From 3d6f21348ff0e328b6e9f1512fc04f30faf30b18 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Wed, 14 Aug 2024 15:38:15 -0700 Subject: [PATCH 01/51] move instance migration to internal API (#6311) Move instance migration from the external API to the internal API per the determinations in RFD 494. This is almost a purely mechanical change (I have not, for example, changed any API signatures or return values); the only unusual bit is one small adjustment to `Nexus::instance_ensure_registered` to deal with the fact that migration no longer runs in a context where the caller is acting on behalf of a user in a silo. Tests: cargo test; spun up a dev cluster and verified that (1) the migrate endpoint is gone from the external API, (2) it's present on the internal API and reachable from the switch zone, and (3) it behaves as it did before (at least to the extent I could test migration's behavior on a single-machine dev cluster). --- nexus/internal-api/src/lib.rs | 16 +- nexus/src/app/instance.rs | 44 ++--- nexus/src/app/sagas/instance_migrate.rs | 30 +-- nexus/src/app/sagas/instance_start.rs | 23 ++- nexus/src/app/sagas/instance_update/mod.rs | 3 +- nexus/src/external_api/http_entrypoints.rs | 43 ---- nexus/src/internal_api/http_entrypoints.rs | 29 +++ nexus/tests/integration_tests/endpoints.rs | 18 -- nexus/tests/integration_tests/instances.rs | 22 ++- nexus/tests/output/nexus_tags.txt | 1 - nexus/types/src/external_api/params.rs | 6 - nexus/types/src/internal_api/params.rs | 7 + openapi/nexus-internal.json | 217 +++++++++++++++++++++ openapi/nexus.json | 69 ------- 14 files changed, 336 insertions(+), 192 deletions(-) diff --git a/nexus/internal-api/src/lib.rs b/nexus/internal-api/src/lib.rs index 6a98c44614..7ac3e42f57 100644 --- a/nexus/internal-api/src/lib.rs +++ b/nexus/internal-api/src/lib.rs @@ -20,15 +20,15 @@ use nexus_types::{ }, internal_api::{ params::{ - OximeterInfo, RackInitializationRequest, SledAgentInfo, - SwitchPutRequest, SwitchPutResponse, + InstanceMigrateRequest, OximeterInfo, RackInitializationRequest, + SledAgentInfo, SwitchPutRequest, SwitchPutResponse, }, views::{BackgroundTask, DemoSaga, Ipv4NatEntryView, Saga}, }, }; use omicron_common::{ api::{ - external::http_pagination::PaginatedById, + external::{http_pagination::PaginatedById, Instance}, internal::nexus::{ DiskRuntimeState, DownstairsClientStopRequest, DownstairsClientStopped, ProducerEndpoint, @@ -119,6 +119,16 @@ pub trait NexusInternalApi { new_runtime_state: TypedBody, ) -> Result; + #[endpoint { + method = POST, + path = "/instances/{instance_id}/migrate", + }] + async fn instance_migrate( + rqctx: RequestContext, + path_params: Path, + migrate_params: TypedBody, + ) -> Result, HttpError>; + /// Report updated state for a disk. #[endpoint { method = PUT, diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 344d2688f7..3106ab9f2a 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -191,6 +191,14 @@ enum InstanceStartDisposition { AlreadyStarted, } +/// The set of API resources needed when ensuring that an instance is registered +/// on a sled. +pub(crate) struct InstanceEnsureRegisteredApiResources { + pub(crate) authz_silo: nexus_auth::authz::Silo, + pub(crate) authz_project: nexus_auth::authz::Project, + pub(crate) authz_instance: nexus_auth::authz::Instance, +} + impl super::Nexus { pub fn instance_lookup<'a>( &'a self, @@ -473,14 +481,16 @@ impl super::Nexus { Ok(()) } - pub(crate) async fn project_instance_migrate( + pub(crate) async fn instance_migrate( self: &Arc, opctx: &OpContext, - instance_lookup: &lookup::Instance<'_>, - params: params::InstanceMigrate, + id: InstanceUuid, + params: nexus_types::internal_api::params::InstanceMigrateRequest, ) -> UpdateResult { - let (.., authz_instance) = - instance_lookup.lookup_for(authz::Action::Modify).await?; + let (.., authz_instance) = LookupPath::new(&opctx, &self.db_datastore) + .instance_id(id.into_untyped_uuid()) + .lookup_for(authz::Action::Modify) + .await?; let state = self .db_datastore @@ -867,7 +877,11 @@ impl super::Nexus { pub(crate) async fn instance_ensure_registered( &self, opctx: &OpContext, - authz_instance: &authz::Instance, + InstanceEnsureRegisteredApiResources { + authz_silo, + authz_project, + authz_instance, + }: &InstanceEnsureRegisteredApiResources, db_instance: &db::model::Instance, propolis_id: &PropolisUuid, initial_vmm: &db::model::Vmm, @@ -1067,23 +1081,9 @@ impl super::Nexus { let ssh_keys: Vec = ssh_keys.map(|ssh_key| ssh_key.public_key).collect(); - // Construct instance metadata used to track its statistics. - // - // This requires another fetch on the silo and project, to extract their - // IDs. - let (.., db_project) = self - .project_lookup( - opctx, - params::ProjectSelector { - project: NameOrId::Id(db_instance.project_id), - }, - )? - .fetch() - .await?; - let (_, db_silo) = self.current_silo_lookup(opctx)?.fetch().await?; let metadata = sled_agent_client::types::InstanceMetadata { - silo_id: db_silo.id(), - project_id: db_project.id(), + silo_id: authz_silo.id(), + project_id: authz_project.id(), }; // Ask the sled agent to begin the state change. Then update the diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index bb4bf282e4..19bef2f046 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -4,15 +4,15 @@ use super::{NexusActionContext, NexusSaga, ACTION_GENERATE_ID}; use crate::app::instance::{ - InstanceRegisterReason, InstanceStateChangeError, - InstanceStateChangeRequest, + InstanceEnsureRegisteredApiResources, InstanceRegisterReason, + InstanceStateChangeError, InstanceStateChangeRequest, }; use crate::app::sagas::{ declare_saga_actions, instance_common::allocate_vmm_ipv6, }; -use crate::external_api::params; use nexus_db_queries::db::{identity::Resource, lookup::LookupPath}; use nexus_db_queries::{authn, authz, db}; +use nexus_types::internal_api::params::InstanceMigrateRequest; use omicron_uuid_kinds::{GenericUuid, InstanceUuid, PropolisUuid, SledUuid}; use serde::Deserialize; use serde::Serialize; @@ -30,7 +30,7 @@ pub struct Params { pub serialized_authn: authn::saga::Serialized, pub instance: db::model::Instance, pub src_vmm: db::model::Vmm, - pub migrate_params: params::InstanceMigrate, + pub migrate_params: InstanceMigrateRequest, } // The migration saga is similar to the instance start saga: get a destination @@ -401,11 +401,12 @@ async fn sim_ensure_destination_propolis( "dst_propolis_id" => %vmm.id, "dst_vmm_state" => ?vmm); - let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore()) - .instance_id(db_instance.id()) - .lookup_for(authz::Action::Modify) - .await - .map_err(ActionError::action_failed)?; + let (authz_silo, authz_project, authz_instance) = + LookupPath::new(&opctx, &osagactx.datastore()) + .instance_id(db_instance.id()) + .lookup_for(authz::Action::Modify) + .await + .map_err(ActionError::action_failed)?; let src_propolis_id = PropolisUuid::from_untyped_uuid(params.src_vmm.id); let dst_propolis_id = PropolisUuid::from_untyped_uuid(vmm.id); @@ -413,7 +414,11 @@ async fn sim_ensure_destination_propolis( .nexus() .instance_ensure_registered( &opctx, - &authz_instance, + &InstanceEnsureRegisteredApiResources { + authz_silo, + authz_project, + authz_instance, + }, &db_instance, &dst_propolis_id, &vmm, @@ -565,6 +570,7 @@ async fn sim_instance_migrate( mod tests { use super::*; use crate::app::sagas::test_helpers; + use crate::external_api::params; use dropshot::test_util::ClientTestContext; use nexus_test_utils::resource_helpers::{ create_default_ip_pool, create_project, object_create, @@ -637,7 +643,7 @@ mod tests { serialized_authn: authn::saga::Serialized::for_opctx(&opctx), instance: state.instance().clone(), src_vmm: vmm.clone(), - migrate_params: params::InstanceMigrate { + migrate_params: InstanceMigrateRequest { dst_sled_id: dst_sled_id.into_untyped_uuid(), }, }; @@ -706,7 +712,7 @@ mod tests { ), instance: old_instance.clone(), src_vmm: old_vmm.clone(), - migrate_params: params::InstanceMigrate { + migrate_params: InstanceMigrateRequest { dst_sled_id: dst_sled_id.into_untyped_uuid(), }, } diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index 9e4e010eea..55fc312ae7 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -10,8 +10,10 @@ use super::{ instance_common::allocate_vmm_ipv6, NexusActionContext, NexusSaga, SagaInitError, }; -use crate::app::instance::InstanceRegisterReason; -use crate::app::instance::InstanceStateChangeError; +use crate::app::instance::{ + InstanceEnsureRegisteredApiResources, InstanceRegisterReason, + InstanceStateChangeError, +}; use crate::app::sagas::declare_saga_actions; use chrono::Utc; use nexus_db_queries::db::{identity::Resource, lookup::LookupPath}; @@ -502,17 +504,22 @@ async fn sis_ensure_registered( "instance_id" => %instance_id, "sled_id" => %sled_id); - let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore()) - .instance_id(instance_id) - .lookup_for(authz::Action::Modify) - .await - .map_err(ActionError::action_failed)?; + let (authz_silo, authz_project, authz_instance) = + LookupPath::new(&opctx, &osagactx.datastore()) + .instance_id(instance_id) + .lookup_for(authz::Action::Modify) + .await + .map_err(ActionError::action_failed)?; osagactx .nexus() .instance_ensure_registered( &opctx, - &authz_instance, + &InstanceEnsureRegisteredApiResources { + authz_silo, + authz_project, + authz_instance, + }, &db_instance, &propolis_id, &vmm_record, diff --git a/nexus/src/app/sagas/instance_update/mod.rs b/nexus/src/app/sagas/instance_update/mod.rs index 71abe63bbd..5f226480b8 100644 --- a/nexus/src/app/sagas/instance_update/mod.rs +++ b/nexus/src/app/sagas/instance_update/mod.rs @@ -1403,6 +1403,7 @@ mod test { create_default_ip_pool, create_project, object_create, }; use nexus_test_utils_macros::nexus_test; + use nexus_types::internal_api::params::InstanceMigrateRequest; use omicron_common::api::internal::nexus::{ MigrationRuntimeState, MigrationState, Migrations, }; @@ -2358,7 +2359,7 @@ mod test { serialized_authn: authn::saga::Serialized::for_opctx(&opctx), instance: state.instance().clone(), src_vmm: vmm.clone(), - migrate_params: params::InstanceMigrate { + migrate_params: InstanceMigrateRequest { dst_sled_id: dst_sled_id.into_untyped_uuid(), }, }; diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index a87bdd834d..8e8b63229b 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -167,7 +167,6 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(instance_view)?; api.register(instance_create)?; api.register(instance_delete)?; - api.register(instance_migrate)?; api.register(instance_reboot)?; api.register(instance_start)?; api.register(instance_stop)?; @@ -2866,48 +2865,6 @@ async fn instance_delete( .await } -// TODO should this be in the public API? -/// Migrate an instance -#[endpoint { - method = POST, - path = "/v1/instances/{instance}/migrate", - tags = ["instances"], -}] -async fn instance_migrate( - rqctx: RequestContext, - query_params: Query, - path_params: Path, - migrate_params: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.context.nexus; - let path = path_params.into_inner(); - let query = query_params.into_inner(); - let migrate_instance_params = migrate_params.into_inner(); - let instance_selector = params::InstanceSelector { - project: query.project, - instance: path.instance, - }; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let instance_lookup = - nexus.instance_lookup(&opctx, instance_selector)?; - let instance = nexus - .project_instance_migrate( - &opctx, - &instance_lookup, - migrate_instance_params, - ) - .await?; - Ok(HttpResponseOk(instance.into())) - }; - apictx - .context - .external_latencies - .instrument_dropshot_handler(&rqctx, handler) - .await -} - /// Reboot an instance #[endpoint { method = POST, diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index c5322e3930..9965b6e21e 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -30,6 +30,7 @@ use nexus_types::external_api::params::UninitializedSledId; use nexus_types::external_api::shared::ProbeInfo; use nexus_types::external_api::shared::UninitializedSled; use nexus_types::external_api::views::SledPolicy; +use nexus_types::internal_api::params::InstanceMigrateRequest; use nexus_types::internal_api::params::SledAgentInfo; use nexus_types::internal_api::params::SwitchPutRequest; use nexus_types::internal_api::params::SwitchPutResponse; @@ -42,6 +43,7 @@ use omicron_common::api::external::http_pagination::data_page_params_for; use omicron_common::api::external::http_pagination::PaginatedById; use omicron_common::api::external::http_pagination::ScanById; use omicron_common::api::external::http_pagination::ScanParams; +use omicron_common::api::external::Instance; use omicron_common::api::internal::nexus::DiskRuntimeState; use omicron_common::api::internal::nexus::DownstairsClientStopRequest; use omicron_common::api::internal::nexus::DownstairsClientStopped; @@ -190,6 +192,33 @@ impl NexusInternalApi for NexusInternalApiImpl { .await } + async fn instance_migrate( + rqctx: RequestContext, + path_params: Path, + migrate_params: TypedBody, + ) -> Result, HttpError> { + let apictx = &rqctx.context().context; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let migrate = migrate_params.into_inner(); + let handler = async { + let opctx = + crate::context::op_context_for_internal_api(&rqctx).await; + let instance = nexus + .instance_migrate( + &opctx, + InstanceUuid::from_untyped_uuid(path.instance_id), + migrate, + ) + .await?; + Ok(HttpResponseOk(instance.into())) + }; + apictx + .internal_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + async fn cpapi_disks_put( rqctx: RequestContext, path_params: Path, diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 6e4e59688a..9097082a20 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -359,12 +359,6 @@ pub static DEMO_INSTANCE_REBOOT_URL: Lazy = Lazy::new(|| { *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR ) }); -pub static DEMO_INSTANCE_MIGRATE_URL: Lazy = Lazy::new(|| { - format!( - "/v1/instances/{}/migrate?{}", - *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR - ) -}); pub static DEMO_INSTANCE_SERIAL_URL: Lazy = Lazy::new(|| { format!( "/v1/instances/{}/serial-console?{}", @@ -1823,18 +1817,6 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { AllowedMethod::Post(serde_json::Value::Null) ], }, - VerifyEndpoint { - url: &DEMO_INSTANCE_MIGRATE_URL, - visibility: Visibility::Protected, - unprivileged_access: UnprivilegedAccess::None, - allowed_methods: vec![ - AllowedMethod::Post(serde_json::to_value( - params::InstanceMigrate { - dst_sled_id: uuid::Uuid::new_v4(), - } - ).unwrap()), - ], - }, VerifyEndpoint { url: &DEMO_INSTANCE_SERIAL_URL, visibility: Visibility::Protected, diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 2e41fac3a4..eb3c88eb38 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -48,6 +48,7 @@ use nexus_types::external_api::shared::SiloIdentityMode; use nexus_types::external_api::views::SshKey; use nexus_types::external_api::{params, views}; use nexus_types::identity::Resource; +use nexus_types::internal_api::params::InstanceMigrateRequest; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Disk; use omicron_common::api::external::DiskState; @@ -737,6 +738,7 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { } let client = &cptestctx.external_client; + let internal_client = &cptestctx.internal_client; let apictx = &cptestctx.server.server_context(); let nexus = &apictx.nexus; let instance_name = "bird-ecology"; @@ -791,10 +793,10 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { }; let migrate_url = - format!("/v1/instances/{}/migrate", &instance_id.to_string()); + format!("/instances/{}/migrate", &instance_id.to_string()); let instance = NexusRequest::new( - RequestBuilder::new(client, Method::POST, &migrate_url) - .body(Some(¶ms::InstanceMigrate { + RequestBuilder::new(internal_client, Method::POST, &migrate_url) + .body(Some(&InstanceMigrateRequest { dst_sled_id: dst_sled_id.into_untyped_uuid(), })) .expect_status(Some(StatusCode::OK)), @@ -907,6 +909,7 @@ async fn test_instance_migrate_v2p_and_routes( cptestctx: &ControlPlaneTestContext, ) { let client = &cptestctx.external_client; + let internal_client = &cptestctx.internal_client; let apictx = &cptestctx.server.server_context(); let nexus = &apictx.nexus; let datastore = nexus.datastore(); @@ -997,10 +1000,10 @@ async fn test_instance_migrate_v2p_and_routes( // Kick off migration and simulate its completion on the target. let migrate_url = - format!("/v1/instances/{}/migrate", &instance_id.to_string()); + format!("/instances/{}/migrate", &instance_id.to_string()); let _ = NexusRequest::new( - RequestBuilder::new(client, Method::POST, &migrate_url) - .body(Some(¶ms::InstanceMigrate { + RequestBuilder::new(internal_client, Method::POST, &migrate_url) + .body(Some(&InstanceMigrateRequest { dst_sled_id: dst_sled_id.into_untyped_uuid(), })) .expect_status(Some(StatusCode::OK)), @@ -1293,6 +1296,7 @@ async fn test_instance_metrics_with_migration( cptestctx: &ControlPlaneTestContext, ) { let client = &cptestctx.external_client; + let internal_client = &cptestctx.internal_client; let apictx = &cptestctx.server.server_context(); let nexus = &apictx.nexus; let instance_name = "bird-ecology"; @@ -1381,10 +1385,10 @@ async fn test_instance_metrics_with_migration( }; let migrate_url = - format!("/v1/instances/{}/migrate", &instance_id.to_string()); + format!("/instances/{}/migrate", &instance_id.to_string()); let _ = NexusRequest::new( - RequestBuilder::new(client, Method::POST, &migrate_url) - .body(Some(¶ms::InstanceMigrate { + RequestBuilder::new(internal_client, Method::POST, &migrate_url) + .body(Some(&InstanceMigrateRequest { dst_sled_id: dst_sled_id.into_untyped_uuid(), })) .expect_status(Some(StatusCode::OK)), diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index 4af018c5af..340d72569b 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -51,7 +51,6 @@ instance_ephemeral_ip_attach POST /v1/instances/{instance}/exter instance_ephemeral_ip_detach DELETE /v1/instances/{instance}/external-ips/ephemeral instance_external_ip_list GET /v1/instances/{instance}/external-ips instance_list GET /v1/instances -instance_migrate POST /v1/instances/{instance}/migrate instance_network_interface_create POST /v1/network-interfaces instance_network_interface_delete DELETE /v1/network-interfaces/{interface} instance_network_interface_list GET /v1/network-interfaces diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 8dcce913b3..a7dd0a72cc 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -1093,12 +1093,6 @@ impl JsonSchema for UserData { } } -/// Migration parameters for an `Instance` -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct InstanceMigrate { - pub dst_sled_id: Uuid, -} - /// Forwarded to a propolis server to request the contents of an Instance's serial console. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] pub struct InstanceSerialConsoleRequest { diff --git a/nexus/types/src/internal_api/params.rs b/nexus/types/src/internal_api/params.rs index 3a26dde4ba..c803f003f1 100644 --- a/nexus/types/src/internal_api/params.rs +++ b/nexus/types/src/internal_api/params.rs @@ -207,3 +207,10 @@ pub struct OximeterInfo { /// The address on which this oximeter instance listens for requests pub address: SocketAddr, } + +/// Parameters used when migrating an instance. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct InstanceMigrateRequest { + /// The ID of the sled to which to migrate the target instance. + pub dst_sled_id: Uuid, +} diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 5dd7d3dea3..d054591f3a 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -784,6 +784,50 @@ } } }, + "/instances/{instance_id}/migrate": { + "post": { + "operationId": "instance_migrate", + "parameters": [ + { + "in": "path", + "name": "instance_id", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InstanceMigrateRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Instance" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/metrics/collectors": { "post": { "summary": "Accept a notification of a new oximeter collection server.", @@ -3300,6 +3344,179 @@ } ] }, + "Instance": { + "description": "View of an Instance", + "type": "object", + "properties": { + "description": { + "description": "human-readable free-form text about a resource", + "type": "string" + }, + "hostname": { + "description": "RFC1035-compliant hostname for the Instance.", + "type": "string" + }, + "id": { + "description": "unique, immutable, system-controlled identifier for each resource", + "type": "string", + "format": "uuid" + }, + "memory": { + "description": "memory allocated for this Instance", + "allOf": [ + { + "$ref": "#/components/schemas/ByteCount" + } + ] + }, + "name": { + "description": "unique, mutable, user-controlled identifier for each resource", + "allOf": [ + { + "$ref": "#/components/schemas/Name" + } + ] + }, + "ncpus": { + "description": "number of CPUs allocated for this Instance", + "allOf": [ + { + "$ref": "#/components/schemas/InstanceCpuCount" + } + ] + }, + "project_id": { + "description": "id for the project containing this Instance", + "type": "string", + "format": "uuid" + }, + "run_state": { + "$ref": "#/components/schemas/InstanceState" + }, + "time_created": { + "description": "timestamp when this resource was created", + "type": "string", + "format": "date-time" + }, + "time_modified": { + "description": "timestamp when this resource was last modified", + "type": "string", + "format": "date-time" + }, + "time_run_state_updated": { + "type": "string", + "format": "date-time" + } + }, + "required": [ + "description", + "hostname", + "id", + "memory", + "name", + "ncpus", + "project_id", + "run_state", + "time_created", + "time_modified", + "time_run_state_updated" + ] + }, + "InstanceCpuCount": { + "description": "The number of CPUs in an Instance", + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "InstanceMigrateRequest": { + "description": "Parameters used when migrating an instance.", + "type": "object", + "properties": { + "dst_sled_id": { + "description": "The ID of the sled to which to migrate the target instance.", + "type": "string", + "format": "uuid" + } + }, + "required": [ + "dst_sled_id" + ] + }, + "InstanceState": { + "description": "Running state of an Instance (primarily: booted or stopped)\n\nThis typically reflects whether it's starting, running, stopping, or stopped, but also includes states related to the Instance's lifecycle", + "oneOf": [ + { + "description": "The instance is being created.", + "type": "string", + "enum": [ + "creating" + ] + }, + { + "description": "The instance is currently starting up.", + "type": "string", + "enum": [ + "starting" + ] + }, + { + "description": "The instance is currently running.", + "type": "string", + "enum": [ + "running" + ] + }, + { + "description": "The instance has been requested to stop and a transition to \"Stopped\" is imminent.", + "type": "string", + "enum": [ + "stopping" + ] + }, + { + "description": "The instance is currently stopped.", + "type": "string", + "enum": [ + "stopped" + ] + }, + { + "description": "The instance is in the process of rebooting - it will remain in the \"rebooting\" state until the VM is starting once more.", + "type": "string", + "enum": [ + "rebooting" + ] + }, + { + "description": "The instance is in the process of migrating - it will remain in the \"migrating\" state until the migration process is complete and the destination propolis is ready to continue execution.", + "type": "string", + "enum": [ + "migrating" + ] + }, + { + "description": "The instance is attempting to recover from a failure.", + "type": "string", + "enum": [ + "repairing" + ] + }, + { + "description": "The instance has encountered a failure.", + "type": "string", + "enum": [ + "failed" + ] + }, + { + "description": "The instance has been deleted.", + "type": "string", + "enum": [ + "destroyed" + ] + } + ] + }, "IpNet": { "x-rust-type": { "crate": "oxnet", diff --git a/openapi/nexus.json b/openapi/nexus.json index da77eec2a8..27e2870b6e 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -2276,62 +2276,6 @@ } } }, - "/v1/instances/{instance}/migrate": { - "post": { - "tags": [ - "instances" - ], - "summary": "Migrate an instance", - "operationId": "instance_migrate", - "parameters": [ - { - "in": "query", - "name": "project", - "description": "Name or ID of the project", - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, - { - "in": "path", - "name": "instance", - "description": "Name or ID of the instance", - "required": true, - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/InstanceMigrate" - } - } - }, - "required": true - }, - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Instance" - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - } - }, "/v1/instances/{instance}/reboot": { "post": { "tags": [ @@ -15271,19 +15215,6 @@ } ] }, - "InstanceMigrate": { - "description": "Migration parameters for an `Instance`", - "type": "object", - "properties": { - "dst_sled_id": { - "type": "string", - "format": "uuid" - } - }, - "required": [ - "dst_sled_id" - ] - }, "InstanceNetworkInterface": { "description": "An `InstanceNetworkInterface` represents a virtual network interface device attached to an instance.", "type": "object", From 77230c7ac07b4a0516bf77146d0b0c8401351f3a Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 14 Aug 2024 22:49:42 +0000 Subject: [PATCH 02/51] Update Rust crate russh to 0.44.1 (#6335) --- Cargo.lock | 147 ++++++++++++++++++++++++++++++++---- end-to-end-tests/Cargo.toml | 4 +- workspace-hack/Cargo.toml | 14 +++- 3 files changed, 146 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cd12b9de9a..f827582501 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6291,6 +6291,7 @@ dependencies = [ "anyhow", "base16ct", "base64 0.22.1", + "base64ct", "bit-set", "bit-vec", "bitflags 1.3.2", @@ -6346,7 +6347,7 @@ dependencies = [ "mio 0.8.11", "nix 0.28.0", "nom", - "num-bigint", + "num-bigint-dig", "num-integer", "num-iter", "num-traits", @@ -6355,6 +6356,7 @@ dependencies = [ "peg-runtime", "pem-rfc7468", "petgraph", + "pkcs8", "postgres-types", "predicates", "proc-macro2", @@ -6363,12 +6365,14 @@ dependencies = [ "regex-syntax 0.8.4", "reqwest", "ring 0.17.8", + "rsa", "rustix", "schemars", "scopeguard", "semver 1.0.23", "serde", "serde_json", + "sha1", "sha2", "signal-hook-mio", "similar", @@ -6397,7 +6401,6 @@ dependencies = [ "usdt", "usdt-impl", "uuid", - "yasna", "zerocopy 0.7.34", "zeroize", ] @@ -6941,6 +6944,18 @@ dependencies = [ "sha2", ] +[[package]] +name = "p384" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70786f51bcc69f6a4c0360e063a4cac5419ef7c5cd5b3c99ad70f3be5ba79209" +dependencies = [ + "ecdsa", + "elliptic-curve", + "primeorder", + "sha2", +] + [[package]] name = "p521" version = "0.13.3" @@ -7141,6 +7156,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ed6a7761f76e3b9f92dfb0a60a6a6477c61024b775147ff0973a02653abaf2" dependencies = [ "digest", + "hmac", ] [[package]] @@ -7322,6 +7338,21 @@ dependencies = [ "spki", ] +[[package]] +name = "pkcs5" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e847e2c91a18bfa887dd028ec33f2fe6f25db77db3619024764914affe8b69a6" +dependencies = [ + "aes", + "cbc", + "der", + "pbkdf2 0.12.2", + "scrypt", + "sha2", + "spki", +] + [[package]] name = "pkcs8" version = "0.10.2" @@ -7329,6 +7360,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" dependencies = [ "der", + "pkcs5", + "rand_core 0.6.4", "spki", ] @@ -8452,19 +8485,21 @@ dependencies = [ [[package]] name = "russh" -version = "0.43.0" +version = "0.44.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c9534703dc13be1eefc5708618f4c346da8e4f04f260218613f351ed5e94259" +checksum = "6500eedfaf8cd81597899d896908a4b9cd5cb566db875e843c04ccf92add2c16" dependencies = [ "aes", "aes-gcm", "async-trait", "bitflags 2.6.0", "byteorder", + "cbc", "chacha20", "ctr", "curve25519-dalek", "digest", + "elliptic-curve", "flate2", "futures", "generic-array", @@ -8473,16 +8508,21 @@ dependencies = [ "log", "num-bigint", "once_cell", + "p256", + "p384", + "p521", "poly1305", "rand 0.8.5", + "rand_core 0.6.4", "russh-cryptovec", "russh-keys", "sha1", "sha2", + "ssh-encoding", + "ssh-key", "subtle", "thiserror", "tokio", - "tokio-util", ] [[package]] @@ -8497,41 +8537,53 @@ dependencies = [ [[package]] name = "russh-keys" -version = "0.43.0" +version = "0.44.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa4a5afa2fab6fd49d0c470a3b75c3c70a4f363c38db32df5ae3b44a3abf5ab9" +checksum = "fb8c0bfe024d4edd242f65a2ac6c8bf38a892930050b9eb90909d8fc2c413c8d" dependencies = [ "aes", "async-trait", "bcrypt-pbkdf", - "bit-vec", "block-padding", "byteorder", "cbc", "ctr", "data-encoding", + "der", + "digest", "dirs", + "ecdsa", "ed25519-dalek", + "elliptic-curve", "futures", "hmac", "inout", "log", "md5", - "num-bigint", "num-integer", "p256", + "p384", "p521", "pbkdf2 0.11.0", - "rand 0.7.3", + "pkcs1", + "pkcs5", + "pkcs8", + "rand 0.8.5", "rand_core 0.6.4", + "rsa", "russh-cryptovec", + "sec1", "serde", "sha1", "sha2", + "spki", + "ssh-encoding", + "ssh-key", "thiserror", "tokio", "tokio-stream", - "yasna", + "typenum", + "zeroize", ] [[package]] @@ -8750,6 +8802,15 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +[[package]] +name = "salsa20" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97a22f5af31f73a954c10289c93e8a50cc23d971e80ee446f1f6f7137a088213" +dependencies = [ + "cipher", +] + [[package]] name = "samael" version = "0.0.15" @@ -8857,6 +8918,17 @@ dependencies = [ "syn 2.0.74", ] +[[package]] +name = "scrypt" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0516a385866c09368f0b5bcd1caff3366aace790fcd46e2bb032697bb172fd1f" +dependencies = [ + "pbkdf2 0.12.2", + "salsa20", + "sha2", +] + [[package]] name = "sct" version = "0.7.1" @@ -9718,6 +9790,57 @@ dependencies = [ "syn 2.0.74", ] +[[package]] +name = "ssh-cipher" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "caac132742f0d33c3af65bfcde7f6aa8f62f0e991d80db99149eb9d44708784f" +dependencies = [ + "aes", + "aes-gcm", + "cbc", + "chacha20", + "cipher", + "ctr", + "poly1305", + "ssh-encoding", + "subtle", +] + +[[package]] +name = "ssh-encoding" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb9242b9ef4108a78e8cd1a2c98e193ef372437f8c22be363075233321dd4a15" +dependencies = [ + "base64ct", + "pem-rfc7468", + "sha2", +] + +[[package]] +name = "ssh-key" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca9b366a80cf18bb6406f4cf4d10aebfb46140a8c0c33f666a144c5c76ecbafc" +dependencies = [ + "bcrypt-pbkdf", + "ed25519-dalek", + "num-bigint-dig", + "p256", + "p384", + "p521", + "rand_core 0.6.4", + "rsa", + "sec1", + "sha2", + "signature", + "ssh-cipher", + "ssh-encoding", + "subtle", + "zeroize", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -11998,8 +12121,6 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e17bb3549cc1321ae1296b9cdc2698e2b6cb1992adfa19a8c72e5b7a738f44cd" dependencies = [ - "bit-vec", - "num-bigint", "time", ] diff --git a/end-to-end-tests/Cargo.toml b/end-to-end-tests/Cargo.toml index eb7cd68812..781f3fb1c6 100644 --- a/end-to-end-tests/Cargo.toml +++ b/end-to-end-tests/Cargo.toml @@ -19,8 +19,8 @@ omicron-test-utils.workspace = true oxide-client.workspace = true rand.workspace = true reqwest = { workspace = true, features = ["cookies"] } -russh = "0.43.0" -russh-keys = "0.43.0" +russh = "0.44.1" +russh-keys = "0.44.0" serde.workspace = true serde_json.workspace = true sled-agent-types.workspace = true diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index eff58519a3..688e1a0921 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -22,6 +22,7 @@ aho-corasick = { version = "1.1.3" } anyhow = { version = "1.0.86", features = ["backtrace"] } base16ct = { version = "0.2.0", default-features = false, features = ["alloc"] } base64 = { version = "0.22.1" } +base64ct = { version = "1.6.0", default-features = false, features = ["std"] } bit-set = { version = "0.5.3" } bit-vec = { version = "0.6.3" } bitflags-dff4ba8e3ae991db = { package = "bitflags", version = "1.3.2" } @@ -72,7 +73,7 @@ log = { version = "0.4.21", default-features = false, features = ["std"] } managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] } memchr = { version = "2.7.2" } nom = { version = "7.1.3" } -num-bigint = { version = "0.4.5", features = ["rand"] } +num-bigint-dig = { version = "0.8.4", default-features = false, features = ["i128", "prime", "serde", "u64_digit", "zeroize"] } num-integer = { version = "0.1.46", features = ["i128"] } num-iter = { version = "0.1.45", default-features = false, features = ["i128"] } num-traits = { version = "0.2.19", features = ["i128", "libm"] } @@ -80,6 +81,7 @@ openapiv3 = { version = "2.0.0", default-features = false, features = ["skip_ser peg-runtime = { version = "0.8.3", default-features = false, features = ["std"] } pem-rfc7468 = { version = "0.7.0", default-features = false, features = ["std"] } petgraph = { version = "0.6.5", features = ["serde-1"] } +pkcs8 = { version = "0.10.2", default-features = false, features = ["encryption", "pem", "std"] } postgres-types = { version = "0.2.7", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } predicates = { version = "3.1.2" } proc-macro2 = { version = "1.0.86" } @@ -88,11 +90,13 @@ regex-automata = { version = "0.4.6", default-features = false, features = ["dfa regex-syntax = { version = "0.8.4" } reqwest = { version = "0.11.27", features = ["blocking", "cookies", "json", "rustls-tls", "stream"] } ring = { version = "0.17.8", features = ["std"] } +rsa = { version = "0.9.6", features = ["serde", "sha2"] } schemars = { version = "0.8.21", features = ["bytes", "chrono", "uuid1"] } scopeguard = { version = "1.2.0" } semver = { version = "1.0.23", features = ["serde"] } serde = { version = "1.0.207", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.124", features = ["raw_value", "unbounded_depth"] } +sha1 = { version = "0.10.6", features = ["oid"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.5.0", features = ["bytes", "inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } @@ -115,7 +119,6 @@ unicode-normalization = { version = "0.1.23" } usdt = { version = "0.5.0" } usdt-impl = { version = "0.5.0", default-features = false, features = ["asm", "des"] } uuid = { version = "1.10.0", features = ["serde", "v4"] } -yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } zerocopy = { version = "0.7.34", features = ["derive", "simd"] } zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } @@ -125,6 +128,7 @@ aho-corasick = { version = "1.1.3" } anyhow = { version = "1.0.86", features = ["backtrace"] } base16ct = { version = "0.2.0", default-features = false, features = ["alloc"] } base64 = { version = "0.22.1" } +base64ct = { version = "1.6.0", default-features = false, features = ["std"] } bit-set = { version = "0.5.3" } bit-vec = { version = "0.6.3" } bitflags-dff4ba8e3ae991db = { package = "bitflags", version = "1.3.2" } @@ -176,7 +180,7 @@ log = { version = "0.4.21", default-features = false, features = ["std"] } managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] } memchr = { version = "2.7.2" } nom = { version = "7.1.3" } -num-bigint = { version = "0.4.5", features = ["rand"] } +num-bigint-dig = { version = "0.8.4", default-features = false, features = ["i128", "prime", "serde", "u64_digit", "zeroize"] } num-integer = { version = "0.1.46", features = ["i128"] } num-iter = { version = "0.1.45", default-features = false, features = ["i128"] } num-traits = { version = "0.2.19", features = ["i128", "libm"] } @@ -184,6 +188,7 @@ openapiv3 = { version = "2.0.0", default-features = false, features = ["skip_ser peg-runtime = { version = "0.8.3", default-features = false, features = ["std"] } pem-rfc7468 = { version = "0.7.0", default-features = false, features = ["std"] } petgraph = { version = "0.6.5", features = ["serde-1"] } +pkcs8 = { version = "0.10.2", default-features = false, features = ["encryption", "pem", "std"] } postgres-types = { version = "0.2.7", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } predicates = { version = "3.1.2" } proc-macro2 = { version = "1.0.86" } @@ -192,11 +197,13 @@ regex-automata = { version = "0.4.6", default-features = false, features = ["dfa regex-syntax = { version = "0.8.4" } reqwest = { version = "0.11.27", features = ["blocking", "cookies", "json", "rustls-tls", "stream"] } ring = { version = "0.17.8", features = ["std"] } +rsa = { version = "0.9.6", features = ["serde", "sha2"] } schemars = { version = "0.8.21", features = ["bytes", "chrono", "uuid1"] } scopeguard = { version = "1.2.0" } semver = { version = "1.0.23", features = ["serde"] } serde = { version = "1.0.207", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.124", features = ["raw_value", "unbounded_depth"] } +sha1 = { version = "0.10.6", features = ["oid"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.5.0", features = ["bytes", "inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } @@ -222,7 +229,6 @@ unicode-xid = { version = "0.2.4" } usdt = { version = "0.5.0" } usdt-impl = { version = "0.5.0", default-features = false, features = ["asm", "des"] } uuid = { version = "1.10.0", features = ["serde", "v4"] } -yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } zerocopy = { version = "0.7.34", features = ["derive", "simd"] } zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } From 82d75b5895412665b0de6391cc21d70fe7bdef95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karen=20C=C3=A1rcamo?= Date: Thu, 15 Aug 2024 11:38:50 +1200 Subject: [PATCH 03/51] [reconfigurator] `clickhouse-admin` SMF service with dropshot server (#6304) ## Overview New SMF service in `clickhouse` and `clickhouse_keeper` zones which runs a dropshot server. The API contains a single `/node/address` endpoint to retrieve the node's listen address. Other endpoints will be added in future PRs. ## Purpose This server will be used to manage ClickHouse server and Keeper nodes. For now it performs a single basic action to keep the size of this PR small, but this server will perform other actions like generating the XML config files, retrieving the state of the node etc. ## Testing I've deployed locally with the following results: ```console root@oxz_switch:~# curl http://[fd00:1122:3344:101::e]:8888/node/address {"clickhouse_address":"[fd00:1122:3344:101::e]:8123"} ``` ```console root@oxz_clickhouse_2c213ff2:~# cat /var/svc/log/oxide-clickhouse-admin:default.log [ Aug 14 06:54:42 Enabled. ] [ Aug 14 06:54:42 Rereading configuration. ] [ Aug 14 06:54:45 Rereading configuration. ] [ Aug 14 06:54:46 Executing start method ("ctrun -l child -o noorphan,regent /opt/oxide/clickhouse-admin/bin/clickhouse-admin run -c /var/svc/manifest/site/clickhouse-admin/config.toml -a [fd00:1122:3344:101::e]:8123 -H [fd00:1122:3344:101::e]:8888 &"). ] [ Aug 14 06:54:46 Method "start" exited with status 0. ] note: configured to log to "/dev/stdout" {"msg":"listening","v":0,"name":"clickhouse-admin","level":30,"time":"2024-08-14T06:54:46.721122327Z","hostname":"oxz_clickhouse_2c213ff2-6544-4316-939f-b51749cf3222","pid":5169,"local_addr":"[fd00:1122:3344:101::e]:8888","component":"dropshot","file":"/home/coatlicue/.cargo/git/checkouts/dropshot-a4a923d29dccc492/52d900a/dropshot/src/server.rs:205"} {"msg":"accepted connection","v":0,"name":"clickhouse-admin","level":30,"time":"2024-08-14T06:56:17.908877036Z","hostname":"oxz_clickhouse_2c213ff2-6544-4316-939f-b51749cf3222","pid":5169,"local_addr":"[fd00:1122:3344:101::e]:8888","component":"dropshot","file":"/home/coatlicue/.cargo/git/checkouts/dropshot-a4a923d29dccc492/52d900a/dropshot/src/server.rs:775","remote_addr":"[fd00:1122:3344:101::2]:37268"} {"msg":"request completed","v":0,"name":"clickhouse-admin","level":30,"time":"2024-08-14T06:56:17.91734856Z","hostname":"oxz_clickhouse_2c213ff2-6544-4316-939f-b51749cf3222","pid":5169,"uri":"/node/address","method":"GET","req_id":"62a3d8fc-e37e-42aa-a715-52dbce8aa493","remote_addr":"[fd00:1122:3344:101::2]:37268","local_addr":"[fd00:1122:3344:101::e]:8888","component":"dropshot","file":"/home/coatlicue/.cargo/git/checkouts/dropshot-a4a923d29dccc492/52d900a/dropshot/src/server.rs:914","latency_us":3151,"response_code":"200"} ``` Related: https://github.com/oxidecomputer/omicron/issues/5999 --- Cargo.lock | 48 +++++++++++ Cargo.toml | 6 ++ clickhouse-admin/Cargo.toml | 42 ++++++++++ clickhouse-admin/api/Cargo.toml | 16 ++++ clickhouse-admin/api/src/lib.rs | 28 +++++++ clickhouse-admin/src/bin/clickhouse-admin.rs | 68 ++++++++++++++++ clickhouse-admin/src/clickward.rs | 51 ++++++++++++ clickhouse-admin/src/config.rs | 43 ++++++++++ clickhouse-admin/src/context.rs | 21 +++++ clickhouse-admin/src/http_entrypoints.rs | 31 ++++++++ clickhouse-admin/src/lib.rs | 70 ++++++++++++++++ common/src/address.rs | 1 + dev-tools/openapi-manager/Cargo.toml | 1 + dev-tools/openapi-manager/src/spec.rs | 11 +++ openapi/clickhouse-admin.json | 84 ++++++++++++++++++++ package-manifest.toml | 16 ++++ sled-agent/src/services.rs | 56 ++++++++++++- smf/clickhouse-admin/config.toml | 10 +++ smf/clickhouse-admin/manifest.xml | 45 +++++++++++ 19 files changed, 646 insertions(+), 2 deletions(-) create mode 100644 clickhouse-admin/Cargo.toml create mode 100644 clickhouse-admin/api/Cargo.toml create mode 100644 clickhouse-admin/api/src/lib.rs create mode 100644 clickhouse-admin/src/bin/clickhouse-admin.rs create mode 100644 clickhouse-admin/src/clickward.rs create mode 100644 clickhouse-admin/src/config.rs create mode 100644 clickhouse-admin/src/context.rs create mode 100644 clickhouse-admin/src/http_entrypoints.rs create mode 100644 clickhouse-admin/src/lib.rs create mode 100644 openapi/clickhouse-admin.json create mode 100644 smf/clickhouse-admin/config.toml create mode 100644 smf/clickhouse-admin/manifest.xml diff --git a/Cargo.lock b/Cargo.lock index f827582501..781785b8ec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1113,6 +1113,18 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" +[[package]] +name = "clickhouse-admin-api" +version = "0.1.0" +dependencies = [ + "dropshot", + "omicron-common", + "omicron-uuid-kinds", + "omicron-workspace-hack", + "schemars", + "serde", +] + [[package]] name = "clickward" version = "0.1.0" @@ -5654,6 +5666,41 @@ dependencies = [ "thiserror", ] +[[package]] +name = "omicron-clickhouse-admin" +version = "0.1.0" +dependencies = [ + "anyhow", + "camino", + "chrono", + "clap", + "clickhouse-admin-api", + "dropshot", + "expectorate", + "http 0.2.12", + "illumos-utils", + "nexus-test-utils", + "omicron-common", + "omicron-test-utils", + "omicron-uuid-kinds", + "omicron-workspace-hack", + "openapi-lint", + "openapiv3", + "schemars", + "serde", + "serde_json", + "slog", + "slog-async", + "slog-dtrace", + "slog-error-chain", + "subprocess", + "thiserror", + "tokio", + "tokio-postgres", + "toml 0.8.19", + "url", +] + [[package]] name = "omicron-cockroach-admin" version = "0.1.0" @@ -6476,6 +6523,7 @@ dependencies = [ "bootstrap-agent-api", "camino", "clap", + "clickhouse-admin-api", "cockroach-admin-api", "dns-server-api", "dropshot", diff --git a/Cargo.toml b/Cargo.toml index 3dd5e61236..b7cf6f6fd1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,8 @@ members = [ "api_identity", "bootstore", "certificates", + "clickhouse-admin", + "clickhouse-admin/api", "clients/bootstrap-agent-client", "clients/cockroach-admin-client", "clients/ddm-admin-client", @@ -112,6 +114,8 @@ default-members = [ "api_identity", "bootstore", "certificates", + "clickhouse-admin", + "clickhouse-admin/api", "clients/bootstrap-agent-client", "clients/cockroach-admin-client", "clients/ddm-admin-client", @@ -294,6 +298,7 @@ cfg-if = "1.0" chrono = { version = "0.4", features = [ "serde" ] } ciborium = "0.2.2" clap = { version = "4.5", features = ["cargo", "derive", "env", "wrap_help"] } +clickhouse-admin-api = { path = "clickhouse-admin/api" } clickward = { git = "https://github.com/oxidecomputer/clickward", rev = "ceec762e6a87d2a22bf56792a3025e145caa095e" } cockroach-admin-api = { path = "cockroach-admin/api" } cockroach-admin-client = { path = "clients/cockroach-admin-client" } @@ -417,6 +422,7 @@ nexus-test-utils = { path = "nexus/test-utils" } nexus-types = { path = "nexus/types" } num-integer = "0.1.46" num = { version = "0.4.3", default-features = false, features = [ "libm" ] } +omicron-clickhouse-admin = { path = "clickhouse-admin" } omicron-certificates = { path = "certificates" } omicron-cockroach-admin = { path = "cockroach-admin" } omicron-common = { path = "common" } diff --git a/clickhouse-admin/Cargo.toml b/clickhouse-admin/Cargo.toml new file mode 100644 index 0000000000..033836dfe0 --- /dev/null +++ b/clickhouse-admin/Cargo.toml @@ -0,0 +1,42 @@ +[package] +name = "omicron-clickhouse-admin" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[dependencies] +anyhow.workspace = true +camino.workspace = true +chrono.workspace = true +clap.workspace = true +clickhouse-admin-api.workspace = true +dropshot.workspace = true +http.workspace = true +illumos-utils.workspace = true +omicron-common.workspace = true +omicron-uuid-kinds.workspace = true +schemars.workspace = true +slog.workspace = true +slog-async.workspace = true +slog-dtrace.workspace = true +slog-error-chain.workspace = true +serde.workspace = true +thiserror.workspace = true +tokio.workspace = true +tokio-postgres.workspace = true +toml.workspace = true + +omicron-workspace-hack.workspace = true + +[dev-dependencies] +expectorate.workspace = true +nexus-test-utils.workspace = true +omicron-test-utils.workspace = true +openapi-lint.workspace = true +openapiv3.workspace = true +serde_json.workspace = true +subprocess.workspace = true +url.workspace = true + +[lints] +workspace = true diff --git a/clickhouse-admin/api/Cargo.toml b/clickhouse-admin/api/Cargo.toml new file mode 100644 index 0000000000..ceec09f6c8 --- /dev/null +++ b/clickhouse-admin/api/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "clickhouse-admin-api" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +dropshot.workspace = true +omicron-common.workspace = true +omicron-uuid-kinds.workspace = true +omicron-workspace-hack.workspace = true +schemars.workspace = true +serde.workspace = true diff --git a/clickhouse-admin/api/src/lib.rs b/clickhouse-admin/api/src/lib.rs new file mode 100644 index 0000000000..9a011d4387 --- /dev/null +++ b/clickhouse-admin/api/src/lib.rs @@ -0,0 +1,28 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use dropshot::{HttpError, HttpResponseOk, RequestContext}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use std::net::SocketAddrV6; + +#[dropshot::api_description] +pub trait ClickhouseAdminApi { + type Context; + + /// Retrieve the address the ClickHouse server or keeper node is listening on + #[endpoint { + method = GET, + path = "/node/address", + }] + async fn clickhouse_address( + rqctx: RequestContext, + ) -> Result, HttpError>; +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub struct ClickhouseAddress { + pub clickhouse_address: SocketAddrV6, +} diff --git a/clickhouse-admin/src/bin/clickhouse-admin.rs b/clickhouse-admin/src/bin/clickhouse-admin.rs new file mode 100644 index 0000000000..6f28a82804 --- /dev/null +++ b/clickhouse-admin/src/bin/clickhouse-admin.rs @@ -0,0 +1,68 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Executable program to run the Omicron ClickHouse admin interface + +use anyhow::anyhow; +use camino::Utf8PathBuf; +use clap::Parser; +use omicron_clickhouse_admin::{Clickward, Config}; +use omicron_common::cmd::fatal; +use omicron_common::cmd::CmdError; +use std::net::{SocketAddr, SocketAddrV6}; + +#[derive(Debug, Parser)] +#[clap( + name = "clickhouse-admin", + about = "Omicron ClickHouse cluster admin server" +)] +enum Args { + /// Start the ClickHouse admin server + Run { + // TODO: This address is solely for testing now. We should remove it + // once we have more endpoints up and running. + /// Socket address for a running clickhouse server or keeper instance + #[clap(long, short = 'a', action)] + clickhouse_address: SocketAddrV6, + + /// Address on which this server should run + #[clap(long, short = 'H', action)] + http_address: SocketAddrV6, + + /// Path to the server configuration file + #[clap(long, short, action)] + config: Utf8PathBuf, + }, +} + +#[tokio::main] +async fn main() { + if let Err(err) = main_impl().await { + fatal(err); + } +} + +async fn main_impl() -> Result<(), CmdError> { + let args = Args::parse(); + + match args { + Args::Run { clickhouse_address, http_address, config } => { + let mut config = Config::from_file(&config) + .map_err(|err| CmdError::Failure(anyhow!(err)))?; + config.dropshot.bind_address = SocketAddr::V6(http_address); + + let clickward = Clickward::new(clickhouse_address); + + let server = + omicron_clickhouse_admin::start_server(clickward, config) + .await + .map_err(|err| CmdError::Failure(anyhow!(err)))?; + server.await.map_err(|err| { + CmdError::Failure(anyhow!( + "server failed after starting: {err}" + )) + }) + } + } +} diff --git a/clickhouse-admin/src/clickward.rs b/clickhouse-admin/src/clickward.rs new file mode 100644 index 0000000000..114201e44b --- /dev/null +++ b/clickhouse-admin/src/clickward.rs @@ -0,0 +1,51 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use clickhouse_admin_api::ClickhouseAddress; +use dropshot::HttpError; +use slog_error_chain::{InlineErrorChain, SlogInlineError}; +use std::io; +use std::net::SocketAddrV6; + +#[derive(Debug, thiserror::Error, SlogInlineError)] +pub enum ClickwardError { + #[error("clickward failure")] + Failure { + #[source] + err: io::Error, + }, +} + +impl From for HttpError { + fn from(err: ClickwardError) -> Self { + match err { + ClickwardError::Failure { .. } => { + let message = InlineErrorChain::new(&err).to_string(); + HttpError { + status_code: http::StatusCode::INTERNAL_SERVER_ERROR, + error_code: Some(String::from("Internal")), + external_message: message.clone(), + internal_message: message, + } + } + } + } +} + +#[derive(Debug)] +pub struct Clickward { + clickhouse_address: SocketAddrV6, +} + +impl Clickward { + pub fn new(clickhouse_address: SocketAddrV6) -> Self { + Self { clickhouse_address } + } + + pub fn clickhouse_address( + &self, + ) -> Result { + Ok(ClickhouseAddress { clickhouse_address: self.clickhouse_address }) + } +} diff --git a/clickhouse-admin/src/config.rs b/clickhouse-admin/src/config.rs new file mode 100644 index 0000000000..77a624835c --- /dev/null +++ b/clickhouse-admin/src/config.rs @@ -0,0 +1,43 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use camino::Utf8Path; +use camino::Utf8PathBuf; +use dropshot::ConfigDropshot; +use dropshot::ConfigLogging; +use serde::Deserialize; +use serde::Serialize; +use slog_error_chain::SlogInlineError; +use std::io; + +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub struct Config { + pub dropshot: ConfigDropshot, + pub log: ConfigLogging, +} +impl Config { + /// Load a `Config` from the given TOML file + pub fn from_file(path: &Utf8Path) -> Result { + let contents = std::fs::read_to_string(path) + .map_err(|err| LoadError::Read { path: path.to_owned(), err })?; + toml::de::from_str(&contents) + .map_err(|err| LoadError::Parse { path: path.to_owned(), err }) + } +} + +#[derive(Debug, thiserror::Error, SlogInlineError)] +pub enum LoadError { + #[error("failed to read {path}")] + Read { + path: Utf8PathBuf, + #[source] + err: io::Error, + }, + #[error("failed to parse {path} as TOML")] + Parse { + path: Utf8PathBuf, + #[source] + err: toml::de::Error, + }, +} diff --git a/clickhouse-admin/src/context.rs b/clickhouse-admin/src/context.rs new file mode 100644 index 0000000000..cab875fe1d --- /dev/null +++ b/clickhouse-admin/src/context.rs @@ -0,0 +1,21 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::Clickward; +use slog::Logger; + +pub struct ServerContext { + clickward: Clickward, + _log: Logger, +} + +impl ServerContext { + pub fn new(clickward: Clickward, _log: Logger) -> Self { + Self { clickward, _log } + } + + pub fn clickward(&self) -> &Clickward { + &self.clickward + } +} diff --git a/clickhouse-admin/src/http_entrypoints.rs b/clickhouse-admin/src/http_entrypoints.rs new file mode 100644 index 0000000000..05988a73b0 --- /dev/null +++ b/clickhouse-admin/src/http_entrypoints.rs @@ -0,0 +1,31 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::context::ServerContext; +use clickhouse_admin_api::*; +use dropshot::HttpError; +use dropshot::HttpResponseOk; +use dropshot::RequestContext; +use std::sync::Arc; + +type ClickhouseApiDescription = dropshot::ApiDescription>; + +pub fn api() -> ClickhouseApiDescription { + clickhouse_admin_api_mod::api_description::() + .expect("registered entrypoints") +} + +enum ClickhouseAdminImpl {} + +impl ClickhouseAdminApi for ClickhouseAdminImpl { + type Context = Arc; + + async fn clickhouse_address( + rqctx: RequestContext, + ) -> Result, HttpError> { + let ctx = rqctx.context(); + let output = ctx.clickward().clickhouse_address()?; + Ok(HttpResponseOk(output)) + } +} diff --git a/clickhouse-admin/src/lib.rs b/clickhouse-admin/src/lib.rs new file mode 100644 index 0000000000..a48588c544 --- /dev/null +++ b/clickhouse-admin/src/lib.rs @@ -0,0 +1,70 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use context::ServerContext; +use omicron_common::FileKv; +use slog::{debug, error, Drain}; +use slog_dtrace::ProbeRegistration; +use slog_error_chain::SlogInlineError; +use std::error::Error; +use std::io; +use std::sync::Arc; + +mod clickward; +mod config; +mod context; +mod http_entrypoints; + +pub use clickward::Clickward; +pub use config::Config; + +#[derive(Debug, thiserror::Error, SlogInlineError)] +pub enum StartError { + #[error("failed to initialize logger")] + InitializeLogger(#[source] io::Error), + #[error("failed to register dtrace probes: {0}")] + RegisterDtraceProbes(String), + #[error("failed to initialize HTTP server")] + InitializeHttpServer(#[source] Box), +} + +pub type Server = dropshot::HttpServer>; + +/// Start the dropshot server +pub async fn start_server( + clickward: Clickward, + server_config: Config, +) -> Result { + let (drain, registration) = slog_dtrace::with_drain( + server_config + .log + .to_logger("clickhouse-admin") + .map_err(StartError::InitializeLogger)?, + ); + let log = slog::Logger::root(drain.fuse(), slog::o!(FileKv)); + match registration { + ProbeRegistration::Success => { + debug!(log, "registered DTrace probes"); + } + ProbeRegistration::Failed(err) => { + let err = StartError::RegisterDtraceProbes(err); + error!(log, "failed to register DTrace probes"; &err); + return Err(err); + } + } + + let context = ServerContext::new( + clickward, + log.new(slog::o!("component" => "ServerContext")), + ); + let http_server_starter = dropshot::HttpServerStarter::new( + &server_config.dropshot, + http_entrypoints::api(), + Arc::new(context), + &log.new(slog::o!("component" => "dropshot")), + ) + .map_err(StartError::InitializeHttpServer)?; + + Ok(http_server_starter.start()) +} diff --git a/common/src/address.rs b/common/src/address.rs index 5ed5689289..ba1193c7f0 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -59,6 +59,7 @@ pub const COCKROACH_ADMIN_PORT: u16 = 32222; pub const CRUCIBLE_PORT: u16 = 32345; pub const CLICKHOUSE_PORT: u16 = 8123; pub const CLICKHOUSE_KEEPER_PORT: u16 = 9181; +pub const CLICKHOUSE_ADMIN_PORT: u16 = 8888; pub const OXIMETER_PORT: u16 = 12223; pub const DENDRITE_PORT: u16 = 12224; pub const LLDP_PORT: u16 = 12230; diff --git a/dev-tools/openapi-manager/Cargo.toml b/dev-tools/openapi-manager/Cargo.toml index 85d27aaafd..fe90737d9e 100644 --- a/dev-tools/openapi-manager/Cargo.toml +++ b/dev-tools/openapi-manager/Cargo.toml @@ -12,6 +12,7 @@ anyhow.workspace = true atomicwrites.workspace = true bootstrap-agent-api.workspace = true camino.workspace = true +clickhouse-admin-api.workspace = true cockroach-admin-api.workspace = true clap.workspace = true dns-server-api.workspace = true diff --git a/dev-tools/openapi-manager/src/spec.rs b/dev-tools/openapi-manager/src/spec.rs index 37a657ee93..29601a63d6 100644 --- a/dev-tools/openapi-manager/src/spec.rs +++ b/dev-tools/openapi-manager/src/spec.rs @@ -24,6 +24,17 @@ pub fn all_apis() -> Vec { filename: "bootstrap-agent.json", extra_validation: None, }, + ApiSpec { + title: "ClickHouse Cluster Admin API", + version: "0.0.1", + description: "API for interacting with the Oxide \ + control plane's ClickHouse cluster", + boundary: ApiBoundary::Internal, + api_description: + clickhouse_admin_api::clickhouse_admin_api_mod::stub_api_description, + filename: "clickhouse-admin.json", + extra_validation: None, + }, ApiSpec { title: "CockroachDB Cluster Admin API", version: "0.0.1", diff --git a/openapi/clickhouse-admin.json b/openapi/clickhouse-admin.json new file mode 100644 index 0000000000..6bb5367712 --- /dev/null +++ b/openapi/clickhouse-admin.json @@ -0,0 +1,84 @@ +{ + "openapi": "3.0.3", + "info": { + "title": "ClickHouse Cluster Admin API", + "description": "API for interacting with the Oxide control plane's ClickHouse cluster", + "contact": { + "url": "https://oxide.computer", + "email": "api@oxide.computer" + }, + "version": "0.0.1" + }, + "paths": { + "/node/address": { + "get": { + "summary": "Retrieve the address the ClickHouse server or keeper node is listening on", + "operationId": "clickhouse_address", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ClickhouseAddress" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + } + }, + "components": { + "schemas": { + "ClickhouseAddress": { + "type": "object", + "properties": { + "clickhouse_address": { + "type": "string" + } + }, + "required": [ + "clickhouse_address" + ] + }, + "Error": { + "description": "Error information from a response.", + "type": "object", + "properties": { + "error_code": { + "type": "string" + }, + "message": { + "type": "string" + }, + "request_id": { + "type": "string" + } + }, + "required": [ + "message", + "request_id" + ] + } + }, + "responses": { + "Error": { + "description": "Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + } + } + } + } + } + } +} diff --git a/package-manifest.toml b/package-manifest.toml index e6cd464404..9189ed09a0 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -153,6 +153,9 @@ source.type = "composite" source.packages = [ "clickhouse_svc.tar.gz", "internal-dns-cli.tar.gz", + # TODO: This package is for solely for testing purposes. + # Remove once replicated clickhouse is up and running. + "omicron-clickhouse-admin.tar.gz", "zone-setup.tar.gz", "zone-network-install.tar.gz" ] @@ -179,6 +182,7 @@ source.type = "composite" source.packages = [ "clickhouse_keeper_svc.tar.gz", "internal-dns-cli.tar.gz", + "omicron-clickhouse-admin.tar.gz", "zone-setup.tar.gz", "zone-network-install.tar.gz" ] @@ -198,6 +202,18 @@ output.type = "zone" output.intermediate_only = true setup_hint = "Run `cargo xtask download clickhouse` to download the necessary binaries" +[package.omicron-clickhouse-admin] +service_name = "clickhouse-admin" +only_for_targets.image = "standard" +source.type = "local" +source.rust.binary_names = ["clickhouse-admin"] +source.rust.release = true +source.paths = [ + { from = "smf/clickhouse-admin", to = "/var/svc/manifest/site/clickhouse-admin" }, +] +output.type = "zone" +output.intermediate_only = true + [package.cockroachdb] service_name = "cockroachdb" only_for_targets.image = "standard" diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index b822ae2963..32cf844e6d 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -65,6 +65,7 @@ use nexus_config::{ConfigDropshotWithTls, DeploymentConfig}; use nexus_sled_agent_shared::inventory::{ OmicronZoneConfig, OmicronZoneType, OmicronZonesConfig, ZoneKind, }; +use omicron_common::address::CLICKHOUSE_ADMIN_PORT; use omicron_common::address::CLICKHOUSE_KEEPER_PORT; use omicron_common::address::CLICKHOUSE_PORT; use omicron_common::address::COCKROACH_PORT; @@ -1573,12 +1574,37 @@ impl ServiceManager { .add_property_group(config), ); + let ch_address = + SocketAddr::new(IpAddr::V6(listen_addr), CLICKHOUSE_PORT) + .to_string(); + + let admin_address = SocketAddr::new( + IpAddr::V6(listen_addr), + CLICKHOUSE_ADMIN_PORT, + ) + .to_string(); + + let clickhouse_admin_config = + PropertyGroupBuilder::new("config") + .add_property( + "clickhouse_address", + "astring", + ch_address, + ) + .add_property("http_address", "astring", admin_address); + let clickhouse_admin_service = + ServiceBuilder::new("oxide/clickhouse-admin").add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(clickhouse_admin_config), + ); + let profile = ProfileBuilder::new("omicron") .add_service(nw_setup_service) .add_service(disabled_ssh_service) .add_service(clickhouse_service) .add_service(dns_service) - .add_service(enabled_dns_client_service); + .add_service(enabled_dns_client_service) + .add_service(clickhouse_admin_service); profile .add_to_zone(&self.inner.log, &installed_zone) .await @@ -1644,12 +1670,38 @@ impl ServiceManager { ServiceInstanceBuilder::new("default") .add_property_group(config), ); + + let ch_address = + SocketAddr::new(IpAddr::V6(listen_addr), CLICKHOUSE_PORT) + .to_string(); + + let admin_address = SocketAddr::new( + IpAddr::V6(listen_addr), + CLICKHOUSE_ADMIN_PORT, + ) + .to_string(); + + let clickhouse_admin_config = + PropertyGroupBuilder::new("config") + .add_property( + "clickhouse_address", + "astring", + ch_address, + ) + .add_property("http_address", "astring", admin_address); + let clickhouse_admin_service = + ServiceBuilder::new("oxide/clickhouse-admin").add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(clickhouse_admin_config), + ); + let profile = ProfileBuilder::new("omicron") .add_service(nw_setup_service) .add_service(disabled_ssh_service) .add_service(clickhouse_keeper_service) .add_service(dns_service) - .add_service(enabled_dns_client_service); + .add_service(enabled_dns_client_service) + .add_service(clickhouse_admin_service); profile .add_to_zone(&self.inner.log, &installed_zone) .await diff --git a/smf/clickhouse-admin/config.toml b/smf/clickhouse-admin/config.toml new file mode 100644 index 0000000000..86ee2c5d4b --- /dev/null +++ b/smf/clickhouse-admin/config.toml @@ -0,0 +1,10 @@ +[dropshot] +# 1 MiB; we don't expect any requests of more than nominal size. +request_body_max_bytes = 1048576 + +[log] +# Show log messages of this level and more severe +level = "info" +mode = "file" +path = "/dev/stdout" +if_exists = "append" diff --git a/smf/clickhouse-admin/manifest.xml b/smf/clickhouse-admin/manifest.xml new file mode 100644 index 0000000000..435f8a86ac --- /dev/null +++ b/smf/clickhouse-admin/manifest.xml @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 63397cc07ab04ee33d9d2c9abb410a110ba238e1 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 14 Aug 2024 17:52:32 -0700 Subject: [PATCH 04/51] Update Rust crate indexmap to 2.4.0 (#6325) --- Cargo.lock | 38 +++++++++++++++++++------------------- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 781785b8ec..ad0dc13987 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2228,7 +2228,7 @@ dependencies = [ "hostname 0.4.0", "http 0.2.12", "hyper 0.14.30", - "indexmap 2.3.0", + "indexmap 2.4.0", "multer", "openapiv3", "paste", @@ -3130,7 +3130,7 @@ dependencies = [ "debug-ignore", "fixedbitset", "guppy-workspace-hack", - "indexmap 2.3.0", + "indexmap 2.4.0", "itertools 0.13.0", "nested", "once_cell", @@ -3162,7 +3162,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap 2.3.0", + "indexmap 2.4.0", "slab", "tokio", "tokio-util", @@ -3785,9 +3785,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.3.0" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de3fc2e30ba82dd1b3911c8de1ffc143c74a914a14e99514d7637e3099df5ea0" +checksum = "93ead53efc7ea8ed3cfb0c79fc8023fbb782a5432b52830b6518941cebe6505c" dependencies = [ "equivalent", "hashbrown 0.14.5", @@ -5186,7 +5186,7 @@ dependencies = [ "debug-ignore", "expectorate", "gateway-client", - "indexmap 2.3.0", + "indexmap 2.4.0", "internal-dns", "ipnet", "maplit", @@ -6380,7 +6380,7 @@ dependencies = [ "hex", "hmac", "hyper 0.14.30", - "indexmap 2.3.0", + "indexmap 2.4.0", "inout", "itertools 0.10.5", "itertools 0.12.1", @@ -6508,7 +6508,7 @@ version = "0.4.0" source = "git+https://github.com/oxidecomputer/openapi-lint?branch=main#ef442ee4343e97b6d9c217d3e7533962fe7d7236" dependencies = [ "heck 0.4.1", - "indexmap 2.3.0", + "indexmap 2.4.0", "lazy_static", "openapiv3", "regex", @@ -6550,7 +6550,7 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cc02deea53ffe807708244e5914f6b099ad7015a207ee24317c22112e17d9c5c" dependencies = [ - "indexmap 2.3.0", + "indexmap 2.4.0", "serde", "serde_json", ] @@ -6822,7 +6822,7 @@ dependencies = [ "expectorate", "futures", "highway", - "indexmap 2.3.0", + "indexmap 2.4.0", "itertools 0.13.0", "num", "omicron-common", @@ -7311,7 +7311,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ "fixedbitset", - "indexmap 2.3.0", + "indexmap 2.4.0", "serde", "serde_derive", ] @@ -7729,7 +7729,7 @@ dependencies = [ "getopts", "heck 0.5.0", "http 0.2.12", - "indexmap 2.3.0", + "indexmap 2.4.0", "openapiv3", "proc-macro2", "quote", @@ -8145,7 +8145,7 @@ dependencies = [ "dropshot", "expectorate", "humantime", - "indexmap 2.3.0", + "indexmap 2.4.0", "nexus-client", "nexus-db-queries", "nexus-reconfigurator-execution", @@ -9202,7 +9202,7 @@ dependencies = [ "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.3.0", + "indexmap 2.4.0", "serde", "serde_derive", "serde_json", @@ -9228,7 +9228,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.3.0", + "indexmap 2.4.0", "itoa", "ryu", "serde", @@ -10672,7 +10672,7 @@ version = "0.19.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" dependencies = [ - "indexmap 2.3.0", + "indexmap 2.4.0", "serde", "serde_spanned", "toml_datetime", @@ -10685,7 +10685,7 @@ version = "0.22.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "583c44c02ad26b0c3f3066fe629275e50627026c51ac2e595cca4c230ce1ce1d" dependencies = [ - "indexmap 2.3.0", + "indexmap 2.4.0", "serde", "serde_spanned", "toml_datetime", @@ -11257,7 +11257,7 @@ dependencies = [ "derive-where", "either", "futures", - "indexmap 2.3.0", + "indexmap 2.4.0", "indicatif", "libsw", "linear-map", @@ -11655,7 +11655,7 @@ dependencies = [ "expectorate", "futures", "humantime", - "indexmap 2.3.0", + "indexmap 2.4.0", "indicatif", "itertools 0.13.0", "maplit", diff --git a/Cargo.toml b/Cargo.toml index b7cf6f6fd1..a87c88eeac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -373,7 +373,7 @@ hyper-rustls = "0.26.0" hyper-staticfile = "0.9.5" illumos-utils = { path = "illumos-utils" } indent_write = "2.2.0" -indexmap = "2.3.0" +indexmap = "2.4.0" indicatif = { version = "0.17.8", features = ["rayon"] } installinator = { path = "installinator" } installinator-api = { path = "installinator-api" } diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 688e1a0921..2e0520d82d 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -62,7 +62,7 @@ hashbrown = { version = "0.14.5", features = ["raw"] } hex = { version = "0.4.3", features = ["serde"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "0.14.30", features = ["full"] } -indexmap = { version = "2.3.0", features = ["serde"] } +indexmap = { version = "2.4.0", features = ["serde"] } inout = { version = "0.1.3", default-features = false, features = ["std"] } itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12.1" } itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10.5" } @@ -169,7 +169,7 @@ hashbrown = { version = "0.14.5", features = ["raw"] } hex = { version = "0.4.3", features = ["serde"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "0.14.30", features = ["full"] } -indexmap = { version = "2.3.0", features = ["serde"] } +indexmap = { version = "2.4.0", features = ["serde"] } inout = { version = "0.1.3", default-features = false, features = ["std"] } itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12.1" } itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10.5" } From 2331f7d62e8b48e94ebfd8d3f05ba64e1276a123 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Wed, 14 Aug 2024 18:05:24 -0700 Subject: [PATCH 05/51] `omdb nexus background-tasks show` could support filtering (#6327) --- Cargo.lock | 1 + dev-tools/omdb/Cargo.toml | 1 + dev-tools/omdb/src/bin/omdb/nexus.rs | 71 ++++- dev-tools/omdb/tests/successes.out | 391 ++++++++++++++++++++++++ dev-tools/omdb/tests/test_all_output.rs | 14 + dev-tools/omdb/tests/usage_errors.out | 40 +++ 6 files changed, 511 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ad0dc13987..f38eece4d5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6055,6 +6055,7 @@ dependencies = [ "indicatif", "internal-dns", "ipnetwork", + "itertools 0.13.0", "multimap", "nexus-client", "nexus-config", diff --git a/dev-tools/omdb/Cargo.toml b/dev-tools/omdb/Cargo.toml index 0990fdb11c..a92de1b6a9 100644 --- a/dev-tools/omdb/Cargo.toml +++ b/dev-tools/omdb/Cargo.toml @@ -28,6 +28,7 @@ gateway-messages.workspace = true gateway-test-utils.workspace = true humantime.workspace = true internal-dns.workspace = true +itertools.workspace = true nexus-client.workspace = true nexus-config.workspace = true nexus-db-model.workspace = true diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 1b6d2469f4..67a4180dd2 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -19,6 +19,7 @@ use clap::Subcommand; use clap::ValueEnum; use futures::future::try_join; use futures::TryStreamExt; +use itertools::Itertools; use nexus_client::types::ActivationReason; use nexus_client::types::BackgroundTask; use nexus_client::types::BackgroundTasksActivateRequest; @@ -46,6 +47,7 @@ use reedline::Reedline; use serde::Deserialize; use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; +use std::collections::BTreeSet; use std::str::FromStr; use tabled::Tabled; use uuid::Uuid; @@ -93,11 +95,21 @@ enum BackgroundTasksCommands { /// Print a summary of the status of all background tasks List, /// Print human-readable summary of the status of each background task - Show, + Show(BackgroundTasksShowArgs), /// Activate one or more background tasks Activate(BackgroundTasksActivateArgs), } +#[derive(Debug, Args)] +struct BackgroundTasksShowArgs { + /// Names of background tasks to show (default: all) + /// + /// You can use any background task name here or one of the special strings + /// "all", "dns_external", or "dns_internal". + #[clap(value_name = "TASK_NAME")] + tasks: Vec, +} + #[derive(Debug, Args)] struct BackgroundTasksActivateArgs { /// Name of the background tasks to activate @@ -361,8 +373,8 @@ impl NexusArgs { command: BackgroundTasksCommands::List, }) => cmd_nexus_background_tasks_list(&client).await, NexusCommands::BackgroundTasks(BackgroundTasksArgs { - command: BackgroundTasksCommands::Show, - }) => cmd_nexus_background_tasks_show(&client).await, + command: BackgroundTasksCommands::Show(args), + }) => cmd_nexus_background_tasks_show(&client, args).await, NexusCommands::BackgroundTasks(BackgroundTasksArgs { command: BackgroundTasksCommands::Activate(args), }) => { @@ -523,7 +535,9 @@ async fn cmd_nexus_background_tasks_list( ) -> Result<(), anyhow::Error> { let response = client.bgtask_list().await.context("listing background tasks")?; - let tasks = response.into_inner(); + // Convert the HashMap to a BTreeMap because we want the keys in sorted + // order. + let tasks = response.into_inner().into_iter().collect::>(); let table_rows = tasks.values().map(BackgroundTaskStatusRow::from); let table = tabled::Table::new(table_rows) .with(tabled::settings::Style::empty()) @@ -536,6 +550,7 @@ async fn cmd_nexus_background_tasks_list( /// Runs `omdb nexus background-tasks show` async fn cmd_nexus_background_tasks_show( client: &nexus_client::Client, + args: &BackgroundTasksShowArgs, ) -> Result<(), anyhow::Error> { let response = client.bgtask_list().await.context("listing background tasks")?; @@ -544,8 +559,50 @@ async fn cmd_nexus_background_tasks_show( let mut tasks = response.into_inner().into_iter().collect::>(); - // We want to pick the order that we print some tasks intentionally. Then - // we want to print anything else that we find. + // Now, pick out the tasks that the user selected. + // + // The set of user tasks may include: + // + // - nothing at all, in which case we include all tasks + // - individual task names + // - certain groups that we recognize, like "dns_external" for all the tasks + // related to external DNS propagation. "all" means "all tasks". + let selected_set: BTreeSet<_> = + args.tasks.iter().map(AsRef::as_ref).collect(); + let selected_all = selected_set.is_empty() || selected_set.contains("all"); + if !selected_all { + for s in &selected_set { + if !tasks.contains_key(*s) + && *s != "all" + && *s != "dns_external" + && *s != "dns_internal" + { + bail!( + "unknown task name: {:?} (known task names: all, \ + dns_external, dns_internal, {})", + s, + tasks.keys().join(", ") + ); + } + } + + tasks.retain(|k, _| { + selected_set.contains(k.as_str()) + || selected_set.contains("all") + || (selected_set.contains("dns_external") + && k.starts_with("dns_") + && k.ends_with("_external")) + || (selected_set.contains("dns_internal") + && k.starts_with("dns_") + && k.ends_with("_internal")) + }); + } + + // Some tasks should be grouped and printed together in a certain order, + // even though their names aren't alphabetical. Notably, the DNS tasks + // logically go from config -> servers -> propagation, so we want to print + // them in that order. So we pick these out first and then print anything + // else that we find in alphabetical order. for name in [ "dns_config_internal", "dns_servers_internal", @@ -559,7 +616,7 @@ async fn cmd_nexus_background_tasks_show( ] { if let Some(bgtask) = tasks.remove(name) { print_task(&bgtask); - } else { + } else if selected_all { eprintln!("warning: expected to find background task {:?}", name); } } diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 166936da9c..19c555ec96 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -632,6 +632,397 @@ task: "vpc_route_manager" started at (s ago) and ran for ms warning: unknown background task: "vpc_route_manager" (don't know how to interpret details: Object {}) +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +============================================= +EXECUTING COMMAND: omdb ["nexus", "background-tasks", "show", "saga_recovery"] +termination: Exited(0) +--------------------------------------------- +stdout: +task: "saga_recovery" + configured period: every 10m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + since Nexus started: + sagas recovered: 0 + sagas recovery errors: 0 + sagas observed started: 0 + sagas inferred finished: 0 + missing from SEC: 0 + bad state in SEC: 0 + last pass: + found sagas: 0 (in-progress, assigned to this Nexus) + recovered: 0 (successfully) + failed: 0 + skipped: 0 (already running) + removed: 0 (newly finished) + no recovered sagas + no saga recovery failures + +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +============================================= +EXECUTING COMMAND: omdb ["nexus", "background-tasks", "show", "blueprint_loader", "blueprint_executor"] +termination: Exited(0) +--------------------------------------------- +stdout: +task: "blueprint_loader" + configured period: every 1m s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + last completion reported error: failed to read target blueprint: Internal Error: no target blueprint set + +task: "blueprint_executor" + configured period: every 10m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + last completion reported error: no blueprint + +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +============================================= +EXECUTING COMMAND: omdb ["nexus", "background-tasks", "show", "dns_internal"] +termination: Exited(0) +--------------------------------------------- +stdout: +task: "dns_config_internal" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + last generation found: 1 + +task: "dns_servers_internal" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + servers found: 1 + + DNS_SERVER_ADDR + [::1]:REDACTED_PORT + +task: "dns_propagation_internal" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a dependent task completing + started at (s ago) and ran for ms + attempt to propagate generation: 1 + + DNS_SERVER_ADDR LAST_RESULT + [::1]:REDACTED_PORT success + + +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +============================================= +EXECUTING COMMAND: omdb ["nexus", "background-tasks", "show", "dns_external"] +termination: Exited(0) +--------------------------------------------- +stdout: +task: "dns_config_external" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + last generation found: 2 + +task: "dns_servers_external" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + servers found: 1 + + DNS_SERVER_ADDR + [::1]:REDACTED_PORT + +task: "dns_propagation_external" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a dependent task completing + started at (s ago) and ran for ms + attempt to propagate generation: 2 + + DNS_SERVER_ADDR LAST_RESULT + [::1]:REDACTED_PORT success + + +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +============================================= +EXECUTING COMMAND: omdb ["nexus", "background-tasks", "show", "all"] +termination: Exited(0) +--------------------------------------------- +stdout: +task: "dns_config_internal" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + last generation found: 1 + +task: "dns_servers_internal" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + servers found: 1 + + DNS_SERVER_ADDR + [::1]:REDACTED_PORT + +task: "dns_propagation_internal" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a dependent task completing + started at (s ago) and ran for ms + attempt to propagate generation: 1 + + DNS_SERVER_ADDR LAST_RESULT + [::1]:REDACTED_PORT success + + +task: "dns_config_external" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + last generation found: 2 + +task: "dns_servers_external" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + servers found: 1 + + DNS_SERVER_ADDR + [::1]:REDACTED_PORT + +task: "dns_propagation_external" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a dependent task completing + started at (s ago) and ran for ms + attempt to propagate generation: 2 + + DNS_SERVER_ADDR LAST_RESULT + [::1]:REDACTED_PORT success + + +task: "nat_v4_garbage_collector" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + last completion reported error: failed to resolve addresses for Dendrite services: no record found for Query { name: Name("_dendrite._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN } + +task: "blueprint_loader" + configured period: every 1m s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + last completion reported error: failed to read target blueprint: Internal Error: no target blueprint set + +task: "blueprint_executor" + configured period: every 10m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + last completion reported error: no blueprint + +task: "abandoned_vmm_reaper" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total abandoned VMMs found: 0 + VMM records deleted: 0 + VMM records already deleted by another Nexus: 0 + sled resource reservations deleted: 0 + +task: "bfd_manager" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + last completion reported error: failed to resolve addresses for Dendrite services: no record found for Query { name: Name("_dendrite._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN } + +task: "crdb_node_id_collector" + configured period: every 10m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + last completion reported error: no blueprint + +task: "decommissioned_disk_cleaner" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms +warning: unknown background task: "decommissioned_disk_cleaner" (don't know how to interpret details: Object {"deleted": Number(0), "error": Null, "error_count": Number(0), "found": Number(0), "not_ready_to_be_deleted": Number(0)}) + +task: "external_endpoints" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + external API endpoints: 2 ('*' below marks default) + + SILO_ID DNS_NAME + ..................... default-silo.sys.oxide-dev.test + * ..................... test-suite-silo.sys.oxide-dev.test + + warnings: 2 + warning: silo ..................... with DNS name "default-silo.sys.oxide-dev.test" has no usable certificates + warning: silo ..................... with DNS name "test-suite-silo.sys.oxide-dev.test" has no usable certificates + + TLS certificates: 0 + +task: "instance_updater" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total instances in need of updates: 0 + instances with destroyed active VMMs: 0 + instances with terminated active migrations: 0 + update sagas started: 0 + update sagas completed successfully: 0 + +task: "instance_watcher" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total instances checked: 0 + checks completed: 0 + successful checks: 0 + update sagas queued: 0 + failed checks: 0 + checks that could not be completed: 0 + stale instance metrics pruned: 0 + +task: "inventory_collection" + configured period: every 10m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + last collection id: ..................... + last collection started: + last collection done: + +task: "lookup_region_port" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total filled in ports: 0 + errors: 0 + +task: "metrics_producer_gc" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms +warning: unknown background task: "metrics_producer_gc" (don't know how to interpret details: Object {"expiration": String(""), "pruned": Array []}) + +task: "phantom_disks" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + number of phantom disks deleted: 0 + number of phantom disk delete errors: 0 + +task: "physical_disk_adoption" + configured period: every s + currently executing: no + last completed activation: , triggered by a dependent task completing + started at (s ago) and ran for ms + last completion reported error: task disabled + +task: "region_replacement" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + number of region replacements started ok: 0 + number of region replacement start errors: 0 + +task: "region_replacement_driver" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + number of region replacement drive sagas started ok: 0 + number of region replacement finish sagas started ok: 0 + number of errors: 0 + +task: "saga_recovery" + configured period: every 10m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + since Nexus started: + sagas recovered: 0 + sagas recovery errors: 0 + sagas observed started: 0 + sagas inferred finished: 0 + missing from SEC: 0 + bad state in SEC: 0 + last pass: + found sagas: 0 (in-progress, assigned to this Nexus) + recovered: 0 (successfully) + failed: 0 + skipped: 0 (already running) + removed: 0 (newly finished) + no recovered sagas + no saga recovery failures + +task: "service_firewall_rule_propagation" + configured period: every 5m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + +task: "service_zone_nat_tracker" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + last completion reported error: inventory collection is None + +task: "switch_port_config_manager" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms +warning: unknown background task: "switch_port_config_manager" (don't know how to interpret details: Object {}) + +task: "v2p_manager" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms +warning: unknown background task: "v2p_manager" (don't know how to interpret details: Object {}) + +task: "vpc_route_manager" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms +warning: unknown background task: "vpc_route_manager" (don't know how to interpret details: Object {}) + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ diff --git a/dev-tools/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs index d0258aeaed..45492c14ce 100644 --- a/dev-tools/omdb/tests/test_all_output.rs +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -80,6 +80,7 @@ async fn test_omdb_usage_errors() { &["mgs"], &["nexus"], &["nexus", "background-tasks"], + &["nexus", "background-tasks", "show", "--help"], &["nexus", "blueprints"], &["nexus", "sagas"], // Missing "--destructive" flag. The URL is bogus but just ensures that @@ -144,6 +145,19 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { &["mgs", "inventory"], &["nexus", "background-tasks", "doc"], &["nexus", "background-tasks", "show"], + // background tasks: test picking out specific names + &["nexus", "background-tasks", "show", "saga_recovery"], + &[ + "nexus", + "background-tasks", + "show", + "blueprint_loader", + "blueprint_executor", + ], + // background tasks: test recognized group names + &["nexus", "background-tasks", "show", "dns_internal"], + &["nexus", "background-tasks", "show", "dns_external"], + &["nexus", "background-tasks", "show", "all"], &["nexus", "sagas", "list"], &["--destructive", "nexus", "sagas", "demo-create"], &["nexus", "sagas", "list"], diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out index 1ee07410bf..55781136b6 100644 --- a/dev-tools/omdb/tests/usage_errors.out +++ b/dev-tools/omdb/tests/usage_errors.out @@ -491,6 +491,46 @@ Connection Options: Safety Options: -w, --destructive Allow potentially-destructive subcommands ============================================= +EXECUTING COMMAND: omdb ["nexus", "background-tasks", "show", "--help"] +termination: Exited(0) +--------------------------------------------- +stdout: +Print human-readable summary of the status of each background task + +Usage: omdb nexus background-tasks show [OPTIONS] [TASK_NAME]... + +Arguments: + [TASK_NAME]... + Names of background tasks to show (default: all) + + You can use any background task name here or one of the special strings "all", + "dns_external", or "dns_internal". + +Options: + --log-level + log level filter + + [env: LOG_LEVEL=] + [default: warn] + + -h, --help + Print help (see a summary with '-h') + +Connection Options: + --nexus-internal-url + URL of the Nexus internal API + + [env: OMDB_NEXUS_URL=] + + --dns-server + [env: OMDB_DNS_SERVER=] + +Safety Options: + -w, --destructive + Allow potentially-destructive subcommands +--------------------------------------------- +stderr: +============================================= EXECUTING COMMAND: omdb ["nexus", "blueprints"] termination: Exited(2) --------------------------------------------- From 912038c1f8d46e24405d86491db1e29166716029 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Wed, 14 Aug 2024 23:12:31 -0700 Subject: [PATCH 06/51] fix auto-mismerge of #6294 and #6327 (#6344) --- dev-tools/omdb/tests/successes.out | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 19c555ec96..db1cb1da61 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -968,6 +968,13 @@ task: "region_replacement_driver" number of region replacement finish sagas started ok: 0 number of errors: 0 +task: "region_snapshot_replacement_start" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms +warning: unknown background task: "region_snapshot_replacement_start" (don't know how to interpret details: Object {"errors": Array [], "requests_created_ok": Array [], "start_invoked_ok": Array []}) + task: "saga_recovery" configured period: every 10m currently executing: no From 13ae33bdb8c5a7e7345b53e79fa44d635eeffff0 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 15 Aug 2024 07:59:15 +0000 Subject: [PATCH 07/51] Update Rust crate serde_tokenstream to v0.2.2 (#6341) --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f38eece4d5..097d3a3aaa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9171,9 +9171,9 @@ dependencies = [ [[package]] name = "serde_tokenstream" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8790a7c3fe883e443eaa2af6f705952bc5d6e8671a220b9335c8cae92c037e74" +checksum = "64060d864397305347a78851c51588fd283767e7e7589829e8121d65512340f1" dependencies = [ "proc-macro2", "quote", From 22e4de36e218b0a76b39088e3ba5004a2031d7f7 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 15 Aug 2024 09:30:08 +0000 Subject: [PATCH 08/51] Update Rust crate serde_json to 1.0.125 (#6345) --- Cargo.lock | 224 ++++++++++++++------------------------ Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 +- 3 files changed, 86 insertions(+), 144 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 097d3a3aaa..b6cadd5e69 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -59,7 +59,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", - "getrandom 0.2.14", + "getrandom", "once_cell", "version_check", "zerocopy 0.7.34", @@ -376,10 +376,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1" dependencies = [ "futures-core", - "getrandom 0.2.14", + "getrandom", "instant", "pin-project-lite", - "rand 0.8.5", + "rand", "tokio", ] @@ -683,7 +683,7 @@ dependencies = [ "omicron-workspace-hack", "pq-sys", "proptest", - "rand 0.8.5", + "rand", "secrecy", "serde", "serde_with", @@ -1609,7 +1609,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" dependencies = [ "generic-array", - "rand_core 0.6.4", + "rand_core", "subtle", "zeroize", ] @@ -1621,7 +1621,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" dependencies = [ "generic-array", - "rand_core 0.6.4", + "rand_core", "typenum", ] @@ -1676,7 +1676,7 @@ dependencies = [ "curve25519-dalek-derive", "digest", "fiat-crypto", - "rand_core 0.6.4", + "rand_core", "rustc_version 0.4.0", "subtle", "zeroize", @@ -1934,7 +1934,7 @@ dependencies = [ "dhcproto-macros", "hex", "ipnet", - "rand 0.8.5", + "rand", "thiserror", "trust-dns-proto", "url", @@ -2198,7 +2198,7 @@ dependencies = [ "progenitor", "progenitor-client", "quote", - "rand 0.8.5", + "rand", "regress", "reqwest", "rustfmt-wrapper", @@ -2332,7 +2332,7 @@ checksum = "4a3daa8e81a3963a60642bcc1f90a670680bd4a77535faa384e9d1c79d620871" dependencies = [ "curve25519-dalek", "ed25519", - "rand_core 0.6.4", + "rand_core", "serde", "sha2", "subtle", @@ -2360,7 +2360,7 @@ dependencies = [ "hkdf", "pem-rfc7468", "pkcs8", - "rand_core 0.6.4", + "rand_core", "sec1", "subtle", "zeroize", @@ -2418,7 +2418,7 @@ dependencies = [ "omicron-test-utils", "omicron-workspace-hack", "oxide-client", - "rand 0.8.5", + "rand", "reqwest", "russh", "russh-keys", @@ -2589,7 +2589,7 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ded41244b729663b1e574f1b4fb731469f69f79c17667b5d776b16cda0479449" dependencies = [ - "rand_core 0.6.4", + "rand_core", "subtle", ] @@ -2905,7 +2905,7 @@ dependencies = [ "gateway-messages", "omicron-workspace-hack", "progenitor", - "rand 0.8.5", + "rand", "reqwest", "schemars", "serde", @@ -3023,17 +3023,6 @@ dependencies = [ "unicode-width", ] -[[package]] -name = "getrandom" -version = "0.1.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" -dependencies = [ - "cfg-if", - "libc", - "wasi 0.9.0+wasi-snapshot-preview1", -] - [[package]] name = "getrandom" version = "0.2.14" @@ -3043,7 +3032,7 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi", "wasm-bindgen", ] @@ -3113,7 +3102,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63" dependencies = [ "ff", - "rand_core 0.6.4", + "rand_core", "subtle", ] @@ -3603,7 +3592,7 @@ dependencies = [ "hyper 0.14.30", "mime_guess", "percent-encoding", - "rand 0.8.5", + "rand", "tokio", "url", "winapi", @@ -4254,7 +4243,7 @@ dependencies = [ "portpicker", "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=6dceb9ef69c217cb78a2018bbedafbc19f6ec1af)", "propolis-server-config", - "rand 0.8.5", + "rand", "regex", "reqwest", "ron 0.7.1", @@ -4669,7 +4658,7 @@ checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" dependencies = [ "libc", "log", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi", "windows-sys 0.48.0", ] @@ -4682,7 +4671,7 @@ dependencies = [ "hermit-abi 0.3.9", "libc", "log", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi", "windows-sys 0.52.0", ] @@ -4744,7 +4733,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3" dependencies = [ - "getrandom 0.2.14", + "getrandom", ] [[package]] @@ -4932,7 +4921,7 @@ dependencies = [ "oxnet", "parse-display", "pq-sys", - "rand 0.8.5", + "rand", "ref-cast", "schemars", "semver 1.0.23", @@ -5001,7 +4990,7 @@ dependencies = [ "pq-sys", "predicates", "pretty_assertions", - "rand 0.8.5", + "rand", "rcgen", "ref-cast", "regex", @@ -5035,7 +5024,7 @@ dependencies = [ "omicron-workspace-hack", "once_cell", "oxnet", - "rand 0.8.5", + "rand", "serde_json", ] @@ -5200,7 +5189,7 @@ dependencies = [ "omicron-workspace-hack", "oxnet", "proptest", - "rand 0.8.5", + "rand", "sled-agent-client", "slog", "static_assertions", @@ -5466,7 +5455,7 @@ checksum = "c165a9ab64cf766f73521c0dd2cfdff64f488b8f0b3e621face3462d3db536d7" dependencies = [ "num-integer", "num-traits", - "rand 0.8.5", + "rand", ] [[package]] @@ -5481,7 +5470,7 @@ dependencies = [ "num-integer", "num-iter", "num-traits", - "rand 0.8.5", + "rand", "serde", "smallvec 1.13.2", "zeroize", @@ -5771,7 +5760,7 @@ dependencies = [ "progenitor", "progenitor-client", "proptest", - "rand 0.8.5", + "rand", "regress", "reqwest", "schemars", @@ -5989,7 +5978,7 @@ dependencies = [ "pretty_assertions", "progenitor-client", "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=24a74d0c76b6a63961ecef76acb1516b6e66c5c9)", - "rand 0.8.5", + "rand", "rcgen", "ref-cast", "regex", @@ -6136,7 +6125,7 @@ dependencies = [ "clap", "criterion", "omicron-workspace-hack", - "rand 0.8.5", + "rand", "rust-argon2", "schemars", "serde", @@ -6245,7 +6234,7 @@ dependencies = [ "pretty_assertions", "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=24a74d0c76b6a63961ecef76acb1516b6e66c5c9)", "propolis-mock-server", - "rand 0.8.5", + "rand", "rcgen", "reqwest", "schemars", @@ -6375,7 +6364,7 @@ dependencies = [ "futures-util", "gateway-messages", "generic-array", - "getrandom 0.2.14", + "getrandom", "group", "hashbrown 0.14.5", "hex", @@ -6692,7 +6681,7 @@ dependencies = [ "hyper 0.14.30", "omicron-workspace-hack", "progenitor", - "rand 0.8.5", + "rand", "regress", "reqwest", "serde", @@ -6787,7 +6776,7 @@ dependencies = [ "oximeter-api", "oximeter-client", "oximeter-db", - "rand 0.8.5", + "rand", "reqwest", "schemars", "serde", @@ -6870,7 +6859,7 @@ dependencies = [ "prettyplease", "proc-macro2", "quote", - "rand 0.8.5", + "rand", "rand_distr", "regex", "rstest", @@ -6900,7 +6889,7 @@ dependencies = [ "libc", "omicron-workspace-hack", "oximeter", - "rand 0.8.5", + "rand", "schemars", "serde", "slog", @@ -7015,7 +7004,7 @@ dependencies = [ "ecdsa", "elliptic-curve", "primeorder", - "rand_core 0.6.4", + "rand_core", "sha2", ] @@ -7150,7 +7139,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7676374caaee8a325c9e7a2ae557f216c5563a171d6997b0ef8a65af35147700" dependencies = [ "base64ct", - "rand_core 0.6.4", + "rand_core", "subtle", ] @@ -7161,7 +7150,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "346f04948ba92c43e8469c1ee6736c7563d71012b17d40745260fe106aac2166" dependencies = [ "base64ct", - "rand_core 0.6.4", + "rand_core", "subtle", ] @@ -7410,7 +7399,7 @@ checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" dependencies = [ "der", "pkcs5", - "rand_core 0.6.4", + "rand_core", "spki", ] @@ -7510,7 +7499,7 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be97d76faf1bfab666e1375477b23fde79eccf0276e9b63b92a39d676a889ba9" dependencies = [ - "rand 0.8.5", + "rand", ] [[package]] @@ -7537,7 +7526,7 @@ dependencies = [ "hmac", "md-5", "memchr", - "rand 0.8.5", + "rand", "sha2", "stringprep", ] @@ -7800,7 +7789,7 @@ dependencies = [ "base64 0.21.7", "futures", "progenitor", - "rand 0.8.5", + "rand", "reqwest", "schemars", "serde", @@ -7821,7 +7810,7 @@ dependencies = [ "base64 0.21.7", "futures", "progenitor", - "rand 0.8.5", + "rand", "reqwest", "schemars", "serde", @@ -7847,7 +7836,7 @@ dependencies = [ "hyper 0.14.30", "progenitor", "propolis_types 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=24a74d0c76b6a63961ecef76acb1516b6e66c5c9)", - "rand 0.8.5", + "rand", "reqwest", "schemars", "serde", @@ -7904,8 +7893,8 @@ dependencies = [ "bitflags 2.6.0", "lazy_static", "num-traits", - "rand 0.8.5", - "rand_chacha 0.3.1", + "rand", + "rand_chacha", "rand_xorshift", "regex-syntax 0.8.4", "rusty-fork", @@ -7981,19 +7970,6 @@ dependencies = [ "nibble_vec", ] -[[package]] -name = "rand" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" -dependencies = [ - "getrandom 0.1.16", - "libc", - "rand_chacha 0.2.2", - "rand_core 0.5.1", - "rand_hc", -] - [[package]] name = "rand" version = "0.8.5" @@ -8001,18 +7977,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha 0.3.1", - "rand_core 0.6.4", -] - -[[package]] -name = "rand_chacha" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" -dependencies = [ - "ppv-lite86", - "rand_core 0.5.1", + "rand_chacha", + "rand_core", ] [[package]] @@ -8022,16 +7988,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core 0.6.4", -] - -[[package]] -name = "rand_core" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" -dependencies = [ - "getrandom 0.1.16", + "rand_core", ] [[package]] @@ -8040,7 +7997,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.14", + "getrandom", ] [[package]] @@ -8050,16 +8007,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" dependencies = [ "num-traits", - "rand 0.8.5", -] - -[[package]] -name = "rand_hc" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" -dependencies = [ - "rand_core 0.5.1", + "rand", ] [[package]] @@ -8068,7 +8016,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a9febe641d2842ffc76ee962668a17578767c4e01735e4802b21ed9a24b2e4e" dependencies = [ - "rand_core 0.6.4", + "rand_core", ] [[package]] @@ -8077,7 +8025,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f" dependencies = [ - "rand_core 0.6.4", + "rand_core", ] [[package]] @@ -8208,7 +8156,7 @@ version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891" dependencies = [ - "getrandom 0.2.14", + "getrandom", "libredox", "thiserror", ] @@ -8416,7 +8364,7 @@ checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" dependencies = [ "cc", "cfg-if", - "getrandom 0.2.14", + "getrandom", "libc", "spin 0.9.8", "untrusted 0.9.0", @@ -8470,7 +8418,7 @@ dependencies = [ "num-traits", "pkcs1", "pkcs8", - "rand_core 0.6.4", + "rand_core", "serde", "sha2", "signature", @@ -8561,8 +8509,8 @@ dependencies = [ "p384", "p521", "poly1305", - "rand 0.8.5", - "rand_core 0.6.4", + "rand", + "rand_core", "russh-cryptovec", "russh-keys", "sha1", @@ -8617,8 +8565,8 @@ dependencies = [ "pkcs1", "pkcs5", "pkcs8", - "rand 0.8.5", - "rand_core 0.6.4", + "rand", + "rand_core", "rsa", "russh-cryptovec", "sec1", @@ -8880,7 +8828,7 @@ dependencies = [ "openssl-sys", "pkg-config", "quick-xml", - "rand 0.8.5", + "rand", "serde", "thiserror", "url", @@ -9120,9 +9068,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.124" +version = "1.0.125" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66ad62847a56b3dba58cc891acd13884b9c61138d330c0d7b6181713d4fce38d" +checksum = "83c8e735a073ccf5be70aa8066aa984eaf2fa000db6c8d0100ae605b366d31ed" dependencies = [ "itoa", "memchr", @@ -9330,7 +9278,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" dependencies = [ "digest", - "rand_core 0.6.4", + "rand_core", ] [[package]] @@ -9481,7 +9429,7 @@ dependencies = [ "omicron-test-utils", "omicron-uuid-kinds", "omicron-workspace-hack", - "rand 0.8.5", + "rand", "schemars", "serde", "sled-hardware-types", @@ -9525,7 +9473,7 @@ dependencies = [ "omicron-test-utils", "omicron-uuid-kinds", "omicron-workspace-hack", - "rand 0.8.5", + "rand", "schemars", "serde", "serde_json", @@ -9879,7 +9827,7 @@ dependencies = [ "p256", "p384", "p521", - "rand_core 0.6.4", + "rand_core", "rsa", "sec1", "sha2", @@ -10549,7 +10497,7 @@ dependencies = [ "pin-project-lite", "postgres-protocol", "postgres-types", - "rand 0.8.5", + "rand", "socket2 0.5.7", "tokio", "tokio-util", @@ -10818,7 +10766,7 @@ dependencies = [ "futures-util", "lazy_static", "radix_trie", - "rand 0.8.5", + "rand", "thiserror", "time", "tokio", @@ -10842,7 +10790,7 @@ dependencies = [ "idna 0.2.3", "ipnet", "lazy_static", - "rand 0.8.5", + "rand", "smallvec 1.13.2", "thiserror", "tinyvec", @@ -10961,7 +10909,7 @@ dependencies = [ "omicron-test-utils", "omicron-workspace-hack", "parse-size", - "rand 0.8.5", + "rand", "ring 0.17.8", "serde", "serde_json", @@ -10998,7 +10946,7 @@ dependencies = [ "http 0.2.12", "httparse", "log", - "rand 0.8.5", + "rand", "sha1", "thiserror", "url", @@ -11017,7 +10965,7 @@ dependencies = [ "http 1.1.0", "httparse", "log", - "rand 0.8.5", + "rand", "sha1", "thiserror", "url", @@ -11031,7 +10979,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ "cfg-if", - "rand 0.7.3", + "rand", "static_assertions", ] @@ -11047,8 +10995,8 @@ version = "0.1.0" dependencies = [ "newtype-uuid", "omicron-workspace-hack", - "rand 0.8.5", - "rand_core 0.6.4", + "rand", + "rand_core", "rand_seeder", "uuid", ] @@ -11232,7 +11180,7 @@ dependencies = [ "omicron-common", "omicron-test-utils", "omicron-workspace-hack", - "rand 0.8.5", + "rand", "sha2", "slog", "thiserror", @@ -11372,7 +11320,7 @@ version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" dependencies = [ - "getrandom 0.2.14", + "getrandom", "serde", ] @@ -11440,9 +11388,9 @@ dependencies = [ "curve25519-dalek", "elliptic-curve", "hex", - "rand 0.8.5", - "rand_chacha 0.3.1", - "rand_core 0.6.4", + "rand", + "rand_chacha", + "rand_core", "serde", "subtle", "thiserror-no-std", @@ -11506,12 +11454,6 @@ dependencies = [ "try-lock", ] -[[package]] -name = "wasi" -version = "0.9.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" - [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -11787,7 +11729,7 @@ dependencies = [ "openapi-lint", "openapiv3", "oxnet", - "rand 0.8.5", + "rand", "reqwest", "schemars", "serde", diff --git a/Cargo.toml b/Cargo.toml index a87c88eeac..a1ae9858ab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -507,7 +507,7 @@ secrecy = "0.8.0" semver = { version = "1.0.23", features = ["std", "serde"] } serde = { version = "1.0", default-features = false, features = [ "derive", "rc" ] } serde_human_bytes = { git = "https://github.com/oxidecomputer/serde_human_bytes", branch = "main" } -serde_json = "1.0.124" +serde_json = "1.0.125" serde_path_to_error = "0.1.16" serde_tokenstream = "0.2" serde_urlencoded = "0.7.1" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 2e0520d82d..35c266cdf3 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -95,7 +95,7 @@ schemars = { version = "0.8.21", features = ["bytes", "chrono", "uuid1"] } scopeguard = { version = "1.2.0" } semver = { version = "1.0.23", features = ["serde"] } serde = { version = "1.0.207", features = ["alloc", "derive", "rc"] } -serde_json = { version = "1.0.124", features = ["raw_value", "unbounded_depth"] } +serde_json = { version = "1.0.125", features = ["raw_value", "unbounded_depth"] } sha1 = { version = "0.10.6", features = ["oid"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.5.0", features = ["bytes", "inline", "unicode"] } @@ -202,7 +202,7 @@ schemars = { version = "0.8.21", features = ["bytes", "chrono", "uuid1"] } scopeguard = { version = "1.2.0" } semver = { version = "1.0.23", features = ["serde"] } serde = { version = "1.0.207", features = ["alloc", "derive", "rc"] } -serde_json = { version = "1.0.124", features = ["raw_value", "unbounded_depth"] } +serde_json = { version = "1.0.125", features = ["raw_value", "unbounded_depth"] } sha1 = { version = "0.10.6", features = ["oid"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.5.0", features = ["bytes", "inline", "unicode"] } From 8b3e948dd8afab54d5fe0c821beb4af5a83b7701 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 15 Aug 2024 06:38:01 -0400 Subject: [PATCH 09/51] SP bump to pick up dumping fixes (#6338) --- tools/permslip_staging | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/permslip_staging b/tools/permslip_staging index d886cc4246..d2ddc45f20 100644 --- a/tools/permslip_staging +++ b/tools/permslip_staging @@ -1,5 +1,5 @@ -34cf117633f82cc8f665dc3b6c78dc2aff61ca87d2b2687290605080265dda30 manifest-gimlet-v1.0.23.toml +6ea87b554882860f1a9b1cf97b2f4a9c61fadf3d69e6ea1bdcd781d306d6ca9c manifest-gimlet-v1.0.24.toml 85553dd164933a9b9e4f22409abd1190b1d632d192b5f7527129acaa778a671a manifest-oxide-rot-1-v1.0.13.toml -db995edfe91959df3cb20ea8156f75b9dcff5ec5e77f98a28766617a8ed2e0c5 manifest-psc-v1.0.22.toml -26b6096a377edb3d7da50b1b499af104e6195bc7c7c6eb1b2751b32434d7ac9e manifest-sidecar-v1.0.23.toml +11bc0684155119f494a6e21810e4dc97b9efadb8154d570f67143dae98a45060 manifest-psc-v1.0.23.toml +60205852109f1584d29e2b086eae5a72d7f61b2e1f64d958e6326312ed2b0d66 manifest-sidecar-v1.0.24.toml c0fecaefac7674138337f3bd4ce4ce5b884053dead5ec27b575701471631ea2f manifest-bootleby-v1.3.0.toml From b50fe3a792e0a1ef5f215acf7a6a644fbda8aff2 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Thu, 15 Aug 2024 12:28:45 -0400 Subject: [PATCH 10/51] [#5333 4/6] Region snapshot replacement GC (#6330) This commit adds a "snapshot replacement garbage collect" background task that will scan for region snapshot replacement requests in the ReplacementDone state and trigger a saga to delete the old snapshot volume populated in the snapshot replacement start saga. The "garbage collect" saga added here does nothing except change the state of the snapshot replacement request and invoke a volume delete sub-saga. At the end of this saga the record's state is changed to Running. --- dev-tools/omdb/src/bin/omdb/nexus.rs | 26 ++ dev-tools/omdb/tests/env.out | 12 + dev-tools/omdb/tests/successes.out | 20 ++ nexus-config/src/nexus_config.rs | 17 + nexus/examples/config-second.toml | 1 + nexus/examples/config.toml | 1 + nexus/src/app/background/init.rs | 23 +- nexus/src/app/background/tasks/mod.rs | 1 + ...on_snapshot_replacement_garbage_collect.rs | 265 ++++++++++++++ nexus/src/app/sagas/mod.rs | 4 + ...on_snapshot_replacement_garbage_collect.rs | 326 ++++++++++++++++++ nexus/tests/config.test.toml | 1 + nexus/types/src/internal_api/background.rs | 8 + smf/nexus/multi-sled/config-partial.toml | 1 + smf/nexus/single-sled/config-partial.toml | 1 + 15 files changed, 706 insertions(+), 1 deletion(-) create mode 100644 nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs create mode 100644 nexus/src/app/sagas/region_snapshot_replacement_garbage_collect.rs diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 67a4180dd2..0828cef892 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -34,6 +34,7 @@ use nexus_saga_recovery::LastPass; use nexus_types::deployment::Blueprint; use nexus_types::internal_api::background::LookupRegionPortStatus; use nexus_types::internal_api::background::RegionReplacementDriverStatus; +use nexus_types::internal_api::background::RegionSnapshotReplacementGarbageCollectStatus; use nexus_types::internal_api::background::RegionSnapshotReplacementStartStatus; use nexus_types::inventory::BaseboardId; use omicron_uuid_kinds::CollectionUuid; @@ -1478,6 +1479,31 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) { println!(" > {line}"); } + println!(" errors: {}", status.errors.len()); + for line in &status.errors { + println!(" > {line}"); + } + } + } + } else if name == "region_snapshot_replacement_garbage_collection" { + match serde_json::from_value::< + RegionSnapshotReplacementGarbageCollectStatus, + >(details.clone()) + { + Err(error) => eprintln!( + "warning: failed to interpret task details: {:?}: {:?}", + error, details + ), + + Ok(status) => { + println!( + " total garbage collections requested: {}", + status.garbage_collect_requested.len(), + ); + for line in &status.garbage_collect_requested { + println!(" > {line}"); + } + println!(" errors: {}", status.errors.len()); for line in &status.errors { println!(" > {line}"); diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index 59d9310b57..ec407cd123 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -127,6 +127,10 @@ task: "region_replacement_driver" drive region replacements forward to completion +task: "region_snapshot_replacement_garbage_collection" + clean up all region snapshot replacement step volumes + + task: "region_snapshot_replacement_start" detect if region snapshots need replacement and begin the process @@ -280,6 +284,10 @@ task: "region_replacement_driver" drive region replacements forward to completion +task: "region_snapshot_replacement_garbage_collection" + clean up all region snapshot replacement step volumes + + task: "region_snapshot_replacement_start" detect if region snapshots need replacement and begin the process @@ -420,6 +428,10 @@ task: "region_replacement_driver" drive region replacements forward to completion +task: "region_snapshot_replacement_garbage_collection" + clean up all region snapshot replacement step volumes + + task: "region_snapshot_replacement_start" detect if region snapshots need replacement and begin the process diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index db1cb1da61..41c5a15a1c 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -328,6 +328,10 @@ task: "region_replacement_driver" drive region replacements forward to completion +task: "region_snapshot_replacement_garbage_collection" + clean up all region snapshot replacement step volumes + + task: "region_snapshot_replacement_start" detect if region snapshots need replacement and begin the process @@ -570,6 +574,14 @@ task: "region_replacement_driver" number of region replacement finish sagas started ok: 0 number of errors: 0 +task: "region_snapshot_replacement_garbage_collection" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total garbage collections requested: 0 + errors: 0 + task: "region_snapshot_replacement_start" configured period: every s currently executing: no @@ -968,6 +980,14 @@ task: "region_replacement_driver" number of region replacement finish sagas started ok: 0 number of errors: 0 +task: "region_snapshot_replacement_garbage_collection" + configured period: every s + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total garbage collections requested: 0 + errors: 0 + task: "region_snapshot_replacement_start" configured period: every s currently executing: no diff --git a/nexus-config/src/nexus_config.rs b/nexus-config/src/nexus_config.rs index b222ebd23b..f6e60bb558 100644 --- a/nexus-config/src/nexus_config.rs +++ b/nexus-config/src/nexus_config.rs @@ -393,6 +393,9 @@ pub struct BackgroundTaskConfig { pub lookup_region_port: LookupRegionPortConfig, /// configuration for region snapshot replacement starter task pub region_snapshot_replacement_start: RegionSnapshotReplacementStartConfig, + /// configuration for region snapshot replacement garbage collection + pub region_snapshot_replacement_garbage_collection: + RegionSnapshotReplacementGarbageCollectionConfig, } #[serde_as] @@ -637,6 +640,14 @@ pub struct RegionSnapshotReplacementStartConfig { pub period_secs: Duration, } +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct RegionSnapshotReplacementGarbageCollectionConfig { + /// period (in seconds) for periodic activations of this background task + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, +} + /// Configuration for a nexus server #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] pub struct PackageConfig { @@ -885,6 +896,7 @@ mod test { saga_recovery.period_secs = 60 lookup_region_port.period_secs = 60 region_snapshot_replacement_start.period_secs = 30 + region_snapshot_replacement_garbage_collection.period_secs = 30 [default_region_allocation_strategy] type = "random" seed = 0 @@ -1051,6 +1063,10 @@ mod test { RegionSnapshotReplacementStartConfig { period_secs: Duration::from_secs(30), }, + region_snapshot_replacement_garbage_collection: + RegionSnapshotReplacementGarbageCollectionConfig { + period_secs: Duration::from_secs(30), + }, }, default_region_allocation_strategy: crate::nexus_config::RegionAllocationStrategy::Random { @@ -1128,6 +1144,7 @@ mod test { saga_recovery.period_secs = 60 lookup_region_port.period_secs = 60 region_snapshot_replacement_start.period_secs = 30 + region_snapshot_replacement_garbage_collection.period_secs = 30 [default_region_allocation_strategy] type = "random" "##, diff --git a/nexus/examples/config-second.toml b/nexus/examples/config-second.toml index 572de807d7..c87e1255b5 100644 --- a/nexus/examples/config-second.toml +++ b/nexus/examples/config-second.toml @@ -140,6 +140,7 @@ abandoned_vmm_reaper.period_secs = 60 saga_recovery.period_secs = 600 lookup_region_port.period_secs = 60 region_snapshot_replacement_start.period_secs = 30 +region_snapshot_replacement_garbage_collection.period_secs = 30 [default_region_allocation_strategy] # allocate region on 3 random distinct zpools, on 3 random distinct sleds. diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index 3aebe35152..f844adccbe 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -126,6 +126,7 @@ abandoned_vmm_reaper.period_secs = 60 saga_recovery.period_secs = 600 lookup_region_port.period_secs = 60 region_snapshot_replacement_start.period_secs = 30 +region_snapshot_replacement_garbage_collection.period_secs = 30 [default_region_allocation_strategy] # allocate region on 3 random distinct zpools, on 3 random distinct sleds. diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index cc42a8f302..6bd805a491 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -108,6 +108,7 @@ use super::tasks::phantom_disks; use super::tasks::physical_disk_adoption; use super::tasks::region_replacement; use super::tasks::region_replacement_driver; +use super::tasks::region_snapshot_replacement_garbage_collect::*; use super::tasks::region_snapshot_replacement_start::*; use super::tasks::saga_recovery; use super::tasks::service_firewall_rules; @@ -163,6 +164,7 @@ pub struct BackgroundTasks { pub task_saga_recovery: Activator, pub task_lookup_region_port: Activator, pub task_region_snapshot_replacement_start: Activator, + pub task_region_snapshot_replacement_garbage_collection: Activator, // Handles to activate background tasks that do not get used by Nexus // at-large. These background tasks are implementation details as far as @@ -245,6 +247,8 @@ impl BackgroundTasksInitializer { task_saga_recovery: Activator::new(), task_lookup_region_port: Activator::new(), task_region_snapshot_replacement_start: Activator::new(), + task_region_snapshot_replacement_garbage_collection: Activator::new( + ), task_internal_dns_propagation: Activator::new(), task_external_dns_propagation: Activator::new(), @@ -307,6 +311,7 @@ impl BackgroundTasksInitializer { task_saga_recovery, task_lookup_region_port, task_region_snapshot_replacement_start, + task_region_snapshot_replacement_garbage_collection, // Add new background tasks here. Be sure to use this binding in a // call to `Driver::register()` below. That's what actually wires // up the Activator to the corresponding background task. @@ -739,7 +744,7 @@ impl BackgroundTasksInitializer { process", period: config.region_snapshot_replacement_start.period_secs, task_impl: Box::new(RegionSnapshotReplacementDetector::new( - datastore, + datastore.clone(), sagas.clone(), )), opctx: opctx.child(BTreeMap::new()), @@ -747,6 +752,22 @@ impl BackgroundTasksInitializer { activator: task_region_snapshot_replacement_start, }); + driver.register(TaskDefinition { + name: "region_snapshot_replacement_garbage_collection", + description: + "clean up all region snapshot replacement step volumes", + period: config + .region_snapshot_replacement_garbage_collection + .period_secs, + task_impl: Box::new(RegionSnapshotReplacementGarbageCollect::new( + datastore, + sagas.clone(), + )), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![], + activator: task_region_snapshot_replacement_garbage_collection, + }); + driver } } diff --git a/nexus/src/app/background/tasks/mod.rs b/nexus/src/app/background/tasks/mod.rs index b0281afd9f..7ba68d0b80 100644 --- a/nexus/src/app/background/tasks/mod.rs +++ b/nexus/src/app/background/tasks/mod.rs @@ -25,6 +25,7 @@ pub mod phantom_disks; pub mod physical_disk_adoption; pub mod region_replacement; pub mod region_replacement_driver; +pub mod region_snapshot_replacement_garbage_collect; pub mod region_snapshot_replacement_start; pub mod saga_recovery; pub mod service_firewall_rules; diff --git a/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs b/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs new file mode 100644 index 0000000000..4c66c166ff --- /dev/null +++ b/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs @@ -0,0 +1,265 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task for deleting volumes that stash a replaced region snapshot + +use crate::app::authn; +use crate::app::background::BackgroundTask; +use crate::app::saga::StartSaga; +use crate::app::sagas; +use crate::app::sagas::region_snapshot_replacement_garbage_collect::*; +use crate::app::sagas::NexusSaga; +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_model::RegionSnapshotReplacement; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use nexus_types::internal_api::background::RegionSnapshotReplacementGarbageCollectStatus; +use serde_json::json; +use std::sync::Arc; + +pub struct RegionSnapshotReplacementGarbageCollect { + datastore: Arc, + sagas: Arc, +} + +impl RegionSnapshotReplacementGarbageCollect { + pub fn new(datastore: Arc, sagas: Arc) -> Self { + RegionSnapshotReplacementGarbageCollect { datastore, sagas } + } + + async fn send_garbage_collect_request( + &self, + opctx: &OpContext, + request: RegionSnapshotReplacement, + ) -> Result<(), omicron_common::api::external::Error> { + let Some(old_snapshot_volume_id) = request.old_snapshot_volume_id + else { + // This state is illegal! + let s = format!( + "request {} old snapshot volume id is None!", + request.id, + ); + + return Err(omicron_common::api::external::Error::internal_error( + &s, + )); + }; + + let params = + sagas::region_snapshot_replacement_garbage_collect::Params { + serialized_authn: authn::saga::Serialized::for_opctx(opctx), + old_snapshot_volume_id, + request, + }; + + let saga_dag = + SagaRegionSnapshotReplacementGarbageCollect::prepare(¶ms)?; + self.sagas.saga_start(saga_dag).await + } + + async fn clean_up_region_snapshot_replacement_volumes( + &self, + opctx: &OpContext, + status: &mut RegionSnapshotReplacementGarbageCollectStatus, + ) { + let log = &opctx.log; + + let requests = match self + .datastore + .get_replacement_done_region_snapshot_replacements(opctx) + .await + { + Ok(requests) => requests, + + Err(e) => { + let s = format!("querying for requests to collect failed! {e}"); + error!(&log, "{s}"); + status.errors.push(s); + return; + } + }; + + for request in requests { + let request_id = request.id; + + let result = + self.send_garbage_collect_request(opctx, request.clone()).await; + + match result { + Ok(()) => { + let s = format!( + "region snapshot replacement garbage collect request \ + ok for {request_id}" + ); + + info!( + &log, + "{s}"; + "request.snapshot_id" => %request.old_snapshot_id, + "request.region_id" => %request.old_region_id, + "request.dataset_id" => %request.old_dataset_id, + ); + status.garbage_collect_requested.push(s); + } + + Err(e) => { + let s = format!( + "sending region snapshot replacement garbage collect \ + request failed: {e}", + ); + error!( + &log, + "{s}"; + "request.snapshot_id" => %request.old_snapshot_id, + "request.region_id" => %request.old_region_id, + "request.dataset_id" => %request.old_dataset_id, + ); + status.errors.push(s); + } + } + } + } +} + +impl BackgroundTask for RegionSnapshotReplacementGarbageCollect { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + async move { + let log = &opctx.log; + info!( + &log, + "region snapshot replacement garbage collect task started", + ); + + let mut status = + RegionSnapshotReplacementGarbageCollectStatus::default(); + + self.clean_up_region_snapshot_replacement_volumes( + opctx, + &mut status, + ) + .await; + + info!( + &log, + "region snapshot replacement garbage collect task done" + ); + + json!(status) + } + .boxed() + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::app::background::init::test::NoopStartSaga; + use nexus_db_model::RegionSnapshotReplacement; + use nexus_db_model::RegionSnapshotReplacementState; + use nexus_test_utils_macros::nexus_test; + use uuid::Uuid; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + #[nexus_test(server = crate::Server)] + async fn test_region_snapshot_replacement_garbage_collect_task( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + let starter = Arc::new(NoopStartSaga::new()); + let mut task = RegionSnapshotReplacementGarbageCollect::new( + datastore.clone(), + starter.clone(), + ); + + // Noop test + let result: RegionSnapshotReplacementGarbageCollectStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + assert_eq!( + result, + RegionSnapshotReplacementGarbageCollectStatus::default() + ); + assert_eq!(starter.count_reset(), 0); + + // Add two region snapshot requests that need garbage collection + + let mut request = RegionSnapshotReplacement::new( + Uuid::new_v4(), + Uuid::new_v4(), + Uuid::new_v4(), + ); + request.replacement_state = + RegionSnapshotReplacementState::ReplacementDone; + request.old_snapshot_volume_id = Some(Uuid::new_v4()); + + let request_1_id = request.id; + + datastore + .insert_region_snapshot_replacement_request_with_volume_id( + &opctx, + request, + Uuid::new_v4(), + ) + .await + .unwrap(); + + let mut request = RegionSnapshotReplacement::new( + Uuid::new_v4(), + Uuid::new_v4(), + Uuid::new_v4(), + ); + request.replacement_state = + RegionSnapshotReplacementState::ReplacementDone; + request.old_snapshot_volume_id = Some(Uuid::new_v4()); + + let request_2_id = request.id; + + datastore + .insert_region_snapshot_replacement_request_with_volume_id( + &opctx, + request, + Uuid::new_v4(), + ) + .await + .unwrap(); + + // Activate the task - it should pick up the two requests + + let result: RegionSnapshotReplacementGarbageCollectStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + + for error in &result.errors { + eprintln!("{error}"); + } + + assert_eq!(result.garbage_collect_requested.len(), 2); + + let s = format!( + "region snapshot replacement garbage collect request ok for \ + {request_1_id}" + ); + assert!(result.garbage_collect_requested.contains(&s)); + + let s = format!( + "region snapshot replacement garbage collect request ok for \ + {request_2_id}" + ); + assert!(result.garbage_collect_requested.contains(&s)); + + assert_eq!(result.errors.len(), 0); + + assert_eq!(starter.count_reset(), 2); + } +} diff --git a/nexus/src/app/sagas/mod.rs b/nexus/src/app/sagas/mod.rs index 471118a5cb..926b983460 100644 --- a/nexus/src/app/sagas/mod.rs +++ b/nexus/src/app/sagas/mod.rs @@ -39,6 +39,7 @@ pub mod project_create; pub mod region_replacement_drive; pub mod region_replacement_finish; pub mod region_replacement_start; +pub mod region_snapshot_replacement_garbage_collect; pub mod region_snapshot_replacement_start; pub mod snapshot_create; pub mod snapshot_delete; @@ -194,6 +195,9 @@ fn make_action_registry() -> ActionRegistry { ::register_actions( &mut registry, ); + ::register_actions( + &mut registry, + ); #[cfg(test)] ::register_actions(&mut registry); diff --git a/nexus/src/app/sagas/region_snapshot_replacement_garbage_collect.rs b/nexus/src/app/sagas/region_snapshot_replacement_garbage_collect.rs new file mode 100644 index 0000000000..e3c5143a68 --- /dev/null +++ b/nexus/src/app/sagas/region_snapshot_replacement_garbage_collect.rs @@ -0,0 +1,326 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Clean up the volume that stashes the target replaced during the region +//! snapshot replacement start saga. After that's done, change the region +//! snapshot replacement state to Running. This saga handles the following +//! region snapshot replacement request state transitions: +//! +//! ```text +//! ReplacementDone <-- +//! | +//! | | +//! v | +//! | +//! DeletingOldVolume -- +//! +//! | +//! v +//! +//! Running +//! ``` +//! +//! See the documentation for the "region snapshot replacement step" saga for +//! the next step(s) in the process. + +use super::{ + ActionRegistry, NexusActionContext, NexusSaga, SagaInitError, + ACTION_GENERATE_ID, +}; +use crate::app::sagas::declare_saga_actions; +use crate::app::sagas::volume_delete; +use crate::app::{authn, db}; +use serde::Deserialize; +use serde::Serialize; +use steno::ActionError; +use steno::Node; +use uuid::Uuid; + +// region snapshot replacement garbage collect saga: input parameters + +#[derive(Debug, Deserialize, Serialize)] +pub(crate) struct Params { + pub serialized_authn: authn::saga::Serialized, + /// The fake volume created for the snapshot that was replaced + // Note: this is only required in the params to build the volume-delete sub + // saga + pub old_snapshot_volume_id: Uuid, + pub request: db::model::RegionSnapshotReplacement, +} + +// region snapshot replacement garbage collect saga: actions + +declare_saga_actions! { + region_snapshot_replacement_garbage_collect; + SET_SAGA_ID -> "unused_1" { + + rsrgs_set_saga_id + - rsrgs_set_saga_id_undo + } + UPDATE_REQUEST_RECORD -> "unused_2" { + + rsrgs_update_request_record + } +} + +// region snapshot replacement garbage collect saga: definition + +#[derive(Debug)] +pub(crate) struct SagaRegionSnapshotReplacementGarbageCollect; +impl NexusSaga for SagaRegionSnapshotReplacementGarbageCollect { + const NAME: &'static str = "region-snapshot-replacement-garbage-collect"; + type Params = Params; + + fn register_actions(registry: &mut ActionRegistry) { + region_snapshot_replacement_garbage_collect_register_actions(registry); + } + + fn make_saga_dag( + params: &Self::Params, + mut builder: steno::DagBuilder, + ) -> Result { + builder.append(Node::action( + "saga_id", + "GenerateSagaId", + ACTION_GENERATE_ID.as_ref(), + )); + + builder.append(set_saga_id_action()); + + let subsaga_params = volume_delete::Params { + serialized_authn: params.serialized_authn.clone(), + volume_id: params.old_snapshot_volume_id, + }; + + let subsaga_dag = { + let subsaga_builder = steno::DagBuilder::new(steno::SagaName::new( + volume_delete::SagaVolumeDelete::NAME, + )); + volume_delete::SagaVolumeDelete::make_saga_dag( + &subsaga_params, + subsaga_builder, + )? + }; + + builder.append(Node::constant( + "params_for_volume_delete_subsaga", + serde_json::to_value(&subsaga_params).map_err(|e| { + SagaInitError::SerializeError( + "params_for_volume_delete_subsaga".to_string(), + e, + ) + })?, + )); + + builder.append(Node::subsaga( + "volume_delete_subsaga_no_result", + subsaga_dag, + "params_for_volume_delete_subsaga", + )); + + builder.append(update_request_record_action()); + + Ok(builder.build()?) + } +} + +// region snapshot replacement garbage collect saga: action implementations + +async fn rsrgs_set_saga_id( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + + // Change the request record here to an intermediate "deleting old volume" + // state to block out other sagas that will be triggered for the same + // request. + osagactx + .datastore() + .set_region_snapshot_replacement_deleting_old_volume( + &opctx, + params.request.id, + saga_id, + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +async fn rsrgs_set_saga_id_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + + osagactx + .datastore() + .undo_set_region_snapshot_replacement_deleting_old_volume( + &opctx, + params.request.id, + saga_id, + ) + .await?; + + Ok(()) +} + +async fn rsrgs_update_request_record( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let params = sagactx.saga_params::()?; + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + + // Now that the snapshot volume has been deleted, update the replacement + // request record to 'Running'. There is no undo step for this, it should + // succeed idempotently. + + datastore + .set_region_snapshot_replacement_running( + &opctx, + params.request.id, + saga_id, + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +#[cfg(test)] +pub(crate) mod test { + use crate::app::sagas::region_snapshot_replacement_garbage_collect::{ + Params, SagaRegionSnapshotReplacementGarbageCollect, + }; + use nexus_db_model::RegionSnapshotReplacement; + use nexus_db_model::RegionSnapshotReplacementState; + use nexus_db_model::Volume; + use nexus_db_queries::authn::saga::Serialized; + use nexus_db_queries::context::OpContext; + use nexus_test_utils_macros::nexus_test; + use sled_agent_client::types::CrucibleOpts; + use sled_agent_client::types::VolumeConstructionRequest; + use uuid::Uuid; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + #[nexus_test(server = crate::Server)] + async fn test_region_snapshot_replacement_garbage_collect_saga( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + // Manually insert required records + let old_snapshot_volume_id = Uuid::new_v4(); + + let volume_construction_request = VolumeConstructionRequest::Volume { + id: old_snapshot_volume_id, + block_size: 0, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: 0, + blocks_per_extent: 0, + extent_count: 0, + gen: 0, + opts: CrucibleOpts { + id: old_snapshot_volume_id, + target: vec![ + // XXX if you put something here, you'll need a + // synthetic dataset record + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: false, + }, + }], + read_only_parent: None, + }; + + let volume_data = + serde_json::to_string(&volume_construction_request).unwrap(); + + datastore + .volume_create(Volume::new(old_snapshot_volume_id, volume_data)) + .await + .unwrap(); + + let mut request = RegionSnapshotReplacement::new( + Uuid::new_v4(), + Uuid::new_v4(), + Uuid::new_v4(), + ); + request.replacement_state = + RegionSnapshotReplacementState::ReplacementDone; + request.old_snapshot_volume_id = Some(old_snapshot_volume_id); + + datastore + .insert_region_snapshot_replacement_request_with_volume_id( + &opctx, + request.clone(), + Uuid::new_v4(), + ) + .await + .unwrap(); + + // Run the saga + let params = Params { + serialized_authn: Serialized::for_opctx(&opctx), + old_snapshot_volume_id, + request: request.clone(), + }; + + let _output = nexus + .sagas + .saga_execute::(params) + .await + .unwrap(); + + // Validate the state transition + let result = datastore + .get_region_snapshot_replacement_request_by_id(&opctx, request.id) + .await + .unwrap(); + + assert_eq!( + result.replacement_state, + RegionSnapshotReplacementState::Running + ); + + // Validate the Volume was deleted + assert!(datastore + .volume_get(old_snapshot_volume_id) + .await + .unwrap() + .is_none()); + } +} diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 35b55184b9..d9cbb5eb34 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -138,6 +138,7 @@ lookup_region_port.period_secs = 60 instance_updater.disable = true instance_updater.period_secs = 60 region_snapshot_replacement_start.period_secs = 30 +region_snapshot_replacement_garbage_collection.period_secs = 30 [default_region_allocation_strategy] # we only have one sled in the test environment, so we need to use the diff --git a/nexus/types/src/internal_api/background.rs b/nexus/types/src/internal_api/background.rs index 2f8a411cf7..8e4b6b3013 100644 --- a/nexus/types/src/internal_api/background.rs +++ b/nexus/types/src/internal_api/background.rs @@ -28,3 +28,11 @@ pub struct RegionSnapshotReplacementStartStatus { pub start_invoked_ok: Vec, pub errors: Vec, } + +/// The status of a `region_snapshot_replacement_garbage_collect` background +/// task activation +#[derive(Serialize, Deserialize, Default, Debug, PartialEq, Eq)] +pub struct RegionSnapshotReplacementGarbageCollectStatus { + pub garbage_collect_requested: Vec, + pub errors: Vec, +} diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml index 437615938f..2e3a8fe578 100644 --- a/smf/nexus/multi-sled/config-partial.toml +++ b/smf/nexus/multi-sled/config-partial.toml @@ -66,6 +66,7 @@ saga_recovery.period_secs = 600 lookup_region_port.period_secs = 60 instance_updater.period_secs = 30 region_snapshot_replacement_start.period_secs = 30 +region_snapshot_replacement_garbage_collection.period_secs = 30 [default_region_allocation_strategy] # by default, allocate across 3 distinct sleds diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml index 95dcca14ae..dbd61e953d 100644 --- a/smf/nexus/single-sled/config-partial.toml +++ b/smf/nexus/single-sled/config-partial.toml @@ -66,6 +66,7 @@ saga_recovery.period_secs = 600 lookup_region_port.period_secs = 60 instance_updater.period_secs = 30 region_snapshot_replacement_start.period_secs = 30 +region_snapshot_replacement_garbage_collection.period_secs = 30 [default_region_allocation_strategy] # by default, allocate without requirement for distinct sleds. From 4543057882bff644354e1c0cc897bbd4753510e7 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Thu, 15 Aug 2024 13:25:44 -0400 Subject: [PATCH 11/51] [common] Move policy constants to their own module (#6340) These constants really don't belong in `address.rs` as they are only tangentially related to addresses. Also add policy for replicated clickhouse. This will be used in a follow up PR. Fixes #6299 --- common/src/address.rs | 26 +------------ common/src/lib.rs | 1 + common/src/policy.rs | 40 ++++++++++++++++++++ nexus/reconfigurator/execution/src/dns.rs | 6 +-- nexus/reconfigurator/planning/src/system.rs | 3 +- nexus/reconfigurator/preparation/src/lib.rs | 7 ++-- nexus/src/app/deployment.rs | 6 +-- nexus/types/src/deployment/planning_input.rs | 18 +++++++++ sled-agent/src/rack_setup/plan/service.rs | 18 ++++++--- 9 files changed, 84 insertions(+), 41 deletions(-) create mode 100644 common/src/policy.rs diff --git a/common/src/address.rs b/common/src/address.rs index ba1193c7f0..c23f5c41ed 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -8,6 +8,7 @@ //! and Nexus, who need to agree upon addressing schemes. use crate::api::external::{self, Error}; +use crate::policy::{DNS_REDUNDANCY, MAX_DNS_REDUNDANCY}; use ipnetwork::Ipv6Network; use once_cell::sync::Lazy; use oxnet::{Ipv4Net, Ipv6Net}; @@ -25,31 +26,6 @@ pub const MAX_PORT: u16 = u16::MAX; /// minimum possible value for a tcp or udp port pub const MIN_PORT: u16 = u16::MIN; -/// The amount of redundancy for boundary NTP servers. -pub const BOUNDARY_NTP_REDUNDANCY: usize = 2; - -/// The amount of redundancy for Nexus services. -/// -/// This is used by both RSS (to distribute the initial set of services) and the -/// Reconfigurator (to know whether to add new Nexus zones) -pub const NEXUS_REDUNDANCY: usize = 3; - -/// The amount of redundancy for CockroachDb services. -/// -/// This is used by both RSS (to distribute the initial set of services) and the -/// Reconfigurator (to know whether to add new crdb zones) -pub const COCKROACHDB_REDUNDANCY: usize = 5; - -/// The amount of redundancy for internal DNS servers. -/// -/// Must be less than or equal to MAX_DNS_REDUNDANCY. -pub const DNS_REDUNDANCY: usize = 3; - -/// The maximum amount of redundancy for DNS servers. -/// -/// This determines the number of addresses which are reserved for DNS servers. -pub const MAX_DNS_REDUNDANCY: usize = 5; - pub const DNS_PORT: u16 = 53; pub const DNS_HTTP_PORT: u16 = 5353; pub const SLED_AGENT_PORT: u16 = 12345; diff --git a/common/src/lib.rs b/common/src/lib.rs index e4f53cbfab..6da32c56ba 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -26,6 +26,7 @@ pub mod backoff; pub mod cmd; pub mod disk; pub mod ledger; +pub mod policy; pub mod progenitor_operation_retry; pub mod update; pub mod vlan; diff --git a/common/src/policy.rs b/common/src/policy.rs new file mode 100644 index 0000000000..677dbfe2b9 --- /dev/null +++ b/common/src/policy.rs @@ -0,0 +1,40 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Fleet policy related functionality used by both Reconfigurator and RSS. + +/// The amount of redundancy for boundary NTP servers. +pub const BOUNDARY_NTP_REDUNDANCY: usize = 2; + +/// The amount of redundancy for Nexus services. +/// +/// This is used by both RSS (to distribute the initial set of services) and the +/// Reconfigurator (to know whether to add new Nexus zones) +pub const NEXUS_REDUNDANCY: usize = 3; + +/// The amount of redundancy for CockroachDb services. +/// +/// This is used by both RSS (to distribute the initial set of services) and the +/// Reconfigurator (to know whether to add new crdb zones) +pub const COCKROACHDB_REDUNDANCY: usize = 5; + +/// The amount of redundancy for internal DNS servers. +/// +/// Must be less than or equal to MAX_DNS_REDUNDANCY. +pub const DNS_REDUNDANCY: usize = 3; + +/// The maximum amount of redundancy for DNS servers. +/// +/// This determines the number of addresses which are reserved for DNS servers. +pub const MAX_DNS_REDUNDANCY: usize = 5; + +/// The amount of redundancy for clickhouse servers +/// +/// Clickhouse servers contain lazily replicated data +pub const CLICKHOUSE_SERVER_REDUNDANCY: usize = 3; + +/// The amount of redundancy for clickhouse keepers +/// +/// Keepers maintain strongly consistent metadata about data replication +pub const CLICKHOUSE_KEEPER_REDUNDANCY: usize = 5; diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index 885ffa67d1..846d19ead3 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -502,13 +502,13 @@ mod test { use omicron_common::address::get_switch_zone_address; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; - use omicron_common::address::BOUNDARY_NTP_REDUNDANCY; - use omicron_common::address::COCKROACHDB_REDUNDANCY; - use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::address::RACK_PREFIX; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::Generation; use omicron_common::api::external::IdentityMetadataCreateParams; + use omicron_common::policy::BOUNDARY_NTP_REDUNDANCY; + use omicron_common::policy::COCKROACHDB_REDUNDANCY; + use omicron_common::policy::NEXUS_REDUNDANCY; use omicron_common::zpool_name::ZpoolName; use omicron_test_utils::dev::test_setup_log; use omicron_uuid_kinds::ExternalIpUuid; diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index f6989be9ef..7298db7a73 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -33,13 +33,13 @@ use nexus_types::inventory::SpType; use omicron_common::address::get_sled_address; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; -use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::address::RACK_PREFIX; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Generation; use omicron_common::disk::DiskIdentity; use omicron_common::disk::DiskVariant; +use omicron_common::policy::NEXUS_REDUNDANCY; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; @@ -328,6 +328,7 @@ impl SystemDescription { target_cockroachdb_zone_count: self.target_cockroachdb_zone_count, target_cockroachdb_cluster_version: self .target_cockroachdb_cluster_version, + clickhouse_policy: None, }; let mut builder = PlanningInputBuilder::new( policy, diff --git a/nexus/reconfigurator/preparation/src/lib.rs b/nexus/reconfigurator/preparation/src/lib.rs index e0ba0f10ba..fc0e4638f8 100644 --- a/nexus/reconfigurator/preparation/src/lib.rs +++ b/nexus/reconfigurator/preparation/src/lib.rs @@ -33,13 +33,13 @@ use nexus_types::identity::Resource; use nexus_types::inventory::Collection; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; -use omicron_common::address::BOUNDARY_NTP_REDUNDANCY; -use omicron_common::address::COCKROACHDB_REDUNDANCY; -use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::Error; use omicron_common::api::external::LookupType; use omicron_common::disk::DiskIdentity; +use omicron_common::policy::BOUNDARY_NTP_REDUNDANCY; +use omicron_common::policy::COCKROACHDB_REDUNDANCY; +use omicron_common::policy::NEXUS_REDUNDANCY; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::PhysicalDiskUuid; @@ -82,6 +82,7 @@ impl PlanningInputFromDb<'_> { target_cockroachdb_zone_count: self.target_cockroachdb_zone_count, target_cockroachdb_cluster_version: self .target_cockroachdb_cluster_version, + clickhouse_policy: None, }; let mut builder = PlanningInputBuilder::new( policy, diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index e9095cc991..50ae332d3f 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -17,9 +17,6 @@ use nexus_types::deployment::CockroachDbClusterVersion; use nexus_types::deployment::PlanningInput; use nexus_types::deployment::SledFilter; use nexus_types::inventory::Collection; -use omicron_common::address::BOUNDARY_NTP_REDUNDANCY; -use omicron_common::address::COCKROACHDB_REDUNDANCY; -use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::DeleteResult; @@ -28,6 +25,9 @@ use omicron_common::api::external::InternalContext; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; +use omicron_common::policy::BOUNDARY_NTP_REDUNDANCY; +use omicron_common::policy::COCKROACHDB_REDUNDANCY; +use omicron_common::policy::NEXUS_REDUNDANCY; use slog_error_chain::InlineErrorChain; use uuid::Uuid; diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs index 1af3636d0e..c6a61aac78 100644 --- a/nexus/types/src/deployment/planning_input.rs +++ b/nexus/types/src/deployment/planning_input.rs @@ -709,6 +709,23 @@ pub struct Policy { /// at present this is hardcoded based on the version of CockroachDB we /// presently ship and the tick-tock pattern described in RFD 469. pub target_cockroachdb_cluster_version: CockroachDbClusterVersion, + + /// Policy information for a replicated clickhouse setup + /// + /// If this policy is `None`, then we are using a single node clickhouse + /// setup. Eventually we will only allow multi-node setups and this will no + /// longer be an option. + pub clickhouse_policy: Option, +} + +/// Policy for replicated clickhouse setups +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ClickhousePolicy { + /// Desired number of clickhouse servers + pub target_servers: usize, + + /// Desired number of clickhouse keepers + pub target_keepers: usize, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -761,6 +778,7 @@ impl PlanningInputBuilder { target_cockroachdb_zone_count: 0, target_cockroachdb_cluster_version: CockroachDbClusterVersion::POLICY, + clickhouse_policy: None, }, internal_dns_version: Generation::new(), external_dns_version: Generation::new(), diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index ff137f131f..c9ed0c2248 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -14,10 +14,8 @@ use nexus_sled_agent_shared::inventory::{ }; use omicron_common::address::{ get_sled_address, get_switch_zone_address, Ipv6Subnet, ReservedRackSubnet, - BOUNDARY_NTP_REDUNDANCY, COCKROACHDB_REDUNDANCY, DENDRITE_PORT, - DNS_HTTP_PORT, DNS_PORT, DNS_REDUNDANCY, MAX_DNS_REDUNDANCY, MGD_PORT, - MGS_PORT, NEXUS_REDUNDANCY, NTP_PORT, NUM_SOURCE_NAT_PORTS, - RSS_RESERVED_ADDRESSES, SLED_PREFIX, + DENDRITE_PORT, DNS_HTTP_PORT, DNS_PORT, MGD_PORT, MGS_PORT, NTP_PORT, + NUM_SOURCE_NAT_PORTS, RSS_RESERVED_ADDRESSES, SLED_PREFIX, }; use omicron_common::api::external::{Generation, MacAddr, Vni}; use omicron_common::api::internal::shared::{ @@ -31,6 +29,10 @@ use omicron_common::disk::{ DiskVariant, OmicronPhysicalDiskConfig, OmicronPhysicalDisksConfig, }; use omicron_common::ledger::{self, Ledger, Ledgerable}; +use omicron_common::policy::{ + BOUNDARY_NTP_REDUNDANCY, COCKROACHDB_REDUNDANCY, DNS_REDUNDANCY, + MAX_DNS_REDUNDANCY, NEXUS_REDUNDANCY, +}; use omicron_uuid_kinds::{GenericUuid, OmicronZoneUuid, SledUuid, ZpoolUuid}; use rand::prelude::SliceRandom; use schemars::JsonSchema; @@ -54,11 +56,15 @@ use uuid::Uuid; const OXIMETER_COUNT: usize = 1; // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove // when Nexus provisions Clickhouse. -// TODO(https://github.com/oxidecomputer/omicron/issues/4000): Set to 2 once we enable replicated ClickHouse +// TODO(https://github.com/oxidecomputer/omicron/issues/4000): Use +// omicron_common::policy::CLICKHOUSE_SERVER_REDUNDANCY once we enable +// replicated ClickHouse const CLICKHOUSE_COUNT: usize = 1; // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove // when Nexus provisions Clickhouse keeper. -// TODO(https://github.com/oxidecomputer/omicron/issues/4000): Set to 3 once we enable replicated ClickHouse +// TODO(https://github.com/oxidecomputer/omicron/issues/4000): Use +// omicron_common::policy::CLICKHOUSE_KEEPER_REDUNDANCY once we enable +// replicated ClickHouse const CLICKHOUSE_KEEPER_COUNT: usize = 0; // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove. // when Nexus provisions Crucible. From 64ef70cb973a9a1f0810c8e01b3009764ffb7531 Mon Sep 17 00:00:00 2001 From: Rain Date: Thu, 15 Aug 2024 10:34:46 -0700 Subject: [PATCH 12/51] [installinator] use camino-tempfile rather than tempfile (#6346) I was in this code and spotted this -- we can just use camino-tempfile here. --- Cargo.lock | 2 +- installinator/Cargo.toml | 3 +-- installinator/src/async_temp_file.rs | 12 ++++++------ installinator/src/write.rs | 4 ++-- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b6cadd5e69..544097d1e5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3832,6 +3832,7 @@ dependencies = [ "buf-list", "bytes", "camino", + "camino-tempfile", "cancel-safe-futures", "clap", "display-error-chain", @@ -3861,7 +3862,6 @@ dependencies = [ "slog-envlogger", "slog-term", "smf", - "tempfile", "test-strategy", "thiserror", "tokio", diff --git a/installinator/Cargo.toml b/installinator/Cargo.toml index 00dfb6440b..0d59950a2a 100644 --- a/installinator/Cargo.toml +++ b/installinator/Cargo.toml @@ -13,6 +13,7 @@ async-trait.workspace = true buf-list.workspace = true bytes.workspace = true camino.workspace = true +camino-tempfile.workspace = true cancel-safe-futures.workspace = true clap.workspace = true display-error-chain.workspace = true @@ -37,7 +38,6 @@ slog-async.workspace = true slog-envlogger.workspace = true slog-term.workspace = true smf.workspace = true -tempfile.workspace = true thiserror.workspace = true tokio = { workspace = true, features = ["full"] } tufaceous-lib.workspace = true @@ -50,7 +50,6 @@ omicron-test-utils.workspace = true hex-literal.workspace = true partial-io.workspace = true proptest.workspace = true -tempfile.workspace = true test-strategy.workspace = true tokio = { workspace = true, features = ["test-util"] } tokio-stream.workspace = true diff --git a/installinator/src/async_temp_file.rs b/installinator/src/async_temp_file.rs index c884908ac8..168fffa2aa 100644 --- a/installinator/src/async_temp_file.rs +++ b/installinator/src/async_temp_file.rs @@ -3,13 +3,13 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use camino::Utf8PathBuf; +use camino_tempfile::NamedUtf8TempFile; +use camino_tempfile::Utf8PathPersistError; +use camino_tempfile::Utf8TempPath; use std::io; use std::pin::Pin; use std::task::Context; use std::task::Poll; -use tempfile::NamedTempFile; -use tempfile::PathPersistError; -use tempfile::TempPath; use tokio::fs::File; use tokio::io::AsyncWrite; @@ -18,7 +18,7 @@ pub(crate) struct AsyncNamedTempFile { // in our `persist()` method below. This allows us to drop the temp path // (deleting the temporary file) if we're dropped before `persist()` is // called. - temp_path: Option, + temp_path: Option, destination: Utf8PathBuf, inner: File, } @@ -41,7 +41,7 @@ impl AsyncNamedTempFile { .to_owned(); let temp_file = - tokio::task::spawn_blocking(|| NamedTempFile::new_in(parent)) + tokio::task::spawn_blocking(|| NamedUtf8TempFile::new_in(parent)) .await .unwrap()?; let temp_path = temp_file.into_temp_path(); @@ -62,7 +62,7 @@ impl AsyncNamedTempFile { tokio::task::spawn_blocking(move || temp_path.persist(&destination)) .await .unwrap() - .map_err(|PathPersistError { error, .. }| error) + .map_err(|Utf8PathPersistError { error, .. }| error) } } diff --git a/installinator/src/write.rs b/installinator/src/write.rs index fdc83cffa2..583c5a7b51 100644 --- a/installinator/src/write.rs +++ b/installinator/src/write.rs @@ -918,6 +918,7 @@ mod tests { use anyhow::Result; use bytes::{Buf, Bytes}; use camino::Utf8Path; + use camino_tempfile::tempdir; use futures::StreamExt; use installinator_common::{ Event, InstallinatorCompletionMetadata, InstallinatorComponent, @@ -934,7 +935,6 @@ mod tests { PartialAsyncWrite, PartialOp, }; use proptest::prelude::*; - use tempfile::tempdir; use test_strategy::proptest; use tokio::io::AsyncReadExt; use tokio::sync::Mutex; @@ -1032,7 +1032,7 @@ mod tests { ) -> Result<()> { let logctx = test_setup_log("test_write_artifact"); let tempdir = tempdir()?; - let tempdir_path: &Utf8Path = tempdir.path().try_into()?; + let tempdir_path = tempdir.path(); let destination_host = tempdir_path.join("test-host.bin"); let destination_control_plane = From 93aa572287ed927960ad138304d0e3055674f6f4 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Thu, 15 Aug 2024 15:40:53 -0400 Subject: [PATCH 13/51] Use the correct background task name (#6348) region_snapshot_replacement -> region_snapshot_replacement_start --- dev-tools/omdb/src/bin/omdb/nexus.rs | 2 +- dev-tools/omdb/tests/successes.out | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 0828cef892..ede2743404 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -1453,7 +1453,7 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) { } } }; - } else if name == "region_snapshot_replacement" { + } else if name == "region_snapshot_replacement_start" { match serde_json::from_value::( details.clone(), ) { diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 41c5a15a1c..9d432525c3 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -587,7 +587,9 @@ task: "region_snapshot_replacement_start" currently executing: no last completed activation: , triggered by a periodic timer firing started at (s ago) and ran for ms -warning: unknown background task: "region_snapshot_replacement_start" (don't know how to interpret details: Object {"errors": Array [], "requests_created_ok": Array [], "start_invoked_ok": Array []}) + total requests created ok: 0 + total start saga invoked ok: 0 + errors: 0 task: "saga_recovery" configured period: every 10m @@ -993,7 +995,9 @@ task: "region_snapshot_replacement_start" currently executing: no last completed activation: , triggered by a periodic timer firing started at (s ago) and ran for ms -warning: unknown background task: "region_snapshot_replacement_start" (don't know how to interpret details: Object {"errors": Array [], "requests_created_ok": Array [], "start_invoked_ok": Array []}) + total requests created ok: 0 + total start saga invoked ok: 0 + errors: 0 task: "saga_recovery" configured period: every 10m From 5646755848e1f3ace5066ae5196c40e15d088133 Mon Sep 17 00:00:00 2001 From: Rain Date: Thu, 15 Aug 2024 13:22:39 -0700 Subject: [PATCH 14/51] [meta] remove profile override for rand_hc (#6351) rand_hc is no longer in our dependency graph. (Thanks @ahl for pointing this out!) --- Cargo.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a1ae9858ab..92c4ead65f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -734,8 +734,6 @@ opt-level = 3 opt-level = 3 [profile.dev.package.rand_core] opt-level = 3 -[profile.dev.package.rand_hc] -opt-level = 3 [profile.dev.package.rand_xorshift] opt-level = 3 [profile.dev.package.rsa] From 66ac7b328889e1921f84cc6b082765023daf722a Mon Sep 17 00:00:00 2001 From: Adam Leventhal Date: Thu, 15 Aug 2024 15:40:49 -0700 Subject: [PATCH 15/51] follow trust-dns to its new name: hickory (#5912) --- Cargo.lock | 204 ++++++++++++++----------- Cargo.toml | 8 +- clients/oxide-client/Cargo.toml | 2 +- clients/oxide-client/src/lib.rs | 19 +-- dns-server/Cargo.toml | 13 +- dns-server/src/bin/dns-server.rs | 2 +- dns-server/src/dns_server.rs | 33 ++-- dns-server/src/lib.rs | 18 ++- dns-server/src/storage.rs | 10 +- dns-server/tests/basic_test.rs | 24 +-- end-to-end-tests/Cargo.toml | 2 +- end-to-end-tests/src/helpers/ctx.rs | 2 +- internal-dns-cli/Cargo.toml | 2 +- internal-dns-cli/src/bin/dnswait.rs | 6 +- internal-dns/Cargo.toml | 2 +- internal-dns/src/resolver.rs | 46 ++++-- nexus/Cargo.toml | 4 +- nexus/src/app/external_dns.rs | 23 ++- nexus/test-utils/Cargo.toml | 2 +- nexus/test-utils/src/lib.rs | 18 +-- nexus/tests/integration_tests/silos.rs | 4 +- wicketd/Cargo.toml | 2 +- wicketd/src/preflight_check/uplink.rs | 38 ++--- workspace-hack/Cargo.toml | 4 +- 24 files changed, 268 insertions(+), 220 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 544097d1e5..5fd17fd158 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2106,6 +2106,10 @@ dependencies = [ "dns-service-client", "dropshot", "expectorate", + "hickory-client", + "hickory-proto", + "hickory-resolver", + "hickory-server", "http 0.2.12", "omicron-test-utils", "omicron-workspace-hack", @@ -2125,10 +2129,6 @@ dependencies = [ "thiserror", "tokio", "toml 0.8.19", - "trust-dns-client", - "trust-dns-proto", - "trust-dns-resolver", - "trust-dns-server", "uuid", ] @@ -2408,6 +2408,7 @@ dependencies = [ "clap", "colored", "dhcproto", + "hickory-resolver", "http 0.2.12", "humantime", "hyper 0.14.30", @@ -2428,7 +2429,6 @@ dependencies = [ "socket2 0.5.7", "tokio", "toml 0.8.19", - "trust-dns-resolver", "uuid", ] @@ -2450,6 +2450,18 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "enum-as-inner" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ffccbb6966c05b32ef8fbac435df276c4ae4d3dc55a8cd0eb9745e6c12f546a" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "syn 2.0.74", +] + [[package]] name = "env_logger" version = "0.9.3" @@ -3318,6 +3330,90 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fe2267d4ed49bc07b63801559be28c718ea06c4738b7a03c94df7386d2cde46" +[[package]] +name = "hickory-client" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bab9683b08d8f8957a857b0236455d80e1886eaa8c6178af556aa7871fb61b55" +dependencies = [ + "cfg-if", + "data-encoding", + "futures-channel", + "futures-util", + "hickory-proto", + "once_cell", + "radix_trie", + "rand", + "thiserror", + "tokio", + "tracing", +] + +[[package]] +name = "hickory-proto" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07698b8420e2f0d6447a436ba999ec85d8fbf2a398bbd737b82cac4a2e96e512" +dependencies = [ + "async-trait", + "cfg-if", + "data-encoding", + "enum-as-inner 0.6.0", + "futures-channel", + "futures-io", + "futures-util", + "idna 0.4.0", + "ipnet", + "once_cell", + "rand", + "thiserror", + "tinyvec", + "tokio", + "tracing", + "url", +] + +[[package]] +name = "hickory-resolver" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28757f23aa75c98f254cf0405e6d8c25b831b32921b050a66692427679b1f243" +dependencies = [ + "cfg-if", + "futures-util", + "hickory-proto", + "ipconfig", + "lru-cache", + "once_cell", + "parking_lot 0.12.2", + "rand", + "resolv-conf", + "smallvec 1.13.2", + "thiserror", + "tokio", + "tracing", +] + +[[package]] +name = "hickory-server" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9be0e43c556b9b3fdb6c7c71a9a32153a2275d02419e3de809e520bfcfe40c37" +dependencies = [ + "async-trait", + "bytes", + "cfg-if", + "enum-as-inner 0.6.0", + "futures-util", + "hickory-proto", + "serde", + "thiserror", + "time", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "highway" version = "1.2.0" @@ -3681,6 +3777,16 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "idna" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + [[package]] name = "idna" version = "0.5.0" @@ -3947,6 +4053,7 @@ dependencies = [ "dropshot", "expectorate", "futures", + "hickory-resolver", "hyper 0.14.30", "omicron-common", "omicron-test-utils", @@ -3961,7 +4068,6 @@ dependencies = [ "tempfile", "thiserror", "tokio", - "trust-dns-resolver", "uuid", ] @@ -3972,12 +4078,12 @@ dependencies = [ "anyhow", "clap", "dropshot", + "hickory-resolver", "internal-dns", "omicron-common", "omicron-workspace-hack", "slog", "tokio", - "trust-dns-resolver", ] [[package]] @@ -5292,6 +5398,7 @@ dependencies = [ "gateway-messages", "gateway-test-utils", "headers", + "hickory-resolver", "http 0.2.12", "hyper 0.14.30", "illumos-utils", @@ -5318,7 +5425,6 @@ dependencies = [ "slog", "tokio", "tokio-util", - "trust-dns-resolver", "uuid", ] @@ -5920,6 +6026,7 @@ dependencies = [ "gateway-test-utils", "headers", "hex", + "hickory-resolver", "http 0.2.12", "httptest", "hubtools", @@ -6012,7 +6119,6 @@ dependencies = [ "tokio-postgres", "tokio-util", "tough", - "trust-dns-resolver", "tufaceous", "tufaceous-lib", "update-common", @@ -6368,6 +6474,7 @@ dependencies = [ "group", "hashbrown 0.14.5", "hex", + "hickory-proto", "hmac", "hyper 0.14.30", "indexmap 2.4.0", @@ -6431,7 +6538,6 @@ dependencies = [ "toml_edit 0.19.15", "toml_edit 0.22.20", "tracing", - "trust-dns-proto", "unicode-bidi", "unicode-normalization", "unicode-xid", @@ -6677,6 +6783,7 @@ dependencies = [ "base64 0.22.1", "chrono", "futures", + "hickory-resolver", "http 0.2.12", "hyper 0.14.30", "omicron-workspace-hack", @@ -6688,7 +6795,6 @@ dependencies = [ "serde_json", "thiserror", "tokio", - "trust-dns-resolver", "uuid", ] @@ -10573,15 +10679,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "toml" -version = "0.5.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" -dependencies = [ - "serde", -] - [[package]] name = "toml" version = "0.7.8" @@ -10754,26 +10851,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "trust-dns-client" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c408c32e6a9dbb38037cece35740f2cf23c875d8ca134d33631cec83f74d3fe" -dependencies = [ - "cfg-if", - "data-encoding", - "futures-channel", - "futures-util", - "lazy_static", - "radix_trie", - "rand", - "thiserror", - "time", - "tokio", - "tracing", - "trust-dns-proto", -] - [[package]] name = "trust-dns-proto" version = "0.22.0" @@ -10783,7 +10860,7 @@ dependencies = [ "async-trait", "cfg-if", "data-encoding", - "enum-as-inner", + "enum-as-inner 0.5.1", "futures-channel", "futures-io", "futures-util", @@ -10794,53 +10871,10 @@ dependencies = [ "smallvec 1.13.2", "thiserror", "tinyvec", - "tokio", "tracing", "url", ] -[[package]] -name = "trust-dns-resolver" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aff21aa4dcefb0a1afbfac26deb0adc93888c7d295fb63ab273ef276ba2b7cfe" -dependencies = [ - "cfg-if", - "futures-util", - "ipconfig", - "lazy_static", - "lru-cache", - "parking_lot 0.12.2", - "resolv-conf", - "smallvec 1.13.2", - "thiserror", - "tokio", - "tracing", - "trust-dns-proto", -] - -[[package]] -name = "trust-dns-server" -version = "0.22.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99022f9befa6daec2a860be68ac28b1f0d9d7ccf441d8c5a695e35a58d88840d" -dependencies = [ - "async-trait", - "bytes", - "cfg-if", - "enum-as-inner", - "futures-executor", - "futures-util", - "serde", - "thiserror", - "time", - "tokio", - "toml 0.5.11", - "tracing", - "trust-dns-client", - "trust-dns-proto", -] - [[package]] name = "try-lock" version = "0.2.5" @@ -11707,6 +11741,7 @@ dependencies = [ "gateway-messages", "gateway-test-utils", "hex", + "hickory-resolver", "http 0.2.12", "hubtools", "hyper 0.14.30", @@ -11746,7 +11781,6 @@ dependencies = [ "tokio-util", "toml 0.8.19", "tough", - "trust-dns-resolver", "tufaceous", "tufaceous-lib", "update-common", diff --git a/Cargo.toml b/Cargo.toml index 92c4ead65f..2bb189b6c0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -362,6 +362,10 @@ headers = "0.3.9" heck = "0.5" hex = "0.4.3" hex-literal = "0.4.1" +hickory-client = "0.24.1" +hickory-proto = "0.24.1" +hickory-resolver = "0.24.1" +hickory-server = "0.24.1" highway = "1.2.0" hkdf = "0.12.4" http = "0.2.12" @@ -572,10 +576,6 @@ tokio-util = { version = "0.7.11", features = ["io", "io-util"] } toml = "0.8.19" toml_edit = "0.22.20" tough = { version = "0.17.1", features = [ "http" ] } -trust-dns-client = "0.22" -trust-dns-proto = "0.22" -trust-dns-resolver = "0.22" -trust-dns-server = "0.22" trybuild = "1.0.99" tufaceous = { path = "tufaceous" } tufaceous-lib = { path = "tufaceous-lib" } diff --git a/clients/oxide-client/Cargo.toml b/clients/oxide-client/Cargo.toml index f2adcacb1b..183640946f 100644 --- a/clients/oxide-client/Cargo.toml +++ b/clients/oxide-client/Cargo.toml @@ -12,6 +12,7 @@ anyhow.workspace = true base64.workspace = true chrono.workspace = true futures.workspace = true +hickory-resolver.workspace = true http.workspace = true hyper.workspace = true progenitor.workspace = true @@ -22,6 +23,5 @@ serde.workspace = true serde_json.workspace = true thiserror.workspace = true tokio = { workspace = true, features = [ "net" ] } -trust-dns-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/clients/oxide-client/src/lib.rs b/clients/oxide-client/src/lib.rs index 07a190c38e..249ea18146 100644 --- a/clients/oxide-client/src/lib.rs +++ b/clients/oxide-client/src/lib.rs @@ -7,13 +7,13 @@ use anyhow::anyhow; use anyhow::Context; use futures::FutureExt; +use hickory_resolver::config::{ + NameServerConfig, Protocol, ResolverConfig, ResolverOpts, +}; +use hickory_resolver::TokioAsyncResolver; use std::net::SocketAddr; use std::sync::Arc; use thiserror::Error; -use trust_dns_resolver::config::{ - NameServerConfig, Protocol, ResolverConfig, ResolverOpts, -}; -use trust_dns_resolver::TokioAsyncResolver; progenitor::generate_api!( spec = "../../openapi/nexus.json", @@ -46,14 +46,15 @@ impl CustomDnsResolver { socket_addr: dns_addr, protocol: Protocol::Udp, tls_dns_name: None, - trust_nx_responses: false, + trust_negative_responses: false, bind_addr: None, }); + let mut resolver_opts = ResolverOpts::default(); + // Enable edns for potentially larger records + resolver_opts.edns0 = true; - let resolver = Arc::new( - TokioAsyncResolver::tokio(resolver_config, ResolverOpts::default()) - .context("failed to create resolver")?, - ); + let resolver = + Arc::new(TokioAsyncResolver::tokio(resolver_config, resolver_opts)); Ok(CustomDnsResolver { dns_addr, resolver }) } diff --git a/dns-server/Cargo.toml b/dns-server/Cargo.toml index d11dabaf85..b4516b8b77 100644 --- a/dns-server/Cargo.toml +++ b/dns-server/Cargo.toml @@ -15,24 +15,24 @@ clap.workspace = true dns-server-api.workspace = true dns-service-client.workspace = true dropshot.workspace = true +hickory-client.workspace = true +hickory-proto.workspace = true +hickory-resolver.workspace = true +hickory-server.workspace = true http.workspace = true pretty-hex.workspace = true schemars.workspace = true serde.workspace = true serde_json.workspace = true sled.workspace = true -slog.workspace = true -slog-term.workspace = true slog-async.workspace = true slog-envlogger.workspace = true +slog-term.workspace = true +slog.workspace = true tempfile.workspace = true thiserror.workspace = true tokio = { workspace = true, features = [ "full" ] } toml.workspace = true -trust-dns-client.workspace = true -trust-dns-proto.workspace = true -trust-dns-resolver.workspace = true -trust-dns-server.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true @@ -44,4 +44,3 @@ openapiv3.workspace = true openapi-lint.workspace = true serde_json.workspace = true subprocess.workspace = true -trust-dns-resolver.workspace = true diff --git a/dns-server/src/bin/dns-server.rs b/dns-server/src/bin/dns-server.rs index 52a9c17c0d..9e8d098ee2 100644 --- a/dns-server/src/bin/dns-server.rs +++ b/dns-server/src/bin/dns-server.rs @@ -3,7 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. //! Executable that starts the HTTP-configurable DNS server used for both -//! internal DNS (RFD 248) and extenral DNS (RFD 357) for the Oxide system +//! internal DNS (RFD 248) and external DNS (RFD 357) for the Oxide system use anyhow::anyhow; use anyhow::Context; diff --git a/dns-server/src/dns_server.rs b/dns-server/src/dns_server.rs index 5c761f2aa3..4ecbe382c8 100644 --- a/dns-server/src/dns_server.rs +++ b/dns-server/src/dns_server.rs @@ -13,6 +13,19 @@ use crate::storage::Store; use anyhow::anyhow; use anyhow::Context; use dns_server_api::DnsRecord; +use hickory_proto::op::Header; +use hickory_proto::op::ResponseCode; +use hickory_proto::rr::rdata::SRV; +use hickory_proto::rr::RData; +use hickory_proto::rr::Record; +use hickory_proto::rr::RecordType; +use hickory_proto::serialize::binary::BinDecodable; +use hickory_proto::serialize::binary::BinDecoder; +use hickory_proto::serialize::binary::BinEncoder; +use hickory_resolver::Name; +use hickory_server::authority::MessageRequest; +use hickory_server::authority::MessageResponse; +use hickory_server::authority::MessageResponseBuilder; use pretty_hex::*; use serde::Deserialize; use slog::{debug, error, info, o, trace, Logger}; @@ -21,17 +34,6 @@ use std::str::FromStr; use std::sync::Arc; use thiserror::Error; use tokio::net::UdpSocket; -use trust_dns_proto::op::header::Header; -use trust_dns_proto::op::response_code::ResponseCode; -use trust_dns_proto::rr::rdata::SRV; -use trust_dns_proto::rr::record_data::RData; -use trust_dns_proto::rr::record_type::RecordType; -use trust_dns_proto::rr::{Name, Record}; -use trust_dns_proto::serialize::binary::{ - BinDecodable, BinDecoder, BinEncoder, -}; -use trust_dns_server::authority::MessageResponse; -use trust_dns_server::authority::{MessageRequest, MessageResponseBuilder}; use uuid::Uuid; /// Configuration related to the DNS server @@ -167,7 +169,10 @@ async fn handle_dns_packet(request: Request) { Err(error) => { let header = Header::response_from_request(mr.header()); let rb_servfail = MessageResponseBuilder::from_message_request(&mr); - error!(log, "failed to handle incoming DNS message: {:#}", error); + error!( + log, + "failed to handle incoming DNS message: {:#?} {:#}", mr, error + ); match error { RequestError::NxDomain(_) => { let rb_nxdomain = @@ -222,7 +227,7 @@ fn dns_record_to_record( let mut a = Record::new(); a.set_name(name.clone()) .set_rr_type(RecordType::A) - .set_data(Some(RData::A(addr))); + .set_data(Some(RData::A(addr.into()))); Ok(a) } @@ -230,7 +235,7 @@ fn dns_record_to_record( let mut aaaa = Record::new(); aaaa.set_name(name.clone()) .set_rr_type(RecordType::AAAA) - .set_data(Some(RData::AAAA(addr))); + .set_data(Some(RData::AAAA(addr.into()))); Ok(aaaa) } diff --git a/dns-server/src/lib.rs b/dns-server/src/lib.rs index 424159e41d..8abd3b945e 100644 --- a/dns-server/src/lib.rs +++ b/dns-server/src/lib.rs @@ -47,13 +47,13 @@ pub mod http_server; pub mod storage; use anyhow::{anyhow, Context}; +use hickory_resolver::config::NameServerConfig; +use hickory_resolver::config::Protocol; +use hickory_resolver::config::ResolverConfig; +use hickory_resolver::config::ResolverOpts; +use hickory_resolver::TokioAsyncResolver; use slog::o; use std::net::SocketAddr; -use trust_dns_resolver::config::NameServerConfig; -use trust_dns_resolver::config::Protocol; -use trust_dns_resolver::config::ResolverConfig; -use trust_dns_resolver::config::ResolverOpts; -use trust_dns_resolver::TokioAsyncResolver; /// Starts both the HTTP and DNS servers over a given store. pub async fn start_servers( @@ -167,12 +167,14 @@ impl TransientServer { socket_addr: self.dns_server.local_address(), protocol: Protocol::Udp, tls_dns_name: None, - trust_nx_responses: false, + trust_negative_responses: false, bind_addr: None, }); + let mut resolver_opts = ResolverOpts::default(); + // Enable edns for potentially larger records + resolver_opts.edns0 = true; let resolver = - TokioAsyncResolver::tokio(resolver_config, ResolverOpts::default()) - .context("creating DNS resolver")?; + TokioAsyncResolver::tokio(resolver_config, resolver_opts); Ok(resolver) } } diff --git a/dns-server/src/storage.rs b/dns-server/src/storage.rs index 85b2e79b8b..b3141f6751 100644 --- a/dns-server/src/storage.rs +++ b/dns-server/src/storage.rs @@ -95,6 +95,8 @@ use anyhow::{anyhow, Context}; use camino::Utf8PathBuf; use dns_server_api::{DnsConfig, DnsConfigParams, DnsConfigZone, DnsRecord}; +use hickory_proto::rr::LowerName; +use hickory_resolver::Name; use serde::{Deserialize, Serialize}; use sled::transaction::ConflictableTransactionError; use slog::{debug, error, info, o, warn}; @@ -104,8 +106,6 @@ use std::sync::atomic::Ordering; use std::sync::Arc; use thiserror::Error; use tokio::sync::Mutex; -use trust_dns_client::rr::LowerName; -use trust_dns_client::rr::Name; const KEY_CONFIG: &'static str = "config"; @@ -586,7 +586,7 @@ impl Store { /// If the returned set would have been empty, returns `QueryError::NoName`. pub(crate) fn query( &self, - mr: &trust_dns_server::authority::MessageRequest, + mr: &hickory_server::authority::MessageRequest, ) -> Result, QueryError> { let name = mr.query().name(); let orig_name = mr.query().original().name(); @@ -784,14 +784,14 @@ mod test { use dns_server_api::DnsConfigParams; use dns_server_api::DnsConfigZone; use dns_server_api::DnsRecord; + use hickory_proto::rr::LowerName; + use hickory_resolver::Name; use omicron_test_utils::dev::test_setup_log; use std::collections::BTreeSet; use std::collections::HashMap; use std::net::Ipv6Addr; use std::str::FromStr; use std::sync::Arc; - use trust_dns_client::rr::LowerName; - use trust_dns_client::rr::Name; /// As usual, `TestContext` groups the various pieces we need in a bunch of /// our tests and helps make sure they get cleaned up properly. diff --git a/dns-server/tests/basic_test.rs b/dns-server/tests/basic_test.rs index b3b7f37378..fa5bfea468 100644 --- a/dns-server/tests/basic_test.rs +++ b/dns-server/tests/basic_test.rs @@ -9,6 +9,12 @@ use dns_service_client::{ Client, }; use dropshot::{test_util::LogContext, HandlerTaskMode}; +use hickory_resolver::error::ResolveErrorKind; +use hickory_resolver::TokioAsyncResolver; +use hickory_resolver::{ + config::{NameServerConfig, Protocol, ResolverConfig, ResolverOpts}, + proto::op::ResponseCode, +}; use omicron_test_utils::dev::test_setup_log; use slog::o; use std::{ @@ -16,12 +22,6 @@ use std::{ net::Ipv6Addr, net::{IpAddr, Ipv4Addr}, }; -use trust_dns_resolver::error::ResolveErrorKind; -use trust_dns_resolver::TokioAsyncResolver; -use trust_dns_resolver::{ - config::{NameServerConfig, Protocol, ResolverConfig, ResolverOpts}, - proto::op::ResponseCode, -}; const TEST_ZONE: &'static str = "oxide.internal"; @@ -374,17 +374,19 @@ async fn init_client_server( ) .await?; - let mut rc = ResolverConfig::new(); - rc.add_name_server(NameServerConfig { + let mut resolver_config = ResolverConfig::new(); + resolver_config.add_name_server(NameServerConfig { socket_addr: dns_server.local_address(), protocol: Protocol::Udp, tls_dns_name: None, - trust_nx_responses: false, + trust_negative_responses: false, bind_addr: None, }); + let mut resolver_opts = ResolverOpts::default(); + // Enable edns for potentially larger records + resolver_opts.edns0 = true; - let resolver = - TokioAsyncResolver::tokio(rc, ResolverOpts::default()).unwrap(); + let resolver = TokioAsyncResolver::tokio(resolver_config, resolver_opts); let client = Client::new(&format!("http://{}", dropshot_server.local_addr()), log); diff --git a/end-to-end-tests/Cargo.toml b/end-to-end-tests/Cargo.toml index 781f3fb1c6..b2400f7603 100644 --- a/end-to-end-tests/Cargo.toml +++ b/end-to-end-tests/Cargo.toml @@ -26,7 +26,7 @@ serde_json.workspace = true sled-agent-types.workspace = true tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } toml.workspace = true -trust-dns-resolver.workspace = true +hickory-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true ispf.workspace = true diff --git a/end-to-end-tests/src/helpers/ctx.rs b/end-to-end-tests/src/helpers/ctx.rs index d9a2d7027a..5363557502 100644 --- a/end-to-end-tests/src/helpers/ctx.rs +++ b/end-to-end-tests/src/helpers/ctx.rs @@ -1,6 +1,7 @@ use crate::helpers::generate_name; use anyhow::{anyhow, Context as _, Result}; use chrono::Utc; +use hickory_resolver::error::ResolveErrorKind; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use oxide_client::types::{Name, ProjectCreate}; use oxide_client::CustomDnsResolver; @@ -13,7 +14,6 @@ use std::net::IpAddr; use std::net::SocketAddr; use std::sync::Arc; use std::time::Duration; -use trust_dns_resolver::error::ResolveErrorKind; use uuid::Uuid; const RSS_CONFIG_STR: &str = include_str!(concat!( diff --git a/internal-dns-cli/Cargo.toml b/internal-dns-cli/Cargo.toml index dae0af0280..3e34c21622 100644 --- a/internal-dns-cli/Cargo.toml +++ b/internal-dns-cli/Cargo.toml @@ -11,9 +11,9 @@ workspace = true anyhow.workspace = true clap.workspace = true dropshot.workspace = true +hickory-resolver.workspace = true internal-dns.workspace = true omicron-common.workspace = true slog.workspace = true tokio.workspace = true -trust-dns-resolver.workspace = true omicron-workspace-hack.workspace = true diff --git a/internal-dns-cli/src/bin/dnswait.rs b/internal-dns-cli/src/bin/dnswait.rs index 9e003ed14f..8dbd675d64 100644 --- a/internal-dns-cli/src/bin/dnswait.rs +++ b/internal-dns-cli/src/bin/dnswait.rs @@ -65,10 +65,8 @@ async fn main() -> Result<()> { let resolver = if opt.nameserver_addresses.is_empty() { info!(&log, "using system configuration"); - let async_resolver = - trust_dns_resolver::AsyncResolver::tokio_from_system_conf() - .context("initializing resolver from system configuration")?; - Resolver::new_with_resolver(log.clone(), async_resolver) + Resolver::new_from_system_conf(log.clone()) + .context("initializing resolver from system configuration")? } else { let addrs = opt.nameserver_addresses; info!(&log, "using explicit nameservers"; "nameservers" => ?addrs); diff --git a/internal-dns/Cargo.toml b/internal-dns/Cargo.toml index c08cc012c1..c12035e2cb 100644 --- a/internal-dns/Cargo.toml +++ b/internal-dns/Cargo.toml @@ -18,7 +18,7 @@ omicron-uuid-kinds.workspace = true reqwest = { workspace = true, features = ["rustls-tls", "stream"] } slog.workspace = true thiserror.workspace = true -trust-dns-resolver.workspace = true +hickory-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/internal-dns/src/resolver.rs b/internal-dns/src/resolver.rs index fdd5dce428..b3dadf16d2 100644 --- a/internal-dns/src/resolver.rs +++ b/internal-dns/src/resolver.rs @@ -2,24 +2,24 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use hickory_resolver::config::{ + LookupIpStrategy, NameServerConfig, Protocol, ResolverConfig, ResolverOpts, +}; +use hickory_resolver::lookup::SrvLookup; +use hickory_resolver::TokioAsyncResolver; use hyper::client::connect::dns::Name; use omicron_common::address::{ Ipv6Subnet, ReservedRackSubnet, AZ_PREFIX, DNS_PORT, }; use slog::{debug, error, info, trace}; use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; -use trust_dns_resolver::config::{ - LookupIpStrategy, NameServerConfig, Protocol, ResolverConfig, ResolverOpts, -}; -use trust_dns_resolver::lookup::SrvLookup; -use trust_dns_resolver::TokioAsyncResolver; pub type DnsError = dns_service_client::Error; #[derive(Debug, Clone, thiserror::Error)] pub enum ResolveError { #[error(transparent)] - Resolve(#[from] trust_dns_resolver::error::ResolveError), + Resolve(#[from] hickory_resolver::error::ResolveError), #[error("Record not found for SRV key: {}", .0.dns_name())] NotFound(crate::ServiceName), @@ -52,6 +52,19 @@ impl reqwest::dns::Resolve for Resolver { } impl Resolver { + /// Construct a new DNS resolver from the system configuration. + pub fn new_from_system_conf( + log: slog::Logger, + ) -> Result { + let (rc, mut opts) = hickory_resolver::system_conf::read_system_conf()?; + // Enable edns for potentially larger records + opts.edns0 = true; + + let resolver = TokioAsyncResolver::tokio(rc, opts); + + Ok(Self { log, resolver }) + } + /// Construct a new DNS resolver from specific DNS server addresses. pub fn new_from_addrs( log: slog::Logger, @@ -66,18 +79,20 @@ impl Resolver { socket_addr, protocol: Protocol::Udp, tls_dns_name: None, - trust_nx_responses: false, + trust_negative_responses: false, bind_addr: None, }); } let mut opts = ResolverOpts::default(); + // Enable edns for potentially larger records + opts.edns0 = true; opts.use_hosts_file = false; opts.num_concurrent_reqs = dns_server_count; // The underlay is IPv6 only, so this helps avoid needless lookups of // the IPv4 variant. opts.ip_strategy = LookupIpStrategy::Ipv6Only; opts.negative_max_ttl = Some(std::time::Duration::from_secs(15)); - let resolver = TokioAsyncResolver::tokio(rc, opts)?; + let resolver = TokioAsyncResolver::tokio(rc, opts); Ok(Self { log, resolver }) } @@ -163,7 +178,7 @@ impl Resolver { .iter() .next() .ok_or_else(|| ResolveError::NotFound(srv))?; - Ok(*address) + Ok(address.0) } /// Returns the targets of the SRV records for a DNS name @@ -313,7 +328,7 @@ impl Resolver { // (1) it returns `IpAddr`'s rather than `SocketAddr`'s // (2) it doesn't actually return all the addresses from the Additional // section of the DNS server's response. - // See bluejekyll/trust-dns#1980 + // See hickory-dns/hickory-dns#1980 // // (1) is not a huge deal as we can try to match up the targets ourselves // to grab the port for creating a `SocketAddr` but (2) means we need to do @@ -350,10 +365,9 @@ impl Resolver { .await .into_iter() .flat_map(move |target| match target { - Ok((ips, port)) => Some( - ips.into_iter() - .map(move |ip| SocketAddrV6::new(ip, port, 0, 0)), - ), + Ok((ips, port)) => Some(ips.into_iter().map(move |aaaa| { + SocketAddrV6::new(aaaa.into(), port, 0, 0) + })), Err((target, err)) => { error!( log, @@ -511,7 +525,7 @@ mod test { assert!( matches!( dns_error.kind(), - trust_dns_resolver::error::ResolveErrorKind::NoRecordsFound { .. }, + hickory_resolver::error::ResolveErrorKind::NoRecordsFound { .. }, ), "Saw error: {dns_error}", ); @@ -664,7 +678,7 @@ mod test { error, ResolveError::Resolve(error) if matches!(error.kind(), - trust_dns_resolver::error::ResolveErrorKind::NoRecordsFound { .. } + hickory_resolver::error::ResolveErrorKind::NoRecordsFound { .. } ) ); diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index 1128cd8f0f..8977507505 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -35,6 +35,7 @@ futures.workspace = true gateway-client.workspace = true headers.workspace = true hex.workspace = true +hickory-resolver.workspace = true http.workspace = true hyper.workspace = true illumos-utils.workspace = true @@ -87,7 +88,6 @@ tokio = { workspace = true, features = ["full"] } tokio-postgres = { workspace = true, features = ["with-serde_json-1"] } tokio-util = { workspace = true, features = ["codec"] } tough.workspace = true -trust-dns-resolver.workspace = true uuid.workspace = true nexus-auth.workspace = true @@ -143,7 +143,7 @@ sp-sim.workspace = true rustls.workspace = true subprocess.workspace = true term.workspace = true -trust-dns-resolver.workspace = true +hickory-resolver.workspace = true tufaceous.workspace = true tufaceous-lib.workspace = true httptest.workspace = true diff --git a/nexus/src/app/external_dns.rs b/nexus/src/app/external_dns.rs index c6a8d833c2..4732146ce2 100644 --- a/nexus/src/app/external_dns.rs +++ b/nexus/src/app/external_dns.rs @@ -5,15 +5,15 @@ use std::net::IpAddr; use std::net::SocketAddr; +use hickory_resolver::config::NameServerConfig; +use hickory_resolver::config::Protocol; +use hickory_resolver::config::ResolverConfig; +use hickory_resolver::config::ResolverOpts; +use hickory_resolver::TokioAsyncResolver; use hyper::client::connect::dns::Name; use omicron_common::address::DNS_PORT; -use trust_dns_resolver::config::NameServerConfig; -use trust_dns_resolver::config::Protocol; -use trust_dns_resolver::config::ResolverConfig; -use trust_dns_resolver::config::ResolverOpts; -use trust_dns_resolver::TokioAsyncResolver; -/// Wrapper around trust-dns-resolver to provide name resolution +/// Wrapper around hickory-resolver to provide name resolution /// using a given set of DNS servers for use with reqwest. pub struct Resolver(TokioAsyncResolver); @@ -26,18 +26,17 @@ impl Resolver { socket_addr: SocketAddr::new(*addr, DNS_PORT), protocol: Protocol::Udp, tls_dns_name: None, - trust_nx_responses: false, + trust_negative_responses: false, bind_addr: None, }); } let mut opts = ResolverOpts::default(); + // Enable edns for potentially larger records + opts.edns0 = true; opts.use_hosts_file = false; // Do as many requests in parallel as we have configured servers opts.num_concurrent_reqs = dns_servers.len(); - Resolver( - TokioAsyncResolver::tokio(rc, opts) - .expect("creating resovler shouldn't fail"), - ) + Resolver(TokioAsyncResolver::tokio(rc, opts)) } } @@ -48,7 +47,7 @@ impl reqwest::dns::Resolve for Resolver { let ips = resolver.lookup_ip(name.as_str()).await?; let addrs = ips .into_iter() - // trust-dns-resolver returns `IpAddr`s but reqwest wants + // hickory-resolver returns `IpAddr`s but reqwest wants // `SocketAddr`s (useful if you have a custom resolver that // returns a scoped IPv6 address). The port provided here // is ignored in favour of the scheme default (http/80, diff --git a/nexus/test-utils/Cargo.toml b/nexus/test-utils/Cargo.toml index a883bc83c5..50110ecaca 100644 --- a/nexus/test-utils/Cargo.toml +++ b/nexus/test-utils/Cargo.toml @@ -46,7 +46,7 @@ sled-agent-client.workspace = true slog.workspace = true tokio.workspace = true tokio-util.workspace = true -trust-dns-resolver.workspace = true +hickory-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 7c190974a1..3dcffb399b 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -17,6 +17,11 @@ use dropshot::HandlerTaskMode; use futures::future::BoxFuture; use futures::FutureExt; use gateway_test_utils::setup::GatewayTestContext; +use hickory_resolver::config::NameServerConfig; +use hickory_resolver::config::Protocol; +use hickory_resolver::config::ResolverConfig; +use hickory_resolver::config::ResolverOpts; +use hickory_resolver::TokioAsyncResolver; use nexus_config::Database; use nexus_config::DpdConfig; use nexus_config::InternalDns; @@ -73,11 +78,6 @@ use std::collections::HashMap; use std::fmt::Debug; use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use std::time::Duration; -use trust_dns_resolver::config::NameServerConfig; -use trust_dns_resolver::config::Protocol; -use trust_dns_resolver::config::ResolverConfig; -use trust_dns_resolver::config::ResolverOpts; -use trust_dns_resolver::TokioAsyncResolver; use uuid::Uuid; pub use sim::TEST_HARDWARE_THREADS; @@ -1586,12 +1586,12 @@ pub async fn start_dns_server( socket_addr: dns_server.local_address(), protocol: Protocol::Udp, tls_dns_name: None, - trust_nx_responses: false, + trust_negative_responses: false, bind_addr: None, }); - let resolver = - TokioAsyncResolver::tokio(resolver_config, ResolverOpts::default()) - .context("creating DNS resolver")?; + let mut resolver_opts = ResolverOpts::default(); + resolver_opts.edns0 = true; + let resolver = TokioAsyncResolver::tokio(resolver_config, resolver_opts); Ok((dns_server, http_server, resolver)) } diff --git a/nexus/tests/integration_tests/silos.rs b/nexus/tests/integration_tests/silos.rs index 2c861ff159..0de4d31395 100644 --- a/nexus/tests/integration_tests/silos.rs +++ b/nexus/tests/integration_tests/silos.rs @@ -37,6 +37,7 @@ use std::fmt::Write; use std::str::FromStr; use base64::Engine; +use hickory_resolver::error::ResolveErrorKind; use http::method::Method; use http::StatusCode; use httptest::{matchers::*, responders::*, Expectation, Server}; @@ -44,7 +45,6 @@ use nexus_types::external_api::shared::{FleetRole, SiloRole}; use std::convert::Infallible; use std::net::Ipv4Addr; use std::time::Duration; -use trust_dns_resolver::error::ResolveErrorKind; use uuid::Uuid; type ControlPlaneTestContext = @@ -2164,7 +2164,7 @@ pub async fn verify_silo_dns_name( .await { Ok(result) => { - let addrs: Vec<_> = result.iter().collect(); + let addrs: Vec<_> = result.iter().map(|a| &a.0).collect(); if addrs.is_empty() { false } else { diff --git a/wicketd/Cargo.toml b/wicketd/Cargo.toml index 324ae01b42..6e2c27a97e 100644 --- a/wicketd/Cargo.toml +++ b/wicketd/Cargo.toml @@ -25,6 +25,7 @@ flume.workspace = true futures.workspace = true gateway-messages.workspace = true hex.workspace = true +hickory-resolver.workspace = true http.workspace = true hubtools.workspace = true hyper.workspace = true @@ -46,7 +47,6 @@ tokio-stream.workspace = true tokio-util.workspace = true toml.workspace = true tough.workspace = true -trust-dns-resolver.workspace = true uuid.workspace = true bootstrap-agent-client.workspace = true diff --git a/wicketd/src/preflight_check/uplink.rs b/wicketd/src/preflight_check/uplink.rs index 36a4f61779..fb0914e836 100644 --- a/wicketd/src/preflight_check/uplink.rs +++ b/wicketd/src/preflight_check/uplink.rs @@ -14,6 +14,11 @@ use dpd_client::types::PortSpeed as DpdPortSpeed; use dpd_client::Client as DpdClient; use dpd_client::ClientState as DpdClientState; use either::Either; +use hickory_resolver::config::NameServerConfigGroup; +use hickory_resolver::config::ResolverConfig; +use hickory_resolver::config::ResolverOpts; +use hickory_resolver::error::ResolveErrorKind; +use hickory_resolver::TokioAsyncResolver; use illumos_utils::zone::SVCCFG; use illumos_utils::PFEXEC; use omicron_common::address::DENDRITE_PORT; @@ -35,12 +40,6 @@ use std::time::Duration; use std::time::Instant; use tokio::process::Command; use tokio::sync::mpsc; -use trust_dns_resolver::config::NameServerConfigGroup; -use trust_dns_resolver::config::ResolverConfig; -use trust_dns_resolver::config::ResolverOpts; -use trust_dns_resolver::error::ResolveError; -use trust_dns_resolver::error::ResolveErrorKind; -use trust_dns_resolver::TokioAsyncResolver; use wicket_common::preflight_check::EventBuffer; use wicket_common::preflight_check::StepContext; use wicket_common::preflight_check::StepProgress; @@ -930,16 +929,7 @@ impl DnsLookupStep { }; 'dns_servers: for &dns_ip in dns_servers { - let resolver = match self.build_resolver(dns_ip) { - Ok(resolver) => resolver, - Err(err) => { - self.warnings.push(format!( - "failed to create resolver for {dns_ip}: {}", - DisplayErrorChain::new(&err) - )); - continue; - } - }; + let resolver = self.build_resolver(dns_ip); // Attempt to resolve any NTP servers that aren't IP addresses. for &ntp_name in &ntp_names_to_resolve { @@ -1052,14 +1042,18 @@ impl DnsLookupStep { ( "A", resolver.ipv4_lookup(name).await.map(|records| { - Either::Left(records.into_iter().map(IpAddr::V4)) + Either::Left( + records.into_iter().map(|x| IpAddr::V4(x.into())), + ) }), ) } else { ( "AAAA", resolver.ipv6_lookup(name).await.map(|records| { - Either::Right(records.into_iter().map(IpAddr::V6)) + Either::Right( + records.into_iter().map(|x| IpAddr::V6(x.into())), + ) }), ) }; @@ -1175,12 +1169,12 @@ impl DnsLookupStep { /// /// If building it fails, we'll append to our internal `warnings` and return /// `None`. - fn build_resolver( - &mut self, - dns_ip: IpAddr, - ) -> Result { + fn build_resolver(&mut self, dns_ip: IpAddr) -> TokioAsyncResolver { let mut options = ResolverOpts::default(); + // Enable edns for potentially larger records + options.edns0 = true; + // We will retry ourselves; we don't want the resolver // retrying internally too. options.attempts = 1; diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 35c266cdf3..5edfbccf93 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -60,6 +60,7 @@ getrandom = { version = "0.2.14", default-features = false, features = ["js", "r group = { version = "0.13.0", default-features = false, features = ["alloc"] } hashbrown = { version = "0.14.5", features = ["raw"] } hex = { version = "0.4.3", features = ["serde"] } +hickory-proto = { version = "0.24.1", features = ["text-parsing"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "0.14.30", features = ["full"] } indexmap = { version = "2.4.0", features = ["serde"] } @@ -113,7 +114,6 @@ tokio-util = { version = "0.7.11", features = ["codec", "io-util"] } toml = { version = "0.7.8" } toml_edit-3c51e837cfc5589a = { package = "toml_edit", version = "0.22.20", features = ["serde"] } tracing = { version = "0.1.40", features = ["log"] } -trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.15" } unicode-normalization = { version = "0.1.23" } usdt = { version = "0.5.0" } @@ -167,6 +167,7 @@ getrandom = { version = "0.2.14", default-features = false, features = ["js", "r group = { version = "0.13.0", default-features = false, features = ["alloc"] } hashbrown = { version = "0.14.5", features = ["raw"] } hex = { version = "0.4.3", features = ["serde"] } +hickory-proto = { version = "0.24.1", features = ["text-parsing"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "0.14.30", features = ["full"] } indexmap = { version = "2.4.0", features = ["serde"] } @@ -222,7 +223,6 @@ tokio-util = { version = "0.7.11", features = ["codec", "io-util"] } toml = { version = "0.7.8" } toml_edit-3c51e837cfc5589a = { package = "toml_edit", version = "0.22.20", features = ["serde"] } tracing = { version = "0.1.40", features = ["log"] } -trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.15" } unicode-normalization = { version = "0.1.23" } unicode-xid = { version = "0.2.4" } From 8ae0833152dd4b01ad7426f5c931f1e74f99cbd4 Mon Sep 17 00:00:00 2001 From: iximeow Date: Fri, 16 Aug 2024 03:40:56 +0000 Subject: [PATCH 16/51] [internal-dns] remove lookup_ipv6 in favor of lookup_socket_v6 (#6320) `lookup_ipv6` is both buggy and easy to misuse: * it sends an AAAA query for a domain which should have a SRV record - this works only because https://github.com/oxidecomputer/omicron/issues/4051 means the SRV record is incorrectly returned, along with the actually-desired AAAA for the SRV's target in Additionals * it looks up an IPv6 address from a SRV record *but ignores the port*. in places `lookup_ipv6` was used, it was paired consistently with the hardcoded port NEXUS_INTERNAL_PORT and matched what should be in the resolved SRV record. if we for example wanted to move Nexus' port (or start a test Nexus on an atypical port), the authoritative port number in the SRV response would be ignored for the hardcoded port. lets just use the port that we told DNS we're at! we may still want a bare IPv6 address for a service if we're going to test network reachability, for example, but we're not doing that with this function today. this all is distinct from helpers like `lookup_all_ipv6`. if we need a service's IPv6 address to use with an alternate port to access a different API, we *probably* should have a distinct SRV record for that lookup to use instead? i've found three instances of this: * wicket assumes the techport proxy is on the same IP as Nexus' API, but that isn't necessarily true * we assume the CRDB admin service listens on the same IP as CRDB itself, but that doesn't have to be true * we look up addresses for MGS via `ServiceName::Dendrite`, but there's a `ServiceName::ManagementGatewayService`, so either that's a typo or can be made to have its own SRV records there are some uses of `lookup_all_ipv6` that make a lot of sense still, where we're discovering the rack's network and _really_ do not care about the port that Dendrite happens to be on. --- internal-dns/src/resolver.rs | 73 ++++++++++++++------------------- oximeter/collector/src/agent.rs | 8 +--- oximeter/collector/src/lib.rs | 17 ++++---- 3 files changed, 40 insertions(+), 58 deletions(-) diff --git a/internal-dns/src/resolver.rs b/internal-dns/src/resolver.rs index b3dadf16d2..5d3832a417 100644 --- a/internal-dns/src/resolver.rs +++ b/internal-dns/src/resolver.rs @@ -160,27 +160,6 @@ impl Resolver { self.resolver.clear_cache(); } - /// Looks up a single [`Ipv6Addr`] based on the SRV name. - /// Returns an error if the record does not exist. - // TODO: There are lots of ways this API can expand: Caching, - // actually respecting TTL, looking up ports, etc. - // - // For now, however, it serves as a very simple "get everyone using DNS" - // API that can be improved upon later. - pub async fn lookup_ipv6( - &self, - srv: crate::ServiceName, - ) -> Result { - let name = srv.srv_name(); - debug!(self.log, "lookup_ipv6 srv"; "dns_name" => &name); - let response = self.resolver.ipv6_lookup(&name).await?; - let address = response - .iter() - .next() - .ok_or_else(|| ResolveError::NotFound(srv))?; - Ok(address.0) - } - /// Returns the targets of the SRV records for a DNS name /// /// The returned values are generally other DNS names that themselves would @@ -235,6 +214,12 @@ impl Resolver { // TODO-robustness: any callers of this should probably be using // all the targets for a given SRV and not just the first one // we get, see [`Resolver::lookup_all_socket_v6`]. + // + // TODO: There are lots of ways this API can expand: Caching, + // actually respecting TTL, looking up ports, etc. + // + // For now, however, it serves as a very simple "get everyone using DNS" + // API that can be improved upon later. pub async fn lookup_socket_v6( &self, service: crate::ServiceName, @@ -549,11 +534,11 @@ mod test { dns_server.update(&dns_config).await.unwrap(); let resolver = dns_server.resolver().unwrap(); - let found_ip = resolver - .lookup_ipv6(ServiceName::Cockroach) + let found_addr = resolver + .lookup_socket_v6(ServiceName::Cockroach) .await .expect("Should have been able to look up IP address"); - assert_eq!(found_ip, ip,); + assert_eq!(found_addr.ip(), &ip,); dns_server.cleanup_successful(); logctx.cleanup_successful(); @@ -631,11 +616,13 @@ mod test { // Look up Cockroach let resolver = dns_server.resolver().unwrap(); - let ip = resolver - .lookup_ipv6(ServiceName::Cockroach) + let resolved_addr = resolver + .lookup_socket_v6(ServiceName::Cockroach) .await .expect("Should have been able to look up IP address"); - assert!(cockroach_addrs.iter().any(|addr| addr.ip() == &ip)); + assert!(cockroach_addrs + .iter() + .any(|addr| addr.ip() == resolved_addr.ip())); // Look up all the Cockroach addresses. let mut ips = @@ -649,18 +636,18 @@ mod test { ); // Look up Clickhouse - let ip = resolver - .lookup_ipv6(ServiceName::Clickhouse) + let addr = resolver + .lookup_socket_v6(ServiceName::Clickhouse) .await .expect("Should have been able to look up IP address"); - assert_eq!(&ip, clickhouse_addr.ip()); + assert_eq!(addr.ip(), clickhouse_addr.ip()); // Look up Backend Service - let ip = resolver - .lookup_ipv6(srv_backend) + let addr = resolver + .lookup_socket_v6(srv_backend) .await .expect("Should have been able to look up IP address"); - assert_eq!(&ip, crucible_addr.ip()); + assert_eq!(addr.ip(), crucible_addr.ip()); // If we deploy a new generation that removes all records, then we don't // find anything any more. @@ -671,7 +658,7 @@ mod test { // If we remove the records for all services, we won't find them any // more. (e.g., there's no hidden caching going on) let error = resolver - .lookup_ipv6(ServiceName::Cockroach) + .lookup_socket_v6(ServiceName::Cockroach) .await .expect_err("unexpectedly found records"); assert_matches!( @@ -708,11 +695,11 @@ mod test { dns_builder.service_backend_zone(srv_crdb, &zone, 12345).unwrap(); let dns_config = dns_builder.build_full_config_for_initial_generation(); dns_server.update(&dns_config).await.unwrap(); - let found_ip = resolver - .lookup_ipv6(ServiceName::Cockroach) + let found_addr = resolver + .lookup_socket_v6(ServiceName::Cockroach) .await .expect("Should have been able to look up IP address"); - assert_eq!(found_ip, ip1); + assert_eq!(found_addr.ip(), &ip1); // If we insert the same record with a new address, it should be // updated. @@ -726,11 +713,11 @@ mod test { dns_builder.build_full_config_for_initial_generation(); dns_config.generation += 1; dns_server.update(&dns_config).await.unwrap(); - let found_ip = resolver - .lookup_ipv6(ServiceName::Cockroach) + let found_addr = resolver + .lookup_socket_v6(ServiceName::Cockroach) .await .expect("Should have been able to look up IP address"); - assert_eq!(found_ip, ip2); + assert_eq!(found_addr.ip(), &ip2); dns_server.cleanup_successful(); logctx.cleanup_successful(); @@ -861,11 +848,11 @@ mod test { dns_server.update(&dns_config).await.unwrap(); // Confirm that we can access this record manually. - let found_ip = resolver - .lookup_ipv6(ServiceName::Nexus) + let found_addr = resolver + .lookup_socket_v6(ServiceName::Nexus) .await .expect("Should have been able to look up IP address"); - assert_eq!(found_ip, ip); + assert_eq!(found_addr.ip(), &ip); // Confirm that the progenitor client can access this record too. let value = client.test_endpoint().await.unwrap(); diff --git a/oximeter/collector/src/agent.rs b/oximeter/collector/src/agent.rs index 5da9a1dfa8..8271b2e068 100644 --- a/oximeter/collector/src/agent.rs +++ b/oximeter/collector/src/agent.rs @@ -18,7 +18,6 @@ use internal_dns::resolver::Resolver; use internal_dns::ServiceName; use nexus_client::types::IdSortMode; use omicron_common::address::CLICKHOUSE_PORT; -use omicron_common::address::NEXUS_INTERNAL_PORT; use omicron_common::backoff; use omicron_common::backoff::BackoffError; use oximeter::types::ProducerResults; @@ -816,7 +815,7 @@ async fn refresh_producer_list(agent: OximeterAgent, resolver: Resolver) { async fn resolve_nexus_with_backoff( log: &Logger, resolver: &Resolver, -) -> SocketAddr { +) -> SocketAddrV6 { let log_failure = |error, delay| { warn!( log, @@ -827,12 +826,9 @@ async fn resolve_nexus_with_backoff( }; let do_lookup = || async { resolver - .lookup_ipv6(ServiceName::Nexus) + .lookup_socket_v6(ServiceName::Nexus) .await .map_err(|e| BackoffError::transient(e.to_string())) - .map(|ip| { - SocketAddr::V6(SocketAddrV6::new(ip, NEXUS_INTERNAL_PORT, 0, 0)) - }) }; backoff::retry_notify( backoff::retry_policy_internal_service(), diff --git a/oximeter/collector/src/lib.rs b/oximeter/collector/src/lib.rs index 02bf9152f4..7dd423d074 100644 --- a/oximeter/collector/src/lib.rs +++ b/oximeter/collector/src/lib.rs @@ -14,7 +14,6 @@ use dropshot::HttpServerStarter; use internal_dns::resolver::ResolveError; use internal_dns::resolver::Resolver; use internal_dns::ServiceName; -use omicron_common::address::NEXUS_INTERNAL_PORT; use omicron_common::api::internal::nexus::ProducerEndpoint; use omicron_common::backoff; use omicron_common::FileKv; @@ -251,14 +250,14 @@ impl Oximeter { let nexus_address = if let Some(address) = config.nexus_address { address } else { - SocketAddr::V6(SocketAddrV6::new( - resolver.lookup_ipv6(ServiceName::Nexus).await.map_err( - |e| backoff::BackoffError::transient(e.to_string()), - )?, - NEXUS_INTERNAL_PORT, - 0, - 0, - )) + SocketAddr::V6( + resolver + .lookup_socket_v6(ServiceName::Nexus) + .await + .map_err(|e| { + backoff::BackoffError::transient(e.to_string()) + })?, + ) }; let client = nexus_client::Client::new( &format!("http://{nexus_address}"), From d7d4beaf0dbfa82c6ae0da10a6ce43b3c5a89142 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 16 Aug 2024 04:47:25 +0000 Subject: [PATCH 17/51] Update Rust crate tokio to 1.39.2 (#6249) --- Cargo.lock | 15 +++++++-------- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 20 ++++++++++---------- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5fd17fd158..f561aed2ae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6488,7 +6488,7 @@ dependencies = [ "log", "managed", "memchr", - "mio 0.8.11", + "mio 1.0.2", "nix 0.28.0", "nom", "num-bigint-dig", @@ -10546,28 +10546,27 @@ dependencies = [ [[package]] name = "tokio" -version = "1.38.1" +version = "1.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb2caba9f80616f438e09748d5acda951967e1ea58508ef53d9c6402485a46df" +checksum = "daa4fb1bc778bd6f04cbfc4bb2d06a7396a8f299dc33ea1900cedaa316f467b1" dependencies = [ "backtrace", "bytes", "libc", - "mio 0.8.11", - "num_cpus", + "mio 1.0.2", "parking_lot 0.12.2", "pin-project-lite", "signal-hook-registry", "socket2 0.5.7", "tokio-macros", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] name = "tokio-macros" -version = "2.3.0" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" +checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 2bb189b6c0..1b62af959d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -568,7 +568,7 @@ textwrap = "0.16.1" test-strategy = "0.3.1" thiserror = "1.0" tofino = { git = "https://github.com/oxidecomputer/tofino", branch = "main" } -tokio = "1.38.1" +tokio = "1.39.2" tokio-postgres = { version = "0.7", features = [ "with-chrono-0_4", "with-uuid-1" ] } tokio-stream = "0.1.15" tokio-tungstenite = "0.20" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 5edfbccf93..7983c38052 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -107,7 +107,7 @@ string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.74", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.36", features = ["formatting", "local-offset", "macros", "parsing"] } -tokio = { version = "1.38.1", features = ["full", "test-util"] } +tokio = { version = "1.39.2", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.11", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } tokio-stream = { version = "0.1.15", features = ["net"] } tokio-util = { version = "0.7.11", features = ["codec", "io-util"] } @@ -216,7 +216,7 @@ syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extr syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.74", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.36", features = ["formatting", "local-offset", "macros", "parsing"] } time-macros = { version = "0.2.18", default-features = false, features = ["formatting", "parsing"] } -tokio = { version = "1.38.1", features = ["full", "test-util"] } +tokio = { version = "1.39.2", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.11", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } tokio-stream = { version = "0.1.15", features = ["net"] } tokio-util = { version = "0.7.11", features = ["codec", "io-util"] } @@ -235,7 +235,7 @@ zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } [target.x86_64-unknown-linux-gnu.dependencies] dof = { version = "0.3.0", default-features = false, features = ["des"] } linux-raw-sys = { version = "0.4.13", default-features = false, features = ["elf", "errno", "general", "ioctl", "no_std", "std", "system"] } -mio = { version = "0.8.11", features = ["net", "os-ext"] } +mio = { version = "1.0.2", features = ["net", "os-ext"] } nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } @@ -244,35 +244,35 @@ signal-hook-mio = { version = "0.2.4", default-features = false, features = ["su [target.x86_64-unknown-linux-gnu.build-dependencies] dof = { version = "0.3.0", default-features = false, features = ["des"] } linux-raw-sys = { version = "0.4.13", default-features = false, features = ["elf", "errno", "general", "ioctl", "no_std", "std", "system"] } -mio = { version = "0.8.11", features = ["net", "os-ext"] } +mio = { version = "1.0.2", features = ["net", "os-ext"] } nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } [target.x86_64-apple-darwin.dependencies] -mio = { version = "0.8.11", features = ["net", "os-ext"] } +mio = { version = "1.0.2", features = ["net", "os-ext"] } nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } [target.x86_64-apple-darwin.build-dependencies] -mio = { version = "0.8.11", features = ["net", "os-ext"] } +mio = { version = "1.0.2", features = ["net", "os-ext"] } nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } [target.aarch64-apple-darwin.dependencies] -mio = { version = "0.8.11", features = ["net", "os-ext"] } +mio = { version = "1.0.2", features = ["net", "os-ext"] } nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } [target.aarch64-apple-darwin.build-dependencies] -mio = { version = "0.8.11", features = ["net", "os-ext"] } +mio = { version = "1.0.2", features = ["net", "os-ext"] } nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } @@ -280,7 +280,7 @@ signal-hook-mio = { version = "0.2.4", default-features = false, features = ["su [target.x86_64-unknown-illumos.dependencies] dof = { version = "0.3.0", default-features = false, features = ["des"] } -mio = { version = "0.8.11", features = ["net", "os-ext"] } +mio = { version = "1.0.2", features = ["net", "os-ext"] } nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } @@ -290,7 +290,7 @@ toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", featu [target.x86_64-unknown-illumos.build-dependencies] dof = { version = "0.3.0", default-features = false, features = ["des"] } -mio = { version = "0.8.11", features = ["net", "os-ext"] } +mio = { version = "1.0.2", features = ["net", "os-ext"] } nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } From 3c585f2180f149c532e268eaaed17329fdc935e4 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 16 Aug 2024 06:22:35 +0000 Subject: [PATCH 18/51] Update Rust crate camino to v1.1.8 (#6357) --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f561aed2ae..a62b4e2983 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -815,9 +815,9 @@ dependencies = [ [[package]] name = "camino" -version = "1.1.7" +version = "1.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0ec6b951b160caa93cc0c7b209e5a3bff7aae9062213451ac99493cd844c239" +checksum = "3054fea8a20d8ff3968d5b22cc27501d2b08dc4decdb31b184323f00c5ef23bb" dependencies = [ "serde", ] From a7885d1da2e2f60c2cc648d9b80d25c1216fde3b Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 16 Aug 2024 10:41:12 -0700 Subject: [PATCH 19/51] Update Rust crate clap to v4.5.16 (#6360) --- Cargo.lock | 4 ++-- workspace-hack/Cargo.toml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a62b4e2983..849c82f1f0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1074,9 +1074,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.15" +version = "4.5.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11d8838454fda655dafd3accb2b6e2bea645b9e4078abe84a22ceb947235c5cc" +checksum = "ed6719fffa43d0d87e5fd8caeab59be1554fb028cd30edc88fc4369b17971019" dependencies = [ "clap_builder", "clap_derive", diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 7983c38052..5dc3bc11e7 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -33,7 +33,7 @@ byteorder = { version = "1.5.0" } bytes = { version = "1.7.1", features = ["serde"] } chrono = { version = "0.4.38", features = ["serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } -clap = { version = "4.5.15", features = ["cargo", "derive", "env", "wrap_help"] } +clap = { version = "4.5.16", features = ["cargo", "derive", "env", "wrap_help"] } clap_builder = { version = "4.5.15", default-features = false, features = ["cargo", "color", "env", "std", "suggestions", "usage", "wrap_help"] } console = { version = "0.15.8" } const-oid = { version = "0.9.6", default-features = false, features = ["db", "std"] } @@ -140,7 +140,7 @@ bytes = { version = "1.7.1", features = ["serde"] } cc = { version = "1.0.97", default-features = false, features = ["parallel"] } chrono = { version = "0.4.38", features = ["serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } -clap = { version = "4.5.15", features = ["cargo", "derive", "env", "wrap_help"] } +clap = { version = "4.5.16", features = ["cargo", "derive", "env", "wrap_help"] } clap_builder = { version = "4.5.15", default-features = false, features = ["cargo", "color", "env", "std", "suggestions", "usage", "wrap_help"] } console = { version = "0.15.8" } const-oid = { version = "0.9.6", default-features = false, features = ["db", "std"] } From 921ec6d58ca1e0f5797924050e585bb7803618db Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Fri, 16 Aug 2024 15:04:19 -0700 Subject: [PATCH 20/51] remove redundant background task log entries (#6323) --- nexus/src/app/background/tasks/lookup_region_port.rs | 3 --- nexus/src/app/background/tasks/phantom_disks.rs | 4 +--- .../app/background/tasks/physical_disk_adoption.rs | 3 --- nexus/src/app/background/tasks/region_replacement.rs | 3 --- .../app/background/tasks/region_replacement_driver.rs | 5 ----- .../region_snapshot_replacement_garbage_collect.rs | 11 ----------- .../tasks/region_snapshot_replacement_start.rs | 5 ----- 7 files changed, 1 insertion(+), 33 deletions(-) diff --git a/nexus/src/app/background/tasks/lookup_region_port.rs b/nexus/src/app/background/tasks/lookup_region_port.rs index fbfc5c5af2..df501fe6b1 100644 --- a/nexus/src/app/background/tasks/lookup_region_port.rs +++ b/nexus/src/app/background/tasks/lookup_region_port.rs @@ -53,7 +53,6 @@ impl BackgroundTask for LookupRegionPort { ) -> BoxFuture<'a, serde_json::Value> { async { let log = &opctx.log; - info!(&log, "lookup region port task started"); let mut status = LookupRegionPortStatus::default(); @@ -147,8 +146,6 @@ impl BackgroundTask for LookupRegionPort { } } - info!(&log, "lookup region port task done"); - json!(status) } .boxed() diff --git a/nexus/src/app/background/tasks/phantom_disks.rs b/nexus/src/app/background/tasks/phantom_disks.rs index 4b0d8bec38..7f3fceab1c 100644 --- a/nexus/src/app/background/tasks/phantom_disks.rs +++ b/nexus/src/app/background/tasks/phantom_disks.rs @@ -43,7 +43,6 @@ impl BackgroundTask for PhantomDiskDetector { ) -> BoxFuture<'a, serde_json::Value> { async { let log = &opctx.log; - warn!(&log, "phantom disk task started"); let phantom_disks = match self.datastore.find_phantom_disks().await { @@ -83,14 +82,13 @@ impl BackgroundTask for PhantomDiskDetector { } else { info!( &log, - "phandom disk {} un-deleted andset to faulted ok", + "phandom disk {} un-deleted and set to faulted ok", disk.id(), ); phantom_disk_deleted_ok += 1; } } - warn!(&log, "phantom disk task done"); json!({ "phantom_disk_deleted_ok": phantom_disk_deleted_ok, "phantom_disk_deleted_err": phantom_disk_deleted_err, diff --git a/nexus/src/app/background/tasks/physical_disk_adoption.rs b/nexus/src/app/background/tasks/physical_disk_adoption.rs index f3b9e8ac62..b1eceed0b6 100644 --- a/nexus/src/app/background/tasks/physical_disk_adoption.rs +++ b/nexus/src/app/background/tasks/physical_disk_adoption.rs @@ -96,8 +96,6 @@ impl BackgroundTask for PhysicalDiskAdoption { } let mut disks_added = 0; - let log = &opctx.log; - warn!(&log, "physical disk adoption task started"); let collection_id = *self.rx_inventory_collection.borrow(); let Some(collection_id) = collection_id else { @@ -171,7 +169,6 @@ impl BackgroundTask for PhysicalDiskAdoption { ); } - warn!(&log, "physical disk adoption task done"); json!({ "physical_disks_added": disks_added, }) diff --git a/nexus/src/app/background/tasks/region_replacement.rs b/nexus/src/app/background/tasks/region_replacement.rs index f852f21734..ba0e7f86fb 100644 --- a/nexus/src/app/background/tasks/region_replacement.rs +++ b/nexus/src/app/background/tasks/region_replacement.rs @@ -61,7 +61,6 @@ impl BackgroundTask for RegionReplacementDetector { ) -> BoxFuture<'a, serde_json::Value> { async { let log = &opctx.log; - warn!(&log, "region replacement task started"); let mut ok = 0; let mut err = 0; @@ -182,8 +181,6 @@ impl BackgroundTask for RegionReplacementDetector { } } - warn!(&log, "region replacement task done"); - json!({ "region_replacement_started_ok": ok, "region_replacement_started_err": err, diff --git a/nexus/src/app/background/tasks/region_replacement_driver.rs b/nexus/src/app/background/tasks/region_replacement_driver.rs index 284ed2c368..02db86eab3 100644 --- a/nexus/src/app/background/tasks/region_replacement_driver.rs +++ b/nexus/src/app/background/tasks/region_replacement_driver.rs @@ -227,16 +227,11 @@ impl BackgroundTask for RegionReplacementDriver { opctx: &'a OpContext, ) -> BoxFuture<'a, serde_json::Value> { async { - let log = &opctx.log; - info!(&log, "region replacement driver task started"); - let mut status = RegionReplacementDriverStatus::default(); self.drive_running_replacements_forward(opctx, &mut status).await; self.complete_done_replacements(opctx, &mut status).await; - info!(&log, "region replacement driver task done"); - json!(status) } .boxed() diff --git a/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs b/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs index 4c66c166ff..77dc87c060 100644 --- a/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs +++ b/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs @@ -129,12 +129,6 @@ impl BackgroundTask for RegionSnapshotReplacementGarbageCollect { opctx: &'a OpContext, ) -> BoxFuture<'a, serde_json::Value> { async move { - let log = &opctx.log; - info!( - &log, - "region snapshot replacement garbage collect task started", - ); - let mut status = RegionSnapshotReplacementGarbageCollectStatus::default(); @@ -144,11 +138,6 @@ impl BackgroundTask for RegionSnapshotReplacementGarbageCollect { ) .await; - info!( - &log, - "region snapshot replacement garbage collect task done" - ); - json!(status) } .boxed() diff --git a/nexus/src/app/background/tasks/region_snapshot_replacement_start.rs b/nexus/src/app/background/tasks/region_snapshot_replacement_start.rs index 9bc66d48c8..1fdc17690d 100644 --- a/nexus/src/app/background/tasks/region_snapshot_replacement_start.rs +++ b/nexus/src/app/background/tasks/region_snapshot_replacement_start.rs @@ -232,9 +232,6 @@ impl BackgroundTask for RegionSnapshotReplacementDetector { opctx: &'a OpContext, ) -> BoxFuture<'a, serde_json::Value> { async { - let log = &opctx.log; - info!(&log, "region snapshot replacement start task started"); - let mut status = RegionSnapshotReplacementStartStatus::default(); self.create_requests_for_region_snapshots_on_expunged_disks( @@ -249,8 +246,6 @@ impl BackgroundTask for RegionSnapshotReplacementDetector { ) .await; - info!(&log, "region snapshot replacement start task done"); - json!(status) } .boxed() From 6bb3c13e79488efccd39b6daa8f9def6a727616f Mon Sep 17 00:00:00 2001 From: David Crespo Date: Fri, 16 Aug 2024 17:09:48 -0500 Subject: [PATCH 21/51] Bump web console (vpc routers + routes, instance polling, edit quotas) (#6366) https://github.com/oxidecomputer/console/compare/17ae890c...33b7a505 * [33b7a505](https://github.com/oxidecomputer/console/commit/33b7a505) oxidecomputer/console#2360 * [1a2cb52d](https://github.com/oxidecomputer/console/commit/1a2cb52d) oxidecomputer/console#2369 * [9e831174](https://github.com/oxidecomputer/console/commit/9e831174) oxidecomputer/console#2374 * [e30f2eb8](https://github.com/oxidecomputer/console/commit/e30f2eb8) oxidecomputer/console#2373 * [bb53f1b2](https://github.com/oxidecomputer/console/commit/bb53f1b2) oxidecomputer/console#2371 * [29398e74](https://github.com/oxidecomputer/console/commit/29398e74) oxidecomputer/console#2343 * [68e2dc89](https://github.com/oxidecomputer/console/commit/68e2dc89) oxidecomputer/console#2359 * [11e29ed8](https://github.com/oxidecomputer/console/commit/11e29ed8) bump omicron to latest main * [b6ed3757](https://github.com/oxidecomputer/console/commit/b6ed3757) oxidecomputer/console#2370 * [af6c1f4a](https://github.com/oxidecomputer/console/commit/af6c1f4a) oxidecomputer/console#2368 * [60ef745c](https://github.com/oxidecomputer/console/commit/60ef745c) disallow unreachable code in ts config, fix one case of it * [3a6f815a](https://github.com/oxidecomputer/console/commit/3a6f815a) oxidecomputer/console#2364 * [80b3f2f3](https://github.com/oxidecomputer/console/commit/80b3f2f3) oxidecomputer/console#2366 * [dab60d9d](https://github.com/oxidecomputer/console/commit/dab60d9d) oxidecomputer/console#2358 * [8e3314f1](https://github.com/oxidecomputer/console/commit/8e3314f1) oxidecomputer/console#2362 * [9b5cdfa0](https://github.com/oxidecomputer/console/commit/9b5cdfa0) bump TS generator for bugfix (just adds whitespace) * [07b6c151](https://github.com/oxidecomputer/console/commit/07b6c151) oxidecomputer/console#2349 * [d32fddc2](https://github.com/oxidecomputer/console/commit/d32fddc2) Revert "Focus confirm button instead of cancel in modals (oxidecomputer/console#2340)" * [84a1501e](https://github.com/oxidecomputer/console/commit/84a1501e) oxidecomputer/console#2341 * [6615cb6b](https://github.com/oxidecomputer/console/commit/6615cb6b) oxidecomputer/console#2340 * [e48b0096](https://github.com/oxidecomputer/console/commit/e48b0096) delete unused vscode tasks * [22a6c50f](https://github.com/oxidecomputer/console/commit/22a6c50f) tighten TypeValueCell spacing * [4eacb3d7](https://github.com/oxidecomputer/console/commit/4eacb3d7) oxidecomputer/console#2338 * [f278a747](https://github.com/oxidecomputer/console/commit/f278a747) oxidecomputer/console#2332 * [016ad1b4](https://github.com/oxidecomputer/console/commit/016ad1b4) oxidecomputer/console#2337 * [2d1a22a2](https://github.com/oxidecomputer/console/commit/2d1a22a2) oxidecomputer/console#2336 * [be0f087f](https://github.com/oxidecomputer/console/commit/be0f087f) oxidecomputer/console#2329 --- tools/console_version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/console_version b/tools/console_version index 4f67064733..994d30396b 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="17ae890c68a5277fbefe773694e790a8f1b178b4" -SHA2="273a31ba14546305bfafeb9aedb2d9a7530328a0359cda363380c9ca3240b948" +COMMIT="33b7a505a222b258a155636e8ee79c7ee3c132d2" +SHA2="f9089e18d52d7a54149b364a0b3ae4efba421c13eca6f7752a23b74dc3fa1a8e" From c86ff799803a918d858b744478af3100de7d927c Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Fri, 16 Aug 2024 15:56:32 -0700 Subject: [PATCH 22/51] clear `OMDB_` environment variables when running omdb tests (#6368) --- dev-tools/omdb/tests/test_all_output.rs | 24 ++++++++++++++++++++++++ nexus/test-utils/src/lib.rs | 1 + 2 files changed, 25 insertions(+) diff --git a/dev-tools/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs index 45492c14ce..1afee71122 100644 --- a/dev-tools/omdb/tests/test_all_output.rs +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -56,6 +56,7 @@ fn assert_oximeter_list_producers_output( #[tokio::test] async fn test_omdb_usage_errors() { + clear_omdb_env(); let cmd_path = path_to_executable(CMD_OMDB); let mut output = String::new(); let invocations: &[&[&'static str]] = &[ @@ -111,6 +112,8 @@ async fn test_omdb_usage_errors() { #[nexus_test] async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { + clear_omdb_env(); + let gwtestctx = gateway_test_utils::setup::test_setup( "test_omdb_success_case", gateway_messages::SpPort::One, @@ -271,6 +274,8 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { /// that's covered by the success tests above. #[nexus_test] async fn test_omdb_env_settings(cptestctx: &ControlPlaneTestContext) { + clear_omdb_env(); + let cmd_path = path_to_executable(CMD_OMDB); let postgres_url = cptestctx.database.listen_url().to_string(); let nexus_internal_url = @@ -504,3 +509,22 @@ async fn do_run_extra( write!(output, "=============================================\n").unwrap(); } + +// We're testing behavior that can be affected by OMDB-related environment +// variables. Clear all of them from the current process so that all child +// processes don't have them. OMDB environment variables can affect even the +// help output provided by clap. See clap-rs/clap#5673 for an example. +fn clear_omdb_env() { + // Rust documents that it's not safe to manipulate the environment in a + // multi-threaded process outside of Windows because it's possible that + // other threads are reading or writing the environment and most systems do + // not support this. On illumos, the underlying interfaces are broadly + // thread-safe. Further, Omicron only supports running tests under `cargo + // nextest`, in which case there are no threads running concurrently here + // that may be reading or modifying the environment. + for (env_var, _) in std::env::vars().filter(|(k, _)| k.starts_with("OMDB_")) + { + eprintln!("removing {:?} from environment", env_var); + std::env::remove_var(env_var); + } +} diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 3dcffb399b..ea46f2d017 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -4,6 +4,7 @@ //! Integration testing facilities for Nexus +#[cfg(feature = "omicron-dev")] use anyhow::Context; use anyhow::Result; use camino::Utf8Path; From f334531ecf2c570fa1be931fcb518911bc2b6e1c Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Fri, 16 Aug 2024 16:19:35 -0700 Subject: [PATCH 23/51] Make OxQL UUID parsing case-insensitive (#6359) Fixes #6358 --- oximeter/db/src/oxql/ast/grammar.rs | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/oximeter/db/src/oxql/ast/grammar.rs b/oximeter/db/src/oxql/ast/grammar.rs index a7585402b6..cbca4470f9 100644 --- a/oximeter/db/src/oxql/ast/grammar.rs +++ b/oximeter/db/src/oxql/ast/grammar.rs @@ -189,11 +189,11 @@ peg::parser! { rule dashed_uuid_literal() -> Uuid = s:$( "\"" - ['a'..='f' | '0'..='9']*<8> "-" - ['a'..='f' | '0'..='9']*<4> "-" - ['a'..='f' | '0'..='9']*<4> "-" - ['a'..='f' | '0'..='9']*<4> "-" - ['a'..='f' | '0'..='9']*<12> + ['a'..='f' | 'A'..='F' | '0'..='9']*<8> "-" + ['a'..='f' | 'A'..='F' | '0'..='9']*<4> "-" + ['a'..='f' | 'A'..='F' | '0'..='9']*<4> "-" + ['a'..='f' | 'A'..='F' | '0'..='9']*<4> "-" + ['a'..='f' | 'A'..='F' | '0'..='9']*<12> "\"" ) {? let Some(middle) = s.get(1..37) else { @@ -202,7 +202,7 @@ peg::parser! { middle.parse().or(Err("invalid UUID literal")) } rule undashed_uuid_literal() -> Uuid - = s:$("\"" ['a'..='f' | '0'..='9']*<32> "\"") {? + = s:$("\"" ['a'..='f' | 'A'..='F' | '0'..='9']*<32> "\"") {? let Some(middle) = s.get(1..33) else { return Err("invalid UUID literal"); }; @@ -734,6 +734,15 @@ mod tests { .is_err()); } + #[test] + fn test_uuid_literal_is_case_insensitive() { + const ID: Uuid = uuid::uuid!("880D82A1-102F-4699-BE1A-7E2A6A469E8E"); + let as_str = format!("\"{ID}\""); + let as_lower = as_str.to_lowercase(); + assert_eq!(query_parser::uuid_literal_impl(&as_str).unwrap(), ID,); + assert_eq!(query_parser::uuid_literal_impl(&as_lower).unwrap(), ID,); + } + #[test] fn test_integer_literal() { assert_eq!(query_parser::integer_literal_impl("1").unwrap(), 1); From c28455a0625704a9b9af874cbb40202fe5429166 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Fri, 16 Aug 2024 16:22:35 -0700 Subject: [PATCH 24/51] [oximeter] Use `fmt::Display` for TOML errors (#6365) --- oximeter/timeseries-macro/src/lib.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/oximeter/timeseries-macro/src/lib.rs b/oximeter/timeseries-macro/src/lib.rs index 317a8533a4..0c70e73445 100644 --- a/oximeter/timeseries-macro/src/lib.rs +++ b/oximeter/timeseries-macro/src/lib.rs @@ -59,9 +59,8 @@ pub fn use_timeseries( Err(e) => { let msg = format!( "Failed to generate timeseries types \ - from '{}': {:?}", + from '{}': {e}", path.display(), - e, ); return syn::Error::new(token.span(), msg) .into_compile_error() From 8e4ac4cf1a1f255f8bde4532de42fa38102b44e2 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Fri, 16 Aug 2024 16:37:00 -0700 Subject: [PATCH 25/51] re-assign sagas from expunged Nexus instances (#6215) --- nexus/db-queries/src/db/datastore/saga.rs | 249 ++++++++++++++++-- nexus/db-queries/src/db/sec_store.rs | 9 +- .../execution/src/cockroachdb.rs | 3 +- nexus/reconfigurator/execution/src/dns.rs | 10 +- nexus/reconfigurator/execution/src/lib.rs | 61 +++-- nexus/reconfigurator/execution/src/sagas.rs | 71 +++++ nexus/src/app/background/init.rs | 3 +- .../background/tasks/blueprint_execution.rs | 34 ++- 8 files changed, 376 insertions(+), 64 deletions(-) create mode 100644 nexus/reconfigurator/execution/src/sagas.rs diff --git a/nexus/db-queries/src/db/datastore/saga.rs b/nexus/db-queries/src/db/datastore/saga.rs index 939929e665..0b626804e1 100644 --- a/nexus/db-queries/src/db/datastore/saga.rs +++ b/nexus/db-queries/src/db/datastore/saga.rs @@ -9,7 +9,6 @@ use super::SQL_BATCH_SIZE; use crate::db; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; -use crate::db::model::Generation; use crate::db::pagination::paginated; use crate::db::pagination::paginated_multicolumn; use crate::db::pagination::Paginator; @@ -17,10 +16,12 @@ use crate::db::update_and_check::UpdateAndCheck; use crate::db::update_and_check::UpdateStatus; use async_bb8_diesel::AsyncRunQueryDsl; use diesel::prelude::*; +use nexus_auth::authz; use nexus_auth::context::OpContext; use omicron_common::api::external::Error; use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; +use std::ops::Add; impl DataStore { pub async fn saga_create( @@ -80,21 +81,15 @@ impl DataStore { /// now, we're implementing saga adoption only in cases where the original /// SEC/Nexus has been expunged.) /// - /// However, in the future, it may be possible for multiple SECs to try and - /// update the same saga, and overwrite each other's state. For example, - /// one SEC might try and update the state to Running while the other one - /// updates it to Done. That case would have to be carefully considered and - /// tested here, probably using the (currently unused) - /// `current_adopt_generation` field to enable optimistic concurrency. - /// - /// To reiterate, we are *not* considering the case where several SECs try - /// to update the same saga. That will be a future enhancement. + /// It's conceivable that multiple SECs do try to udpate the same saga + /// concurrently. That would be a bug. This is noticed and prevented by + /// making this query conditional on current_sec and failing with a conflict + /// if the current SEC has changed. pub async fn saga_update_state( &self, saga_id: steno::SagaId, new_state: steno::SagaCachedState, current_sec: db::saga_types::SecId, - current_adopt_generation: Generation, ) -> Result<(), Error> { use db::schema::saga::dsl; @@ -102,7 +97,6 @@ impl DataStore { let result = diesel::update(dsl::saga) .filter(dsl::id.eq(saga_id)) .filter(dsl::current_sec.eq(current_sec)) - .filter(dsl::adopt_generation.eq(current_adopt_generation)) .set(dsl::saga_state.eq(db::saga_types::SagaCachedState(new_state))) .check_if_exists::(saga_id) .execute_and_check(&*self.pool_connection_unauthorized().await?) @@ -119,20 +113,19 @@ impl DataStore { match result.status { UpdateStatus::Updated => Ok(()), - UpdateStatus::NotUpdatedButExists => Err(Error::invalid_request( - format!( - "failed to update saga {:?} with state {:?}: preconditions not met: \ - expected current_sec = {:?}, adopt_generation = {:?}, \ - but found current_sec = {:?}, adopt_generation = {:?}, state = {:?}", + UpdateStatus::NotUpdatedButExists => { + Err(Error::invalid_request(format!( + "failed to update saga {:?} with state {:?}:\ + preconditions not met: \ + expected current_sec = {:?}, \ + but found current_sec = {:?}, state = {:?}", saga_id, new_state, current_sec, - current_adopt_generation, result.found.current_sec, - result.found.adopt_generation, result.found.saga_state, - ) - )), + ))) + } } } @@ -207,16 +200,75 @@ impl DataStore { Ok(events) } + + /// Updates all sagas that are currently assigned to any of the SEC ids in + /// `sec_ids`, assigning them to `new_sec_id` instead. + /// + /// Generally, an SEC id corresponds to a Nexus id. This change causes the + /// Nexus instance `new_sec_id` to discover these sagas and resume executing + /// them the next time it performs saga recovery (which is normally on + /// startup and periodically). Generally, `new_sec_id` is the _current_ + /// Nexus instance and the caller should activate the saga recovery + /// background task after calling this function to immediately resume the + /// newly-assigned sagas. + /// + /// **Warning:** This operation is only safe if the other SECs `sec_ids` are + /// not currently running. If those SECs are still running, then two (or + /// more) SECs may wind up running the same saga concurrently. This would + /// likely violate implicit assumptions made by various saga actions, + /// leading to hard-to-debug errors and state corruption. + pub async fn sagas_reassign_sec( + &self, + opctx: &OpContext, + sec_ids: &[db::saga_types::SecId], + new_sec_id: db::saga_types::SecId, + ) -> Result { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + + let now = chrono::Utc::now(); + let conn = self.pool_connection_authorized(opctx).await?; + + // It would be more robust to do this in batches. However, Diesel does + // not appear to support the UPDATE ... LIMIT syntax using the normal + // builder. In practice, it's extremely unlikely we'd have so many + // in-progress sagas that this would be a problem. + use db::schema::saga::dsl; + diesel::update( + dsl::saga + .filter(dsl::current_sec.is_not_null()) + .filter( + dsl::current_sec.eq_any( + sec_ids.into_iter().cloned().collect::>(), + ), + ) + .filter(dsl::saga_state.ne(db::saga_types::SagaCachedState( + steno::SagaCachedState::Done, + ))), + ) + .set(( + dsl::current_sec.eq(Some(new_sec_id)), + dsl::adopt_generation.eq(dsl::adopt_generation.add(1)), + dsl::adopt_time.eq(now), + )) + .execute_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } } #[cfg(test)] mod test { use super::*; use crate::db::datastore::test_utils::datastore_test; + use async_bb8_diesel::AsyncConnection; + use async_bb8_diesel::AsyncSimpleConnection; + use db::queries::ALLOW_FULL_TABLE_SCAN_SQL; use nexus_db_model::{SagaNodeEvent, SecId}; use nexus_test_utils::db::test_setup_database; + use omicron_common::api::external::Generation; use omicron_test_utils::dev; use rand::seq::SliceRandom; + use std::collections::BTreeSet; use uuid::Uuid; // Tests pagination in listing sagas that are candidates for recovery @@ -440,7 +492,6 @@ mod test { node_cx.saga_id, steno::SagaCachedState::Running, node_cx.sec_id, - db::model::Generation::new(), ) .await .expect("updating state to Running again"); @@ -451,7 +502,6 @@ mod test { node_cx.saga_id, steno::SagaCachedState::Done, node_cx.sec_id, - db::model::Generation::new(), ) .await .expect("updating state to Done"); @@ -463,7 +513,6 @@ mod test { node_cx.saga_id, steno::SagaCachedState::Done, node_cx.sec_id, - db::model::Generation::new(), ) .await .expect("updating state to Done again"); @@ -509,4 +558,156 @@ mod test { SagaNodeEvent::new(event, self.sec_id) } } + + #[tokio::test] + async fn test_saga_reassignment() { + // Test setup + let logctx = dev::test_setup_log("test_saga_reassignment"); + let mut db = test_setup_database(&logctx.log).await; + let (_, datastore) = datastore_test(&logctx, &db).await; + let opctx = OpContext::for_tests(logctx.log.clone(), datastore.clone()); + + // Populate the database with a few different sagas: + // + // - assigned to SEC A: done, running, and unwinding + // - assigned to SEC B: done, running, and unwinding + // - assigned to SEC C: done, running, and unwinding + // - assigned to SEC D: done, running, and unwinding + // + // Then we'll reassign SECs B's and C's sagas to SEC A and check exactly + // which sagas were changed by this. This exercises: + // - that we don't touch A's sagas (the one we're assigning *to*) + // - that we do touch both B's and C's sagas (the ones we're assigning + // *from*) + // - that we don't touch D's sagas (some other SEC) + // - that we don't touch any "done" sagas + // - that we do touch both running and unwinding sagas + let mut sagas_to_insert = Vec::new(); + let sec_a = SecId(Uuid::new_v4()); + let sec_b = SecId(Uuid::new_v4()); + let sec_c = SecId(Uuid::new_v4()); + let sec_d = SecId(Uuid::new_v4()); + + for sec_id in [sec_a, sec_b, sec_c, sec_d] { + for state in [ + steno::SagaCachedState::Running, + steno::SagaCachedState::Unwinding, + steno::SagaCachedState::Done, + ] { + let params = steno::SagaCreateParams { + id: steno::SagaId(Uuid::new_v4()), + name: steno::SagaName::new("tewst saga"), + dag: serde_json::value::Value::Null, + state, + }; + + sagas_to_insert + .push(db::model::saga_types::Saga::new(sec_id, params)); + } + } + println!("sagas to insert: {:?}", sagas_to_insert); + + // These two sets are complements, but we write out the conditions to + // double-check that we've got it right. + let sagas_affected: BTreeSet<_> = sagas_to_insert + .iter() + .filter_map(|saga| { + ((saga.creator == sec_b || saga.creator == sec_c) + && (saga.saga_state.0 == steno::SagaCachedState::Running + || saga.saga_state.0 + == steno::SagaCachedState::Unwinding)) + .then(|| saga.id) + }) + .collect(); + let sagas_unaffected: BTreeSet<_> = sagas_to_insert + .iter() + .filter_map(|saga| { + (saga.creator == sec_a + || saga.creator == sec_d + || saga.saga_state.0 == steno::SagaCachedState::Done) + .then(|| saga.id) + }) + .collect(); + println!("sagas affected: {:?}", sagas_affected); + println!("sagas UNaffected: {:?}", sagas_unaffected); + assert_eq!(sagas_affected.intersection(&sagas_unaffected).count(), 0); + assert_eq!( + sagas_affected.len() + sagas_unaffected.len(), + sagas_to_insert.len() + ); + + // Insert the sagas. + let count = { + use db::schema::saga::dsl; + let conn = datastore.pool_connection_for_tests().await.unwrap(); + diesel::insert_into(dsl::saga) + .values(sagas_to_insert) + .execute_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .expect("successful insertion") + }; + assert_eq!(count, sagas_affected.len() + sagas_unaffected.len()); + + // Reassign uncompleted sagas from SECs B and C to SEC A. + let nreassigned = datastore + .sagas_reassign_sec(&opctx, &[sec_b, sec_c], sec_a) + .await + .expect("failed to re-assign sagas"); + + // Fetch all the sagas and check their states. + let all_sagas: Vec<_> = datastore + .pool_connection_for_tests() + .await + .unwrap() + .transaction_async(|conn| async move { + use db::schema::saga::dsl; + conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL).await?; + dsl::saga + .select(nexus_db_model::Saga::as_select()) + .load_async(&conn) + .await + }) + .await + .unwrap(); + + for saga in all_sagas { + println!("checking saga: {:?}", saga); + let current_sec = saga.current_sec.unwrap(); + if sagas_affected.contains(&saga.id) { + assert!(saga.creator == sec_b || saga.creator == sec_c); + assert_eq!(current_sec, sec_a); + assert_eq!(*saga.adopt_generation, Generation::from(2)); + assert!( + saga.saga_state.0 == steno::SagaCachedState::Running + || saga.saga_state.0 + == steno::SagaCachedState::Unwinding + ); + } else if sagas_unaffected.contains(&saga.id) { + assert_eq!(current_sec, saga.creator); + assert_eq!(*saga.adopt_generation, Generation::from(1)); + // Its SEC and state could be anything since we've deliberately + // included sagas with various states and SECs that should not + // be affected by the reassignment. + } else { + println!( + "ignoring saga that was not created by this test: {:?}", + saga + ); + } + } + + assert_eq!(nreassigned, sagas_affected.len()); + + // If we do it again, we should make no changes. + let nreassigned = datastore + .sagas_reassign_sec(&opctx, &[sec_b, sec_c], sec_a) + .await + .expect("failed to re-assign sagas"); + assert_eq!(nreassigned, 0); + + // Test cleanup + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } } diff --git a/nexus/db-queries/src/db/sec_store.rs b/nexus/db-queries/src/db/sec_store.rs index 0dcc3aa717..920ff3aee1 100644 --- a/nexus/db-queries/src/db/sec_store.rs +++ b/nexus/db-queries/src/db/sec_store.rs @@ -4,7 +4,7 @@ //! Implementation of [`steno::SecStore`] backed by Omicron's database -use crate::db::{self, model::Generation}; +use crate::db; use anyhow::Context; use async_trait::async_trait; use dropshot::HttpError; @@ -102,12 +102,7 @@ impl steno::SecStore for CockroachDbSecStore { &log, || { self.datastore - .saga_update_state( - id, - update, - self.sec_id, - Generation::new(), - ) + .saga_update_state(id, update, self.sec_id) .map_err(backoff::BackoffError::transient) }, "updating saga state", diff --git a/nexus/reconfigurator/execution/src/cockroachdb.rs b/nexus/reconfigurator/execution/src/cockroachdb.rs index 498944598d..277f5f91c4 100644 --- a/nexus/reconfigurator/execution/src/cockroachdb.rs +++ b/nexus/reconfigurator/execution/src/cockroachdb.rs @@ -39,6 +39,7 @@ mod test { use nexus_test_utils_macros::nexus_test; use nexus_types::deployment::CockroachDbClusterVersion; use std::sync::Arc; + use uuid::Uuid; type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; @@ -101,7 +102,7 @@ mod test { datastore, resolver, &blueprint, - "test-suite", + Uuid::new_v4(), &overrides, ) .await diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index 846d19ead3..4395944b25 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -1250,7 +1250,7 @@ mod test { datastore, resolver, &blueprint, - "test-suite", + Uuid::new_v4(), &overrides, ) .await @@ -1390,7 +1390,7 @@ mod test { datastore, resolver, &blueprint2, - "test-suite", + Uuid::new_v4(), &overrides, ) .await @@ -1464,7 +1464,7 @@ mod test { datastore, resolver, &blueprint2, - "test-suite", + Uuid::new_v4(), &overrides, ) .await @@ -1500,7 +1500,7 @@ mod test { datastore, resolver, &blueprint2, - "test-suite", + Uuid::new_v4(), &overrides, ) .await @@ -1594,7 +1594,7 @@ mod test { datastore, resolver, &blueprint, - "test-suite", + Uuid::new_v4(), &overrides, ) .await diff --git a/nexus/reconfigurator/execution/src/lib.rs b/nexus/reconfigurator/execution/src/lib.rs index bb525b1b8b..8606187762 100644 --- a/nexus/reconfigurator/execution/src/lib.rs +++ b/nexus/reconfigurator/execution/src/lib.rs @@ -24,6 +24,7 @@ use slog::info; use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; use std::net::SocketAddrV6; +use uuid::Uuid; mod cockroachdb; mod datasets; @@ -31,6 +32,7 @@ mod dns; mod omicron_physical_disks; mod omicron_zones; mod overridables; +mod sagas; mod sled_state; pub use dns::blueprint_external_dns_config; @@ -73,38 +75,32 @@ impl From for Sled { /// /// The assumption is that callers are running this periodically or in a loop to /// deal with transient errors or changes in the underlying system state. -pub async fn realize_blueprint( +pub async fn realize_blueprint( opctx: &OpContext, datastore: &DataStore, resolver: &Resolver, blueprint: &Blueprint, - nexus_label: S, -) -> Result<(), Vec> -where - String: From, -{ + nexus_id: Uuid, +) -> Result> { realize_blueprint_with_overrides( opctx, datastore, resolver, blueprint, - nexus_label, + nexus_id, &Default::default(), ) .await } -pub async fn realize_blueprint_with_overrides( +pub async fn realize_blueprint_with_overrides( opctx: &OpContext, datastore: &DataStore, resolver: &Resolver, blueprint: &Blueprint, - nexus_label: S, + nexus_id: Uuid, overrides: &Overridables, -) -> Result<(), Vec> -where - String: From, -{ +) -> Result> { let opctx = opctx.child(BTreeMap::from([( "comment".to_string(), blueprint.comment.clone(), @@ -182,7 +178,7 @@ where dns::deploy_dns( &opctx, datastore, - String::from(nexus_label), + nexus_id.to_string(), blueprint, &sleds_by_id, overrides, @@ -215,14 +211,43 @@ where omicron_physical_disks::decommission_expunged_disks(&opctx, datastore) .await?; + // From this point on, we'll assume that any errors that we encounter do + // *not* require stopping execution. We'll just accumulate them and return + // them all at the end. + // + // TODO We should probably do this with more of the errors above, too. + let mut errors = Vec::new(); + + // For any expunged Nexus zones, re-assign in-progress sagas to some other + // Nexus. If this fails for some reason, it doesn't affect anything else. + let sec_id = nexus_db_model::SecId(nexus_id); + let reassigned = sagas::reassign_sagas_from_expunged( + &opctx, datastore, blueprint, sec_id, + ) + .await + .context("failed to re-assign sagas"); + let needs_saga_recovery = match reassigned { + Ok(needs_recovery) => needs_recovery, + Err(error) => { + errors.push(error); + false + } + }; + // This is likely to error if any cluster upgrades are in progress (which // can take some time), so it should remain at the end so that other parts // of the blueprint can progress normally. - cockroachdb::ensure_settings(&opctx, datastore, blueprint) - .await - .map_err(|err| vec![err])?; + if let Err(error) = + cockroachdb::ensure_settings(&opctx, datastore, blueprint).await + { + errors.push(error); + } - Ok(()) + if errors.is_empty() { + Ok(needs_saga_recovery) + } else { + Err(errors) + } } #[cfg(test)] diff --git a/nexus/reconfigurator/execution/src/sagas.rs b/nexus/reconfigurator/execution/src/sagas.rs new file mode 100644 index 0000000000..458328ef00 --- /dev/null +++ b/nexus/reconfigurator/execution/src/sagas.rs @@ -0,0 +1,71 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Re-assign sagas from expunged Nexus zones + +use nexus_db_model::SecId; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintZoneFilter; +use omicron_common::api::external::Error; +use omicron_uuid_kinds::GenericUuid; +use slog::{debug, info, warn}; + +/// For each expunged Nexus zone, re-assign sagas owned by that Nexus to the +/// specified nexus (`nexus_id`). +pub(crate) async fn reassign_sagas_from_expunged( + opctx: &OpContext, + datastore: &DataStore, + blueprint: &Blueprint, + nexus_id: SecId, +) -> Result { + let log = &opctx.log; + + // Identify any Nexus zones that have been expunged and need to have sagas + // re-assigned. + // + // TODO: Currently, we take any expunged Nexus instances and attempt to + // assign all their sagas to ourselves. Per RFD 289, we can only re-assign + // sagas between two instances of Nexus that are at the same version. Right + // now this can't happen so there's nothing to do here to ensure that + // constraint. However, once we support allowing the control plane to be + // online _during_ an upgrade, there may be multiple different Nexus + // instances running at the same time. At that point, we will need to make + // sure that we only ever try to assign ourselves sagas from other Nexus + // instances that we know are running the same version as ourselves. + let nexus_zone_ids: Vec<_> = blueprint + .all_omicron_zones(BlueprintZoneFilter::Expunged) + .filter_map(|(_, z)| { + z.zone_type + .is_nexus() + .then(|| nexus_db_model::SecId(z.id.into_untyped_uuid())) + }) + .collect(); + + debug!(log, "re-assign sagas: found Nexus instances"; + "nexus_zone_ids" => ?nexus_zone_ids); + + let result = + datastore.sagas_reassign_sec(opctx, &nexus_zone_ids, nexus_id).await; + + match result { + Ok(count) => { + info!(log, "re-assigned sagas"; + "nexus_zone_ids" => ?nexus_zone_ids, + "count" => count, + ); + + Ok(count != 0) + } + Err(error) => { + warn!(log, "failed to re-assign sagas"; + "nexus_zone_ids" => ?nexus_zone_ids, + &error, + ); + + Err(error) + } + } +} diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index 6bd805a491..37c276fa07 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -448,7 +448,8 @@ impl BackgroundTasksInitializer { datastore.clone(), resolver.clone(), rx_blueprint.clone(), - nexus_id.to_string(), + nexus_id, + task_saga_recovery.clone(), ); let rx_blueprint_exec = blueprint_executor.watcher(); driver.register(TaskDefinition { diff --git a/nexus/src/app/background/tasks/blueprint_execution.rs b/nexus/src/app/background/tasks/blueprint_execution.rs index ee780812ae..b430270ec9 100644 --- a/nexus/src/app/background/tasks/blueprint_execution.rs +++ b/nexus/src/app/background/tasks/blueprint_execution.rs @@ -4,7 +4,7 @@ //! Background task for realizing a plan blueprint -use crate::app::background::BackgroundTask; +use crate::app::background::{Activator, BackgroundTask}; use futures::future::BoxFuture; use futures::FutureExt; use internal_dns::resolver::Resolver; @@ -14,6 +14,7 @@ use nexus_types::deployment::{Blueprint, BlueprintTarget}; use serde_json::json; use std::sync::Arc; use tokio::sync::watch; +use uuid::Uuid; /// Background task that takes a [`Blueprint`] and realizes the change to /// the state of the system based on the `Blueprint`. @@ -21,8 +22,9 @@ pub struct BlueprintExecutor { datastore: Arc, resolver: Resolver, rx_blueprint: watch::Receiver>>, - nexus_label: String, + nexus_id: Uuid, tx: watch::Sender, + saga_recovery: Activator, } impl BlueprintExecutor { @@ -32,10 +34,18 @@ impl BlueprintExecutor { rx_blueprint: watch::Receiver< Option>, >, - nexus_label: String, + nexus_id: Uuid, + saga_recovery: Activator, ) -> BlueprintExecutor { let (tx, _) = watch::channel(0); - BlueprintExecutor { datastore, resolver, rx_blueprint, nexus_label, tx } + BlueprintExecutor { + datastore, + resolver, + rx_blueprint, + nexus_id, + tx, + saga_recovery, + } } pub fn watcher(&self) -> watch::Receiver { @@ -81,16 +91,23 @@ impl BlueprintExecutor { &self.datastore, &self.resolver, blueprint, - &self.nexus_label, + self.nexus_id, ) .await; // Trigger anybody waiting for this to finish. self.tx.send_modify(|count| *count = *count + 1); + // If executing the blueprint requires activating the saga recovery + // background task, do that now. + info!(&opctx.log, "activating saga recovery task"); + if let Ok(true) = result { + self.saga_recovery.activate(); + } + // Return the result as a `serde_json::Value` match result { - Ok(()) => json!({}), + Ok(_) => json!({}), Err(errors) => { let errors: Vec<_> = errors.into_iter().map(|e| format!("{:#}", e)).collect(); @@ -115,7 +132,7 @@ impl BackgroundTask for BlueprintExecutor { #[cfg(test)] mod test { use super::BlueprintExecutor; - use crate::app::background::BackgroundTask; + use crate::app::background::{Activator, BackgroundTask}; use httptest::matchers::{all_of, request}; use httptest::responders::status_code; use httptest::Expectation; @@ -261,7 +278,8 @@ mod test { datastore.clone(), resolver.clone(), blueprint_rx, - String::from("test-suite"), + Uuid::new_v4(), + Activator::new(), ); // Now we're ready. From b927049862e2161f08fae2480dadc1b0a4572b26 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Fri, 16 Aug 2024 17:10:24 -0700 Subject: [PATCH 26/51] add issue templates for flaky tests (#4833) --- .../test-flake-from-buildomat.md | 65 +++++++++++++++++++ .../test-flake-from-local-failure.md | 42 ++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/test-flake-from-buildomat.md create mode 100644 .github/ISSUE_TEMPLATE/test-flake-from-local-failure.md diff --git a/.github/ISSUE_TEMPLATE/test-flake-from-buildomat.md b/.github/ISSUE_TEMPLATE/test-flake-from-buildomat.md new file mode 100644 index 0000000000..eb1ac2c6e9 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/test-flake-from-buildomat.md @@ -0,0 +1,65 @@ +--- +name: Test flake from buildomat +about: Report a test failure from a CI run either on "main" or on a PR where you don't + think the PR changes caused the failure +title: 'test failed in CI: NAME_OF_TEST' +labels: Test Flake +assignees: '' + +--- + + + +This test failed on a CI run on **"main" (or pull request XXX)**: + + Link here to the GitHub page showing the test failure. + If it's from a PR, this might look like: + https://github.com/oxidecomputer/omicron/pull/4588/checks?check_run_id=19198066410 + It could also be a link to a failure on "main", which would look like: + https://github.com/oxidecomputer/omicron/runs/20589829185 + This is useful because it shows which commit failed and all the surrounding context. + +Log showing the specific test failure: + + + Link here to the specific line of output from the buildomat log showing the failure: + https://buildomat.eng.oxide.computer/wg/0/details/01HGH32FQYKZJNX9J62HNABKPA/31C5jyox8tyHUIuDDevKkXlDZCyNw143z4nOq8wLl3xtjKzT/01HGH32V3P0HH6B56S46AJAT63#S4455 + This is useful because it shows all the details about the test failure. + +Excerpt from the log showing the failure: + +``` +Paste here an excerpt from the log. +This is redundant with the log above but helps people searching for the error message +or test name. It also works if the link above becomes unavailable. +Here's an example: + +------ + +failures: + integration_tests::updates::test_update_races + +test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured; 4 filtered out; finished in 4.84s + + +--- STDERR: wicketd::mod integration_tests::updates::test_update_races --- +log file: /var/tmp/omicron_tmp/mod-ae2eb84a30e4213e-test_artifact_upload_while_updating.14133.0.log +note: configured to log to "/var/tmp/omicron_tmp/mod-ae2eb84a30e4213e-test_artifact_upload_while_updating.14133.0.log" +hint: Generated a random key: +hint: +hint: ed25519:826a8f799d4cc767158c990a60f721215bfd71f8f94fa88ba1960037bd6e5554 +hint: +hint: To modify this repository, you will need this key. Use the -k/--key +hint: command line flag or the TUFACEOUS_KEY environment variable: +hint: +hint: export TUFACEOUS_KEY=ed25519:826a8f799d4cc767158c990a60f721215bfd71f8f94fa88ba1960037bd6e5554 +hint: +hint: To prevent this default behavior, use --no-generate-key. +thread 'integration_tests::updates::test_update_races' panicked at wicketd/tests/integration_tests/updates.rs:482:41: +at least one event +stack backtrace: +... +``` diff --git a/.github/ISSUE_TEMPLATE/test-flake-from-local-failure.md b/.github/ISSUE_TEMPLATE/test-flake-from-local-failure.md new file mode 100644 index 0000000000..e963c83926 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/test-flake-from-local-failure.md @@ -0,0 +1,42 @@ +--- +name: Test flake from local failure +about: Report a test failure that happened locally (not CI) that you believe is not + related to local changes +title: 'test failure: TEST_NAME' +labels: Test Flake +assignees: '' + +--- + +On branch **BRANCH** commit **COMMIT**, I saw this test failure: + +``` +Include the trimmed, relevant output from `cargo nextest`. Here's an example: + +------- +failures: + integration_tests::updates::test_update_races + +test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured; 4 filtered out; finished in 4.84s + + +--- STDERR: wicketd::mod integration_tests::updates::test_update_races --- +log file: /var/tmp/omicron_tmp/mod-ae2eb84a30e4213e-test_artifact_upload_while_updating.14133.0.log +note: configured to log to "/var/tmp/omicron_tmp/mod-ae2eb84a30e4213e-test_artifact_upload_while_updating.14133.0.log" +hint: Generated a random key: +hint: +hint: ed25519:826a8f799d4cc767158c990a60f721215bfd71f8f94fa88ba1960037bd6e5554 +hint: +hint: To modify this repository, you will need this key. Use the -k/--key +hint: command line flag or the TUFACEOUS_KEY environment variable: +hint: +hint: export TUFACEOUS_KEY=ed25519:826a8f799d4cc767158c990a60f721215bfd71f8f94fa88ba1960037bd6e5554 +hint: +hint: To prevent this default behavior, use --no-generate-key. +thread 'integration_tests::updates::test_update_races' panicked at wicketd/tests/integration_tests/updates.rs:482:41: +at least one event +stack backtrace: +... +``` + +**NOTE: Consider attaching any log files produced by the test.** From fac7e2780043fa60f35c7fc8c673af0d3dc7842d Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 16 Aug 2024 20:05:38 -0700 Subject: [PATCH 27/51] [oximeter] split oximeter-impl into -types, -schema and -test-utils (#6355) The Nexus external API uses several types from oximeter, and in the interest of keeping the upcoming nexus-external-api crate's graph small, it makes sense to split it into three crates. The crates are: * **oximeter-types**: Core type definitions (there are a lot of them). * **oximeter-schema**: Library for working with schemas. * **oximeter-test-utils**: Test utilities. The names match the other services in omicron, e.g. `sled-agent-types` and `nexus-test-utils`. --- Cargo.lock | 97 +++-- Cargo.toml | 12 +- oximeter/db/Cargo.toml | 1 + oximeter/db/src/client/mod.rs | 27 +- oximeter/db/src/model.rs | 9 +- oximeter/db/tests/integration_test.rs | 13 +- oximeter/impl/src/test_util.rs | 130 ------- oximeter/oximeter/Cargo.toml | 3 +- oximeter/oximeter/src/lib.rs | 9 +- oximeter/schema/Cargo.toml | 24 ++ .../src/bin/oximeter-schema.rs | 6 +- .../src/schema => schema/src}/codegen.rs | 349 +++++++++--------- .../{impl/src/schema => schema/src}/ir.rs | 20 +- oximeter/schema/src/lib.rs | 12 + oximeter/test-utils/Cargo.toml | 15 + oximeter/test-utils/src/lib.rs | 295 +++++++++++++++ oximeter/timeseries-macro/Cargo.toml | 3 +- oximeter/timeseries-macro/src/lib.rs | 4 +- oximeter/{impl => types}/Cargo.toml | 12 +- oximeter/{impl => types}/benches/quantile.rs | 2 +- oximeter/{impl => types}/src/histogram.rs | 8 +- oximeter/{impl => types}/src/lib.rs | 7 +- oximeter/{impl => types}/src/quantile.rs | 16 +- .../src/schema/mod.rs => types/src/schema.rs} | 125 ------- oximeter/{impl => types}/src/traits.rs | 22 +- oximeter/{impl => types}/src/types.rs | 32 +- .../{impl => types}/tests/fail/failures.rs | 0 .../tests/fail/failures.stderr | 0 .../{impl => types}/tests/test_compilation.rs | 0 29 files changed, 663 insertions(+), 590 deletions(-) delete mode 100644 oximeter/impl/src/test_util.rs create mode 100644 oximeter/schema/Cargo.toml rename oximeter/{oximeter => schema}/src/bin/oximeter-schema.rs (93%) rename oximeter/{impl/src/schema => schema/src}/codegen.rs (73%) rename oximeter/{impl/src/schema => schema/src}/ir.rs (99%) create mode 100644 oximeter/schema/src/lib.rs create mode 100644 oximeter/test-utils/Cargo.toml create mode 100644 oximeter/test-utils/src/lib.rs rename oximeter/{impl => types}/Cargo.toml (78%) rename oximeter/{impl => types}/benches/quantile.rs (97%) rename oximeter/{impl => types}/src/histogram.rs (99%) rename oximeter/{impl => types}/src/lib.rs (92%) rename oximeter/{impl => types}/src/quantile.rs (97%) rename oximeter/{impl/src/schema/mod.rs => types/src/schema.rs} (75%) rename oximeter/{impl => types}/src/traits.rs (96%) rename oximeter/{impl => types}/src/types.rs (97%) rename oximeter/{impl => types}/tests/fail/failures.rs (100%) rename oximeter/{impl => types}/tests/fail/failures.stderr (100%) rename oximeter/{impl => types}/tests/test_compilation.rs (100%) diff --git a/Cargo.lock b/Cargo.lock index 849c82f1f0..3b62f3001a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6821,9 +6821,10 @@ dependencies = [ "chrono", "clap", "omicron-workspace-hack", - "oximeter-impl", "oximeter-macro-impl", + "oximeter-schema", "oximeter-timeseries-macro", + "oximeter-types", "prettyplease", "syn 2.0.74", "toml 0.8.19", @@ -6925,6 +6926,7 @@ dependencies = [ "omicron-test-utils", "omicron-workspace-hack", "oximeter", + "oximeter-test-utils", "peg", "reedline", "regex", @@ -6948,39 +6950,6 @@ dependencies = [ "uuid", ] -[[package]] -name = "oximeter-impl" -version = "0.1.0" -dependencies = [ - "approx", - "bytes", - "chrono", - "criterion", - "float-ord", - "heck 0.5.0", - "num", - "omicron-common", - "omicron-workspace-hack", - "oximeter-macro-impl", - "prettyplease", - "proc-macro2", - "quote", - "rand", - "rand_distr", - "regex", - "rstest", - "schemars", - "serde", - "serde_json", - "slog-error-chain", - "strum", - "syn 2.0.74", - "thiserror", - "toml 0.8.19", - "trybuild", - "uuid", -] - [[package]] name = "oximeter-instruments" version = "0.1.0" @@ -7041,17 +7010,75 @@ dependencies = [ "uuid", ] +[[package]] +name = "oximeter-schema" +version = "0.1.0" +dependencies = [ + "anyhow", + "chrono", + "clap", + "heck 0.5.0", + "omicron-workspace-hack", + "oximeter-types", + "prettyplease", + "proc-macro2", + "quote", + "schemars", + "serde", + "slog-error-chain", + "syn 2.0.74", + "toml 0.8.19", +] + +[[package]] +name = "oximeter-test-utils" +version = "0.1.0" +dependencies = [ + "chrono", + "omicron-workspace-hack", + "oximeter-macro-impl", + "oximeter-types", + "uuid", +] + [[package]] name = "oximeter-timeseries-macro" version = "0.1.0" dependencies = [ "omicron-workspace-hack", - "oximeter-impl", + "oximeter-schema", + "oximeter-types", "proc-macro2", "quote", "syn 2.0.74", ] +[[package]] +name = "oximeter-types" +version = "0.1.0" +dependencies = [ + "approx", + "bytes", + "chrono", + "criterion", + "float-ord", + "num", + "omicron-common", + "omicron-workspace-hack", + "oximeter-macro-impl", + "rand", + "rand_distr", + "regex", + "rstest", + "schemars", + "serde", + "serde_json", + "strum", + "thiserror", + "trybuild", + "uuid", +] + [[package]] name = "oxlog" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 1b62af959d..ea687936e3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -77,12 +77,14 @@ members = [ "oximeter/api", "oximeter/collector", "oximeter/db", - "oximeter/impl", "oximeter/instruments", "oximeter/oximeter-macro-impl", "oximeter/oximeter", "oximeter/producer", + "oximeter/schema", + "oximeter/test-utils", "oximeter/timeseries-macro", + "oximeter/types", "package", "passwords", "rpaths", @@ -191,12 +193,14 @@ default-members = [ "oximeter/api", "oximeter/collector", "oximeter/db", - "oximeter/impl", "oximeter/instruments", "oximeter/oximeter-macro-impl", "oximeter/oximeter", "oximeter/producer", + "oximeter/schema", + "oximeter/test-utils", "oximeter/timeseries-macro", + "oximeter/types", "package", "passwords", "rpaths", @@ -459,11 +463,13 @@ oximeter-api = { path = "oximeter/api" } oximeter-client = { path = "clients/oximeter-client" } oximeter-db = { path = "oximeter/db/", default-features = false } oximeter-collector = { path = "oximeter/collector" } -oximeter-impl = { path = "oximeter/impl" } oximeter-instruments = { path = "oximeter/instruments" } oximeter-macro-impl = { path = "oximeter/oximeter-macro-impl" } oximeter-producer = { path = "oximeter/producer" } +oximeter-schema = { path = "oximeter/schema" } +oximeter-test-utils = { path = "oximeter/test-utils" } oximeter-timeseries-macro = { path = "oximeter/timeseries-macro" } +oximeter-types = { path = "oximeter/types" } p256 = "0.13" parse-display = "0.10.0" partial-io = { version = "0.5.4", features = ["proptest1", "tokio1"] } diff --git a/oximeter/db/Cargo.toml b/oximeter/db/Cargo.toml index e3cf089cb5..6a7cedbc22 100644 --- a/oximeter/db/Cargo.toml +++ b/oximeter/db/Cargo.toml @@ -89,6 +89,7 @@ expectorate.workspace = true indexmap.workspace = true itertools.workspace = true omicron-test-utils.workspace = true +oximeter-test-utils.workspace = true slog-dtrace.workspace = true sqlformat.workspace = true sqlparser.workspace = true diff --git a/oximeter/db/src/client/mod.rs b/oximeter/db/src/client/mod.rs index 30ae4b68d2..176e1bd5f8 100644 --- a/oximeter/db/src/client/mod.rs +++ b/oximeter/db/src/client/mod.rs @@ -1191,7 +1191,6 @@ mod tests { }; use omicron_test_utils::dev::test_setup_log; use oximeter::histogram::Histogram; - use oximeter::test_util; use oximeter::types::MissingDatum; use oximeter::Datum; use oximeter::FieldValue; @@ -1723,7 +1722,7 @@ mod tests { let samples = { let mut s = Vec::with_capacity(8); for _ in 0..s.capacity() { - s.push(test_util::make_hist_sample()) + s.push(oximeter_test_utils::make_hist_sample()) } s }; @@ -1762,7 +1761,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let sample = test_util::make_sample(); + let sample = oximeter_test_utils::make_sample(); client.insert_samples(&[sample]).await.unwrap(); let bad_name = name_mismatch::TestTarget { @@ -1770,7 +1769,7 @@ mod tests { name2: "second_name".into(), num: 2, }; - let metric = test_util::TestMetric { + let metric = oximeter_test_utils::TestMetric { id: uuid::Uuid::new_v4(), good: true, datum: 1, @@ -1792,7 +1791,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let sample = test_util::make_sample(); + let sample = oximeter_test_utils::make_sample(); // Verify that this sample is considered new, i.e., we return rows to update the timeseries // schema table. @@ -1867,7 +1866,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = test_util::generate_test_samples(2, 2, 2, 2); + let samples = oximeter_test_utils::generate_test_samples(2, 2, 2, 2); client.insert_samples(&samples).await?; let sample = samples.first().unwrap(); @@ -1956,7 +1955,7 @@ mod tests { // we'd like to exercise the logic of ClickHouse's replacing merge tree engine. let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = test_util::generate_test_samples(2, 2, 2, 2); + let samples = oximeter_test_utils::generate_test_samples(2, 2, 2, 2); client.insert_samples(&samples).await?; async fn assert_table_count( @@ -2631,7 +2630,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = test_util::generate_test_samples(2, 2, 2, 2); + let samples = oximeter_test_utils::generate_test_samples(2, 2, 2, 2); client.insert_samples(&samples).await?; let original_schema = client.schema.lock().await.clone(); @@ -2656,7 +2655,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = test_util::generate_test_samples(2, 2, 2, 2); + let samples = oximeter_test_utils::generate_test_samples(2, 2, 2, 2); client.insert_samples(&samples).await?; let limit = 100u32.try_into().unwrap(); @@ -2691,7 +2690,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = test_util::generate_test_samples(2, 2, 2, 2); + let samples = oximeter_test_utils::generate_test_samples(2, 2, 2, 2); client.insert_samples(&samples).await?; let limit = 7u32.try_into().unwrap(); @@ -3364,7 +3363,7 @@ mod tests { // The values here don't matter much, we just want to check that // the database data hasn't been dropped. assert_eq!(0, get_schema_count(&client).await); - let sample = test_util::make_sample(); + let sample = oximeter_test_utils::make_sample(); client.insert_samples(&[sample.clone()]).await.unwrap(); assert_eq!(1, get_schema_count(&client).await); @@ -3438,7 +3437,7 @@ mod tests { // The values here don't matter much, we just want to check that // the database data gets dropped later. assert_eq!(0, get_schema_count(&client).await); - let sample = test_util::make_sample(); + let sample = oximeter_test_utils::make_sample(); client.insert_samples(&[sample.clone()]).await.unwrap(); assert_eq!(1, get_schema_count(&client).await); @@ -3464,7 +3463,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = [test_util::make_sample()]; + let samples = [oximeter_test_utils::make_sample()]; client.insert_samples(&samples).await.unwrap(); // Get the count of schema directly from the DB, which should have just @@ -3549,7 +3548,7 @@ mod tests { let client = Client::new(address, &log); db_type.init_db(&client).await.unwrap(); - let samples = [test_util::make_sample()]; + let samples = [oximeter_test_utils::make_sample()]; // We're using the components of the `insert_samples()` method here, // which has been refactored explicitly for this test. We need to insert diff --git a/oximeter/db/src/model.rs b/oximeter/db/src/model.rs index f27df4ed49..3e34ad10e3 100644 --- a/oximeter/db/src/model.rs +++ b/oximeter/db/src/model.rs @@ -1880,7 +1880,6 @@ mod tests { use super::*; use chrono::Timelike; use oximeter::histogram::Record; - use oximeter::test_util; use oximeter::Datum; #[test] @@ -1983,7 +1982,7 @@ mod tests { #[test] fn test_unroll_from_source() { - let sample = test_util::make_sample(); + let sample = oximeter_test_utils::make_sample(); let out = unroll_from_source(&sample); assert_eq!(out["oximeter.fields_string"].len(), 2); assert_eq!(out["oximeter.fields_i64"].len(), 1); @@ -2003,8 +2002,8 @@ mod tests { // datum. #[test] fn test_unroll_missing_measurement_row() { - let sample = test_util::make_sample(); - let missing_sample = test_util::make_missing_sample(); + let sample = oximeter_test_utils::make_sample(); + let missing_sample = oximeter_test_utils::make_missing_sample(); let (table_name, row) = unroll_measurement_row(&sample); let (missing_table_name, missing_row) = unroll_measurement_row(&missing_sample); @@ -2022,7 +2021,7 @@ mod tests { #[test] fn test_unroll_measurement_row() { - let sample = test_util::make_hist_sample(); + let sample = oximeter_test_utils::make_hist_sample(); let (table_name, row) = unroll_measurement_row(&sample); assert_eq!(table_name, "oximeter.measurements_histogramf64"); let unpacked: HistogramF64MeasurementRow = diff --git a/oximeter/db/tests/integration_test.rs b/oximeter/db/tests/integration_test.rs index 732683c414..f5d81d51d1 100644 --- a/oximeter/db/tests/integration_test.rs +++ b/oximeter/db/tests/integration_test.rs @@ -10,7 +10,6 @@ use clickward::{ use dropshot::test_util::log_prefix_for_test; use omicron_test_utils::dev::poll; use omicron_test_utils::dev::test_setup_log; -use oximeter::test_util; use oximeter_db::{Client, DbWrite, OxqlResult, Sample, TestDbWrite}; use slog::{debug, info, Logger}; use std::collections::BTreeSet; @@ -199,7 +198,7 @@ async fn test_cluster() -> anyhow::Result<()> { // Let's write some samples to our first replica and wait for them to show // up on replica 2. let start = tokio::time::Instant::now(); - let samples = test_util::generate_test_samples( + let samples = oximeter_test_utils::generate_test_samples( input.n_projects, input.n_instances, input.n_cpus, @@ -261,7 +260,7 @@ async fn test_cluster() -> anyhow::Result<()> { info!(log, "successfully stopped server 1"); // Generate some new samples and insert them at replica3 - let samples = test_util::generate_test_samples( + let samples = oximeter_test_utils::generate_test_samples( input.n_projects, input.n_instances, input.n_cpus, @@ -298,7 +297,7 @@ async fn test_cluster() -> anyhow::Result<()> { .expect("failed to get samples from client1"); // We still have a quorum (2 of 3 keepers), so we should be able to insert - let samples = test_util::generate_test_samples( + let samples = oximeter_test_utils::generate_test_samples( input.n_projects, input.n_instances, input.n_cpus, @@ -321,7 +320,7 @@ async fn test_cluster() -> anyhow::Result<()> { .expect("failed to get samples from client1"); info!(log, "Attempting to insert samples without keeper quorum"); - let samples = test_util::generate_test_samples( + let samples = oximeter_test_utils::generate_test_samples( input.n_projects, input.n_instances, input.n_cpus, @@ -350,7 +349,7 @@ async fn test_cluster() -> anyhow::Result<()> { ) .await .expect("failed to sync keepers"); - let samples = test_util::generate_test_samples( + let samples = oximeter_test_utils::generate_test_samples( input.n_projects, input.n_instances, input.n_cpus, @@ -370,7 +369,7 @@ async fn test_cluster() -> anyhow::Result<()> { ) .await .expect("failed to sync keepers"); - let samples = test_util::generate_test_samples( + let samples = oximeter_test_utils::generate_test_samples( input.n_projects, input.n_instances, input.n_cpus, diff --git a/oximeter/impl/src/test_util.rs b/oximeter/impl/src/test_util.rs deleted file mode 100644 index c2ac7b34bd..0000000000 --- a/oximeter/impl/src/test_util.rs +++ /dev/null @@ -1,130 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Utilities for testing the oximeter crate. -// Copyright 2024 Oxide Computer Company - -use crate::histogram; -use crate::histogram::{Histogram, Record}; -use crate::types::{Cumulative, Sample}; -use uuid::Uuid; - -#[derive(oximeter::Target)] -pub struct TestTarget { - pub name1: String, - pub name2: String, - pub num: i64, -} - -impl Default for TestTarget { - fn default() -> Self { - TestTarget { - name1: "first_name".into(), - name2: "second_name".into(), - num: 0, - } - } -} - -#[derive(oximeter::Metric)] -pub struct TestMetric { - pub id: Uuid, - pub good: bool, - pub datum: i64, -} - -#[derive(oximeter::Metric)] -pub struct TestCumulativeMetric { - pub id: Uuid, - pub good: bool, - pub datum: Cumulative, -} - -#[derive(oximeter::Metric)] -pub struct TestHistogram { - pub id: Uuid, - pub good: bool, - pub datum: Histogram, -} - -const ID: Uuid = uuid::uuid!("e00ced4d-39d1-446a-ae85-a67f05c9750b"); - -pub fn make_sample() -> Sample { - let target = TestTarget::default(); - let metric = TestMetric { id: ID, good: true, datum: 1 }; - Sample::new(&target, &metric).unwrap() -} - -pub fn make_missing_sample() -> Sample { - let target = TestTarget::default(); - let metric = TestMetric { id: ID, good: true, datum: 1 }; - Sample::new_missing(&target, &metric).unwrap() -} - -pub fn make_hist_sample() -> Sample { - let target = TestTarget::default(); - let mut hist = histogram::Histogram::new(&[0.0, 5.0, 10.0]).unwrap(); - hist.sample(1.0).unwrap(); - hist.sample(2.0).unwrap(); - hist.sample(6.0).unwrap(); - let metric = TestHistogram { id: ID, good: true, datum: hist }; - Sample::new(&target, &metric).unwrap() -} - -/// A target identifying a single virtual machine instance -#[derive(Debug, Clone, Copy, oximeter::Target)] -pub struct VirtualMachine { - pub project_id: Uuid, - pub instance_id: Uuid, -} - -/// A metric recording the total time a vCPU is busy, by its ID -#[derive(Debug, Clone, Copy, oximeter::Metric)] -pub struct CpuBusy { - cpu_id: i64, - datum: Cumulative, -} - -pub fn generate_test_samples( - n_projects: usize, - n_instances: usize, - n_cpus: usize, - n_samples: usize, -) -> Vec { - let n_timeseries = n_projects * n_instances * n_cpus; - let mut samples = Vec::with_capacity(n_samples * n_timeseries); - for _ in 0..n_projects { - let project_id = Uuid::new_v4(); - for _ in 0..n_instances { - let vm = VirtualMachine { project_id, instance_id: Uuid::new_v4() }; - for cpu in 0..n_cpus { - for sample in 0..n_samples { - let cpu_busy = CpuBusy { - cpu_id: cpu as _, - datum: Cumulative::new(sample as f64), - }; - let sample = Sample::new(&vm, &cpu_busy).unwrap(); - samples.push(sample); - } - } - } - } - samples -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_gen_test_samples() { - let (n_projects, n_instances, n_cpus, n_samples) = (2, 2, 2, 2); - let samples = - generate_test_samples(n_projects, n_instances, n_cpus, n_samples); - assert_eq!( - samples.len(), - n_projects * n_instances * n_cpus * n_samples - ); - } -} diff --git a/oximeter/oximeter/Cargo.toml b/oximeter/oximeter/Cargo.toml index c04d1bd3ae..63b370bee6 100644 --- a/oximeter/oximeter/Cargo.toml +++ b/oximeter/oximeter/Cargo.toml @@ -13,9 +13,10 @@ anyhow.workspace = true clap.workspace = true chrono.workspace = true omicron-workspace-hack.workspace = true -oximeter-impl.workspace = true oximeter-macro-impl.workspace = true +oximeter-schema.workspace = true oximeter-timeseries-macro.workspace = true +oximeter-types.workspace = true prettyplease.workspace = true syn.workspace = true toml.workspace = true diff --git a/oximeter/oximeter/src/lib.rs b/oximeter/oximeter/src/lib.rs index 5ec6a49e5c..913318b8a8 100644 --- a/oximeter/oximeter/src/lib.rs +++ b/oximeter/oximeter/src/lib.rs @@ -185,14 +185,15 @@ //! `Producer`s may be registered with the same `ProducerServer`, each with potentially different //! sampling intervals. -pub use oximeter_impl::*; +pub use oximeter_macro_impl::{Metric, Target}; pub use oximeter_timeseries_macro::use_timeseries; +pub use oximeter_types::*; #[cfg(test)] mod test { - use oximeter_impl::schema::ir::load_schema; - use oximeter_impl::schema::{FieldSource, SCHEMA_DIRECTORY}; - use oximeter_impl::TimeseriesSchema; + use oximeter_schema::ir::load_schema; + use oximeter_types::schema::{FieldSource, SCHEMA_DIRECTORY}; + use oximeter_types::TimeseriesSchema; use std::collections::BTreeMap; use std::fs; diff --git a/oximeter/schema/Cargo.toml b/oximeter/schema/Cargo.toml new file mode 100644 index 0000000000..fe2e28705a --- /dev/null +++ b/oximeter/schema/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "oximeter-schema" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +chrono.workspace = true +clap.workspace = true +heck.workspace = true +omicron-workspace-hack.workspace = true +oximeter-types.workspace = true +prettyplease.workspace = true +proc-macro2.workspace = true +quote.workspace = true +schemars.workspace = true +serde.workspace = true +slog-error-chain.workspace = true +syn.workspace = true +toml.workspace = true diff --git a/oximeter/oximeter/src/bin/oximeter-schema.rs b/oximeter/schema/src/bin/oximeter-schema.rs similarity index 93% rename from oximeter/oximeter/src/bin/oximeter-schema.rs rename to oximeter/schema/src/bin/oximeter-schema.rs index 14fb31b1e8..5595a28639 100644 --- a/oximeter/oximeter/src/bin/oximeter-schema.rs +++ b/oximeter/schema/src/bin/oximeter-schema.rs @@ -9,7 +9,7 @@ use anyhow::Context as _; use clap::Parser; use clap::Subcommand; -use oximeter::schema::ir::TimeseriesDefinition; +use oximeter_schema::ir::TimeseriesDefinition; use std::num::NonZeroU8; use std::path::PathBuf; @@ -56,7 +56,7 @@ fn main() -> anyhow::Result<()> { println!("{def:#?}"); } Cmd::Schema { timeseries, version } => { - let schema = oximeter_impl::schema::ir::load_schema(&contents)?; + let schema = oximeter_schema::ir::load_schema(&contents)?; match (timeseries, version) { (None, None) => { for each in schema.into_iter() { @@ -87,7 +87,7 @@ fn main() -> anyhow::Result<()> { } } Cmd::Emit => { - let code = oximeter::schema::codegen::use_timeseries(&contents)?; + let code = oximeter_schema::codegen::use_timeseries(&contents)?; let formatted = prettyplease::unparse(&syn::parse_file(&format!("{code}"))?); println!("{formatted}"); diff --git a/oximeter/impl/src/schema/codegen.rs b/oximeter/schema/src/codegen.rs similarity index 73% rename from oximeter/impl/src/schema/codegen.rs rename to oximeter/schema/src/codegen.rs index 4778cf4970..0429cf0534 100644 --- a/oximeter/impl/src/schema/codegen.rs +++ b/oximeter/schema/src/codegen.rs @@ -6,18 +6,18 @@ //! Generate Rust types and code from oximeter schema definitions. -use crate::schema::ir::find_schema_version; -use crate::schema::ir::load_schema; -use crate::schema::AuthzScope; -use crate::schema::FieldSource; -use crate::schema::Units; -use crate::DatumType; -use crate::FieldSchema; -use crate::FieldType; -use crate::MetricsError; -use crate::TimeseriesSchema; +use crate::ir::find_schema_version; +use crate::ir::load_schema; use chrono::prelude::DateTime; use chrono::prelude::Utc; +use oximeter_types::AuthzScope; +use oximeter_types::DatumType; +use oximeter_types::FieldSchema; +use oximeter_types::FieldSource; +use oximeter_types::FieldType; +use oximeter_types::MetricsError; +use oximeter_types::TimeseriesSchema; +use oximeter_types::Units; use proc_macro2::TokenStream; use quote::quote; @@ -34,7 +34,7 @@ pub fn use_timeseries(contents: &str) -> Result { let latest = find_schema_version(schema.iter().cloned(), None); let mod_name = quote::format_ident!("{}", latest[0].target_name()); let types = emit_schema_types(latest); - let func = emit_schema_function(schema.into_iter()); + let func = emit_schema_function(schema.iter()); Ok(quote! { pub mod #mod_name { #types @@ -43,9 +43,10 @@ pub fn use_timeseries(contents: &str) -> Result { }) } -fn emit_schema_function( - list: impl Iterator, +fn emit_schema_function<'a>( + list: impl Iterator, ) -> TokenStream { + let list = list.map(quote_timeseries_schema); quote! { pub fn timeseries_schema() -> Vec<::oximeter::schema::TimeseriesSchema> { vec![ @@ -310,66 +311,63 @@ fn emit_one(source: FieldSource, schema: &TimeseriesSchema) -> TokenStream { // This is used so that we can emit a function that will return the same data as // we parse from the TOML file with the timeseries definition, as a way to // export the definitions without needing that actual file at runtime. -impl quote::ToTokens for DatumType { - fn to_tokens(&self, tokens: &mut TokenStream) { - let toks = match self { - DatumType::Bool => quote! { ::oximeter::DatumType::Bool }, - DatumType::I8 => quote! { ::oximeter::DatumType::I8 }, - DatumType::U8 => quote! { ::oximeter::DatumType::U8 }, - DatumType::I16 => quote! { ::oximeter::DatumType::I16 }, - DatumType::U16 => quote! { ::oximeter::DatumType::U16 }, - DatumType::I32 => quote! { ::oximeter::DatumType::I32 }, - DatumType::U32 => quote! { ::oximeter::DatumType::U32 }, - DatumType::I64 => quote! { ::oximeter::DatumType::I64 }, - DatumType::U64 => quote! { ::oximeter::DatumType::U64 }, - DatumType::F32 => quote! { ::oximeter::DatumType::F32 }, - DatumType::F64 => quote! { ::oximeter::DatumType::F64 }, - DatumType::String => quote! { ::oximeter::DatumType::String }, - DatumType::Bytes => quote! { ::oximeter::DatumType::Bytes }, - DatumType::CumulativeI64 => { - quote! { ::oximeter::DatumType::CumulativeI64 } - } - DatumType::CumulativeU64 => { - quote! { ::oximeter::DatumType::CumulativeU64 } - } - DatumType::CumulativeF32 => { - quote! { ::oximeter::DatumType::CumulativeF32 } - } - DatumType::CumulativeF64 => { - quote! { ::oximeter::DatumType::CumulativeF64 } - } - DatumType::HistogramI8 => { - quote! { ::oximeter::DatumType::HistogramI8 } - } - DatumType::HistogramU8 => { - quote! { ::oximeter::DatumType::HistogramU8 } - } - DatumType::HistogramI16 => { - quote! { ::oximeter::DatumType::HistogramI16 } - } - DatumType::HistogramU16 => { - quote! { ::oximeter::DatumType::HistogramU16 } - } - DatumType::HistogramI32 => { - quote! { ::oximeter::DatumType::HistogramI32 } - } - DatumType::HistogramU32 => { - quote! { ::oximeter::DatumType::HistogramU32 } - } - DatumType::HistogramI64 => { - quote! { ::oximeter::DatumType::HistogramI64 } - } - DatumType::HistogramU64 => { - quote! { ::oximeter::DatumType::HistogramU64 } - } - DatumType::HistogramF32 => { - quote! { ::oximeter::DatumType::HistogramF32 } - } - DatumType::HistogramF64 => { - quote! { ::oximeter::DatumType::HistogramF64 } - } - }; - toks.to_tokens(tokens); +fn quote_datum_type(datum_type: DatumType) -> TokenStream { + match datum_type { + DatumType::Bool => quote! { ::oximeter::DatumType::Bool }, + DatumType::I8 => quote! { ::oximeter::DatumType::I8 }, + DatumType::U8 => quote! { ::oximeter::DatumType::U8 }, + DatumType::I16 => quote! { ::oximeter::DatumType::I16 }, + DatumType::U16 => quote! { ::oximeter::DatumType::U16 }, + DatumType::I32 => quote! { ::oximeter::DatumType::I32 }, + DatumType::U32 => quote! { ::oximeter::DatumType::U32 }, + DatumType::I64 => quote! { ::oximeter::DatumType::I64 }, + DatumType::U64 => quote! { ::oximeter::DatumType::U64 }, + DatumType::F32 => quote! { ::oximeter::DatumType::F32 }, + DatumType::F64 => quote! { ::oximeter::DatumType::F64 }, + DatumType::String => quote! { ::oximeter::DatumType::String }, + DatumType::Bytes => quote! { ::oximeter::DatumType::Bytes }, + DatumType::CumulativeI64 => { + quote! { ::oximeter::DatumType::CumulativeI64 } + } + DatumType::CumulativeU64 => { + quote! { ::oximeter::DatumType::CumulativeU64 } + } + DatumType::CumulativeF32 => { + quote! { ::oximeter::DatumType::CumulativeF32 } + } + DatumType::CumulativeF64 => { + quote! { ::oximeter::DatumType::CumulativeF64 } + } + DatumType::HistogramI8 => { + quote! { ::oximeter::DatumType::HistogramI8 } + } + DatumType::HistogramU8 => { + quote! { ::oximeter::DatumType::HistogramU8 } + } + DatumType::HistogramI16 => { + quote! { ::oximeter::DatumType::HistogramI16 } + } + DatumType::HistogramU16 => { + quote! { ::oximeter::DatumType::HistogramU16 } + } + DatumType::HistogramI32 => { + quote! { ::oximeter::DatumType::HistogramI32 } + } + DatumType::HistogramU32 => { + quote! { ::oximeter::DatumType::HistogramU32 } + } + DatumType::HistogramI64 => { + quote! { ::oximeter::DatumType::HistogramI64 } + } + DatumType::HistogramU64 => { + quote! { ::oximeter::DatumType::HistogramU64 } + } + DatumType::HistogramF32 => { + quote! { ::oximeter::DatumType::HistogramF32 } + } + DatumType::HistogramF64 => { + quote! { ::oximeter::DatumType::HistogramF64 } + } } } @@ -452,55 +450,46 @@ fn emit_rust_type_for_field(field_type: FieldType) -> TokenStream { } } -impl quote::ToTokens for FieldSource { - fn to_tokens(&self, tokens: &mut TokenStream) { - let toks = match self { - FieldSource::Target => { - quote! { ::oximeter::schema::FieldSource::Target } - } - FieldSource::Metric => { - quote! { ::oximeter::schema::FieldSource::Metric } - } - }; - toks.to_tokens(tokens); +fn quote_field_source(source: FieldSource) -> TokenStream { + match source { + FieldSource::Target => { + quote! { ::oximeter::schema::FieldSource::Target } + } + FieldSource::Metric => { + quote! { ::oximeter::schema::FieldSource::Metric } + } } } -impl quote::ToTokens for FieldType { - fn to_tokens(&self, tokens: &mut TokenStream) { - let toks = match self { - FieldType::String => quote! { ::oximeter::FieldType::String }, - FieldType::I8 => quote! { ::oximeter::FieldType::I8 }, - FieldType::U8 => quote! { ::oximeter::FieldType::U8 }, - FieldType::I16 => quote! { ::oximeter::FieldType::I16 }, - FieldType::U16 => quote! { ::oximeter::FieldType::U16 }, - FieldType::I32 => quote! { ::oximeter::FieldType::I32 }, - FieldType::U32 => quote! { ::oximeter::FieldType::U32 }, - FieldType::I64 => quote! { ::oximeter::FieldType::I64 }, - FieldType::U64 => quote! { ::oximeter::FieldType::U64 }, - FieldType::IpAddr => quote! { ::oximeter::FieldType::IpAddr }, - FieldType::Uuid => quote! { ::oximeter::FieldType::Uuid }, - FieldType::Bool => quote! { ::oximeter::FieldType::Bool }, - }; - toks.to_tokens(tokens); +fn quote_field_type(field_type: FieldType) -> TokenStream { + match field_type { + FieldType::String => quote! { ::oximeter::FieldType::String }, + FieldType::I8 => quote! { ::oximeter::FieldType::I8 }, + FieldType::U8 => quote! { ::oximeter::FieldType::U8 }, + FieldType::I16 => quote! { ::oximeter::FieldType::I16 }, + FieldType::U16 => quote! { ::oximeter::FieldType::U16 }, + FieldType::I32 => quote! { ::oximeter::FieldType::I32 }, + FieldType::U32 => quote! { ::oximeter::FieldType::U32 }, + FieldType::I64 => quote! { ::oximeter::FieldType::I64 }, + FieldType::U64 => quote! { ::oximeter::FieldType::U64 }, + FieldType::IpAddr => quote! { ::oximeter::FieldType::IpAddr }, + FieldType::Uuid => quote! { ::oximeter::FieldType::Uuid }, + FieldType::Bool => quote! { ::oximeter::FieldType::Bool }, } } -impl quote::ToTokens for AuthzScope { - fn to_tokens(&self, tokens: &mut TokenStream) { - let toks = match self { - AuthzScope::Fleet => { - quote! { ::oximeter::schema::AuthzScope::Fleet } - } - AuthzScope::Silo => quote! { ::oximeter::schema::AuthzScope::Silo }, - AuthzScope::Project => { - quote! { ::oximeter::schema::AuthzScope::Project } - } - AuthzScope::ViewableToAll => { - quote! { ::oximeter::schema::AuthzScope::ViewableToAll } - } - }; - toks.to_tokens(tokens); +fn quote_authz_scope(authz_scope: AuthzScope) -> TokenStream { + match authz_scope { + AuthzScope::Fleet => { + quote! { ::oximeter::schema::AuthzScope::Fleet } + } + AuthzScope::Silo => quote! { ::oximeter::schema::AuthzScope::Silo }, + AuthzScope::Project => { + quote! { ::oximeter::schema::AuthzScope::Project } + } + AuthzScope::ViewableToAll => { + quote! { ::oximeter::schema::AuthzScope::ViewableToAll } + } } } @@ -512,85 +501,79 @@ fn quote_creation_time(created: DateTime) -> TokenStream { } } -impl quote::ToTokens for Units { - fn to_tokens(&self, tokens: &mut TokenStream) { - let toks = match self { - Units::None => quote! { ::oximeter::schema::Units::None }, - Units::Count => quote! { ::oximeter::schema::Units::Count }, - Units::Bytes => quote! { ::oximeter::schema::Units::Bytes }, - Units::Seconds => quote! { ::oximeter::schema::Units::Seconds }, - Units::Nanoseconds => { - quote! { ::oximeter::schema::Units::Nanoseconds } - } - Units::Amps => quote! { ::oximeter::schema::Units::Amps }, - Units::Volts => quote! { ::oximeter::schema::Units::Volts }, - Units::DegreesCelcius => { - quote! { ::oximeter::schema::Units::DegreesCelcius } - } - Units::Rpm => quote! { ::oximeter::schema::Units::Rpm }, - }; - toks.to_tokens(tokens); +fn quote_units(units: Units) -> TokenStream { + match units { + Units::None => quote! { ::oximeter::schema::Units::None }, + Units::Count => quote! { ::oximeter::schema::Units::Count }, + Units::Bytes => quote! { ::oximeter::schema::Units::Bytes }, + Units::Seconds => quote! { ::oximeter::schema::Units::Seconds }, + Units::Nanoseconds => { + quote! { ::oximeter::schema::Units::Nanoseconds } + } + Units::Amps => quote! { ::oximeter::schema::Units::Amps }, + Units::Volts => quote! { ::oximeter::schema::Units::Volts }, + Units::DegreesCelcius => { + quote! { ::oximeter::schema::Units::DegreesCelcius } + } + Units::Rpm => quote! { ::oximeter::schema::Units::Rpm }, } } -impl quote::ToTokens for FieldSchema { - fn to_tokens(&self, tokens: &mut TokenStream) { - let name = self.name.as_str(); - let field_type = self.field_type; - let source = self.source; - let description = self.description.as_str(); - let toks = quote! { - ::oximeter::FieldSchema { - name: String::from(#name), - field_type: #field_type, - source: #source, - description: String::from(#description), - } - }; - toks.to_tokens(tokens); +fn quote_field_schema(field_schema: &FieldSchema) -> TokenStream { + let name = field_schema.name.as_str(); + let field_type = quote_field_type(field_schema.field_type); + let source = quote_field_source(field_schema.source); + let description = field_schema.description.as_str(); + quote! { + ::oximeter::FieldSchema { + name: String::from(#name), + field_type: #field_type, + source: #source, + description: String::from(#description), + } } } -impl quote::ToTokens for TimeseriesSchema { - fn to_tokens(&self, tokens: &mut TokenStream) { - let field_schema = &self.field_schema; - let timeseries_name = self.timeseries_name.to_string(); - let target_description = self.description.target.as_str(); - let metric_description = self.description.metric.as_str(); - let authz_scope = self.authz_scope; - let units = self.units; - let datum_type = self.datum_type; - let ver = self.version.get(); - let version = quote! { ::core::num::NonZeroU8::new(#ver).unwrap() }; - let created = quote_creation_time(self.created); - let toks = quote! { - ::oximeter::schema::TimeseriesSchema { - timeseries_name: - <::oximeter::TimeseriesName as ::std::convert::TryFrom<&str>>::try_from( - #timeseries_name - ).unwrap(), - description: ::oximeter::schema::TimeseriesDescription { - target: String::from(#target_description), - metric: String::from(#metric_description), - }, - authz_scope: #authz_scope, - units: #units, - field_schema: ::std::collections::BTreeSet::from([ - #(#field_schema),* - ]), - datum_type: #datum_type, - version: #version, - created: #created, - } - }; - toks.to_tokens(tokens); +fn quote_timeseries_schema( + timeseries_schema: &TimeseriesSchema, +) -> TokenStream { + let field_schema = + timeseries_schema.field_schema.iter().map(quote_field_schema); + let timeseries_name = timeseries_schema.timeseries_name.to_string(); + let target_description = timeseries_schema.description.target.as_str(); + let metric_description = timeseries_schema.description.metric.as_str(); + let authz_scope = quote_authz_scope(timeseries_schema.authz_scope); + let units = quote_units(timeseries_schema.units); + let datum_type = quote_datum_type(timeseries_schema.datum_type); + let ver = timeseries_schema.version.get(); + let version = quote! { ::core::num::NonZeroU8::new(#ver).unwrap() }; + let created = quote_creation_time(timeseries_schema.created); + quote! { + ::oximeter::schema::TimeseriesSchema { + timeseries_name: + <::oximeter::TimeseriesName as ::std::convert::TryFrom<&str>>::try_from( + #timeseries_name + ).unwrap(), + description: ::oximeter::schema::TimeseriesDescription { + target: String::from(#target_description), + metric: String::from(#metric_description), + }, + authz_scope: #authz_scope, + units: #units, + field_schema: ::std::collections::BTreeSet::from([ + #(#field_schema),* + ]), + datum_type: #datum_type, + version: #version, + created: #created, + } } } #[cfg(test)] mod tests { use super::*; - use crate::schema::TimeseriesDescription; + use oximeter_types::TimeseriesDescription; use std::{collections::BTreeSet, num::NonZeroU8}; #[test] diff --git a/oximeter/impl/src/schema/ir.rs b/oximeter/schema/src/ir.rs similarity index 99% rename from oximeter/impl/src/schema/ir.rs rename to oximeter/schema/src/ir.rs index f7a209294f..370236000a 100644 --- a/oximeter/impl/src/schema/ir.rs +++ b/oximeter/schema/src/ir.rs @@ -11,17 +11,17 @@ //! inspected or used to generate code that contains the equivalent Rust types //! and trait implementations. -use crate::schema::AuthzScope; -use crate::schema::DatumType; -use crate::schema::FieldSource; -use crate::schema::FieldType; -use crate::schema::TimeseriesDescription; -use crate::schema::Units; -use crate::FieldSchema; -use crate::MetricsError; -use crate::TimeseriesName; -use crate::TimeseriesSchema; use chrono::Utc; +use oximeter_types::AuthzScope; +use oximeter_types::DatumType; +use oximeter_types::FieldSchema; +use oximeter_types::FieldSource; +use oximeter_types::FieldType; +use oximeter_types::MetricsError; +use oximeter_types::TimeseriesDescription; +use oximeter_types::TimeseriesName; +use oximeter_types::TimeseriesSchema; +use oximeter_types::Units; use serde::Deserialize; use std::collections::btree_map::Entry; use std::collections::BTreeMap; diff --git a/oximeter/schema/src/lib.rs b/oximeter/schema/src/lib.rs new file mode 100644 index 0000000000..b1ce73a940 --- /dev/null +++ b/oximeter/schema/src/lib.rs @@ -0,0 +1,12 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2024 Oxide Computer Company + +//! Tools for working with schemas for fields and timeseries. +//! +//! The actual schema type definitions are in [`oximeter_types::schema`]. + +pub mod codegen; +pub mod ir; diff --git a/oximeter/test-utils/Cargo.toml b/oximeter/test-utils/Cargo.toml new file mode 100644 index 0000000000..f463e74aca --- /dev/null +++ b/oximeter/test-utils/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "oximeter-test-utils" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +chrono.workspace = true +omicron-workspace-hack.workspace = true +oximeter-macro-impl.workspace = true +oximeter-types.workspace = true +uuid.workspace = true diff --git a/oximeter/test-utils/src/lib.rs b/oximeter/test-utils/src/lib.rs new file mode 100644 index 0000000000..04c49add65 --- /dev/null +++ b/oximeter/test-utils/src/lib.rs @@ -0,0 +1,295 @@ +// Copyright 2024 Oxide Computer Company + +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Utilities for testing the oximeter crate. + +// Export the current crate as `oximeter`. The macros defined in `oximeter-macro-impl` generate +// code referring to symbols like `oximeter::traits::Target`. In consumers of this crate, that's +// fine, but internally there _is_ no crate named `oximeter`, it's just `self` or `crate`. +// +// See https://github.com/rust-lang/rust/pull/55275 for the PR introducing this fix, which links to +// lots of related issues and discussion. +extern crate self as oximeter; + +use oximeter_macro_impl::{Metric, Target}; +use oximeter_types::histogram; +use oximeter_types::histogram::{Histogram, Record}; +use oximeter_types::traits; +use oximeter_types::types::{ + Cumulative, Datum, DatumType, FieldType, FieldValue, Measurement, Sample, +}; +use oximeter_types::{Metric, Target}; +use uuid::Uuid; + +#[derive(Target)] +pub struct TestTarget { + pub name1: String, + pub name2: String, + pub num: i64, +} + +impl Default for TestTarget { + fn default() -> Self { + TestTarget { + name1: "first_name".into(), + name2: "second_name".into(), + num: 0, + } + } +} + +#[derive(Metric)] +pub struct TestMetric { + pub id: Uuid, + pub good: bool, + pub datum: i64, +} + +#[derive(Metric)] +pub struct TestCumulativeMetric { + pub id: Uuid, + pub good: bool, + pub datum: Cumulative, +} + +#[derive(Metric)] +pub struct TestHistogram { + pub id: Uuid, + pub good: bool, + pub datum: Histogram, +} + +const ID: Uuid = uuid::uuid!("e00ced4d-39d1-446a-ae85-a67f05c9750b"); + +pub fn make_sample() -> Sample { + let target = TestTarget::default(); + let metric = TestMetric { id: ID, good: true, datum: 1 }; + Sample::new(&target, &metric).unwrap() +} + +pub fn make_missing_sample() -> Sample { + let target = TestTarget::default(); + let metric = TestMetric { id: ID, good: true, datum: 1 }; + Sample::new_missing(&target, &metric).unwrap() +} + +pub fn make_hist_sample() -> Sample { + let target = TestTarget::default(); + let mut hist = histogram::Histogram::new(&[0.0, 5.0, 10.0]).unwrap(); + hist.sample(1.0).unwrap(); + hist.sample(2.0).unwrap(); + hist.sample(6.0).unwrap(); + let metric = TestHistogram { id: ID, good: true, datum: hist }; + Sample::new(&target, &metric).unwrap() +} + +/// A target identifying a single virtual machine instance +#[derive(Debug, Clone, Copy, oximeter::Target)] +pub struct VirtualMachine { + pub project_id: Uuid, + pub instance_id: Uuid, +} + +/// A metric recording the total time a vCPU is busy, by its ID +#[derive(Debug, Clone, Copy, oximeter::Metric)] +pub struct CpuBusy { + cpu_id: i64, + datum: Cumulative, +} + +pub fn generate_test_samples( + n_projects: usize, + n_instances: usize, + n_cpus: usize, + n_samples: usize, +) -> Vec { + let n_timeseries = n_projects * n_instances * n_cpus; + let mut samples = Vec::with_capacity(n_samples * n_timeseries); + for _ in 0..n_projects { + let project_id = Uuid::new_v4(); + for _ in 0..n_instances { + let vm = VirtualMachine { project_id, instance_id: Uuid::new_v4() }; + for cpu in 0..n_cpus { + for sample in 0..n_samples { + let cpu_busy = CpuBusy { + cpu_id: cpu as _, + datum: Cumulative::new(sample as f64), + }; + let sample = Sample::new(&vm, &cpu_busy).unwrap(); + samples.push(sample); + } + } + } + } + samples +} + +#[cfg(test)] +mod tests { + use chrono::Utc; + use oximeter_types::{ + schema::{ + default_schema_version, AuthzScope, FieldSchema, FieldSource, + TimeseriesSchema, Units, + }, + TimeseriesName, + }; + + use super::*; + + #[test] + fn test_gen_test_samples() { + let (n_projects, n_instances, n_cpus, n_samples) = (2, 2, 2, 2); + let samples = + generate_test_samples(n_projects, n_instances, n_cpus, n_samples); + assert_eq!( + samples.len(), + n_projects * n_instances * n_cpus * n_samples + ); + } + + #[test] + fn test_sample_struct() { + let t = TestTarget::default(); + let m = TestMetric { id: Uuid::new_v4(), good: true, datum: 1i64 }; + let sample = Sample::new(&t, &m).unwrap(); + assert_eq!( + sample.timeseries_name, + format!("{}:{}", t.name(), m.name()) + ); + assert!(sample.measurement.start_time().is_none()); + assert_eq!(sample.measurement.datum(), &Datum::from(1i64)); + + let m = TestCumulativeMetric { + id: Uuid::new_v4(), + good: true, + datum: 1i64.into(), + }; + let sample = Sample::new(&t, &m).unwrap(); + assert!(sample.measurement.start_time().is_some()); + } + + #[derive(Target)] + struct MyTarget { + id: Uuid, + name: String, + } + + const ID: Uuid = uuid::uuid!("ca565ef4-65dc-4ab0-8622-7be43ed72105"); + + impl Default for MyTarget { + fn default() -> Self { + Self { id: ID, name: String::from("name") } + } + } + + #[derive(Metric)] + struct MyMetric { + happy: bool, + datum: u64, + } + + impl Default for MyMetric { + fn default() -> Self { + Self { happy: true, datum: 0 } + } + } + + #[test] + fn test_timeseries_schema_from_parts() { + let target = MyTarget::default(); + let metric = MyMetric::default(); + let schema = TimeseriesSchema::new(&target, &metric).unwrap(); + + assert_eq!(schema.timeseries_name, "my_target:my_metric"); + let f = schema.schema_for_field("id").unwrap(); + assert_eq!(f.name, "id"); + assert_eq!(f.field_type, FieldType::Uuid); + assert_eq!(f.source, FieldSource::Target); + + let f = schema.schema_for_field("name").unwrap(); + assert_eq!(f.name, "name"); + assert_eq!(f.field_type, FieldType::String); + assert_eq!(f.source, FieldSource::Target); + + let f = schema.schema_for_field("happy").unwrap(); + assert_eq!(f.name, "happy"); + assert_eq!(f.field_type, FieldType::Bool); + assert_eq!(f.source, FieldSource::Metric); + assert_eq!(schema.datum_type, DatumType::U64); + } + + #[test] + fn test_timeseries_schema_from_sample() { + let target = MyTarget::default(); + let metric = MyMetric::default(); + let sample = Sample::new(&target, &metric).unwrap(); + let schema = TimeseriesSchema::new(&target, &metric).unwrap(); + let schema_from_sample = TimeseriesSchema::from(&sample); + assert_eq!(schema, schema_from_sample); + } + + // Test that we correctly order field across a target and metric. + // + // In an earlier commit, we switched from storing fields in an unordered Vec + // to using a BTree{Map,Set} to ensure ordering by name. However, the + // `TimeseriesSchema` type stored all its fields by chaining the sorted + // fields from the target and metric, without then sorting _across_ them. + // + // This was exacerbated by the error reporting, where we did in fact sort + // all fields across the target and metric, making it difficult to tell how + // the derived schema was different, if at all. + // + // This test generates a sample with a schema where the target and metric + // fields are sorted within them, but not across them. We check that the + // derived schema are actually equal, which means we've imposed that + // ordering when deriving the schema. + #[test] + fn test_schema_field_ordering_across_target_metric() { + let target_field = FieldSchema { + name: String::from("later"), + field_type: FieldType::U64, + source: FieldSource::Target, + description: String::new(), + }; + let metric_field = FieldSchema { + name: String::from("earlier"), + field_type: FieldType::U64, + source: FieldSource::Metric, + description: String::new(), + }; + let timeseries_name: TimeseriesName = "foo:bar".parse().unwrap(); + let datum_type = DatumType::U64; + let field_schema = + [target_field.clone(), metric_field.clone()].into_iter().collect(); + let expected_schema = TimeseriesSchema { + timeseries_name, + description: Default::default(), + field_schema, + datum_type, + version: default_schema_version(), + authz_scope: AuthzScope::Fleet, + units: Units::Count, + created: Utc::now(), + }; + + #[derive(oximeter::Target)] + struct Foo { + later: u64, + } + #[derive(oximeter::Metric)] + struct Bar { + earlier: u64, + datum: u64, + } + + let target = Foo { later: 1 }; + let metric = Bar { earlier: 2, datum: 10 }; + let sample = Sample::new(&target, &metric).unwrap(); + let derived_schema = TimeseriesSchema::from(&sample); + assert_eq!(derived_schema, expected_schema); + } +} diff --git a/oximeter/timeseries-macro/Cargo.toml b/oximeter/timeseries-macro/Cargo.toml index db591aed06..2fb8b8f312 100644 --- a/oximeter/timeseries-macro/Cargo.toml +++ b/oximeter/timeseries-macro/Cargo.toml @@ -8,7 +8,8 @@ proc-macro = true [dependencies] omicron-workspace-hack.workspace = true -oximeter-impl.workspace = true +oximeter-schema.workspace = true +oximeter-types.workspace = true proc-macro2.workspace = true quote.workspace = true syn.workspace = true diff --git a/oximeter/timeseries-macro/src/lib.rs b/oximeter/timeseries-macro/src/lib.rs index 0c70e73445..12ec2cc417 100644 --- a/oximeter/timeseries-macro/src/lib.rs +++ b/oximeter/timeseries-macro/src/lib.rs @@ -8,7 +8,7 @@ extern crate proc_macro; -use oximeter_impl::schema::SCHEMA_DIRECTORY; +use oximeter_types::schema::SCHEMA_DIRECTORY; /// Generate code to use the timeseries from one target. /// @@ -45,7 +45,7 @@ pub fn use_timeseries( .into(); } }; - match oximeter_impl::schema::codegen::use_timeseries(&contents) { + match oximeter_schema::codegen::use_timeseries(&contents) { Ok(toks) => { let path_ = path.display().to_string(); return quote::quote! { diff --git a/oximeter/impl/Cargo.toml b/oximeter/types/Cargo.toml similarity index 78% rename from oximeter/impl/Cargo.toml rename to oximeter/types/Cargo.toml index 91277d9d47..6d6bbc07e6 100644 --- a/oximeter/impl/Cargo.toml +++ b/oximeter/types/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "oximeter-impl" +name = "oximeter-types" version = "0.1.0" edition = "2021" license = "MPL-2.0" @@ -11,22 +11,13 @@ workspace = true bytes = { workspace = true, features = [ "serde" ] } chrono.workspace = true float-ord.workspace = true -heck.workspace = true num.workspace = true omicron-common.workspace = true omicron-workspace-hack.workspace = true -oximeter-macro-impl.workspace = true -prettyplease.workspace = true -proc-macro2.workspace = true -quote.workspace = true regex.workspace = true schemars = { workspace = true, features = [ "uuid1", "bytes", "chrono" ] } serde.workspace = true -serde_json.workspace = true -slog-error-chain.workspace = true strum.workspace = true -syn.workspace = true -toml.workspace = true thiserror.workspace = true uuid.workspace = true @@ -34,6 +25,7 @@ uuid.workspace = true approx.workspace = true # For benchmark criterion.workspace = true +oximeter-macro-impl.workspace = true rand = { workspace = true, features = ["std_rng"] } rand_distr.workspace = true rstest.workspace = true diff --git a/oximeter/impl/benches/quantile.rs b/oximeter/types/benches/quantile.rs similarity index 97% rename from oximeter/impl/benches/quantile.rs rename to oximeter/types/benches/quantile.rs index 4540ba8f6a..b88cb211e6 100644 --- a/oximeter/impl/benches/quantile.rs +++ b/oximeter/types/benches/quantile.rs @@ -8,7 +8,7 @@ // Copyright 2024 Oxide Computer Company use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; -use oximeter_impl::Quantile; +use oximeter_types::Quantile; use rand_distr::{Distribution, Normal}; /// Emulates baseline code in a Python implementation of the P² diff --git a/oximeter/impl/src/histogram.rs b/oximeter/types/src/histogram.rs similarity index 99% rename from oximeter/impl/src/histogram.rs rename to oximeter/types/src/histogram.rs index 40df0a1b41..0b85727ee0 100644 --- a/oximeter/impl/src/histogram.rs +++ b/oximeter/types/src/histogram.rs @@ -523,9 +523,9 @@ where /// Example /// ------- /// ```rust - /// # // Rename the impl crate so the doctests can refer to the public + /// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. - /// # use oximeter_impl as oximeter; + /// # use oximeter_types as oximeter; /// use oximeter::histogram::Histogram; /// /// let hist = Histogram::with_bins(&[(0..10).into(), (10..100).into()]).unwrap(); @@ -922,9 +922,9 @@ where /// ------- /// /// ```rust - /// # // Rename the impl crate so the doctests can refer to the public + /// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. - /// # use oximeter_impl as oximeter; + /// # use oximeter_types as oximeter; /// use oximeter::histogram::{Histogram, BinRange}; /// use std::ops::{RangeBounds, Bound}; /// diff --git a/oximeter/impl/src/lib.rs b/oximeter/types/src/lib.rs similarity index 92% rename from oximeter/impl/src/lib.rs rename to oximeter/types/src/lib.rs index 5acbeb9422..7a1a480f8d 100644 --- a/oximeter/impl/src/lib.rs +++ b/oximeter/types/src/lib.rs @@ -4,8 +4,6 @@ // Copyright 2024 Oxide Computer Company -pub use oximeter_macro_impl::*; - // Export the current crate as `oximeter`. The macros defined in `oximeter-macro-impl` generate // code referring to symbols like `oximeter::traits::Target`. In consumers of this crate, that's // fine, but internally there _is_ no crate named `oximeter`, it's just `self` or `crate`. @@ -17,15 +15,18 @@ extern crate self as oximeter; pub mod histogram; pub mod quantile; pub mod schema; -pub mod test_util; pub mod traits; pub mod types; pub use quantile::Quantile; pub use quantile::QuantileError; +pub use schema::AuthzScope; pub use schema::FieldSchema; +pub use schema::FieldSource; +pub use schema::TimeseriesDescription; pub use schema::TimeseriesName; pub use schema::TimeseriesSchema; +pub use schema::Units; pub use traits::Metric; pub use traits::Producer; pub use traits::Target; diff --git a/oximeter/impl/src/quantile.rs b/oximeter/types/src/quantile.rs similarity index 97% rename from oximeter/impl/src/quantile.rs rename to oximeter/types/src/quantile.rs index fafe9c9ece..40777217e5 100644 --- a/oximeter/impl/src/quantile.rs +++ b/oximeter/types/src/quantile.rs @@ -78,9 +78,9 @@ impl Quantile { /// # Examples /// /// ``` - /// # // Rename the impl crate so the doctests can refer to the public + /// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. - /// # use oximeter_impl as oximeter; + /// # use oximeter_types as oximeter; /// use oximeter::Quantile; /// let q = Quantile::new(0.5).unwrap(); /// @@ -116,9 +116,9 @@ impl Quantile { /// /// # Examples /// ``` - /// # // Rename the impl crate so the doctests can refer to the public + /// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. - /// # use oximeter_impl as oximeter; + /// # use oximeter_types as oximeter; /// use oximeter::Quantile; /// let q = Quantile::from_parts( /// 0.5, @@ -200,9 +200,9 @@ impl Quantile { /// # Examples /// /// ``` - /// # // Rename the impl crate so the doctests can refer to the public + /// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. - /// # use oximeter_impl as oximeter; + /// # use oximeter_types as oximeter; /// use oximeter::Quantile; /// let mut q = Quantile::new(0.5).unwrap(); /// for o in 1..=100 { @@ -243,9 +243,9 @@ impl Quantile { /// # Examples /// /// ``` - /// # // Rename the impl crate so the doctests can refer to the public + /// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. - /// # use oximeter_impl as oximeter; + /// # use oximeter_types as oximeter; /// use oximeter::Quantile; /// let mut q = Quantile::new(0.9).unwrap(); /// q.append(10).unwrap(); diff --git a/oximeter/impl/src/schema/mod.rs b/oximeter/types/src/schema.rs similarity index 75% rename from oximeter/impl/src/schema/mod.rs rename to oximeter/types/src/schema.rs index 250604d7be..2efd5265ff 100644 --- a/oximeter/impl/src/schema/mod.rs +++ b/oximeter/types/src/schema.rs @@ -6,9 +6,6 @@ //! Tools for working with schema for fields and timeseries. -pub mod codegen; -pub mod ir; - use crate::types::DatumType; use crate::types::FieldType; use crate::types::MetricsError; @@ -402,7 +399,6 @@ pub enum AuthzScope { mod tests { use super::*; use std::convert::TryFrom; - use uuid::Uuid; #[test] fn test_timeseries_name() { @@ -426,127 +422,6 @@ mod tests { assert!(TimeseriesName::try_from("x.a:b").is_err()); } - #[derive(Target)] - struct MyTarget { - id: Uuid, - name: String, - } - - const ID: Uuid = uuid::uuid!("ca565ef4-65dc-4ab0-8622-7be43ed72105"); - - impl Default for MyTarget { - fn default() -> Self { - Self { id: ID, name: String::from("name") } - } - } - - #[derive(Metric)] - struct MyMetric { - happy: bool, - datum: u64, - } - - impl Default for MyMetric { - fn default() -> Self { - Self { happy: true, datum: 0 } - } - } - - #[test] - fn test_timeseries_schema_from_parts() { - let target = MyTarget::default(); - let metric = MyMetric::default(); - let schema = TimeseriesSchema::new(&target, &metric).unwrap(); - - assert_eq!(schema.timeseries_name, "my_target:my_metric"); - let f = schema.schema_for_field("id").unwrap(); - assert_eq!(f.name, "id"); - assert_eq!(f.field_type, FieldType::Uuid); - assert_eq!(f.source, FieldSource::Target); - - let f = schema.schema_for_field("name").unwrap(); - assert_eq!(f.name, "name"); - assert_eq!(f.field_type, FieldType::String); - assert_eq!(f.source, FieldSource::Target); - - let f = schema.schema_for_field("happy").unwrap(); - assert_eq!(f.name, "happy"); - assert_eq!(f.field_type, FieldType::Bool); - assert_eq!(f.source, FieldSource::Metric); - assert_eq!(schema.datum_type, DatumType::U64); - } - - #[test] - fn test_timeseries_schema_from_sample() { - let target = MyTarget::default(); - let metric = MyMetric::default(); - let sample = Sample::new(&target, &metric).unwrap(); - let schema = TimeseriesSchema::new(&target, &metric).unwrap(); - let schema_from_sample = TimeseriesSchema::from(&sample); - assert_eq!(schema, schema_from_sample); - } - - // Test that we correctly order field across a target and metric. - // - // In an earlier commit, we switched from storing fields in an unordered Vec - // to using a BTree{Map,Set} to ensure ordering by name. However, the - // `TimeseriesSchema` type stored all its fields by chaining the sorted - // fields from the target and metric, without then sorting _across_ them. - // - // This was exacerbated by the error reporting, where we did in fact sort - // all fields across the target and metric, making it difficult to tell how - // the derived schema was different, if at all. - // - // This test generates a sample with a schema where the target and metric - // fields are sorted within them, but not across them. We check that the - // derived schema are actually equal, which means we've imposed that - // ordering when deriving the schema. - #[test] - fn test_schema_field_ordering_across_target_metric() { - let target_field = FieldSchema { - name: String::from("later"), - field_type: FieldType::U64, - source: FieldSource::Target, - description: String::new(), - }; - let metric_field = FieldSchema { - name: String::from("earlier"), - field_type: FieldType::U64, - source: FieldSource::Metric, - description: String::new(), - }; - let timeseries_name: TimeseriesName = "foo:bar".parse().unwrap(); - let datum_type = DatumType::U64; - let field_schema = - [target_field.clone(), metric_field.clone()].into_iter().collect(); - let expected_schema = TimeseriesSchema { - timeseries_name, - description: Default::default(), - field_schema, - datum_type, - version: default_schema_version(), - authz_scope: AuthzScope::Fleet, - units: Units::Count, - created: Utc::now(), - }; - - #[derive(oximeter::Target)] - struct Foo { - later: u64, - } - #[derive(oximeter::Metric)] - struct Bar { - earlier: u64, - datum: u64, - } - - let target = Foo { later: 1 }; - let metric = Bar { earlier: 2, datum: 10 }; - let sample = Sample::new(&target, &metric).unwrap(); - let derived_schema = TimeseriesSchema::from(&sample); - assert_eq!(derived_schema, expected_schema); - } - #[test] fn test_field_schema_ordering() { let mut fields = BTreeSet::new(); diff --git a/oximeter/impl/src/traits.rs b/oximeter/types/src/traits.rs similarity index 96% rename from oximeter/impl/src/traits.rs rename to oximeter/types/src/traits.rs index 16baa4f619..91ecca817d 100644 --- a/oximeter/impl/src/traits.rs +++ b/oximeter/types/src/traits.rs @@ -45,9 +45,9 @@ use std::ops::AddAssign; /// -------- /// /// ```rust -/// # // Rename the impl crate so the doctests can refer to the public +/// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. -/// # extern crate oximeter_impl as oximeter; +/// # extern crate oximeter_types as oximeter; /// # use oximeter_macro_impl::*; /// use oximeter::{traits::Target, types::FieldType}; /// use uuid::Uuid; @@ -75,9 +75,9 @@ use std::ops::AddAssign; /// supported types. /// /// ```compile_fail -/// # // Rename the impl crate so the doctests can refer to the public +/// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. -/// # extern crate oximeter_impl as oximeter; +/// # extern crate oximeter_types as oximeter; /// # use oximeter_macro_impl::*; /// #[derive(oximeter::Target)] /// struct Bad { @@ -160,9 +160,9 @@ pub trait Target { /// Example /// ------- /// ```rust -/// # // Rename the impl crate so the doctests can refer to the public +/// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. -/// # extern crate oximeter_impl as oximeter; +/// # extern crate oximeter_types as oximeter; /// # use oximeter_macro_impl::*; /// use chrono::Utc; /// use oximeter::Metric; @@ -185,9 +185,9 @@ pub trait Target { /// an unsupported type. /// /// ```compile_fail -/// # // Rename the impl crate so the doctests can refer to the public +/// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. -/// # extern crate oximeter_impl as oximeter; +/// # extern crate oximeter_types as oximeter; /// # use oximeter_macro_impl::*; /// #[derive(Metric)] /// pub struct BadType { @@ -364,9 +364,9 @@ pub use crate::histogram::HistogramSupport; /// Example /// ------- /// ```rust -/// # // Rename the impl crate so the doctests can refer to the public +/// # // Rename the types crate so the doctests can refer to the public /// # // `oximeter` crate, not the private impl. -/// # extern crate oximeter_impl as oximeter; +/// # extern crate oximeter_types as oximeter; /// # use oximeter_macro_impl::*; /// use oximeter::{Datum, MetricsError, Metric, Producer, Target}; /// use oximeter::types::{Measurement, Sample, Cumulative}; @@ -464,6 +464,8 @@ pub trait Producer: Send + Sync + std::fmt::Debug + 'static { #[cfg(test)] mod tests { + use oximeter_macro_impl::{Metric, Target}; + use crate::types; use crate::{ Datum, DatumType, FieldType, FieldValue, Metric, MetricsError, diff --git a/oximeter/impl/src/types.rs b/oximeter/types/src/types.rs similarity index 97% rename from oximeter/impl/src/types.rs rename to oximeter/types/src/types.rs index 370557f7f7..60260e3649 100644 --- a/oximeter/impl/src/types.rs +++ b/oximeter/types/src/types.rs @@ -850,7 +850,7 @@ pub struct Sample { /// The version of the timeseries this sample belongs to // // TODO-cleanup: This should be removed once schema are tracked in CRDB. - #[serde(default = "::oximeter::schema::default_schema_version")] + #[serde(default = "crate::schema::default_schema_version")] pub timeseries_version: NonZeroU8, // Target name and fields @@ -1104,15 +1104,10 @@ mod tests { use super::Measurement; use super::MetricsError; use super::Sample; - use crate::test_util; - use crate::types; - use crate::Metric; - use crate::Target; use bytes::Bytes; use std::collections::BTreeMap; use std::net::Ipv4Addr; use std::net::Ipv6Addr; - use uuid::Uuid; #[test] fn test_cumulative_i64() { @@ -1176,31 +1171,6 @@ mod tests { assert!(measurement.timestamp() >= measurement.start_time().unwrap()); } - #[test] - fn test_sample_struct() { - let t = test_util::TestTarget::default(); - let m = test_util::TestMetric { - id: Uuid::new_v4(), - good: true, - datum: 1i64, - }; - let sample = types::Sample::new(&t, &m).unwrap(); - assert_eq!( - sample.timeseries_name, - format!("{}:{}", t.name(), m.name()) - ); - assert!(sample.measurement.start_time().is_none()); - assert_eq!(sample.measurement.datum(), &Datum::from(1i64)); - - let m = test_util::TestCumulativeMetric { - id: Uuid::new_v4(), - good: true, - datum: 1i64.into(), - }; - let sample = types::Sample::new(&t, &m).unwrap(); - assert!(sample.measurement.start_time().is_some()); - } - #[rstest::rstest] #[case::as_string("some string", FieldValue::String("some string".into()))] #[case::as_i8("2", FieldValue::I8(2))] diff --git a/oximeter/impl/tests/fail/failures.rs b/oximeter/types/tests/fail/failures.rs similarity index 100% rename from oximeter/impl/tests/fail/failures.rs rename to oximeter/types/tests/fail/failures.rs diff --git a/oximeter/impl/tests/fail/failures.stderr b/oximeter/types/tests/fail/failures.stderr similarity index 100% rename from oximeter/impl/tests/fail/failures.stderr rename to oximeter/types/tests/fail/failures.stderr diff --git a/oximeter/impl/tests/test_compilation.rs b/oximeter/types/tests/test_compilation.rs similarity index 100% rename from oximeter/impl/tests/test_compilation.rs rename to oximeter/types/tests/test_compilation.rs From dae480a84551638886a4c66c1d29d2f8d6669a9d Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 17 Aug 2024 05:36:18 +0000 Subject: [PATCH 28/51] Update Rust crate libc to 0.2.156 (#6375) Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3b62f3001a..bad1af8f6b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4307,9 +4307,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.155" +version = "0.2.156" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +checksum = "a5f43f184355eefb8d17fc948dbecf6c13be3c141f20d834ae842193a448c72a" [[package]] name = "libdlpi-sys" diff --git a/Cargo.toml b/Cargo.toml index ea687936e3..cac3395a7d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -396,7 +396,7 @@ ipnetwork = { version = "0.20", features = ["schemars"] } ispf = { git = "https://github.com/oxidecomputer/ispf" } key-manager = { path = "key-manager" } kstat-rs = "0.2.4" -libc = "0.2.155" +libc = "0.2.156" libfalcon = { git = "https://github.com/oxidecomputer/falcon", rev = "e69694a1f7cc9fe31fab27f321017280531fb5f7" } libnvme = { git = "https://github.com/oxidecomputer/libnvme", rev = "dd5bb221d327a1bc9287961718c3c10d6bd37da0" } linear-map = "1.2.0" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 5dc3bc11e7..854179a4db 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -69,7 +69,7 @@ itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12.1" } itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10.5" } lalrpop-util = { version = "0.19.12" } lazy_static = { version = "1.5.0", default-features = false, features = ["spin_no_std"] } -libc = { version = "0.2.155", features = ["extra_traits"] } +libc = { version = "0.2.156", features = ["extra_traits"] } log = { version = "0.4.21", default-features = false, features = ["std"] } managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] } memchr = { version = "2.7.2" } @@ -176,7 +176,7 @@ itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12.1" } itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10.5" } lalrpop-util = { version = "0.19.12" } lazy_static = { version = "1.5.0", default-features = false, features = ["spin_no_std"] } -libc = { version = "0.2.155", features = ["extra_traits"] } +libc = { version = "0.2.156", features = ["extra_traits"] } log = { version = "0.4.21", default-features = false, features = ["std"] } managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] } memchr = { version = "2.7.2" } From f69ea9d4ec7f71ccf7babc53e94ee175674bfe06 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 17 Aug 2024 01:41:19 -0700 Subject: [PATCH 29/51] Update Rust crate rstest to 0.22.0 (#6347) --- Cargo.lock | 31 ++++++++++++++++++++++++++----- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bad1af8f6b..3f084cd822 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5673,7 +5673,7 @@ version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcbff9bc912032c62bf65ef1d5aea88983b420f4f839db1e9b0c281a25c9c799" dependencies = [ - "proc-macro-crate", + "proc-macro-crate 1.3.1", "proc-macro2", "quote", "syn 1.0.109", @@ -7786,6 +7786,15 @@ dependencies = [ "toml_edit 0.19.15", ] +[[package]] +name = "proc-macro-crate" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d37c51ca738a55da99dc0c4a34860fd675453b8b36209178c2249bb13651284" +dependencies = [ + "toml_edit 0.21.1", +] + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -8562,9 +8571,9 @@ dependencies = [ [[package]] name = "rstest" -version = "0.19.0" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d5316d2a1479eeef1ea21e7f9ddc67c191d497abc8fc3ba2467857abbb68330" +checksum = "7b423f0e62bdd61734b67cd21ff50871dfaeb9cc74f869dcd6af974fbcb19936" dependencies = [ "futures", "futures-timer", @@ -8574,12 +8583,13 @@ dependencies = [ [[package]] name = "rstest_macros" -version = "0.19.0" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04a9df72cc1f67020b0d63ad9bfe4a323e459ea7eb68e03bd9824db49f9a4c25" +checksum = "c5e1711e7d14f74b12a58411c542185ef7fb7f2e7f8ee6e2940a883628522b42" dependencies = [ "cfg-if", "glob", + "proc-macro-crate 3.1.0", "proc-macro2", "quote", "regex", @@ -10751,6 +10761,17 @@ dependencies = [ "winnow 0.5.40", ] +[[package]] +name = "toml_edit" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1" +dependencies = [ + "indexmap 2.4.0", + "toml_datetime", + "winnow 0.5.40", +] + [[package]] name = "toml_edit" version = "0.22.20" diff --git a/Cargo.toml b/Cargo.toml index cac3395a7d..8cb1b667d7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -506,7 +506,7 @@ regress = "0.9.1" reqwest = { version = "0.11", default-features = false } ring = "0.17.8" rpassword = "7.3.1" -rstest = "0.19.0" +rstest = "0.22.0" rustfmt-wrapper = "0.2" rustls = "0.22.2" rustls-pemfile = "2.1.3" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 854179a4db..b42f8093fb 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -112,6 +112,7 @@ tokio-postgres = { version = "0.7.11", features = ["with-chrono-0_4", "with-serd tokio-stream = { version = "0.1.15", features = ["net"] } tokio-util = { version = "0.7.11", features = ["codec", "io-util"] } toml = { version = "0.7.8" } +toml_datetime = { version = "0.6.8", default-features = false, features = ["serde"] } toml_edit-3c51e837cfc5589a = { package = "toml_edit", version = "0.22.20", features = ["serde"] } tracing = { version = "0.1.40", features = ["log"] } unicode-bidi = { version = "0.3.15" } @@ -221,6 +222,7 @@ tokio-postgres = { version = "0.7.11", features = ["with-chrono-0_4", "with-serd tokio-stream = { version = "0.1.15", features = ["net"] } tokio-util = { version = "0.7.11", features = ["codec", "io-util"] } toml = { version = "0.7.8" } +toml_datetime = { version = "0.6.8", default-features = false, features = ["serde"] } toml_edit-3c51e837cfc5589a = { package = "toml_edit", version = "0.22.20", features = ["serde"] } tracing = { version = "0.1.40", features = ["log"] } unicode-bidi = { version = "0.3.15" } @@ -285,7 +287,6 @@ nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signa once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } -toml_datetime = { version = "0.6.8", default-features = false, features = ["serde"] } toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", features = ["serde"] } [target.x86_64-unknown-illumos.build-dependencies] @@ -295,7 +296,6 @@ nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signa once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } -toml_datetime = { version = "0.6.8", default-features = false, features = ["serde"] } toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", features = ["serde"] } ### END HAKARI SECTION From f1e28fd27f4060580ff0a8945a0923c25864ee09 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 17 Aug 2024 10:12:45 +0000 Subject: [PATCH 30/51] Update Rust crate serde to v1.0.208 (#6376) --- Cargo.lock | 8 ++++---- workspace-hack/Cargo.toml | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3f084cd822..d0ac123970 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9142,9 +9142,9 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.207" +version = "1.0.208" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5665e14a49a4ea1b91029ba7d3bca9f299e1f7cfa194388ccc20f14743e784f2" +checksum = "cff085d2cb684faa248efb494c39b68e522822ac0de72ccf08109abde717cfb2" dependencies = [ "serde_derive", ] @@ -9180,9 +9180,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.207" +version = "1.0.208" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6aea2634c86b0e8ef2cfdc0c340baede54ec27b1e46febd7f80dffb2aa44a00e" +checksum = "24008e81ff7613ed8e5ba0cfaf24e2c2f1e5b8a0495711e44fcd4882fca62bcf" dependencies = [ "proc-macro2", "quote", diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index b42f8093fb..973b87c00a 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -95,7 +95,7 @@ rsa = { version = "0.9.6", features = ["serde", "sha2"] } schemars = { version = "0.8.21", features = ["bytes", "chrono", "uuid1"] } scopeguard = { version = "1.2.0" } semver = { version = "1.0.23", features = ["serde"] } -serde = { version = "1.0.207", features = ["alloc", "derive", "rc"] } +serde = { version = "1.0.208", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.125", features = ["raw_value", "unbounded_depth"] } sha1 = { version = "0.10.6", features = ["oid"] } sha2 = { version = "0.10.8", features = ["oid"] } @@ -203,7 +203,7 @@ rsa = { version = "0.9.6", features = ["serde", "sha2"] } schemars = { version = "0.8.21", features = ["bytes", "chrono", "uuid1"] } scopeguard = { version = "1.2.0" } semver = { version = "1.0.23", features = ["serde"] } -serde = { version = "1.0.207", features = ["alloc", "derive", "rc"] } +serde = { version = "1.0.208", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.125", features = ["raw_value", "unbounded_depth"] } sha1 = { version = "0.10.6", features = ["oid"] } sha2 = { version = "0.10.8", features = ["oid"] } From 8fcd26f86398ed7732b02ed8ab584bdb489c40a7 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 17 Aug 2024 10:13:39 +0000 Subject: [PATCH 31/51] Update Rust crate serde_with to 3.9.0 (#6377) --- Cargo.lock | 8 ++++---- Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d0ac123970..4ce34b9f84 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9286,9 +9286,9 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.8.3" +version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e73139bc5ec2d45e6c5fd85be5a46949c1c39a4c18e56915f5eb4c12f975e377" +checksum = "69cecfa94848272156ea67b2b1a53f20fc7bc638c4a46d2f8abde08f05f4b857" dependencies = [ "base64 0.22.1", "chrono", @@ -9304,9 +9304,9 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.8.3" +version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b80d3d6b56b64335c0180e5ffde23b3c5e08c14c585b51a15bd0e95393f46703" +checksum = "a8fee4991ef4f274617a51ad4af30519438dacb2f56ac773b08a1922ff743350" dependencies = [ "darling", "proc-macro2", diff --git a/Cargo.toml b/Cargo.toml index 8cb1b667d7..a2b4363455 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -521,7 +521,7 @@ serde_json = "1.0.125" serde_path_to_error = "0.1.16" serde_tokenstream = "0.2" serde_urlencoded = "0.7.1" -serde_with = "3.8.3" +serde_with = "3.9.0" sha2 = "0.10.8" sha3 = "0.10.8" shell-words = "1.1.0" From b1b5572a44c9b636c49e897f1173e4f8d4f7b235 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 17 Aug 2024 10:11:22 -0700 Subject: [PATCH 32/51] Update Rust crate similar to 2.6.0 (#6378) --- Cargo.lock | 7 +++---- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 10 ++++------ 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4ce34b9f84..51bf324679 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6439,7 +6439,6 @@ dependencies = [ "bit-vec", "bitflags 1.3.2", "bitflags 2.6.0", - "bstr 0.2.17", "bstr 1.9.1", "byteorder", "bytes", @@ -9435,11 +9434,11 @@ dependencies = [ [[package]] name = "similar" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa42c91313f1d05da9b26f267f931cf178d4aba455b4c4622dd7355eb80c6640" +checksum = "1de1d4f81173b03af4c0cbed3c898f6bff5b870e4a7f5d6f4057d62a7a4b686e" dependencies = [ - "bstr 0.2.17", + "bstr 1.9.1", "unicode-segmentation", ] diff --git a/Cargo.toml b/Cargo.toml index a2b4363455..a86d986e80 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -528,7 +528,7 @@ shell-words = "1.1.0" signal-hook = "0.3" signal-hook-tokio = { version = "0.3", features = [ "futures-v0_3" ] } sigpipe = "0.1.3" -similar = { version = "2.5.0", features = ["bytes"] } +similar = { version = "2.6.0", features = ["bytes"] } similar-asserts = "1.5.0" # Don't change sled's version on accident; sled's on-disk format is not yet # stable and requires manual migrations. In the limit this won't matter because diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 973b87c00a..fb1b94ae0f 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -27,8 +27,7 @@ bit-set = { version = "0.5.3" } bit-vec = { version = "0.6.3" } bitflags-dff4ba8e3ae991db = { package = "bitflags", version = "1.3.2" } bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.6.0", default-features = false, features = ["serde", "std"] } -bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2.17" } -bstr-dff4ba8e3ae991db = { package = "bstr", version = "1.9.1" } +bstr = { version = "1.9.1" } byteorder = { version = "1.5.0" } bytes = { version = "1.7.1", features = ["serde"] } chrono = { version = "0.4.38", features = ["serde"] } @@ -99,7 +98,7 @@ serde = { version = "1.0.208", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.125", features = ["raw_value", "unbounded_depth"] } sha1 = { version = "0.10.6", features = ["oid"] } sha2 = { version = "0.10.8", features = ["oid"] } -similar = { version = "2.5.0", features = ["bytes", "inline", "unicode"] } +similar = { version = "2.6.0", features = ["bytes", "inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } smallvec = { version = "1.13.2", default-features = false, features = ["const_new"] } spin = { version = "0.9.8" } @@ -134,8 +133,7 @@ bit-set = { version = "0.5.3" } bit-vec = { version = "0.6.3" } bitflags-dff4ba8e3ae991db = { package = "bitflags", version = "1.3.2" } bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.6.0", default-features = false, features = ["serde", "std"] } -bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2.17" } -bstr-dff4ba8e3ae991db = { package = "bstr", version = "1.9.1" } +bstr = { version = "1.9.1" } byteorder = { version = "1.5.0" } bytes = { version = "1.7.1", features = ["serde"] } cc = { version = "1.0.97", default-features = false, features = ["parallel"] } @@ -207,7 +205,7 @@ serde = { version = "1.0.208", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.125", features = ["raw_value", "unbounded_depth"] } sha1 = { version = "0.10.6", features = ["oid"] } sha2 = { version = "0.10.8", features = ["oid"] } -similar = { version = "2.5.0", features = ["bytes", "inline", "unicode"] } +similar = { version = "2.6.0", features = ["bytes", "inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } smallvec = { version = "1.13.2", default-features = false, features = ["const_new"] } spin = { version = "0.9.8" } From 24c129b40579d9f51e664f9cea114705f16444d8 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sun, 18 Aug 2024 04:33:07 +0000 Subject: [PATCH 33/51] Update taiki-e/install-action digest to 2d7ff60 (#6381) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`4f13fb6` -> `2d7ff60`](https://togithub.com/taiki-e/install-action/compare/4f13fb6...2d7ff60) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index ddc4ffc021..4b4d09ba35 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@4f13fb62448d53782828736cd5b0fd395b5f0c06 # v2 + uses: taiki-e/install-action@2d7ff60c815c5236dc38fd3909d97d6d605315d2 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 4e8c731a6848a73eff1c449f6785228563dec016 Mon Sep 17 00:00:00 2001 From: Rain Date: Sat, 17 Aug 2024 21:47:19 -0700 Subject: [PATCH 34/51] [2/6] [oxql] move core types to a new oxql-types crate (#6364) Means that the Nexus external API crate doesn't have to pull in oximeter-db. I did have to expose a few mutators, but my general reasoning is that because these types derive `Deserialize`, it's always possible to make these changes by serializing into JSON and working with them there. --- Cargo.lock | 16 + Cargo.toml | 3 + nexus/Cargo.toml | 1 + nexus/src/app/metrics.rs | 4 +- nexus/src/external_api/http_entrypoints.rs | 2 +- nexus/tests/integration_tests/metrics.rs | 6 +- openapi/nexus.json | 14 +- oximeter/db/Cargo.toml | 1 + oximeter/db/src/client/mod.rs | 4 +- oximeter/db/src/client/oxql.rs | 30 +- oximeter/db/src/lib.rs | 3 +- oximeter/db/src/model.rs | 2 +- oximeter/db/src/oxql/ast/table_ops/align.rs | 35 +- oximeter/db/src/oxql/ast/table_ops/filter.rs | 47 +- .../db/src/oxql/ast/table_ops/group_by.rs | 73 ++- oximeter/db/src/oxql/ast/table_ops/join.rs | 292 +--------- oximeter/db/src/oxql/ast/table_ops/limit.rs | 108 ++-- oximeter/db/src/oxql/ast/table_ops/mod.rs | 2 +- oximeter/db/src/oxql/mod.rs | 4 - oximeter/db/src/oxql/query/mod.rs | 10 - oximeter/db/src/query.rs | 5 +- oximeter/db/src/shells/oxql.rs | 3 +- oximeter/oxql-types/Cargo.toml | 18 + oximeter/oxql-types/src/lib.rs | 23 + .../{db/src/oxql => oxql-types/src}/point.rs | 525 ++++++++++++++---- .../{db/src/oxql => oxql-types/src}/table.rs | 89 ++- oximeter/types/src/schema.rs | 2 + 27 files changed, 710 insertions(+), 612 deletions(-) create mode 100644 oximeter/oxql-types/Cargo.toml create mode 100644 oximeter/oxql-types/src/lib.rs rename oximeter/{db/src/oxql => oxql-types/src}/point.rs (82%) rename oximeter/{db/src/oxql => oxql-types/src}/table.rs (75%) diff --git a/Cargo.lock b/Cargo.lock index 51bf324679..2943a4f2c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6077,6 +6077,7 @@ dependencies = [ "oximeter-instruments", "oximeter-producer", "oxnet", + "oxql-types", "parse-display", "paste", "pem", @@ -6926,6 +6927,7 @@ dependencies = [ "omicron-workspace-hack", "oximeter", "oximeter-test-utils", + "oxql-types", "peg", "reedline", "regex", @@ -7102,6 +7104,20 @@ dependencies = [ "serde_json", ] +[[package]] +name = "oxql-types" +version = "0.1.0" +dependencies = [ + "anyhow", + "chrono", + "highway", + "num", + "omicron-workspace-hack", + "oximeter-types", + "schemars", + "serde", +] + [[package]] name = "p256" version = "0.13.2" diff --git a/Cargo.toml b/Cargo.toml index a86d986e80..83aea83ddf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -80,6 +80,7 @@ members = [ "oximeter/instruments", "oximeter/oximeter-macro-impl", "oximeter/oximeter", + "oximeter/oxql-types", "oximeter/producer", "oximeter/schema", "oximeter/test-utils", @@ -196,6 +197,7 @@ default-members = [ "oximeter/instruments", "oximeter/oximeter-macro-impl", "oximeter/oximeter", + "oximeter/oxql-types", "oximeter/producer", "oximeter/schema", "oximeter/test-utils", @@ -470,6 +472,7 @@ oximeter-schema = { path = "oximeter/schema" } oximeter-test-utils = { path = "oximeter/test-utils" } oximeter-timeseries-macro = { path = "oximeter/timeseries-macro" } oximeter-types = { path = "oximeter/types" } +oxql-types = { path = "oximeter/oxql-types" } p256 = "0.13" parse-display = "0.10.0" partial-io = { version = "0.5.4", features = ["proptest1", "tokio1"] } diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index 8977507505..5b181c7fa0 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -57,6 +57,7 @@ openssl.workspace = true oximeter-client.workspace = true oximeter-db = { workspace = true, default-features = false, features = [ "oxql" ] } oxnet.workspace = true +oxql-types.workspace = true parse-display.workspace = true paste.workspace = true # See omicron-rpaths for more about the "pq-sys" dependency. diff --git a/nexus/src/app/metrics.rs b/nexus/src/app/metrics.rs index 3728a3bdc1..3a6e7e27be 100644 --- a/nexus/src/app/metrics.rs +++ b/nexus/src/app/metrics.rs @@ -14,7 +14,7 @@ use nexus_db_queries::{ }; use omicron_common::api::external::{Error, InternalContext}; use oximeter_db::{ - oxql, Measurement, TimeseriesSchema, TimeseriesSchemaPaginationParams, + Measurement, TimeseriesSchema, TimeseriesSchemaPaginationParams, }; use std::num::NonZeroU32; @@ -138,7 +138,7 @@ impl super::Nexus { &self, opctx: &OpContext, query: impl AsRef, - ) -> Result, Error> { + ) -> Result, Error> { // Must be a fleet user to list timeseries schema. // // TODO-security: We need to figure out how to implement proper security diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 8e8b63229b..df522f18ab 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -6386,7 +6386,7 @@ async fn timeseries_schema_list( async fn timeseries_query( rqctx: RequestContext, body: TypedBody, -) -> Result>, HttpError> { +) -> Result>, HttpError> { let apictx = rqctx.context(); let handler = async { let nexus = &apictx.context.nexus; diff --git a/nexus/tests/integration_tests/metrics.rs b/nexus/tests/integration_tests/metrics.rs index 9cfa0350e8..e24de2a3ad 100644 --- a/nexus/tests/integration_tests/metrics.rs +++ b/nexus/tests/integration_tests/metrics.rs @@ -284,7 +284,7 @@ async fn test_timeseries_schema_list( pub async fn timeseries_query( cptestctx: &ControlPlaneTestContext, query: impl ToString, -) -> Vec { +) -> Vec { // first, make sure the latest timeseries have been collected. cptestctx.oximeter.force_collect().await; @@ -429,11 +429,11 @@ async fn test_instance_watcher_metrics( #[track_caller] fn count_state( - table: &oximeter_db::oxql::Table, + table: &oxql_types::Table, instance_id: InstanceUuid, state: &'static str, ) -> i64 { - use oximeter_db::oxql::point::ValueArray; + use oxql_types::point::ValueArray; let uuid = FieldValue::Uuid(instance_id.into_untyped_uuid()); let state = FieldValue::String(state.into()); let mut timeserieses = table.timeseries().filter(|ts| { diff --git a/openapi/nexus.json b/openapi/nexus.json index 27e2870b6e..a0cbfa2f63 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -20131,10 +20131,20 @@ "type": "object", "properties": { "metric_type": { - "$ref": "#/components/schemas/MetricType" + "description": "The type of this metric.", + "allOf": [ + { + "$ref": "#/components/schemas/MetricType" + } + ] }, "values": { - "$ref": "#/components/schemas/ValueArray" + "description": "The data values.", + "allOf": [ + { + "$ref": "#/components/schemas/ValueArray" + } + ] } }, "required": [ diff --git a/oximeter/db/Cargo.toml b/oximeter/db/Cargo.toml index 6a7cedbc22..2a9c615da2 100644 --- a/oximeter/db/Cargo.toml +++ b/oximeter/db/Cargo.toml @@ -24,6 +24,7 @@ num.workspace = true omicron-common.workspace = true omicron-workspace-hack.workspace = true oximeter.workspace = true +oxql-types.workspace = true regex.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/oximeter/db/src/client/mod.rs b/oximeter/db/src/client/mod.rs index 176e1bd5f8..c2b07ebaa6 100644 --- a/oximeter/db/src/client/mod.rs +++ b/oximeter/db/src/client/mod.rs @@ -22,8 +22,6 @@ use crate::Error; use crate::Metric; use crate::Target; use crate::Timeseries; -use crate::TimeseriesKey; -use crate::TimeseriesName; use crate::TimeseriesPageSelector; use crate::TimeseriesScanParams; use crate::TimeseriesSchema; @@ -31,7 +29,9 @@ use dropshot::EmptyScanParams; use dropshot::PaginationOrder; use dropshot::ResultsPage; use dropshot::WhichPage; +use oximeter::schema::TimeseriesKey; use oximeter::types::Sample; +use oximeter::TimeseriesName; use regex::Regex; use regex::RegexBuilder; use slog::debug; diff --git a/oximeter/db/src/client/oxql.rs b/oximeter/db/src/client/oxql.rs index 29586b8189..4005fa873e 100644 --- a/oximeter/db/src/client/oxql.rs +++ b/oximeter/db/src/client/oxql.rs @@ -18,7 +18,7 @@ use crate::query::field_table_name; use crate::Error; use crate::Metric; use crate::Target; -use crate::TimeseriesKey; +use oximeter::schema::TimeseriesKey; use oximeter::TimeseriesSchema; use slog::debug; use slog::trace; @@ -68,7 +68,7 @@ pub struct OxqlResult { pub query_summaries: Vec, /// The list of OxQL tables returned from the query. - pub tables: Vec, + pub tables: Vec, } /// The maximum number of data values fetched from the database for an OxQL @@ -479,7 +479,9 @@ impl Client { query_id, total_duration: query_start.elapsed(), query_summaries, - tables: vec![oxql::Table::new(schema.timeseries_name.as_str())], + tables: vec![oxql_types::Table::new( + schema.timeseries_name.as_str(), + )], }; return Ok(result); } @@ -503,7 +505,7 @@ impl Client { // At this point, let's construct a set of tables and run the results // through the transformation pipeline. - let mut tables = vec![oxql::Table::from_timeseries( + let mut tables = vec![oxql_types::Table::from_timeseries( schema.timeseries_name.as_str(), timeseries_by_key.into_values(), )?]; @@ -553,7 +555,7 @@ impl Client { limit: Option, total_rows_fetched: &mut u64, ) -> Result< - (Vec, BTreeMap), + (Vec, BTreeMap), Error, > { // We'll create timeseries for each key on the fly. To enable computing @@ -624,25 +626,25 @@ impl Client { for (key, measurements) in measurements_by_key.into_iter() { // Constuct a new timeseries, from the target/metric info. let (target, metric) = info.get(&key).unwrap(); - let mut timeseries = oxql::Timeseries::new( + let mut timeseries = oxql_types::Timeseries::new( target .fields .iter() .chain(metric.fields.iter()) .map(|field| (field.name.clone(), field.value.clone())), - oxql::point::DataType::try_from(schema.datum_type)?, + oxql_types::point::DataType::try_from(schema.datum_type)?, if schema.datum_type.is_cumulative() { - oxql::point::MetricType::Delta + oxql_types::point::MetricType::Delta } else { - oxql::point::MetricType::Gauge + oxql_types::point::MetricType::Gauge }, )?; // Covert its oximeter measurements into OxQL data types. let points = if schema.datum_type.is_cumulative() { - oxql::point::Points::delta_from_cumulative(&measurements)? + oxql_types::point::Points::delta_from_cumulative(&measurements)? } else { - oxql::point::Points::gauge_from_gauge(&measurements)? + oxql_types::point::Points::gauge_from_gauge(&measurements)? }; timeseries.points = points; debug!( @@ -1108,10 +1110,7 @@ fn update_total_rows_and_check( mod tests { use super::ConsistentKeyGroup; use crate::client::oxql::chunk_consistent_key_groups_impl; - use crate::{ - oxql::{point::Points, Table, Timeseries}, - Client, DbWrite, - }; + use crate::{Client, DbWrite}; use crate::{Metric, Target}; use chrono::{DateTime, Utc}; use dropshot::test_util::LogContext; @@ -1119,6 +1118,7 @@ mod tests { use omicron_test_utils::dev::test_setup_log; use oximeter::{types::Cumulative, FieldValue}; use oximeter::{DatumType, Sample}; + use oxql_types::{point::Points, Table, Timeseries}; use std::collections::BTreeMap; use std::time::Duration; diff --git a/oximeter/db/src/lib.rs b/oximeter/db/src/lib.rs index 9ad382c97d..5d56d802c9 100644 --- a/oximeter/db/src/lib.rs +++ b/oximeter/db/src/lib.rs @@ -14,6 +14,7 @@ use dropshot::EmptyScanParams; use dropshot::PaginationParams; pub use oximeter::schema::FieldSchema; pub use oximeter::schema::FieldSource; +use oximeter::schema::TimeseriesKey; pub use oximeter::schema::TimeseriesName; pub use oximeter::schema::TimeseriesSchema; pub use oximeter::DatumType; @@ -267,8 +268,6 @@ pub async fn make_client( Ok(client) } -pub(crate) type TimeseriesKey = u64; - // TODO-cleanup: Add the timeseries version in to the computation of the key. // This will require a full drop of the database, since we're changing the // sorting key and the timeseries key on each past sample. See diff --git a/oximeter/db/src/model.rs b/oximeter/db/src/model.rs index 3e34ad10e3..986bf00225 100644 --- a/oximeter/db/src/model.rs +++ b/oximeter/db/src/model.rs @@ -11,13 +11,13 @@ use crate::FieldSchema; use crate::FieldSource; use crate::Metric; use crate::Target; -use crate::TimeseriesKey; use crate::TimeseriesSchema; use bytes::Bytes; use chrono::DateTime; use chrono::Utc; use num::traits::Zero; use oximeter::histogram::Histogram; +use oximeter::schema::TimeseriesKey; use oximeter::traits; use oximeter::types::Cumulative; use oximeter::types::Datum; diff --git a/oximeter/db/src/oxql/ast/table_ops/align.rs b/oximeter/db/src/oxql/ast/table_ops/align.rs index cf54ebc312..b0cd7d80f1 100644 --- a/oximeter/db/src/oxql/ast/table_ops/align.rs +++ b/oximeter/db/src/oxql/ast/table_ops/align.rs @@ -6,19 +6,19 @@ // Copyright 2024 Oxide Computer Company -use crate::oxql::point::DataType; -use crate::oxql::point::MetricType; -use crate::oxql::point::Points; -use crate::oxql::point::ValueArray; -use crate::oxql::point::Values; -use crate::oxql::query::Alignment; -use crate::oxql::Error; -use crate::oxql::Table; -use crate::oxql::Timeseries; use anyhow::Context; +use anyhow::Error; use chrono::DateTime; use chrono::TimeDelta; use chrono::Utc; +use oxql_types::point::DataType; +use oxql_types::point::MetricType; +use oxql_types::point::Points; +use oxql_types::point::ValueArray; +use oxql_types::point::Values; +use oxql_types::Alignment; +use oxql_types::Table; +use oxql_types::Timeseries; use std::time::Duration; // The maximum factor by which an alignment operation may upsample data. @@ -144,7 +144,7 @@ fn align_mean_within( "Alignment by mean requires a gauge or delta metric, not {}", metric_type, ); - verify_max_upsampling_ratio(&points.timestamps, &period)?; + verify_max_upsampling_ratio(points.timestamps(), &period)?; // Always convert the output to doubles, when computing the mean. The // output is always a gauge, so we do not need the start times of the @@ -179,7 +179,7 @@ fn align_mean_within( // - Compute the mean of those. let period_ = TimeDelta::from_std(*period).context("time delta out of range")?; - let first_timestamp = points.timestamps[0]; + let first_timestamp = points.timestamps()[0]; let mut ix: u32 = 0; loop { // Compute the next output timestamp, by shifting the query end time @@ -220,15 +220,15 @@ fn align_mean_within( // entries. let output_value = if matches!(metric_type, MetricType::Gauge) { mean_gauge_value_in_window( - &points.timestamps, + points.timestamps(), &input_points, window_start, output_time, ) } else { mean_delta_value_in_window( - points.start_times.as_ref().unwrap(), - &points.timestamps, + points.start_times().unwrap(), + points.timestamps(), &input_points, window_start, output_time, @@ -255,10 +255,9 @@ fn align_mean_within( ValueArray::Double(output_values.into_iter().rev().collect()); let timestamps = output_timestamps.into_iter().rev().collect(); let values = Values { values, metric_type: MetricType::Gauge }; - new_timeseries.points = - Points { start_times: None, timestamps, values: vec![values] }; - new_timeseries.alignment = - Some(Alignment { end_time: *query_end, period: *period }); + new_timeseries.points = Points::new(None, timestamps, vec![values]); + new_timeseries + .set_alignment(Alignment { end_time: *query_end, period: *period }); output_table.insert(new_timeseries).unwrap(); } Ok(output_table) diff --git a/oximeter/db/src/oxql/ast/table_ops/filter.rs b/oximeter/db/src/oxql/ast/table_ops/filter.rs index b6fc533e4d..ad398da983 100644 --- a/oximeter/db/src/oxql/ast/table_ops/filter.rs +++ b/oximeter/db/src/oxql/ast/table_ops/filter.rs @@ -12,18 +12,18 @@ use crate::oxql::ast::literal::Literal; use crate::oxql::ast::logical_op::LogicalOp; use crate::oxql::ast::table_ops::limit::Limit; use crate::oxql::ast::table_ops::limit::LimitKind; -use crate::oxql::point::DataType; -use crate::oxql::point::MetricType; -use crate::oxql::point::Points; -use crate::oxql::point::ValueArray; use crate::oxql::Error; -use crate::oxql::Table; -use crate::oxql::Timeseries; use crate::shells::special_idents; use chrono::DateTime; use chrono::Utc; use oximeter::FieldType; use oximeter::FieldValue; +use oxql_types::point::DataType; +use oxql_types::point::MetricType; +use oxql_types::point::Points; +use oxql_types::point::ValueArray; +use oxql_types::Table; +use oxql_types::Timeseries; use regex::Regex; use std::collections::BTreeSet; use std::fmt; @@ -340,16 +340,13 @@ impl Filter { // Apply the filter to the data points as well. let points = self.filter_points(&input.points)?; - // Similar to above, if the filter removes all data points in - // the timeseries, let's remove the timeseries altogether. - if points.is_empty() { - continue; + if let Some(new_timeseries) = input.copy_with_points(points) { + timeseries.push(new_timeseries); + } else { + // None means that the filter removed all data points in + // the timeseries. In that case, we remove the timeseries + // altogether. } - timeseries.push(Timeseries { - fields: input.fields.clone(), - points, - alignment: input.alignment, - }) } output_tables.push(Table::from_timeseries( table.name(), @@ -823,7 +820,7 @@ impl SimpleFilter { ) -> Result, Error> { let ident = self.ident.as_str(); if ident == "timestamp" { - self.filter_points_by_timestamp(negated, &points.timestamps) + self.filter_points_by_timestamp(negated, points.timestamps()) } else if ident == "datum" { anyhow::ensure!( points.dimensionality() == 1, @@ -1151,15 +1148,15 @@ impl SimpleFilter { mod tests { use crate::oxql::ast::grammar::query_parser; use crate::oxql::ast::logical_op::LogicalOp; - use crate::oxql::point::DataType; - use crate::oxql::point::MetricType; - use crate::oxql::point::Points; - use crate::oxql::point::ValueArray; - use crate::oxql::point::Values; - use crate::oxql::Table; - use crate::oxql::Timeseries; use chrono::Utc; use oximeter::FieldValue; + use oxql_types::point::DataType; + use oxql_types::point::MetricType; + use oxql_types::point::Points; + use oxql_types::point::ValueArray; + use oxql_types::point::Values; + use oxql_types::Table; + use oxql_types::Timeseries; use std::time::Duration; use uuid::Uuid; @@ -1172,7 +1169,7 @@ mod tests { values: ValueArray::Double(vec![Some(0.0), Some(2.0)]), metric_type: MetricType::Gauge, }]; - let points = Points { start_times, timestamps, values }; + let points = Points::new(start_times, timestamps, values); // This filter should remove the first point based on its timestamp. let t = Utc::now() + Duration::from_secs(10); @@ -1205,7 +1202,7 @@ mod tests { values: ValueArray::Double(vec![Some(0.0), Some(2.0)]), metric_type: MetricType::Gauge, }]; - let points = Points { start_times, timestamps, values }; + let points = Points::new(start_times, timestamps, values); let filter = query_parser::filter("filter datum < \"something\"").unwrap(); diff --git a/oximeter/db/src/oxql/ast/table_ops/group_by.rs b/oximeter/db/src/oxql/ast/table_ops/group_by.rs index f40572d762..c48804a788 100644 --- a/oximeter/db/src/oxql/ast/table_ops/group_by.rs +++ b/oximeter/db/src/oxql/ast/table_ops/group_by.rs @@ -10,13 +10,13 @@ use chrono::DateTime; use chrono::Utc; use crate::oxql::ast::ident::Ident; -use crate::oxql::point::DataType; -use crate::oxql::point::MetricType; -use crate::oxql::point::ValueArray; -use crate::oxql::Error; -use crate::oxql::Table; -use crate::oxql::Timeseries; -use crate::TimeseriesKey; +use anyhow::Error; +use oximeter::schema::TimeseriesKey; +use oxql_types::point::DataType; +use oxql_types::point::MetricType; +use oxql_types::point::ValueArray; +use oxql_types::Table; +use oxql_types::Timeseries; use std::collections::btree_map::Entry; use std::collections::BTreeMap; @@ -98,7 +98,7 @@ impl GroupBy { ValueArray::Double(new_values), ValueArray::Double(existing_values), ) => { - let new_timestamps = &dropped.points.timestamps; + let new_timestamps = dropped.points.timestamps(); // We will be merging the new data with the // existing, but borrow-checking limits the degree @@ -106,7 +106,7 @@ impl GroupBy { // entry in the output table. Instead, aggregate // everything into a copy of the expected data. let mut timestamps = - existing.points.timestamps.clone(); + existing.points.timestamps().to_owned(); let mut values = existing_values.clone(); // Merge in the new values, so long as they actually @@ -152,10 +152,7 @@ impl GroupBy { // Replace the existing output timeseries's // timestamps and data arrays. - std::mem::swap( - &mut existing.points.timestamps, - &mut timestamps, - ); + existing.points.set_timestamps(timestamps); existing .points .values_mut(0) @@ -166,7 +163,7 @@ impl GroupBy { ValueArray::Integer(new_values), ValueArray::Integer(existing_values), ) => { - let new_timestamps = &dropped.points.timestamps; + let new_timestamps = dropped.points.timestamps(); // We will be merging the new data with the // existing, but borrow-checking limits the degree @@ -174,7 +171,7 @@ impl GroupBy { // entry in the output table. Instead, aggregate // everything into a copy of the expected data. let mut timestamps = - existing.points.timestamps.clone(); + existing.points.timestamps().to_owned(); let mut values = existing_values.clone(); // Merge in the new values, so long as they actually @@ -220,10 +217,7 @@ impl GroupBy { // Replace the existing output timeseries's // timestamps and data arrays. - std::mem::swap( - &mut existing.points.timestamps, - &mut timestamps, - ); + existing.points.set_timestamps(timestamps); existing .points .values_mut(0) @@ -286,14 +280,15 @@ impl GroupBy { else { unreachable!(); }; - let new_timestamps = &new_points.timestamps; + let new_timestamps = new_points.timestamps(); // We will be merging the new data with the // existing, but borrow-checking limits the degree // to which we can easily do this on the `existing` // entry in the output table. Instead, aggregate // everything into a copy of the expected data. - let mut timestamps = existing.points.timestamps.clone(); + let mut timestamps = + existing.points.timestamps().to_owned(); let mut values = existing .points .values(0) @@ -360,10 +355,7 @@ impl GroupBy { // Replace the existing output timeseries's // timestamps and data arrays. - std::mem::swap( - &mut existing.points.timestamps, - &mut timestamps, - ); + existing.points.set_timestamps(timestamps); existing .points .values_mut(0) @@ -388,7 +380,7 @@ impl GroupBy { // _zero_ for any where the values are none. let counts = new_timeseries .points - .timestamps + .timestamps() .iter() .zip(values) .map(|(timestamp, maybe_value)| { @@ -434,16 +426,16 @@ pub enum Reducer { #[cfg(test)] mod tests { use super::{GroupBy, Reducer}; - use crate::oxql::{ - ast::{ - ident::Ident, - table_ops::align::{Align, AlignmentMethod}, - }, - point::{DataType, MetricType, ValueArray}, - Table, Timeseries, + use crate::oxql::ast::{ + ident::Ident, + table_ops::align::{Align, AlignmentMethod}, }; use chrono::{DateTime, Utc}; use oximeter::FieldValue; + use oxql_types::{ + point::{DataType, MetricType, ValueArray}, + Table, Timeseries, + }; use std::{collections::BTreeMap, time::Duration}; // Which timeseries the second data point is missing from. @@ -495,8 +487,8 @@ mod tests { MetricType::Gauge, ) .unwrap(); - ts0.points.start_times = None; - ts0.points.timestamps.clone_from(×tamps); + ts0.points.clear_start_times(); + ts0.points.set_timestamps(timestamps.clone()); *ts0.points.values_mut(0).unwrap() = ValueArray::Double(vec![ Some(1.0), if matches!( @@ -527,7 +519,7 @@ mod tests { MetricType::Gauge, ) .unwrap(); - ts1.points.start_times = None; + ts1.points.clear_start_times(); // Non-overlapping in this test setup means that we just shift one // value from this array backward in time by one additional second. @@ -538,7 +530,7 @@ mod tests { // // When reducing, t0 is never changed, and t1-t2 are always reduced // together, if the values are present. - ts1.points.timestamps = if cfg.overlapping_times { + let new_timestamps = if cfg.overlapping_times { timestamps.clone() } else { let mut new_timestamps = timestamps.clone(); @@ -546,6 +538,7 @@ mod tests { timestamps.insert(0, new_timestamps[0]); new_timestamps }; + ts1.points.set_timestamps(new_timestamps); *ts1.points.values_mut(0).unwrap() = ValueArray::Double(vec![ Some(2.0), if matches!(cfg.missing_value, MissingValue::Both) { @@ -604,11 +597,13 @@ mod tests { let points = &grouped_timeseries.points; assert_eq!(points.dimensionality(), 1, "Points should still be 1D"); assert_eq!( - points.start_times, None, + points.start_times(), + None, "Points should not have start times" ); assert_eq!( - points.timestamps, test.timestamps, + points.timestamps(), + test.timestamps, "Points do not have correct timestamps" ); diff --git a/oximeter/db/src/oxql/ast/table_ops/join.rs b/oximeter/db/src/oxql/ast/table_ops/join.rs index 3c150a4acf..2893f6cf3e 100644 --- a/oximeter/db/src/oxql/ast/table_ops/join.rs +++ b/oximeter/db/src/oxql/ast/table_ops/join.rs @@ -6,12 +6,10 @@ // Copyright 2024 Oxide Computer Company -use crate::oxql::point::MetricType; -use crate::oxql::point::Points; -use crate::oxql::point::Values; -use crate::oxql::Error; -use crate::oxql::Table; use anyhow::Context; +use anyhow::Error; +use oxql_types::point::MetricType; +use oxql_types::Table; /// An AST node for a natural inner join. #[derive(Clone, Copy, Debug, PartialEq)] @@ -80,10 +78,8 @@ impl Join { // 1. They have the same alignment, and // 2. We merge the timepoints rather than simply creating a // ragged array of points. - timeseries.points = inner_join_point_arrays( - ×eries.points, - &next_timeseries.points, - )?; + timeseries.points = + timeseries.points.inner_join(&next_timeseries.points)?; } // We'll also update the name, to indicate the joined data. out.name.push(','); @@ -93,101 +89,6 @@ impl Join { } } -// Given two arrays of points, stack them together at matching timepoints. -// -// For time points in either which do not have a corresponding point in the -// other, the entire time point is elided. -fn inner_join_point_arrays( - left: &Points, - right: &Points, -) -> Result { - // Create an output array with roughly the right capacity, and double the - // number of dimensions. We're trying to stack output value arrays together - // along the dimension axis. - let data_types = - left.data_types().chain(right.data_types()).collect::>(); - let metric_types = - left.metric_types().chain(right.metric_types()).collect::>(); - let mut out = Points::with_capacity( - left.len().max(right.len()), - data_types.iter().copied(), - metric_types.iter().copied(), - )?; - - // Iterate through each array until one is exhausted. We're only inserting - // values from both arrays where the timestamps actually match, since this - // is an inner join. We may want to insert missing values where timestamps - // do not match on either side, when we support an outer join of some kind. - let n_left_dim = left.values.len(); - let mut left_ix = 0; - let mut right_ix = 0; - while left_ix < left.len() && right_ix < right.len() { - let left_timestamp = left.timestamps[left_ix]; - let right_timestamp = right.timestamps[right_ix]; - if left_timestamp == right_timestamp { - out.timestamps.push(left_timestamp); - push_concrete_values( - &mut out.values[..n_left_dim], - &left.values, - left_ix, - ); - push_concrete_values( - &mut out.values[n_left_dim..], - &right.values, - right_ix, - ); - left_ix += 1; - right_ix += 1; - } else if left_timestamp < right_timestamp { - left_ix += 1; - } else { - right_ix += 1; - } - } - Ok(out) -} - -// Push the `i`th value from each dimension of `from` onto `to`. -fn push_concrete_values(to: &mut [Values], from: &[Values], i: usize) { - assert_eq!(to.len(), from.len()); - for (output, input) in to.iter_mut().zip(from.iter()) { - let input_array = &input.values; - let output_array = &mut output.values; - assert_eq!(input_array.data_type(), output_array.data_type()); - if let Ok(ints) = input_array.as_integer() { - output_array.as_integer_mut().unwrap().push(ints[i]); - continue; - } - if let Ok(doubles) = input_array.as_double() { - output_array.as_double_mut().unwrap().push(doubles[i]); - continue; - } - if let Ok(bools) = input_array.as_boolean() { - output_array.as_boolean_mut().unwrap().push(bools[i]); - continue; - } - if let Ok(strings) = input_array.as_string() { - output_array.as_string_mut().unwrap().push(strings[i].clone()); - continue; - } - if let Ok(dists) = input_array.as_integer_distribution() { - output_array - .as_integer_distribution_mut() - .unwrap() - .push(dists[i].clone()); - continue; - } - if let Ok(dists) = input_array.as_double_distribution() { - output_array - .as_double_distribution_mut() - .unwrap() - .push(dists[i].clone()); - continue; - } - unreachable!(); - } -} - // Return an error if any metric types are not suitable for joining. fn ensure_all_metric_types( mut metric_types: impl ExactSizeIterator, @@ -200,186 +101,3 @@ fn ensure_all_metric_types( ); Ok(()) } - -#[cfg(test)] -mod tests { - use super::*; - use crate::oxql::point::DataType; - use crate::oxql::point::Datum; - use crate::oxql::point::ValueArray; - use chrono::Utc; - use std::time::Duration; - - #[test] - fn test_push_concrete_values() { - let mut points = Points::with_capacity( - 2, - [DataType::Integer, DataType::Double].into_iter(), - [MetricType::Gauge, MetricType::Gauge].into_iter(), - ) - .unwrap(); - - // Push a concrete value for the integer dimension - let from_ints = vec![Values { - values: ValueArray::Integer(vec![Some(1)]), - metric_type: MetricType::Gauge, - }]; - push_concrete_values(&mut points.values[..1], &from_ints, 0); - - // And another for the double dimension. - let from_doubles = vec![Values { - values: ValueArray::Double(vec![Some(2.0)]), - metric_type: MetricType::Gauge, - }]; - push_concrete_values(&mut points.values[1..], &from_doubles, 0); - - assert_eq!( - points.dimensionality(), - 2, - "Points should have 2 dimensions", - ); - let ints = points.values[0].values.as_integer().unwrap(); - assert_eq!( - ints.len(), - 1, - "Should have pushed one point in the first dimension" - ); - assert_eq!( - ints[0], - Some(1), - "Should have pushed 1 onto the first dimension" - ); - let doubles = points.values[1].values.as_double().unwrap(); - assert_eq!( - doubles.len(), - 1, - "Should have pushed one point in the second dimension" - ); - assert_eq!( - doubles[0], - Some(2.0), - "Should have pushed 2.0 onto the second dimension" - ); - } - - #[test] - fn test_join_point_arrays() { - let now = Utc::now(); - - // Create a set of integer points to join with. - // - // This will have two timestamps, one of which will match the points - // below that are merged in. - let int_points = Points { - start_times: None, - timestamps: vec![ - now - Duration::from_secs(3), - now - Duration::from_secs(2), - now, - ], - values: vec![Values { - values: ValueArray::Integer(vec![Some(1), Some(2), Some(3)]), - metric_type: MetricType::Gauge, - }], - }; - - // Create an additional set of double points. - // - // This also has two timepoints, one of which matches with the above, - // and one of which does not. - let double_points = Points { - start_times: None, - timestamps: vec![ - now - Duration::from_secs(3), - now - Duration::from_secs(1), - now, - ], - values: vec![Values { - values: ValueArray::Double(vec![ - Some(4.0), - Some(5.0), - Some(6.0), - ]), - metric_type: MetricType::Gauge, - }], - }; - - // Merge the arrays. - let merged = - inner_join_point_arrays(&int_points, &double_points).unwrap(); - - // Basic checks that we merged in the right values and have the right - // types and dimensions. - assert_eq!( - merged.dimensionality(), - 2, - "Should have appended the dimensions from each input array" - ); - assert_eq!(merged.len(), 2, "Should have merged two common points",); - assert_eq!( - merged.data_types().collect::>(), - &[DataType::Integer, DataType::Double], - "Should have combined the data types of the input arrays" - ); - assert_eq!( - merged.metric_types().collect::>(), - &[MetricType::Gauge, MetricType::Gauge], - "Should have combined the metric types of the input arrays" - ); - - // Check the actual values of the array. - let mut points = merged.iter_points(); - - // The first and last timepoint overlapped between the two arrays, so we - // should have both of them as concrete samples. - let pt = points.next().unwrap(); - assert_eq!(pt.start_time, None, "Gauges don't have a start time"); - assert_eq!( - *pt.timestamp, int_points.timestamps[0], - "Should have taken the first input timestamp from both arrays", - ); - assert_eq!( - *pt.timestamp, double_points.timestamps[0], - "Should have taken the first input timestamp from both arrays", - ); - let values = pt.values; - assert_eq!(values.len(), 2, "Should have 2 dimensions"); - assert_eq!( - &values[0], - &(Datum::Integer(Some(&1)), MetricType::Gauge), - "Should have pulled value from first integer array." - ); - assert_eq!( - &values[1], - &(Datum::Double(Some(&4.0)), MetricType::Gauge), - "Should have pulled value from second double array." - ); - - // And the next point - let pt = points.next().unwrap(); - assert_eq!(pt.start_time, None, "Gauges don't have a start time"); - assert_eq!( - *pt.timestamp, int_points.timestamps[2], - "Should have taken the input timestamp from both arrays", - ); - assert_eq!( - *pt.timestamp, double_points.timestamps[2], - "Should have taken the input timestamp from both arrays", - ); - let values = pt.values; - assert_eq!(values.len(), 2, "Should have 2 dimensions"); - assert_eq!( - &values[0], - &(Datum::Integer(Some(&3)), MetricType::Gauge), - "Should have pulled value from first integer array." - ); - assert_eq!( - &values[1], - &(Datum::Double(Some(&6.0)), MetricType::Gauge), - "Should have pulled value from second double array." - ); - - // And there should be no other values. - assert!(points.next().is_none(), "There should be no more points"); - } -} diff --git a/oximeter/db/src/oxql/ast/table_ops/limit.rs b/oximeter/db/src/oxql/ast/table_ops/limit.rs index 0205868f5c..89afb31a7c 100644 --- a/oximeter/db/src/oxql/ast/table_ops/limit.rs +++ b/oximeter/db/src/oxql/ast/table_ops/limit.rs @@ -6,12 +6,8 @@ // Copyright 2024 Oxide Computer Company -use crate::oxql::point::Points; -use crate::oxql::point::ValueArray; -use crate::oxql::point::Values; -use crate::oxql::Error; -use crate::oxql::Table; -use crate::oxql::Timeseries; +use anyhow::Error; +use oxql_types::Table; use std::num::NonZeroUsize; /// The kind of limiting operation @@ -65,58 +61,7 @@ impl Limit { } }; - // Slice the various data arrays. - let start_times = input_points - .start_times - .as_ref() - .map(|s| s[start..end].to_vec()); - let timestamps = - input_points.timestamps[start..end].to_vec(); - let values = input_points - .values - .iter() - .map(|vals| { - let values = match &vals.values { - ValueArray::Integer(inner) => { - ValueArray::Integer( - inner[start..end].to_vec(), - ) - } - ValueArray::Double(inner) => { - ValueArray::Double( - inner[start..end].to_vec(), - ) - } - ValueArray::Boolean(inner) => { - ValueArray::Boolean( - inner[start..end].to_vec(), - ) - } - ValueArray::String(inner) => { - ValueArray::String( - inner[start..end].to_vec(), - ) - } - ValueArray::IntegerDistribution(inner) => { - ValueArray::IntegerDistribution( - inner[start..end].to_vec(), - ) - } - ValueArray::DoubleDistribution(inner) => { - ValueArray::DoubleDistribution( - inner[start..end].to_vec(), - ) - } - }; - Values { values, metric_type: vals.metric_type } - }) - .collect(); - let points = Points { start_times, timestamps, values }; - Timeseries { - fields: timeseries.fields.clone(), - points, - alignment: timeseries.alignment, - } + timeseries.limit(start, end) }); Table::from_timeseries(table.name(), timeseries) }) @@ -127,9 +72,12 @@ impl Limit { #[cfg(test)] mod tests { use super::*; - use crate::oxql::point::{DataType, MetricType}; use chrono::Utc; use oximeter::FieldValue; + use oxql_types::{ + point::{DataType, MetricType}, + Timeseries, + }; use std::{collections::BTreeMap, time::Duration}; fn test_tables() -> Vec { @@ -150,12 +98,14 @@ mod tests { MetricType::Gauge, ) .unwrap(); - timeseries.points.timestamps.clone_from(×tamps); - timeseries.points.values[0].values.as_integer_mut().unwrap().extend([ - Some(1), - Some(2), - Some(3), - ]); + timeseries.points.set_timestamps(timestamps.clone()); + timeseries + .points + .values_mut(0) + .unwrap() + .as_integer_mut() + .unwrap() + .extend([Some(1), Some(2), Some(3)]); let table1 = Table::from_timeseries("first", std::iter::once(timeseries)) .unwrap(); @@ -166,12 +116,14 @@ mod tests { MetricType::Gauge, ) .unwrap(); - timeseries.points.timestamps.clone_from(×tamps); - timeseries.points.values[0].values.as_integer_mut().unwrap().extend([ - Some(4), - Some(5), - Some(6), - ]); + timeseries.points.set_timestamps(timestamps.clone()); + timeseries + .points + .values_mut(0) + .unwrap() + .as_integer_mut() + .unwrap() + .extend([Some(4), Some(5), Some(6)]); let table2 = Table::from_timeseries("second", std::iter::once(timeseries)) .unwrap(); @@ -223,7 +175,8 @@ mod tests { "Limited table should have the same fields" ); assert_eq!( - timeseries.alignment, limited_timeseries.alignment, + timeseries.alignment(), + limited_timeseries.alignment(), "Limited timeseries should have the same alignment" ); assert_eq!( @@ -237,14 +190,15 @@ mod tests { // These depend on the limit operation. let points = ×eries.points; let limited_points = &limited_timeseries.points; - assert_eq!(points.start_times, limited_points.start_times); + assert_eq!(points.start_times(), limited_points.start_times()); assert_eq!( - points.timestamps[start..end], - limited_points.timestamps + &points.timestamps()[start..end], + limited_points.timestamps() ); assert_eq!( - limited_points.values[0].values.as_integer().unwrap(), - &points.values[0].values.as_integer().unwrap()[start..end], + limited_points.values(0).unwrap().as_integer().unwrap(), + &points.values(0).unwrap().as_integer().unwrap() + [start..end], "Points should be limited to [{start}..{end}]", ); } diff --git a/oximeter/db/src/oxql/ast/table_ops/mod.rs b/oximeter/db/src/oxql/ast/table_ops/mod.rs index 46f5106a08..8b8d4cbe1b 100644 --- a/oximeter/db/src/oxql/ast/table_ops/mod.rs +++ b/oximeter/db/src/oxql/ast/table_ops/mod.rs @@ -20,10 +20,10 @@ use self::join::Join; use self::limit::Limit; use crate::oxql::ast::Query; use crate::oxql::Error; -use crate::oxql::Table; use chrono::DateTime; use chrono::Utc; use oximeter::TimeseriesName; +use oxql_types::Table; /// A basic table operation, the atoms of an OxQL query. #[derive(Clone, Debug, PartialEq)] diff --git a/oximeter/db/src/oxql/mod.rs b/oximeter/db/src/oxql/mod.rs index 3961fae1cc..fcdfb783c5 100644 --- a/oximeter/db/src/oxql/mod.rs +++ b/oximeter/db/src/oxql/mod.rs @@ -10,13 +10,9 @@ use peg::error::ParseError as PegError; use peg::str::LineCol; pub mod ast; -pub mod point; pub mod query; -pub mod table; pub use self::query::Query; -pub use self::table::Table; -pub use self::table::Timeseries; pub use anyhow::Error; /// Format a PEG parsing error into a nice anyhow error. diff --git a/oximeter/db/src/oxql/query/mod.rs b/oximeter/db/src/oxql/query/mod.rs index e1fada9f2a..46c9bbc92c 100644 --- a/oximeter/db/src/oxql/query/mod.rs +++ b/oximeter/db/src/oxql/query/mod.rs @@ -23,7 +23,6 @@ use crate::oxql::Error; use crate::TimeseriesName; use chrono::DateTime; use chrono::Utc; -use std::time::Duration; /// A parsed OxQL query. #[derive(Clone, Debug, PartialEq)] @@ -391,15 +390,6 @@ fn restrict_filter_idents( } } -/// Describes the time alignment for an OxQL query. -#[derive(Clone, Copy, Debug, PartialEq)] -pub struct Alignment { - /// The end time of the query, which the temporal reference point. - pub end_time: DateTime, - /// The alignment period, the interval on which values are produced. - pub period: Duration, -} - #[cfg(test)] mod tests { use super::Filter; diff --git a/oximeter/db/src/query.rs b/oximeter/db/src/query.rs index ceabf00888..556ced0437 100644 --- a/oximeter/db/src/query.rs +++ b/oximeter/db/src/query.rs @@ -6,11 +6,12 @@ // Copyright 2021 Oxide Computer Company use crate::{ - Error, FieldSchema, FieldSource, TimeseriesKey, TimeseriesSchema, - DATABASE_NAME, DATABASE_SELECT_FORMAT, + Error, FieldSchema, FieldSource, TimeseriesSchema, DATABASE_NAME, + DATABASE_SELECT_FORMAT, }; use chrono::{DateTime, Utc}; use dropshot::PaginationOrder; +use oximeter::schema::TimeseriesKey; use oximeter::types::{DatumType, FieldType, FieldValue}; use oximeter::{Metric, Target}; use regex::Regex; diff --git a/oximeter/db/src/shells/oxql.rs b/oximeter/db/src/shells/oxql.rs index 0f23ea7d64..f46d08c0cf 100644 --- a/oximeter/db/src/shells/oxql.rs +++ b/oximeter/db/src/shells/oxql.rs @@ -7,9 +7,10 @@ // Copyright 2024 Oxide Computer use super::{list_timeseries, prepare_columns}; -use crate::{make_client, oxql::Table, Client, OxqlResult}; +use crate::{make_client, Client, OxqlResult}; use clap::Args; use crossterm::style::Stylize; +use oxql_types::Table; use reedline::DefaultPrompt; use reedline::DefaultPromptSegment; use reedline::Reedline; diff --git a/oximeter/oxql-types/Cargo.toml b/oximeter/oxql-types/Cargo.toml new file mode 100644 index 0000000000..da7c7bcd1c --- /dev/null +++ b/oximeter/oxql-types/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "oxql-types" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +chrono.workspace = true +highway.workspace = true +num.workspace = true +omicron-workspace-hack.workspace = true +oximeter-types.workspace = true +schemars.workspace = true +serde.workspace = true diff --git a/oximeter/oxql-types/src/lib.rs b/oximeter/oxql-types/src/lib.rs new file mode 100644 index 0000000000..00468705a9 --- /dev/null +++ b/oximeter/oxql-types/src/lib.rs @@ -0,0 +1,23 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Core types for OxQL. + +use chrono::{DateTime, Utc}; +use std::time::Duration; + +pub mod point; +pub mod table; + +pub use self::table::Table; +pub use self::table::Timeseries; + +/// Describes the time alignment for an OxQL query. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct Alignment { + /// The end time of the query, which the temporal reference point. + pub end_time: DateTime, + /// The alignment period, the interval on which values are produced. + pub period: Duration, +} diff --git a/oximeter/db/src/oxql/point.rs b/oximeter/oxql-types/src/point.rs similarity index 82% rename from oximeter/db/src/oxql/point.rs rename to oximeter/oxql-types/src/point.rs index e04193e8b8..6e3c7143dc 100644 --- a/oximeter/db/src/oxql/point.rs +++ b/oximeter/oxql-types/src/point.rs @@ -6,15 +6,15 @@ // Copyright 2024 Oxide Computer Company -use super::Error; use anyhow::Context; +use anyhow::Error; use chrono::DateTime; use chrono::Utc; use num::ToPrimitive; -use oximeter::traits::HistogramSupport; -use oximeter::DatumType; -use oximeter::Measurement; -use oximeter::Quantile; +use oximeter_types::traits::HistogramSupport; +use oximeter_types::DatumType; +use oximeter_types::Measurement; +use oximeter_types::Quantile; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; @@ -131,32 +131,32 @@ impl CumulativeDatum { // not cumulative. fn from_cumulative(meas: &Measurement) -> Result { let datum = match meas.datum() { - oximeter::Datum::CumulativeI64(val) => { + oximeter_types::Datum::CumulativeI64(val) => { CumulativeDatum::Integer(val.value()) } - oximeter::Datum::CumulativeU64(val) => { + oximeter_types::Datum::CumulativeU64(val) => { let int = val .value() .try_into() .context("Overflow converting u64 to i64")?; CumulativeDatum::Integer(int) } - oximeter::Datum::CumulativeF32(val) => { + oximeter_types::Datum::CumulativeF32(val) => { CumulativeDatum::Double(val.value().into()) } - oximeter::Datum::CumulativeF64(val) => { + oximeter_types::Datum::CumulativeF64(val) => { CumulativeDatum::Double(val.value()) } - oximeter::Datum::HistogramI8(hist) => hist.into(), - oximeter::Datum::HistogramU8(hist) => hist.into(), - oximeter::Datum::HistogramI16(hist) => hist.into(), - oximeter::Datum::HistogramU16(hist) => hist.into(), - oximeter::Datum::HistogramI32(hist) => hist.into(), - oximeter::Datum::HistogramU32(hist) => hist.into(), - oximeter::Datum::HistogramI64(hist) => hist.into(), - oximeter::Datum::HistogramU64(hist) => hist.try_into()?, - oximeter::Datum::HistogramF32(hist) => hist.into(), - oximeter::Datum::HistogramF64(hist) => hist.into(), + oximeter_types::Datum::HistogramI8(hist) => hist.into(), + oximeter_types::Datum::HistogramU8(hist) => hist.into(), + oximeter_types::Datum::HistogramI16(hist) => hist.into(), + oximeter_types::Datum::HistogramU16(hist) => hist.into(), + oximeter_types::Datum::HistogramI32(hist) => hist.into(), + oximeter_types::Datum::HistogramU32(hist) => hist.into(), + oximeter_types::Datum::HistogramI64(hist) => hist.into(), + oximeter_types::Datum::HistogramU64(hist) => hist.try_into()?, + oximeter_types::Datum::HistogramF32(hist) => hist.into(), + oximeter_types::Datum::HistogramF64(hist) => hist.into(), other => anyhow::bail!( "Input datum of type {} is not cumulative", other.datum_type(), @@ -169,10 +169,10 @@ impl CumulativeDatum { /// A single list of values, for one dimension of a timeseries. #[derive(Clone, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] pub struct Values { - // The data values. - pub(super) values: ValueArray, - // The type of this metric. - pub(super) metric_type: MetricType, + /// The data values. + pub values: ValueArray, + /// The type of this metric. + pub metric_type: MetricType, } impl Values { @@ -285,14 +285,23 @@ impl<'a> fmt::Display for Datum<'a> { #[derive(Clone, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] pub struct Points { // The start time points for cumulative or delta metrics. - pub(super) start_times: Option>>, + pub(crate) start_times: Option>>, // The timestamp of each value. - pub(super) timestamps: Vec>, + pub(crate) timestamps: Vec>, // The array of data values, one for each dimension. - pub(super) values: Vec, + pub(crate) values: Vec, } impl Points { + /// Construct a new `Points` with the provided data. + pub fn new( + start_times: Option>>, + timestamps: Vec>, + values: Vec, + ) -> Self { + Self { start_times, timestamps, values } + } + /// Construct an empty array of points to hold data of the provided type. pub fn empty(data_type: DataType, metric_type: MetricType) -> Self { Self::with_capacity( @@ -303,8 +312,28 @@ impl Points { .unwrap() } - // Return a mutable reference to the value array of the specified dimension, if any. - pub(super) fn values_mut(&mut self, dim: usize) -> Option<&mut ValueArray> { + /// Return the start times of the points, if any. + pub fn start_times(&self) -> Option<&[DateTime]> { + self.start_times.as_deref() + } + + /// Clear the start times of the points. + pub fn clear_start_times(&mut self) { + self.start_times = None; + } + + /// Return the timestamps of the points. + pub fn timestamps(&self) -> &[DateTime] { + &self.timestamps + } + + pub fn set_timestamps(&mut self, timestamps: Vec>) { + self.timestamps = timestamps; + } + + /// Return a mutable reference to the value array of the specified + /// dimension, if any. + pub fn values_mut(&mut self, dim: usize) -> Option<&mut ValueArray> { self.values.get_mut(dim).map(|val| &mut val.values) } @@ -563,8 +592,8 @@ impl Points { }) } - // Filter points in self to those where `to_keep` is true. - pub(crate) fn filter(&self, to_keep: Vec) -> Result { + /// Filter points in self to those where `to_keep` is true. + pub fn filter(&self, to_keep: Vec) -> Result { anyhow::ensure!( to_keep.len() == self.len(), "Filter array must be the same length as self", @@ -646,8 +675,8 @@ impl Points { Ok(out) } - // Return a new set of points, with the values casted to the provided types. - pub(crate) fn cast(&self, types: &[DataType]) -> Result { + /// Return a new set of points, with the values casted to the provided types. + pub fn cast(&self, types: &[DataType]) -> Result { anyhow::ensure!( types.len() == self.dimensionality(), "Cannot cast to {} types, the data has dimensionality {}", @@ -863,12 +892,104 @@ impl Points { Ok(Self { start_times, timestamps, values: new_values }) } + /// Given two arrays of points, stack them together at matching timepoints. + /// + /// For time points in either which do not have a corresponding point in + /// the other, the entire time point is elided. + pub fn inner_join(&self, right: &Points) -> Result { + // Create an output array with roughly the right capacity, and double the + // number of dimensions. We're trying to stack output value arrays together + // along the dimension axis. + let data_types = + self.data_types().chain(right.data_types()).collect::>(); + let metric_types = + self.metric_types().chain(right.metric_types()).collect::>(); + let mut out = Points::with_capacity( + self.len().max(right.len()), + data_types.iter().copied(), + metric_types.iter().copied(), + )?; + + // Iterate through each array until one is exhausted. We're only inserting + // values from both arrays where the timestamps actually match, since this + // is an inner join. We may want to insert missing values where timestamps + // do not match on either side, when we support an outer join of some kind. + let n_left_dim = self.dimensionality(); + let mut left_ix = 0; + let mut right_ix = 0; + while left_ix < self.len() && right_ix < right.len() { + let left_timestamp = self.timestamps()[left_ix]; + let right_timestamp = right.timestamps()[right_ix]; + if left_timestamp == right_timestamp { + out.timestamps.push(left_timestamp); + push_concrete_values( + &mut out.values[..n_left_dim], + &self.values, + left_ix, + ); + push_concrete_values( + &mut out.values[n_left_dim..], + &right.values, + right_ix, + ); + left_ix += 1; + right_ix += 1; + } else if left_timestamp < right_timestamp { + left_ix += 1; + } else { + right_ix += 1; + } + } + Ok(out) + } + /// Return true if self contains no data points. pub fn is_empty(&self) -> bool { self.len() == 0 } } +// Push the `i`th value from each dimension of `from` onto `to`. +fn push_concrete_values(to: &mut [Values], from: &[Values], i: usize) { + assert_eq!(to.len(), from.len()); + for (output, input) in to.iter_mut().zip(from.iter()) { + let input_array = &input.values; + let output_array = &mut output.values; + assert_eq!(input_array.data_type(), output_array.data_type()); + if let Ok(ints) = input_array.as_integer() { + output_array.as_integer_mut().unwrap().push(ints[i]); + continue; + } + if let Ok(doubles) = input_array.as_double() { + output_array.as_double_mut().unwrap().push(doubles[i]); + continue; + } + if let Ok(bools) = input_array.as_boolean() { + output_array.as_boolean_mut().unwrap().push(bools[i]); + continue; + } + if let Ok(strings) = input_array.as_string() { + output_array.as_string_mut().unwrap().push(strings[i].clone()); + continue; + } + if let Ok(dists) = input_array.as_integer_distribution() { + output_array + .as_integer_distribution_mut() + .unwrap() + .push(dists[i].clone()); + continue; + } + if let Ok(dists) = input_array.as_double_distribution() { + output_array + .as_double_distribution_mut() + .unwrap() + .push(dists[i].clone()); + continue; + } + unreachable!(); + } +} + /// List of data values for one timeseries. /// /// Each element is an option, where `None` represents a missing sample. @@ -900,8 +1021,8 @@ impl ValueArray { } } - // Return the data type in self. - pub(super) fn data_type(&self) -> DataType { + /// Return the data type in self. + pub fn data_type(&self) -> DataType { match self { ValueArray::Integer(_) => DataType::Integer, ValueArray::Double(_) => DataType::Double, @@ -947,10 +1068,8 @@ impl ValueArray { Ok(inner) } - // Access the inner array of integers, if possible. - pub(super) fn as_integer_mut( - &mut self, - ) -> Result<&mut Vec>, Error> { + /// Access the inner array of integers, if possible. + pub fn as_integer_mut(&mut self) -> Result<&mut Vec>, Error> { let ValueArray::Integer(inner) = self else { anyhow::bail!( "Cannot access value array as integer type, it has type {}", @@ -1107,91 +1226,97 @@ impl ValueArray { // Push a value directly from a datum, without modification. fn push_value_from_datum( &mut self, - datum: &oximeter::Datum, + datum: &oximeter_types::Datum, ) -> Result<(), Error> { match datum { - oximeter::Datum::Bool(b) => self.as_boolean_mut()?.push(Some(*b)), - oximeter::Datum::I8(i) => { + oximeter_types::Datum::Bool(b) => { + self.as_boolean_mut()?.push(Some(*b)) + } + oximeter_types::Datum::I8(i) => { self.as_integer_mut()?.push(Some(i64::from(*i))) } - oximeter::Datum::U8(i) => { + oximeter_types::Datum::U8(i) => { self.as_integer_mut()?.push(Some(i64::from(*i))) } - oximeter::Datum::I16(i) => { + oximeter_types::Datum::I16(i) => { self.as_integer_mut()?.push(Some(i64::from(*i))) } - oximeter::Datum::U16(i) => { + oximeter_types::Datum::U16(i) => { self.as_integer_mut()?.push(Some(i64::from(*i))) } - oximeter::Datum::I32(i) => { + oximeter_types::Datum::I32(i) => { self.as_integer_mut()?.push(Some(i64::from(*i))) } - oximeter::Datum::U32(i) => { + oximeter_types::Datum::U32(i) => { self.as_integer_mut()?.push(Some(i64::from(*i))) } - oximeter::Datum::I64(i) => self.as_integer_mut()?.push(Some(*i)), - oximeter::Datum::U64(i) => { + oximeter_types::Datum::I64(i) => { + self.as_integer_mut()?.push(Some(*i)) + } + oximeter_types::Datum::U64(i) => { let i = i.to_i64().context("Failed to convert u64 datum to i64")?; self.as_integer_mut()?.push(Some(i)); } - oximeter::Datum::F32(f) => { + oximeter_types::Datum::F32(f) => { self.as_double_mut()?.push(Some(f64::from(*f))) } - oximeter::Datum::F64(f) => self.as_double_mut()?.push(Some(*f)), - oximeter::Datum::String(s) => { + oximeter_types::Datum::F64(f) => { + self.as_double_mut()?.push(Some(*f)) + } + oximeter_types::Datum::String(s) => { self.as_string_mut()?.push(Some(s.clone())) } - oximeter::Datum::Bytes(_) => { + oximeter_types::Datum::Bytes(_) => { anyhow::bail!("Bytes data types are not yet supported") } - oximeter::Datum::CumulativeI64(c) => { + oximeter_types::Datum::CumulativeI64(c) => { self.as_integer_mut()?.push(Some(c.value())) } - oximeter::Datum::CumulativeU64(c) => { + oximeter_types::Datum::CumulativeU64(c) => { let c = c .value() .to_i64() .context("Failed to convert u64 datum to i64")?; self.as_integer_mut()?.push(Some(c)); } - oximeter::Datum::CumulativeF32(c) => { + oximeter_types::Datum::CumulativeF32(c) => { self.as_double_mut()?.push(Some(f64::from(c.value()))) } - oximeter::Datum::CumulativeF64(c) => { + oximeter_types::Datum::CumulativeF64(c) => { self.as_double_mut()?.push(Some(c.value())) } - oximeter::Datum::HistogramI8(h) => self + oximeter_types::Datum::HistogramI8(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramU8(h) => self + oximeter_types::Datum::HistogramU8(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramI16(h) => self + oximeter_types::Datum::HistogramI16(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramU16(h) => self + oximeter_types::Datum::HistogramU16(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramI32(h) => self + oximeter_types::Datum::HistogramI32(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramU32(h) => self + oximeter_types::Datum::HistogramU32(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramI64(h) => self + oximeter_types::Datum::HistogramI64(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramU64(h) => self + oximeter_types::Datum::HistogramU64(h) => self .as_integer_distribution_mut()? .push(Some(Distribution::try_from(h)?)), - oximeter::Datum::HistogramF32(h) => self + oximeter_types::Datum::HistogramF32(h) => self .as_double_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::HistogramF64(h) => self + oximeter_types::Datum::HistogramF64(h) => self .as_double_distribution_mut()? .push(Some(Distribution::from(h))), - oximeter::Datum::Missing(missing) => { + oximeter_types::Datum::Missing(missing) => { self.push_missing(missing.datum_type())? } } @@ -1216,7 +1341,7 @@ impl ValueArray { fn push_diff_from_last_to_datum( &mut self, last_datum: &Option, - new_datum: &oximeter::Datum, + new_datum: &oximeter_types::Datum, data_type: DataType, ) -> Result<(), Error> { match (last_datum.as_ref(), new_datum.is_missing()) { @@ -1253,49 +1378,49 @@ impl ValueArray { match (last_datum, new_datum) { ( CumulativeDatum::Integer(last), - oximeter::Datum::I8(new), + oximeter_types::Datum::I8(new), ) => { let new = i64::from(*new); self.as_integer_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::U8(new), + oximeter_types::Datum::U8(new), ) => { let new = i64::from(*new); self.as_integer_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::I16(new), + oximeter_types::Datum::I16(new), ) => { let new = i64::from(*new); self.as_integer_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::U16(new), + oximeter_types::Datum::U16(new), ) => { let new = i64::from(*new); self.as_integer_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::I32(new), + oximeter_types::Datum::I32(new), ) => { let new = i64::from(*new); self.as_integer_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::U32(new), + oximeter_types::Datum::U32(new), ) => { let new = i64::from(*new); self.as_integer_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::I64(new), + oximeter_types::Datum::I64(new), ) => { let diff = new .checked_sub(*last) @@ -1304,7 +1429,7 @@ impl ValueArray { } ( CumulativeDatum::Integer(last), - oximeter::Datum::U64(new), + oximeter_types::Datum::U64(new), ) => { let new = new .to_i64() @@ -1316,20 +1441,20 @@ impl ValueArray { } ( CumulativeDatum::Double(last), - oximeter::Datum::F32(new), + oximeter_types::Datum::F32(new), ) => { self.as_double_mut()? .push(Some(f64::from(*new) - last)); } ( CumulativeDatum::Double(last), - oximeter::Datum::F64(new), + oximeter_types::Datum::F64(new), ) => { self.as_double_mut()?.push(Some(new - last)); } ( CumulativeDatum::Integer(last), - oximeter::Datum::CumulativeI64(new), + oximeter_types::Datum::CumulativeI64(new), ) => { let new = new.value(); let diff = new @@ -1339,7 +1464,7 @@ impl ValueArray { } ( CumulativeDatum::Integer(last), - oximeter::Datum::CumulativeU64(new), + oximeter_types::Datum::CumulativeU64(new), ) => { let new = new .value() @@ -1352,20 +1477,20 @@ impl ValueArray { } ( CumulativeDatum::Double(last), - oximeter::Datum::CumulativeF32(new), + oximeter_types::Datum::CumulativeF32(new), ) => { self.as_double_mut()? .push(Some(f64::from(new.value()) - last)); } ( CumulativeDatum::Double(last), - oximeter::Datum::CumulativeF64(new), + oximeter_types::Datum::CumulativeF64(new), ) => { self.as_double_mut()?.push(Some(new.value() - last)); } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramI8(new), + oximeter_types::Datum::HistogramI8(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1373,7 +1498,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramU8(new), + oximeter_types::Datum::HistogramU8(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1381,7 +1506,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramI16(new), + oximeter_types::Datum::HistogramI16(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1389,7 +1514,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramU16(new), + oximeter_types::Datum::HistogramU16(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1397,7 +1522,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramI32(new), + oximeter_types::Datum::HistogramI32(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1405,7 +1530,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramU32(new), + oximeter_types::Datum::HistogramU32(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1413,7 +1538,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramI64(new), + oximeter_types::Datum::HistogramI64(new), ) => { let new = Distribution::from(new); self.as_integer_distribution_mut()? @@ -1421,7 +1546,7 @@ impl ValueArray { } ( CumulativeDatum::IntegerDistribution(last), - oximeter::Datum::HistogramU64(new), + oximeter_types::Datum::HistogramU64(new), ) => { let new = Distribution::try_from(new)?; self.as_integer_distribution_mut()? @@ -1429,7 +1554,7 @@ impl ValueArray { } ( CumulativeDatum::DoubleDistribution(last), - oximeter::Datum::HistogramF32(new), + oximeter_types::Datum::HistogramF32(new), ) => { let new = Distribution::::from(new); self.as_double_distribution_mut()? @@ -1437,7 +1562,7 @@ impl ValueArray { } ( CumulativeDatum::DoubleDistribution(last), - oximeter::Datum::HistogramF64(new), + oximeter_types::Datum::HistogramF64(new), ) => { let new = Distribution::::from(new); self.as_double_distribution_mut()? @@ -1486,8 +1611,8 @@ impl ValueArray { } } - // Swap the value in self with other, asserting they're the same type. - pub(crate) fn swap(&mut self, mut values: ValueArray) { + /// Swap the value in self with other, asserting they're the same type. + pub fn swap(&mut self, mut values: ValueArray) { use std::mem::swap; match (self, &mut values) { (ValueArray::Integer(x), ValueArray::Integer(y)) => swap(x, y), @@ -1733,8 +1858,10 @@ where macro_rules! i64_dist_from { ($t:ty) => { - impl From<&oximeter::histogram::Histogram<$t>> for Distribution { - fn from(hist: &oximeter::histogram::Histogram<$t>) -> Self { + impl From<&oximeter_types::histogram::Histogram<$t>> + for Distribution + { + fn from(hist: &oximeter_types::histogram::Histogram<$t>) -> Self { let (bins, counts) = hist.bins_and_counts(); Self { bins: bins.into_iter().map(i64::from).collect(), @@ -1750,8 +1877,10 @@ macro_rules! i64_dist_from { } } - impl From<&oximeter::histogram::Histogram<$t>> for CumulativeDatum { - fn from(hist: &oximeter::histogram::Histogram<$t>) -> Self { + impl From<&oximeter_types::histogram::Histogram<$t>> + for CumulativeDatum + { + fn from(hist: &oximeter_types::histogram::Histogram<$t>) -> Self { CumulativeDatum::IntegerDistribution(hist.into()) } } @@ -1766,10 +1895,10 @@ i64_dist_from!(i32); i64_dist_from!(u32); i64_dist_from!(i64); -impl TryFrom<&oximeter::histogram::Histogram> for Distribution { +impl TryFrom<&oximeter_types::histogram::Histogram> for Distribution { type Error = Error; fn try_from( - hist: &oximeter::histogram::Histogram, + hist: &oximeter_types::histogram::Histogram, ) -> Result { let (bins, counts) = hist.bins_and_counts(); let bins = bins @@ -1791,10 +1920,10 @@ impl TryFrom<&oximeter::histogram::Histogram> for Distribution { } } -impl TryFrom<&oximeter::histogram::Histogram> for CumulativeDatum { +impl TryFrom<&oximeter_types::histogram::Histogram> for CumulativeDatum { type Error = Error; fn try_from( - hist: &oximeter::histogram::Histogram, + hist: &oximeter_types::histogram::Histogram, ) -> Result { hist.try_into().map(CumulativeDatum::IntegerDistribution) } @@ -1802,8 +1931,10 @@ impl TryFrom<&oximeter::histogram::Histogram> for CumulativeDatum { macro_rules! f64_dist_from { ($t:ty) => { - impl From<&oximeter::histogram::Histogram<$t>> for Distribution { - fn from(hist: &oximeter::histogram::Histogram<$t>) -> Self { + impl From<&oximeter_types::histogram::Histogram<$t>> + for Distribution + { + fn from(hist: &oximeter_types::histogram::Histogram<$t>) -> Self { let (bins, counts) = hist.bins_and_counts(); Self { bins: bins.into_iter().map(f64::from).collect(), @@ -1819,8 +1950,10 @@ macro_rules! f64_dist_from { } } - impl From<&oximeter::histogram::Histogram<$t>> for CumulativeDatum { - fn from(hist: &oximeter::histogram::Histogram<$t>) -> Self { + impl From<&oximeter_types::histogram::Histogram<$t>> + for CumulativeDatum + { + fn from(hist: &oximeter_types::histogram::Histogram<$t>) -> Self { CumulativeDatum::DoubleDistribution(hist.into()) } } @@ -1833,9 +1966,9 @@ f64_dist_from!(f64); #[cfg(test)] mod tests { use super::{Distribution, MetricType, Points, Values}; - use crate::oxql::point::{DataType, ValueArray}; + use crate::point::{push_concrete_values, DataType, Datum, ValueArray}; use chrono::{DateTime, Utc}; - use oximeter::{ + use oximeter_types::{ histogram::Record, types::Cumulative, Measurement, Quantile, }; use std::time::Duration; @@ -1939,12 +2072,12 @@ mod tests { let now = Utc::now(); let current1 = now + Duration::from_secs(1); let mut hist1 = - oximeter::histogram::Histogram::new(&[0i64, 10, 20]).unwrap(); + oximeter_types::histogram::Histogram::new(&[0i64, 10, 20]).unwrap(); hist1.sample(1).unwrap(); hist1.set_start_time(current1); let current2 = now + Duration::from_secs(2); let mut hist2 = - oximeter::histogram::Histogram::new(&[0i64, 10, 20]).unwrap(); + oximeter_types::histogram::Histogram::new(&[0i64, 10, 20]).unwrap(); hist2.sample(5).unwrap(); hist2.sample(10).unwrap(); hist2.sample(15).unwrap(); @@ -2273,4 +2406,176 @@ mod tests { .cast(&[DataType::DoubleDistribution, DataType::DoubleDistribution]) .is_err()); } + + #[test] + fn test_push_concrete_values() { + let mut points = Points::with_capacity( + 2, + [DataType::Integer, DataType::Double].into_iter(), + [MetricType::Gauge, MetricType::Gauge].into_iter(), + ) + .unwrap(); + + // Push a concrete value for the integer dimension + let from_ints = vec![Values { + values: ValueArray::Integer(vec![Some(1)]), + metric_type: MetricType::Gauge, + }]; + push_concrete_values(&mut points.values[..1], &from_ints, 0); + + // And another for the double dimension. + let from_doubles = vec![Values { + values: ValueArray::Double(vec![Some(2.0)]), + metric_type: MetricType::Gauge, + }]; + push_concrete_values(&mut points.values[1..], &from_doubles, 0); + + assert_eq!( + points.dimensionality(), + 2, + "Points should have 2 dimensions", + ); + let ints = points.values[0].values.as_integer().unwrap(); + assert_eq!( + ints.len(), + 1, + "Should have pushed one point in the first dimension" + ); + assert_eq!( + ints[0], + Some(1), + "Should have pushed 1 onto the first dimension" + ); + let doubles = points.values[1].values.as_double().unwrap(); + assert_eq!( + doubles.len(), + 1, + "Should have pushed one point in the second dimension" + ); + assert_eq!( + doubles[0], + Some(2.0), + "Should have pushed 2.0 onto the second dimension" + ); + } + + #[test] + fn test_join_point_arrays() { + let now = Utc::now(); + + // Create a set of integer points to join with. + // + // This will have two timestamps, one of which will match the points + // below that are merged in. + let int_points = Points { + start_times: None, + timestamps: vec![ + now - Duration::from_secs(3), + now - Duration::from_secs(2), + now, + ], + values: vec![Values { + values: ValueArray::Integer(vec![Some(1), Some(2), Some(3)]), + metric_type: MetricType::Gauge, + }], + }; + + // Create an additional set of double points. + // + // This also has two timepoints, one of which matches with the above, + // and one of which does not. + let double_points = Points { + start_times: None, + timestamps: vec![ + now - Duration::from_secs(3), + now - Duration::from_secs(1), + now, + ], + values: vec![Values { + values: ValueArray::Double(vec![ + Some(4.0), + Some(5.0), + Some(6.0), + ]), + metric_type: MetricType::Gauge, + }], + }; + + // Merge the arrays. + let merged = int_points.inner_join(&double_points).unwrap(); + + // Basic checks that we merged in the right values and have the right + // types and dimensions. + assert_eq!( + merged.dimensionality(), + 2, + "Should have appended the dimensions from each input array" + ); + assert_eq!(merged.len(), 2, "Should have merged two common points",); + assert_eq!( + merged.data_types().collect::>(), + &[DataType::Integer, DataType::Double], + "Should have combined the data types of the input arrays" + ); + assert_eq!( + merged.metric_types().collect::>(), + &[MetricType::Gauge, MetricType::Gauge], + "Should have combined the metric types of the input arrays" + ); + + // Check the actual values of the array. + let mut points = merged.iter_points(); + + // The first and last timepoint overlapped between the two arrays, so we + // should have both of them as concrete samples. + let pt = points.next().unwrap(); + assert_eq!(pt.start_time, None, "Gauges don't have a start time"); + assert_eq!( + *pt.timestamp, int_points.timestamps[0], + "Should have taken the first input timestamp from both arrays", + ); + assert_eq!( + *pt.timestamp, double_points.timestamps[0], + "Should have taken the first input timestamp from both arrays", + ); + let values = pt.values; + assert_eq!(values.len(), 2, "Should have 2 dimensions"); + assert_eq!( + &values[0], + &(Datum::Integer(Some(&1)), MetricType::Gauge), + "Should have pulled value from first integer array." + ); + assert_eq!( + &values[1], + &(Datum::Double(Some(&4.0)), MetricType::Gauge), + "Should have pulled value from second double array." + ); + + // And the next point + let pt = points.next().unwrap(); + assert_eq!(pt.start_time, None, "Gauges don't have a start time"); + assert_eq!( + *pt.timestamp, int_points.timestamps[2], + "Should have taken the input timestamp from both arrays", + ); + assert_eq!( + *pt.timestamp, double_points.timestamps[2], + "Should have taken the input timestamp from both arrays", + ); + let values = pt.values; + assert_eq!(values.len(), 2, "Should have 2 dimensions"); + assert_eq!( + &values[0], + &(Datum::Integer(Some(&3)), MetricType::Gauge), + "Should have pulled value from first integer array." + ); + assert_eq!( + &values[1], + &(Datum::Double(Some(&6.0)), MetricType::Gauge), + "Should have pulled value from second double array." + ); + + // And there should be no other values. + assert!(points.next().is_none(), "There should be no more points"); + } } diff --git a/oximeter/db/src/oxql/table.rs b/oximeter/oxql-types/src/table.rs similarity index 75% rename from oximeter/db/src/oxql/table.rs rename to oximeter/oxql-types/src/table.rs index 2cd141d2fa..f37992942f 100644 --- a/oximeter/db/src/oxql/table.rs +++ b/oximeter/oxql-types/src/table.rs @@ -6,14 +6,16 @@ // Copyright 2024 Oxide Computer Company -use super::point::DataType; -use super::point::MetricType; -use super::point::Points; -use super::query::Alignment; -use super::Error; -use crate::TimeseriesKey; +use crate::point::DataType; +use crate::point::MetricType; +use crate::point::Points; +use crate::point::ValueArray; +use crate::point::Values; +use crate::Alignment; +use anyhow::Error; use highway::HighwayHasher; -use oximeter::FieldValue; +use oximeter_types::schema::TimeseriesKey; +use oximeter_types::FieldValue; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; @@ -67,10 +69,20 @@ impl Timeseries { hasher.finish() } + /// Return the alignment of this timeseries, if any. + pub fn alignment(&self) -> Option { + self.alignment + } + + /// Set the alignment of this timeseries. + pub fn set_alignment(&mut self, alignment: Alignment) { + self.alignment = Some(alignment); + } + /// Return a copy of the timeseries, keeping only the provided fields. /// /// An error is returned if the timeseries does not contain those fields. - pub(crate) fn copy_with_fields( + pub fn copy_with_fields( &self, kept_fields: &[&str], ) -> Result { @@ -88,6 +100,20 @@ impl Timeseries { }) } + /// Return a copy of the timeseries, keeping only the provided points. + /// + /// Returns `None` if `kept_points` is empty. + pub fn copy_with_points(&self, kept_points: Points) -> Option { + if kept_points.is_empty() { + return None; + } + Some(Self { + fields: self.fields.clone(), + points: kept_points, + alignment: self.alignment, + }) + } + // Return `true` if the schema in `other` matches that of `self`. fn matches_schema(&self, other: &Timeseries) -> bool { if self.fields.len() != other.fields.len() { @@ -125,7 +151,7 @@ impl Timeseries { /// This returns an error if the points cannot be so cast, or the /// dimensionality of the types requested differs from the dimensionality of /// the points themselves. - pub(crate) fn cast(&self, types: &[DataType]) -> Result { + pub fn cast(&self, types: &[DataType]) -> Result { let fields = self.fields.clone(); Ok(Self { fields, @@ -133,6 +159,49 @@ impl Timeseries { alignment: self.alignment, }) } + + /// Return a new timeseries, with the points limited to the provided range. + pub fn limit(&self, start: usize, end: usize) -> Self { + let input_points = &self.points; + + // Slice the various data arrays. + let start_times = + input_points.start_times().map(|s| s[start..end].to_vec()); + let timestamps = input_points.timestamps()[start..end].to_vec(); + let values = input_points + .values + .iter() + .map(|vals| { + let values = match &vals.values { + ValueArray::Integer(inner) => { + ValueArray::Integer(inner[start..end].to_vec()) + } + ValueArray::Double(inner) => { + ValueArray::Double(inner[start..end].to_vec()) + } + ValueArray::Boolean(inner) => { + ValueArray::Boolean(inner[start..end].to_vec()) + } + ValueArray::String(inner) => { + ValueArray::String(inner[start..end].to_vec()) + } + ValueArray::IntegerDistribution(inner) => { + ValueArray::IntegerDistribution( + inner[start..end].to_vec(), + ) + } + ValueArray::DoubleDistribution(inner) => { + ValueArray::DoubleDistribution( + inner[start..end].to_vec(), + ) + } + }; + Values { values, metric_type: vals.metric_type } + }) + .collect(); + let points = Points::new(start_times, timestamps, values); + Self { fields: self.fields.clone(), points, alignment: self.alignment } + } } /// A table represents one or more timeseries with the same schema. @@ -146,7 +215,7 @@ pub struct Table { // // This starts as the name of the timeseries schema the data is derived // from, but can be modified as operations are done. - pub(super) name: String, + pub name: String, // The set of timeseries in the table, ordered by key. timeseries: BTreeMap, } diff --git a/oximeter/types/src/schema.rs b/oximeter/types/src/schema.rs index 2efd5265ff..80aaa6f101 100644 --- a/oximeter/types/src/schema.rs +++ b/oximeter/types/src/schema.rs @@ -28,6 +28,8 @@ use std::num::NonZeroU8; pub const SCHEMA_DIRECTORY: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../oximeter/schema"); +pub type TimeseriesKey = u64; + /// The name and type information for a field of a timeseries schema. #[derive( Clone, From a63e7840eabaaf3e4105dc4af4685c262409a4d0 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Sat, 17 Aug 2024 23:35:26 -0700 Subject: [PATCH 35/51] Make OxQL query response type into an object (#6374) - Fixes #6371 --- Cargo.lock | 1 + nexus/src/external_api/http_entrypoints.rs | 4 ++-- nexus/tests/integration_tests/metrics.rs | 13 ++++++++----- nexus/types/Cargo.toml | 1 + nexus/types/src/external_api/views.rs | 9 +++++++++ openapi/nexus.json | 22 +++++++++++++++++----- 6 files changed, 38 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2943a4f2c2..040ecd9e98 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5461,6 +5461,7 @@ dependencies = [ "omicron-workspace-hack", "openssl", "oxnet", + "oxql-types", "parse-display", "proptest", "schemars", diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index df522f18ab..5b80c973e3 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -6386,7 +6386,7 @@ async fn timeseries_schema_list( async fn timeseries_query( rqctx: RequestContext, body: TypedBody, -) -> Result>, HttpError> { +) -> Result, HttpError> { let apictx = rqctx.context(); let handler = async { let nexus = &apictx.context.nexus; @@ -6395,7 +6395,7 @@ async fn timeseries_query( nexus .timeseries_query(&opctx, &query) .await - .map(HttpResponseOk) + .map(|tables| HttpResponseOk(views::OxqlQueryResult { tables })) .map_err(HttpError::from) }; apictx diff --git a/nexus/tests/integration_tests/metrics.rs b/nexus/tests/integration_tests/metrics.rs index e24de2a3ad..3b808984ae 100644 --- a/nexus/tests/integration_tests/metrics.rs +++ b/nexus/tests/integration_tests/metrics.rs @@ -19,6 +19,7 @@ use nexus_test_utils::resource_helpers::{ }; use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; +use nexus_types::external_api::views::OxqlQueryResult; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use oximeter::types::Datum; @@ -307,12 +308,14 @@ pub async fn timeseries_query( .unwrap_or_else(|e| { panic!("timeseries query failed: {e:?}\nquery: {query}") }); - rsp.parsed_body().unwrap_or_else(|e| { - panic!( - "could not parse timeseries query response: {e:?}\n\ + rsp.parsed_body::() + .unwrap_or_else(|e| { + panic!( + "could not parse timeseries query response: {e:?}\n\ query: {query}\nresponse: {rsp:#?}" - ); - }) + ); + }) + .tables } #[nexus_test] diff --git a/nexus/types/Cargo.toml b/nexus/types/Cargo.toml index a4418d2a74..8dd6292d5c 100644 --- a/nexus/types/Cargo.toml +++ b/nexus/types/Cargo.toml @@ -19,6 +19,7 @@ humantime.workspace = true ipnetwork.workspace = true omicron-uuid-kinds.workspace = true openssl.workspace = true +oxql-types.workspace = true oxnet.workspace = true parse-display.workspace = true schemars = { workspace = true, features = ["chrono", "uuid1"] } diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index e241f849ee..58c2e560ab 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -971,3 +971,12 @@ pub struct AllowList { /// The allowlist of IPs or subnets. pub allowed_ips: ExternalAllowedSourceIps, } + +// OxQL QUERIES + +/// The result of a successful OxQL query. +#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] +pub struct OxqlQueryResult { + /// Tables resulting from the query, each containing timeseries. + pub tables: Vec, +} diff --git a/openapi/nexus.json b/openapi/nexus.json index a0cbfa2f63..c29cb8a95c 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -8026,11 +8026,7 @@ "content": { "application/json": { "schema": { - "title": "Array_of_Table", - "type": "array", - "items": { - "$ref": "#/components/schemas/Table" - } + "$ref": "#/components/schemas/OxqlQueryResult" } } } @@ -16501,6 +16497,22 @@ } ] }, + "OxqlQueryResult": { + "description": "The result of a successful OxQL query.", + "type": "object", + "properties": { + "tables": { + "description": "Tables resulting from the query, each containing timeseries.", + "type": "array", + "items": { + "$ref": "#/components/schemas/Table" + } + } + }, + "required": [ + "tables" + ] + }, "Password": { "title": "A password used to authenticate a user", "description": "Passwords may be subject to additional constraints.", From ede17c7a26017d29c2fccc1f0183a945793dd693 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sun, 18 Aug 2024 16:42:00 -0700 Subject: [PATCH 36/51] Update Rust crate camino to v1.1.9 (#6382) --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 040ecd9e98..f3aafb9b7d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -815,9 +815,9 @@ dependencies = [ [[package]] name = "camino" -version = "1.1.8" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3054fea8a20d8ff3968d5b22cc27501d2b08dc4decdb31b184323f00c5ef23bb" +checksum = "8b96ec4966b5813e2c0507c1f86115c8c5abaadc3980879c3424042a02fd1ad3" dependencies = [ "serde", ] From b449abb736c10313df1d5e0c8f126970b2b968e5 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 04:32:11 +0000 Subject: [PATCH 37/51] Update taiki-e/install-action digest to 37129d5 (#6384) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`2d7ff60` -> `37129d5`](https://togithub.com/taiki-e/install-action/compare/2d7ff60...37129d5) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 4b4d09ba35..63752880d6 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@2d7ff60c815c5236dc38fd3909d97d6d605315d2 # v2 + uses: taiki-e/install-action@37129d5de13e9122cce55a7a5e7e49981cef514c # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 5eb0284d3733ad1ae8633332458bcaa26c3b5c29 Mon Sep 17 00:00:00 2001 From: Ryan Goodfellow Date: Mon, 19 Aug 2024 09:05:01 -0700 Subject: [PATCH 38/51] releng: allow building os image from local sources (#6126) --- dev-tools/releng/src/main.rs | 73 ++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 33 deletions(-) diff --git a/dev-tools/releng/src/main.rs b/dev-tools/releng/src/main.rs index ee649e79b2..264eec2503 100644 --- a/dev-tools/releng/src/main.rs +++ b/dev-tools/releng/src/main.rs @@ -143,6 +143,10 @@ struct Args { /// Path to a pre-built omicron-package binary (skips building if set) #[clap(long, env = "OMICRON_PACKAGE")] omicron_package_bin: Option, + + /// Build the helios OS image from local sources. + #[clap(long)] + helios_local: bool, } impl Args { @@ -286,7 +290,7 @@ async fn main() -> Result<()> { logger, "helios checkout at {0} is out-of-date; run \ `git pull -C {0}`, or run omicron-releng with \ - --ignore-helios-origin or --helios-path", + --ignore-helios-origin or --helios-dir", shell_words::quote(args.helios_dir.as_str()) ); preflight_ok = false; @@ -496,39 +500,42 @@ async fn main() -> Result<()> { Utc::now().format("%Y-%m-%d %H:%M") ); - // helios-build experiment-image - jobs.push_command( - format!("{}-image", target), - Command::new("ptime") - .arg("-m") - .arg(args.helios_dir.join("helios-build")) - .arg("experiment-image") - .arg("-o") // output directory for image - .arg(args.output_dir.join(format!("os-{}", target))) + let mut image_cmd = Command::new("ptime") + .arg("-m") + .arg(args.helios_dir.join("helios-build")) + .arg("experiment-image") + .arg("-o") // output directory for image + .arg(args.output_dir.join(format!("os-{}", target))) + .arg("-F") // pass extra image builder features + .arg(format!("optever={}", opte_version.trim())) + .arg("-P") // include all files from extra proto area + .arg(proto_dir.join("root")) + .arg("-N") // image name + .arg(image_name) + .arg("-s") // tempdir name suffix + .arg(target.as_str()) + .args(target.image_build_args()) + .current_dir(&args.helios_dir) + .env( + "IMAGE_DATASET", + match target { + Target::Host => &args.host_dataset, + Target::Recovery => &args.recovery_dataset, + }, + ) + .env_remove("CARGO") + .env_remove("RUSTUP_TOOLCHAIN"); + + if !args.helios_local { + image_cmd = image_cmd .arg("-p") // use an external package repository - .arg(format!("helios-dev={}", HELIOS_REPO)) - .arg("-F") // pass extra image builder features - .arg(format!("optever={}", opte_version.trim())) - .arg("-P") // include all files from extra proto area - .arg(proto_dir.join("root")) - .arg("-N") // image name - .arg(image_name) - .arg("-s") // tempdir name suffix - .arg(target.as_str()) - .args(target.image_build_args()) - .current_dir(&args.helios_dir) - .env( - "IMAGE_DATASET", - match target { - Target::Host => &args.host_dataset, - Target::Recovery => &args.recovery_dataset, - }, - ) - .env_remove("CARGO") - .env_remove("RUSTUP_TOOLCHAIN"), - ) - .after("helios-setup") - .after(format!("{}-proto", target)); + .arg(format!("helios-dev={HELIOS_REPO}")) + } + + // helios-build experiment-image + jobs.push_command(format!("{}-image", target), image_cmd) + .after("helios-setup") + .after(format!("{}-proto", target)); } // Build the recovery target after we build the host target. Only one // of these will build at a time since Cargo locks its target directory; From b29947e9efd4fc79864a85d1a6776acfd2d94ea8 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Mon, 19 Aug 2024 09:25:39 -0700 Subject: [PATCH 39/51] Slim down the ClickHouse database (#6352) - Add TTLs to all field tables, by using a materialized column with the time each record is inserted. ClickHouse will retain the latest timestamp, so when we stop inserting, the TTL clock will start counting down on those timeseries records. - Update Dropshot dependency. - Add operation ID to HTTP service timeseries, remove other fields. Expunge the old timeseries too. - Remove unnecessary stingifying of URIs in latency tracking. --- Cargo.lock | 7 +- ...ast_updated_column_to_fields_i64_local.sql | 1 + ...ast_updated_column_on_fields_i64_local.sql | 1 + .../10/02_add_ttl_to_fields_i64_local.sql | 1 + ...st_updated_column_to_fields_uuid_local.sql | 1 + ...st_updated_column_on_fields_uuid_local.sql | 1 + .../10/05_add_ttl_to_fields_uuid_local.sql | 1 + ...st_updated_column_to_fields_bool_local.sql | 1 + ...st_updated_column_on_fields_bool_local.sql | 1 + .../10/08_add_ttl_to_fields_bool_local.sql | 1 + ..._updated_column_to_fields_ipaddr_local.sql | 1 + ..._updated_column_on_fields_ipaddr_local.sql | 1 + .../10/11_add_ttl_to_fields_ipaddr_local.sql | 1 + ..._updated_column_to_fields_string_local.sql | 1 + ..._updated_column_on_fields_string_local.sql | 1 + .../10/14_add_ttl_to_fields_string_local.sql | 1 + ...last_updated_column_to_fields_i8_local.sql | 1 + ...last_updated_column_on_fields_i8_local.sql | 1 + .../10/17_add_ttl_to_fields_i8_local.sql | 1 + ...last_updated_column_to_fields_u8_local.sql | 1 + ...last_updated_column_on_fields_u8_local.sql | 1 + .../10/20_add_ttl_to_fields_u8_local.sql | 1 + ...ast_updated_column_to_fields_i16_local.sql | 1 + ...ast_updated_column_on_fields_i16_local.sql | 1 + .../10/23_add_ttl_to_fields_i16_local.sql | 1 + ...ast_updated_column_to_fields_u16_local.sql | 1 + ...ast_updated_column_on_fields_u16_local.sql | 1 + .../10/26_add_ttl_to_fields_u16_local.sql | 1 + ...ast_updated_column_to_fields_i32_local.sql | 1 + ...ast_updated_column_on_fields_i32_local.sql | 1 + .../10/29_add_ttl_to_fields_i32_local.sql | 1 + ...ast_updated_column_to_fields_u32_local.sql | 1 + ...ast_updated_column_on_fields_u32_local.sql | 1 + .../10/32_add_ttl_to_fields_u32_local.sql | 1 + ...ast_updated_column_to_fields_u64_local.sql | 1 + ...ast_updated_column_on_fields_u64_local.sql | 1 + .../10/35_add_ttl_to_fields_u64_local.sql | 1 + .../replicated/10/timeseries-to-delete.txt | 1 + oximeter/db/schema/replicated/db-init-1.sql | 12 +- oximeter/db/schema/replicated/db-init-2.sql | 60 ++++++--- ...add_last_updated_column_to_fields_bool.sql | 1 + ...ize_last_updated_column_on_fields_bool.sql | 1 + .../10/02_add_ttl_to_fields_bool.sql | 1 + ...3_add_last_updated_column_to_fields_i8.sql | 1 + ...alize_last_updated_column_on_fields_i8.sql | 1 + .../10/05_add_ttl_to_fields_i8.sql | 1 + ...6_add_last_updated_column_to_fields_u8.sql | 1 + ...alize_last_updated_column_on_fields_u8.sql | 1 + .../10/08_add_ttl_to_fields_u8.sql | 1 + ..._add_last_updated_column_to_fields_i16.sql | 1 + ...lize_last_updated_column_on_fields_i16.sql | 1 + .../10/11_add_ttl_to_fields_i16.sql | 1 + ..._add_last_updated_column_to_fields_u16.sql | 1 + ...lize_last_updated_column_on_fields_u16.sql | 1 + .../10/14_add_ttl_to_fields_u16.sql | 1 + ..._add_last_updated_column_to_fields_i32.sql | 1 + ...lize_last_updated_column_on_fields_i32.sql | 1 + .../10/17_add_ttl_to_fields_i32.sql | 1 + ..._add_last_updated_column_to_fields_u32.sql | 1 + ...lize_last_updated_column_on_fields_u32.sql | 1 + .../10/20_add_ttl_to_fields_u32.sql | 1 + ..._add_last_updated_column_to_fields_i64.sql | 1 + ...lize_last_updated_column_on_fields_i64.sql | 1 + .../10/23_add_ttl_to_fields_i64.sql | 1 + ..._add_last_updated_column_to_fields_u64.sql | 1 + ...lize_last_updated_column_on_fields_u64.sql | 1 + .../10/26_add_ttl_to_fields_u64.sql | 1 + ...d_last_updated_column_to_fields_ipaddr.sql | 1 + ...e_last_updated_column_on_fields_ipaddr.sql | 1 + .../10/29_add_ttl_to_fields_ipaddr.sql | 1 + ...d_last_updated_column_to_fields_string.sql | 1 + ...e_last_updated_column_on_fields_string.sql | 1 + .../10/32_add_ttl_to_fields_string.sql | 1 + ...add_last_updated_column_to_fields_uuid.sql | 1 + ...ize_last_updated_column_on_fields_uuid.sql | 1 + .../10/35_add_ttl_to_fields_uuid.sql | 1 + .../single-node/10/timeseries-to-delete.txt | 1 + oximeter/db/schema/single-node/db-init.sql | 80 ++++++++---- oximeter/db/src/model.rs | 2 +- oximeter/instruments/src/http.rs | 119 ++++++++---------- oximeter/oximeter/schema/http-service.toml | 15 +-- workspace-hack/Cargo.toml | 2 + 82 files changed, 244 insertions(+), 127 deletions(-) create mode 100644 oximeter/db/schema/replicated/10/00_add_last_updated_column_to_fields_i64_local.sql create mode 100644 oximeter/db/schema/replicated/10/01_materialize_last_updated_column_on_fields_i64_local.sql create mode 100644 oximeter/db/schema/replicated/10/02_add_ttl_to_fields_i64_local.sql create mode 100644 oximeter/db/schema/replicated/10/03_add_last_updated_column_to_fields_uuid_local.sql create mode 100644 oximeter/db/schema/replicated/10/04_materialize_last_updated_column_on_fields_uuid_local.sql create mode 100644 oximeter/db/schema/replicated/10/05_add_ttl_to_fields_uuid_local.sql create mode 100644 oximeter/db/schema/replicated/10/06_add_last_updated_column_to_fields_bool_local.sql create mode 100644 oximeter/db/schema/replicated/10/07_materialize_last_updated_column_on_fields_bool_local.sql create mode 100644 oximeter/db/schema/replicated/10/08_add_ttl_to_fields_bool_local.sql create mode 100644 oximeter/db/schema/replicated/10/09_add_last_updated_column_to_fields_ipaddr_local.sql create mode 100644 oximeter/db/schema/replicated/10/10_materialize_last_updated_column_on_fields_ipaddr_local.sql create mode 100644 oximeter/db/schema/replicated/10/11_add_ttl_to_fields_ipaddr_local.sql create mode 100644 oximeter/db/schema/replicated/10/12_add_last_updated_column_to_fields_string_local.sql create mode 100644 oximeter/db/schema/replicated/10/13_materialize_last_updated_column_on_fields_string_local.sql create mode 100644 oximeter/db/schema/replicated/10/14_add_ttl_to_fields_string_local.sql create mode 100644 oximeter/db/schema/replicated/10/15_add_last_updated_column_to_fields_i8_local.sql create mode 100644 oximeter/db/schema/replicated/10/16_materialize_last_updated_column_on_fields_i8_local.sql create mode 100644 oximeter/db/schema/replicated/10/17_add_ttl_to_fields_i8_local.sql create mode 100644 oximeter/db/schema/replicated/10/18_add_last_updated_column_to_fields_u8_local.sql create mode 100644 oximeter/db/schema/replicated/10/19_materialize_last_updated_column_on_fields_u8_local.sql create mode 100644 oximeter/db/schema/replicated/10/20_add_ttl_to_fields_u8_local.sql create mode 100644 oximeter/db/schema/replicated/10/21_add_last_updated_column_to_fields_i16_local.sql create mode 100644 oximeter/db/schema/replicated/10/22_materialize_last_updated_column_on_fields_i16_local.sql create mode 100644 oximeter/db/schema/replicated/10/23_add_ttl_to_fields_i16_local.sql create mode 100644 oximeter/db/schema/replicated/10/24_add_last_updated_column_to_fields_u16_local.sql create mode 100644 oximeter/db/schema/replicated/10/25_materialize_last_updated_column_on_fields_u16_local.sql create mode 100644 oximeter/db/schema/replicated/10/26_add_ttl_to_fields_u16_local.sql create mode 100644 oximeter/db/schema/replicated/10/27_add_last_updated_column_to_fields_i32_local.sql create mode 100644 oximeter/db/schema/replicated/10/28_materialize_last_updated_column_on_fields_i32_local.sql create mode 100644 oximeter/db/schema/replicated/10/29_add_ttl_to_fields_i32_local.sql create mode 100644 oximeter/db/schema/replicated/10/30_add_last_updated_column_to_fields_u32_local.sql create mode 100644 oximeter/db/schema/replicated/10/31_materialize_last_updated_column_on_fields_u32_local.sql create mode 100644 oximeter/db/schema/replicated/10/32_add_ttl_to_fields_u32_local.sql create mode 100644 oximeter/db/schema/replicated/10/33_add_last_updated_column_to_fields_u64_local.sql create mode 100644 oximeter/db/schema/replicated/10/34_materialize_last_updated_column_on_fields_u64_local.sql create mode 100644 oximeter/db/schema/replicated/10/35_add_ttl_to_fields_u64_local.sql create mode 100644 oximeter/db/schema/replicated/10/timeseries-to-delete.txt create mode 100644 oximeter/db/schema/single-node/10/00_add_last_updated_column_to_fields_bool.sql create mode 100644 oximeter/db/schema/single-node/10/01_materialize_last_updated_column_on_fields_bool.sql create mode 100644 oximeter/db/schema/single-node/10/02_add_ttl_to_fields_bool.sql create mode 100644 oximeter/db/schema/single-node/10/03_add_last_updated_column_to_fields_i8.sql create mode 100644 oximeter/db/schema/single-node/10/04_materialize_last_updated_column_on_fields_i8.sql create mode 100644 oximeter/db/schema/single-node/10/05_add_ttl_to_fields_i8.sql create mode 100644 oximeter/db/schema/single-node/10/06_add_last_updated_column_to_fields_u8.sql create mode 100644 oximeter/db/schema/single-node/10/07_materialize_last_updated_column_on_fields_u8.sql create mode 100644 oximeter/db/schema/single-node/10/08_add_ttl_to_fields_u8.sql create mode 100644 oximeter/db/schema/single-node/10/09_add_last_updated_column_to_fields_i16.sql create mode 100644 oximeter/db/schema/single-node/10/10_materialize_last_updated_column_on_fields_i16.sql create mode 100644 oximeter/db/schema/single-node/10/11_add_ttl_to_fields_i16.sql create mode 100644 oximeter/db/schema/single-node/10/12_add_last_updated_column_to_fields_u16.sql create mode 100644 oximeter/db/schema/single-node/10/13_materialize_last_updated_column_on_fields_u16.sql create mode 100644 oximeter/db/schema/single-node/10/14_add_ttl_to_fields_u16.sql create mode 100644 oximeter/db/schema/single-node/10/15_add_last_updated_column_to_fields_i32.sql create mode 100644 oximeter/db/schema/single-node/10/16_materialize_last_updated_column_on_fields_i32.sql create mode 100644 oximeter/db/schema/single-node/10/17_add_ttl_to_fields_i32.sql create mode 100644 oximeter/db/schema/single-node/10/18_add_last_updated_column_to_fields_u32.sql create mode 100644 oximeter/db/schema/single-node/10/19_materialize_last_updated_column_on_fields_u32.sql create mode 100644 oximeter/db/schema/single-node/10/20_add_ttl_to_fields_u32.sql create mode 100644 oximeter/db/schema/single-node/10/21_add_last_updated_column_to_fields_i64.sql create mode 100644 oximeter/db/schema/single-node/10/22_materialize_last_updated_column_on_fields_i64.sql create mode 100644 oximeter/db/schema/single-node/10/23_add_ttl_to_fields_i64.sql create mode 100644 oximeter/db/schema/single-node/10/24_add_last_updated_column_to_fields_u64.sql create mode 100644 oximeter/db/schema/single-node/10/25_materialize_last_updated_column_on_fields_u64.sql create mode 100644 oximeter/db/schema/single-node/10/26_add_ttl_to_fields_u64.sql create mode 100644 oximeter/db/schema/single-node/10/27_add_last_updated_column_to_fields_ipaddr.sql create mode 100644 oximeter/db/schema/single-node/10/28_materialize_last_updated_column_on_fields_ipaddr.sql create mode 100644 oximeter/db/schema/single-node/10/29_add_ttl_to_fields_ipaddr.sql create mode 100644 oximeter/db/schema/single-node/10/30_add_last_updated_column_to_fields_string.sql create mode 100644 oximeter/db/schema/single-node/10/31_materialize_last_updated_column_on_fields_string.sql create mode 100644 oximeter/db/schema/single-node/10/32_add_ttl_to_fields_string.sql create mode 100644 oximeter/db/schema/single-node/10/33_add_last_updated_column_to_fields_uuid.sql create mode 100644 oximeter/db/schema/single-node/10/34_materialize_last_updated_column_on_fields_uuid.sql create mode 100644 oximeter/db/schema/single-node/10/35_add_ttl_to_fields_uuid.sql create mode 100644 oximeter/db/schema/single-node/10/timeseries-to-delete.txt diff --git a/Cargo.lock b/Cargo.lock index f3aafb9b7d..874b33134f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2213,7 +2213,7 @@ dependencies = [ [[package]] name = "dropshot" version = "0.10.2-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#52d900a470b8f08eddf021813470b2a9194f2cc0" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#06c8dab40e28d313f8bb0e15e1027eeace3bce89" dependencies = [ "async-stream", "async-trait", @@ -2259,7 +2259,7 @@ dependencies = [ [[package]] name = "dropshot_endpoint" version = "0.10.2-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#52d900a470b8f08eddf021813470b2a9194f2cc0" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#06c8dab40e28d313f8bb0e15e1027eeace3bce89" dependencies = [ "heck 0.5.0", "proc-macro2", @@ -3616,7 +3616,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2 0.5.7", + "socket2 0.4.10", "tokio", "tower-service", "tracing", @@ -6523,6 +6523,7 @@ dependencies = [ "similar", "slog", "smallvec 1.13.2", + "socket2 0.5.7", "spin 0.9.8", "string_cache", "subtle", diff --git a/oximeter/db/schema/replicated/10/00_add_last_updated_column_to_fields_i64_local.sql b/oximeter/db/schema/replicated/10/00_add_last_updated_column_to_fields_i64_local.sql new file mode 100644 index 0000000000..04158b36ce --- /dev/null +++ b/oximeter/db/schema/replicated/10/00_add_last_updated_column_to_fields_i64_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i64_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/01_materialize_last_updated_column_on_fields_i64_local.sql b/oximeter/db/schema/replicated/10/01_materialize_last_updated_column_on_fields_i64_local.sql new file mode 100644 index 0000000000..2e35dd2793 --- /dev/null +++ b/oximeter/db/schema/replicated/10/01_materialize_last_updated_column_on_fields_i64_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i64_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/02_add_ttl_to_fields_i64_local.sql b/oximeter/db/schema/replicated/10/02_add_ttl_to_fields_i64_local.sql new file mode 100644 index 0000000000..25e5303e5a --- /dev/null +++ b/oximeter/db/schema/replicated/10/02_add_ttl_to_fields_i64_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i64_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/03_add_last_updated_column_to_fields_uuid_local.sql b/oximeter/db/schema/replicated/10/03_add_last_updated_column_to_fields_uuid_local.sql new file mode 100644 index 0000000000..f26fdedbb6 --- /dev/null +++ b/oximeter/db/schema/replicated/10/03_add_last_updated_column_to_fields_uuid_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_uuid_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/04_materialize_last_updated_column_on_fields_uuid_local.sql b/oximeter/db/schema/replicated/10/04_materialize_last_updated_column_on_fields_uuid_local.sql new file mode 100644 index 0000000000..1bc623f418 --- /dev/null +++ b/oximeter/db/schema/replicated/10/04_materialize_last_updated_column_on_fields_uuid_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_uuid_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/05_add_ttl_to_fields_uuid_local.sql b/oximeter/db/schema/replicated/10/05_add_ttl_to_fields_uuid_local.sql new file mode 100644 index 0000000000..b98bba1e88 --- /dev/null +++ b/oximeter/db/schema/replicated/10/05_add_ttl_to_fields_uuid_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_uuid_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/06_add_last_updated_column_to_fields_bool_local.sql b/oximeter/db/schema/replicated/10/06_add_last_updated_column_to_fields_bool_local.sql new file mode 100644 index 0000000000..bf3c16dde5 --- /dev/null +++ b/oximeter/db/schema/replicated/10/06_add_last_updated_column_to_fields_bool_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_bool_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/07_materialize_last_updated_column_on_fields_bool_local.sql b/oximeter/db/schema/replicated/10/07_materialize_last_updated_column_on_fields_bool_local.sql new file mode 100644 index 0000000000..3ddb0eec84 --- /dev/null +++ b/oximeter/db/schema/replicated/10/07_materialize_last_updated_column_on_fields_bool_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_bool_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/08_add_ttl_to_fields_bool_local.sql b/oximeter/db/schema/replicated/10/08_add_ttl_to_fields_bool_local.sql new file mode 100644 index 0000000000..58d599cf49 --- /dev/null +++ b/oximeter/db/schema/replicated/10/08_add_ttl_to_fields_bool_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_bool_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/09_add_last_updated_column_to_fields_ipaddr_local.sql b/oximeter/db/schema/replicated/10/09_add_last_updated_column_to_fields_ipaddr_local.sql new file mode 100644 index 0000000000..94696b7b06 --- /dev/null +++ b/oximeter/db/schema/replicated/10/09_add_last_updated_column_to_fields_ipaddr_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_ipaddr_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/10_materialize_last_updated_column_on_fields_ipaddr_local.sql b/oximeter/db/schema/replicated/10/10_materialize_last_updated_column_on_fields_ipaddr_local.sql new file mode 100644 index 0000000000..f621033d56 --- /dev/null +++ b/oximeter/db/schema/replicated/10/10_materialize_last_updated_column_on_fields_ipaddr_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_ipaddr_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/11_add_ttl_to_fields_ipaddr_local.sql b/oximeter/db/schema/replicated/10/11_add_ttl_to_fields_ipaddr_local.sql new file mode 100644 index 0000000000..4a01da9e74 --- /dev/null +++ b/oximeter/db/schema/replicated/10/11_add_ttl_to_fields_ipaddr_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_ipaddr_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/12_add_last_updated_column_to_fields_string_local.sql b/oximeter/db/schema/replicated/10/12_add_last_updated_column_to_fields_string_local.sql new file mode 100644 index 0000000000..173d803437 --- /dev/null +++ b/oximeter/db/schema/replicated/10/12_add_last_updated_column_to_fields_string_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_string_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/13_materialize_last_updated_column_on_fields_string_local.sql b/oximeter/db/schema/replicated/10/13_materialize_last_updated_column_on_fields_string_local.sql new file mode 100644 index 0000000000..d9fcc84eba --- /dev/null +++ b/oximeter/db/schema/replicated/10/13_materialize_last_updated_column_on_fields_string_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_string_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/14_add_ttl_to_fields_string_local.sql b/oximeter/db/schema/replicated/10/14_add_ttl_to_fields_string_local.sql new file mode 100644 index 0000000000..8c9aecca9d --- /dev/null +++ b/oximeter/db/schema/replicated/10/14_add_ttl_to_fields_string_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_string_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/15_add_last_updated_column_to_fields_i8_local.sql b/oximeter/db/schema/replicated/10/15_add_last_updated_column_to_fields_i8_local.sql new file mode 100644 index 0000000000..8d071424f6 --- /dev/null +++ b/oximeter/db/schema/replicated/10/15_add_last_updated_column_to_fields_i8_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i8_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/16_materialize_last_updated_column_on_fields_i8_local.sql b/oximeter/db/schema/replicated/10/16_materialize_last_updated_column_on_fields_i8_local.sql new file mode 100644 index 0000000000..ac5fa948ae --- /dev/null +++ b/oximeter/db/schema/replicated/10/16_materialize_last_updated_column_on_fields_i8_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i8_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/17_add_ttl_to_fields_i8_local.sql b/oximeter/db/schema/replicated/10/17_add_ttl_to_fields_i8_local.sql new file mode 100644 index 0000000000..3caa1b93f6 --- /dev/null +++ b/oximeter/db/schema/replicated/10/17_add_ttl_to_fields_i8_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i8_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/18_add_last_updated_column_to_fields_u8_local.sql b/oximeter/db/schema/replicated/10/18_add_last_updated_column_to_fields_u8_local.sql new file mode 100644 index 0000000000..ed6978c7e6 --- /dev/null +++ b/oximeter/db/schema/replicated/10/18_add_last_updated_column_to_fields_u8_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u8_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/19_materialize_last_updated_column_on_fields_u8_local.sql b/oximeter/db/schema/replicated/10/19_materialize_last_updated_column_on_fields_u8_local.sql new file mode 100644 index 0000000000..81ce8626a7 --- /dev/null +++ b/oximeter/db/schema/replicated/10/19_materialize_last_updated_column_on_fields_u8_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u8_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/20_add_ttl_to_fields_u8_local.sql b/oximeter/db/schema/replicated/10/20_add_ttl_to_fields_u8_local.sql new file mode 100644 index 0000000000..2a7c757dc8 --- /dev/null +++ b/oximeter/db/schema/replicated/10/20_add_ttl_to_fields_u8_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u8_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/21_add_last_updated_column_to_fields_i16_local.sql b/oximeter/db/schema/replicated/10/21_add_last_updated_column_to_fields_i16_local.sql new file mode 100644 index 0000000000..cbe0b08fe4 --- /dev/null +++ b/oximeter/db/schema/replicated/10/21_add_last_updated_column_to_fields_i16_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i16_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/22_materialize_last_updated_column_on_fields_i16_local.sql b/oximeter/db/schema/replicated/10/22_materialize_last_updated_column_on_fields_i16_local.sql new file mode 100644 index 0000000000..d4854807b7 --- /dev/null +++ b/oximeter/db/schema/replicated/10/22_materialize_last_updated_column_on_fields_i16_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i16_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/23_add_ttl_to_fields_i16_local.sql b/oximeter/db/schema/replicated/10/23_add_ttl_to_fields_i16_local.sql new file mode 100644 index 0000000000..c84b634a00 --- /dev/null +++ b/oximeter/db/schema/replicated/10/23_add_ttl_to_fields_i16_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i16_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/24_add_last_updated_column_to_fields_u16_local.sql b/oximeter/db/schema/replicated/10/24_add_last_updated_column_to_fields_u16_local.sql new file mode 100644 index 0000000000..60c28c0047 --- /dev/null +++ b/oximeter/db/schema/replicated/10/24_add_last_updated_column_to_fields_u16_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u16_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/25_materialize_last_updated_column_on_fields_u16_local.sql b/oximeter/db/schema/replicated/10/25_materialize_last_updated_column_on_fields_u16_local.sql new file mode 100644 index 0000000000..b38cdda831 --- /dev/null +++ b/oximeter/db/schema/replicated/10/25_materialize_last_updated_column_on_fields_u16_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u16_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/26_add_ttl_to_fields_u16_local.sql b/oximeter/db/schema/replicated/10/26_add_ttl_to_fields_u16_local.sql new file mode 100644 index 0000000000..cd533ffd8f --- /dev/null +++ b/oximeter/db/schema/replicated/10/26_add_ttl_to_fields_u16_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u16_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/27_add_last_updated_column_to_fields_i32_local.sql b/oximeter/db/schema/replicated/10/27_add_last_updated_column_to_fields_i32_local.sql new file mode 100644 index 0000000000..1ea7093d8f --- /dev/null +++ b/oximeter/db/schema/replicated/10/27_add_last_updated_column_to_fields_i32_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i32_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/28_materialize_last_updated_column_on_fields_i32_local.sql b/oximeter/db/schema/replicated/10/28_materialize_last_updated_column_on_fields_i32_local.sql new file mode 100644 index 0000000000..f9f6464729 --- /dev/null +++ b/oximeter/db/schema/replicated/10/28_materialize_last_updated_column_on_fields_i32_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i32_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/29_add_ttl_to_fields_i32_local.sql b/oximeter/db/schema/replicated/10/29_add_ttl_to_fields_i32_local.sql new file mode 100644 index 0000000000..7c37ee9b21 --- /dev/null +++ b/oximeter/db/schema/replicated/10/29_add_ttl_to_fields_i32_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i32_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/30_add_last_updated_column_to_fields_u32_local.sql b/oximeter/db/schema/replicated/10/30_add_last_updated_column_to_fields_u32_local.sql new file mode 100644 index 0000000000..b15eab9387 --- /dev/null +++ b/oximeter/db/schema/replicated/10/30_add_last_updated_column_to_fields_u32_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u32_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/31_materialize_last_updated_column_on_fields_u32_local.sql b/oximeter/db/schema/replicated/10/31_materialize_last_updated_column_on_fields_u32_local.sql new file mode 100644 index 0000000000..caa96ab5eb --- /dev/null +++ b/oximeter/db/schema/replicated/10/31_materialize_last_updated_column_on_fields_u32_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u32_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/32_add_ttl_to_fields_u32_local.sql b/oximeter/db/schema/replicated/10/32_add_ttl_to_fields_u32_local.sql new file mode 100644 index 0000000000..25af5ee660 --- /dev/null +++ b/oximeter/db/schema/replicated/10/32_add_ttl_to_fields_u32_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u32_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/33_add_last_updated_column_to_fields_u64_local.sql b/oximeter/db/schema/replicated/10/33_add_last_updated_column_to_fields_u64_local.sql new file mode 100644 index 0000000000..e85bd845d4 --- /dev/null +++ b/oximeter/db/schema/replicated/10/33_add_last_updated_column_to_fields_u64_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u64_local ON CLUSTER oximeter_cluster ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/replicated/10/34_materialize_last_updated_column_on_fields_u64_local.sql b/oximeter/db/schema/replicated/10/34_materialize_last_updated_column_on_fields_u64_local.sql new file mode 100644 index 0000000000..d287a02c6f --- /dev/null +++ b/oximeter/db/schema/replicated/10/34_materialize_last_updated_column_on_fields_u64_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u64_local ON CLUSTER oximeter_cluster MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/replicated/10/35_add_ttl_to_fields_u64_local.sql b/oximeter/db/schema/replicated/10/35_add_ttl_to_fields_u64_local.sql new file mode 100644 index 0000000000..02eb09c300 --- /dev/null +++ b/oximeter/db/schema/replicated/10/35_add_ttl_to_fields_u64_local.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u64_local ON CLUSTER oximeter_cluster MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/10/timeseries-to-delete.txt b/oximeter/db/schema/replicated/10/timeseries-to-delete.txt new file mode 100644 index 0000000000..40b90e05ff --- /dev/null +++ b/oximeter/db/schema/replicated/10/timeseries-to-delete.txt @@ -0,0 +1 @@ +http_service:request_latency_histogram diff --git a/oximeter/db/schema/replicated/db-init-1.sql b/oximeter/db/schema/replicated/db-init-1.sql index 176e5b64f7..4eac2b4e37 100644 --- a/oximeter/db/schema/replicated/db-init-1.sql +++ b/oximeter/db/schema/replicated/db-init-1.sql @@ -78,10 +78,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i64_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int64 + field_value Int64, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_i64_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i64 ON CLUSTER oximeter_cluster AS oximeter.fields_i64_local @@ -93,10 +95,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_uuid_local ON CLUSTER oximeter_cluste timeseries_name String, timeseries_key UInt64, field_name String, - field_value UUID + field_value UUID, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_uuid_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_uuid ON CLUSTER oximeter_cluster AS oximeter.fields_uuid_local diff --git a/oximeter/db/schema/replicated/db-init-2.sql b/oximeter/db/schema/replicated/db-init-2.sql index ae0431ec84..51e64e20e0 100644 --- a/oximeter/db/schema/replicated/db-init-2.sql +++ b/oximeter/db/schema/replicated/db-init-2.sql @@ -595,10 +595,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_bool_local ON CLUSTER oximeter_cluste timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt8 + field_value UInt8, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_bool_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_bool ON CLUSTER oximeter_cluster AS oximeter.fields_bool_local @@ -609,10 +611,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_ipaddr_local ON CLUSTER oximeter_clus timeseries_name String, timeseries_key UInt64, field_name String, - field_value IPv6 + field_value IPv6, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_ipaddr_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_ipaddr ON CLUSTER oximeter_cluster AS oximeter.fields_ipaddr_local @@ -623,10 +627,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_string_local ON CLUSTER oximeter_clus timeseries_name String, timeseries_key UInt64, field_name String, - field_value String + field_value String, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_string_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_string ON CLUSTER oximeter_cluster AS oximeter.fields_string_local @@ -637,10 +643,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i8_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int8 + field_value Int8, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_i8_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i8 ON CLUSTER oximeter_cluster AS oximeter.fields_i8_local @@ -651,10 +659,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u8_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt8 + field_value UInt8, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_u8_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u8 ON CLUSTER oximeter_cluster AS oximeter.fields_u8_local @@ -665,10 +675,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i16_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int16 + field_value Int16, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_i16_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i16 ON CLUSTER oximeter_cluster AS oximeter.fields_i16_local @@ -679,10 +691,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u16_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt16 + field_value UInt16, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_u16_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u16 ON CLUSTER oximeter_cluster AS oximeter.fields_u16_local @@ -693,10 +707,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i32_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int32 + field_value Int32, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_i32_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i32 ON CLUSTER oximeter_cluster AS oximeter.fields_i32_local @@ -707,10 +723,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u32_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt32 + field_value UInt32, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_u32_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u32 ON CLUSTER oximeter_cluster AS oximeter.fields_u32_local @@ -721,10 +739,12 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u64_local ON CLUSTER oximeter_cluster timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt64 + field_value UInt64, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_u64_local', '{replica}') -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u64 ON CLUSTER oximeter_cluster AS oximeter.fields_u64_local diff --git a/oximeter/db/schema/single-node/10/00_add_last_updated_column_to_fields_bool.sql b/oximeter/db/schema/single-node/10/00_add_last_updated_column_to_fields_bool.sql new file mode 100644 index 0000000000..86f46a43bf --- /dev/null +++ b/oximeter/db/schema/single-node/10/00_add_last_updated_column_to_fields_bool.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_bool ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/01_materialize_last_updated_column_on_fields_bool.sql b/oximeter/db/schema/single-node/10/01_materialize_last_updated_column_on_fields_bool.sql new file mode 100644 index 0000000000..6ebec2d506 --- /dev/null +++ b/oximeter/db/schema/single-node/10/01_materialize_last_updated_column_on_fields_bool.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_bool MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/02_add_ttl_to_fields_bool.sql b/oximeter/db/schema/single-node/10/02_add_ttl_to_fields_bool.sql new file mode 100644 index 0000000000..cc07b8cd1d --- /dev/null +++ b/oximeter/db/schema/single-node/10/02_add_ttl_to_fields_bool.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_bool MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/03_add_last_updated_column_to_fields_i8.sql b/oximeter/db/schema/single-node/10/03_add_last_updated_column_to_fields_i8.sql new file mode 100644 index 0000000000..884b5ffed6 --- /dev/null +++ b/oximeter/db/schema/single-node/10/03_add_last_updated_column_to_fields_i8.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i8 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/04_materialize_last_updated_column_on_fields_i8.sql b/oximeter/db/schema/single-node/10/04_materialize_last_updated_column_on_fields_i8.sql new file mode 100644 index 0000000000..ef569d80c3 --- /dev/null +++ b/oximeter/db/schema/single-node/10/04_materialize_last_updated_column_on_fields_i8.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i8 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/05_add_ttl_to_fields_i8.sql b/oximeter/db/schema/single-node/10/05_add_ttl_to_fields_i8.sql new file mode 100644 index 0000000000..adfc3dd1a4 --- /dev/null +++ b/oximeter/db/schema/single-node/10/05_add_ttl_to_fields_i8.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i8 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/06_add_last_updated_column_to_fields_u8.sql b/oximeter/db/schema/single-node/10/06_add_last_updated_column_to_fields_u8.sql new file mode 100644 index 0000000000..0f4e43ce2c --- /dev/null +++ b/oximeter/db/schema/single-node/10/06_add_last_updated_column_to_fields_u8.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u8 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/07_materialize_last_updated_column_on_fields_u8.sql b/oximeter/db/schema/single-node/10/07_materialize_last_updated_column_on_fields_u8.sql new file mode 100644 index 0000000000..8dcbb32bb2 --- /dev/null +++ b/oximeter/db/schema/single-node/10/07_materialize_last_updated_column_on_fields_u8.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u8 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/08_add_ttl_to_fields_u8.sql b/oximeter/db/schema/single-node/10/08_add_ttl_to_fields_u8.sql new file mode 100644 index 0000000000..11a83bde7a --- /dev/null +++ b/oximeter/db/schema/single-node/10/08_add_ttl_to_fields_u8.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u8 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/09_add_last_updated_column_to_fields_i16.sql b/oximeter/db/schema/single-node/10/09_add_last_updated_column_to_fields_i16.sql new file mode 100644 index 0000000000..d27f38f94f --- /dev/null +++ b/oximeter/db/schema/single-node/10/09_add_last_updated_column_to_fields_i16.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i16 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/10_materialize_last_updated_column_on_fields_i16.sql b/oximeter/db/schema/single-node/10/10_materialize_last_updated_column_on_fields_i16.sql new file mode 100644 index 0000000000..cd60a2a1e9 --- /dev/null +++ b/oximeter/db/schema/single-node/10/10_materialize_last_updated_column_on_fields_i16.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i16 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/11_add_ttl_to_fields_i16.sql b/oximeter/db/schema/single-node/10/11_add_ttl_to_fields_i16.sql new file mode 100644 index 0000000000..5b1b2fcfb6 --- /dev/null +++ b/oximeter/db/schema/single-node/10/11_add_ttl_to_fields_i16.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i16 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/12_add_last_updated_column_to_fields_u16.sql b/oximeter/db/schema/single-node/10/12_add_last_updated_column_to_fields_u16.sql new file mode 100644 index 0000000000..a71753f95d --- /dev/null +++ b/oximeter/db/schema/single-node/10/12_add_last_updated_column_to_fields_u16.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u16 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/13_materialize_last_updated_column_on_fields_u16.sql b/oximeter/db/schema/single-node/10/13_materialize_last_updated_column_on_fields_u16.sql new file mode 100644 index 0000000000..c8dbfb494e --- /dev/null +++ b/oximeter/db/schema/single-node/10/13_materialize_last_updated_column_on_fields_u16.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u16 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/14_add_ttl_to_fields_u16.sql b/oximeter/db/schema/single-node/10/14_add_ttl_to_fields_u16.sql new file mode 100644 index 0000000000..30da688c8c --- /dev/null +++ b/oximeter/db/schema/single-node/10/14_add_ttl_to_fields_u16.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u16 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/15_add_last_updated_column_to_fields_i32.sql b/oximeter/db/schema/single-node/10/15_add_last_updated_column_to_fields_i32.sql new file mode 100644 index 0000000000..eb0f377e2d --- /dev/null +++ b/oximeter/db/schema/single-node/10/15_add_last_updated_column_to_fields_i32.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i32 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/16_materialize_last_updated_column_on_fields_i32.sql b/oximeter/db/schema/single-node/10/16_materialize_last_updated_column_on_fields_i32.sql new file mode 100644 index 0000000000..9cd4fa05c8 --- /dev/null +++ b/oximeter/db/schema/single-node/10/16_materialize_last_updated_column_on_fields_i32.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i32 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/17_add_ttl_to_fields_i32.sql b/oximeter/db/schema/single-node/10/17_add_ttl_to_fields_i32.sql new file mode 100644 index 0000000000..5230634097 --- /dev/null +++ b/oximeter/db/schema/single-node/10/17_add_ttl_to_fields_i32.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i32 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/18_add_last_updated_column_to_fields_u32.sql b/oximeter/db/schema/single-node/10/18_add_last_updated_column_to_fields_u32.sql new file mode 100644 index 0000000000..9d967784e9 --- /dev/null +++ b/oximeter/db/schema/single-node/10/18_add_last_updated_column_to_fields_u32.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u32 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/19_materialize_last_updated_column_on_fields_u32.sql b/oximeter/db/schema/single-node/10/19_materialize_last_updated_column_on_fields_u32.sql new file mode 100644 index 0000000000..f625138b59 --- /dev/null +++ b/oximeter/db/schema/single-node/10/19_materialize_last_updated_column_on_fields_u32.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u32 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/20_add_ttl_to_fields_u32.sql b/oximeter/db/schema/single-node/10/20_add_ttl_to_fields_u32.sql new file mode 100644 index 0000000000..fc80ce7102 --- /dev/null +++ b/oximeter/db/schema/single-node/10/20_add_ttl_to_fields_u32.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u32 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/21_add_last_updated_column_to_fields_i64.sql b/oximeter/db/schema/single-node/10/21_add_last_updated_column_to_fields_i64.sql new file mode 100644 index 0000000000..26256d3924 --- /dev/null +++ b/oximeter/db/schema/single-node/10/21_add_last_updated_column_to_fields_i64.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i64 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/22_materialize_last_updated_column_on_fields_i64.sql b/oximeter/db/schema/single-node/10/22_materialize_last_updated_column_on_fields_i64.sql new file mode 100644 index 0000000000..a81294e535 --- /dev/null +++ b/oximeter/db/schema/single-node/10/22_materialize_last_updated_column_on_fields_i64.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i64 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/23_add_ttl_to_fields_i64.sql b/oximeter/db/schema/single-node/10/23_add_ttl_to_fields_i64.sql new file mode 100644 index 0000000000..43ca166755 --- /dev/null +++ b/oximeter/db/schema/single-node/10/23_add_ttl_to_fields_i64.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_i64 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/24_add_last_updated_column_to_fields_u64.sql b/oximeter/db/schema/single-node/10/24_add_last_updated_column_to_fields_u64.sql new file mode 100644 index 0000000000..46074c79ce --- /dev/null +++ b/oximeter/db/schema/single-node/10/24_add_last_updated_column_to_fields_u64.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u64 ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/25_materialize_last_updated_column_on_fields_u64.sql b/oximeter/db/schema/single-node/10/25_materialize_last_updated_column_on_fields_u64.sql new file mode 100644 index 0000000000..a68d449de7 --- /dev/null +++ b/oximeter/db/schema/single-node/10/25_materialize_last_updated_column_on_fields_u64.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u64 MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/26_add_ttl_to_fields_u64.sql b/oximeter/db/schema/single-node/10/26_add_ttl_to_fields_u64.sql new file mode 100644 index 0000000000..48afb51bf1 --- /dev/null +++ b/oximeter/db/schema/single-node/10/26_add_ttl_to_fields_u64.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_u64 MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/27_add_last_updated_column_to_fields_ipaddr.sql b/oximeter/db/schema/single-node/10/27_add_last_updated_column_to_fields_ipaddr.sql new file mode 100644 index 0000000000..d3c6be9072 --- /dev/null +++ b/oximeter/db/schema/single-node/10/27_add_last_updated_column_to_fields_ipaddr.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_ipaddr ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/28_materialize_last_updated_column_on_fields_ipaddr.sql b/oximeter/db/schema/single-node/10/28_materialize_last_updated_column_on_fields_ipaddr.sql new file mode 100644 index 0000000000..5bdffd4b2e --- /dev/null +++ b/oximeter/db/schema/single-node/10/28_materialize_last_updated_column_on_fields_ipaddr.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_ipaddr MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/29_add_ttl_to_fields_ipaddr.sql b/oximeter/db/schema/single-node/10/29_add_ttl_to_fields_ipaddr.sql new file mode 100644 index 0000000000..4551db90cd --- /dev/null +++ b/oximeter/db/schema/single-node/10/29_add_ttl_to_fields_ipaddr.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_ipaddr MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/30_add_last_updated_column_to_fields_string.sql b/oximeter/db/schema/single-node/10/30_add_last_updated_column_to_fields_string.sql new file mode 100644 index 0000000000..024c5f8f94 --- /dev/null +++ b/oximeter/db/schema/single-node/10/30_add_last_updated_column_to_fields_string.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_string ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/31_materialize_last_updated_column_on_fields_string.sql b/oximeter/db/schema/single-node/10/31_materialize_last_updated_column_on_fields_string.sql new file mode 100644 index 0000000000..67d3b7a596 --- /dev/null +++ b/oximeter/db/schema/single-node/10/31_materialize_last_updated_column_on_fields_string.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_string MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/32_add_ttl_to_fields_string.sql b/oximeter/db/schema/single-node/10/32_add_ttl_to_fields_string.sql new file mode 100644 index 0000000000..c5272df459 --- /dev/null +++ b/oximeter/db/schema/single-node/10/32_add_ttl_to_fields_string.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_string MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/33_add_last_updated_column_to_fields_uuid.sql b/oximeter/db/schema/single-node/10/33_add_last_updated_column_to_fields_uuid.sql new file mode 100644 index 0000000000..8d01b382fe --- /dev/null +++ b/oximeter/db/schema/single-node/10/33_add_last_updated_column_to_fields_uuid.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_uuid ADD COLUMN IF NOT EXISTS last_updated_at DateTime MATERIALIZED now(); diff --git a/oximeter/db/schema/single-node/10/34_materialize_last_updated_column_on_fields_uuid.sql b/oximeter/db/schema/single-node/10/34_materialize_last_updated_column_on_fields_uuid.sql new file mode 100644 index 0000000000..06fbd94d02 --- /dev/null +++ b/oximeter/db/schema/single-node/10/34_materialize_last_updated_column_on_fields_uuid.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_uuid MATERIALIZE COLUMN last_updated_at; diff --git a/oximeter/db/schema/single-node/10/35_add_ttl_to_fields_uuid.sql b/oximeter/db/schema/single-node/10/35_add_ttl_to_fields_uuid.sql new file mode 100644 index 0000000000..481055d4f5 --- /dev/null +++ b/oximeter/db/schema/single-node/10/35_add_ttl_to_fields_uuid.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_uuid MODIFY TTL last_updated_at + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/10/timeseries-to-delete.txt b/oximeter/db/schema/single-node/10/timeseries-to-delete.txt new file mode 100644 index 0000000000..40b90e05ff --- /dev/null +++ b/oximeter/db/schema/single-node/10/timeseries-to-delete.txt @@ -0,0 +1 @@ +http_service:request_latency_histogram diff --git a/oximeter/db/schema/single-node/db-init.sql b/oximeter/db/schema/single-node/db-init.sql index 38e9d0b70c..184951feeb 100644 --- a/oximeter/db/schema/single-node/db-init.sql +++ b/oximeter/db/schema/single-node/db-init.sql @@ -504,126 +504,158 @@ TTL toDateTime(timestamp) + INTERVAL 30 DAY; * timeseries name and then key, since it would improve lookups where one * already has the key. Realistically though, these tables are quite small and * so performance benefits will be low in absolute terms. + * + * TTL: We use a materialized column to expire old field table records. This + * column is generated automatically by the database whenever a new row is + * inserted. It cannot be inserted directly, nor is it returned in a `SELECT *` + * query. Since these tables are `ReplacingMergeTree`s, that means the last + * record will remain during a deduplication, which will have the last + * timestamp. ClickHouse will then expire old data for us, similar to the + * measurement tables. */ CREATE TABLE IF NOT EXISTS oximeter.fields_bool ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt8 + field_value UInt8, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i8 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int8 + field_value Int8, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u8 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt8 + field_value UInt8, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i16 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int16 + field_value Int16, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u16 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt16 + field_value UInt16, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i32 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int32 + field_value Int32, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u32 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt32 + field_value UInt32, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_i64 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value Int64 + field_value Int64, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_u64 ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt64 + field_value UInt64, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_ipaddr ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value IPv6 + field_value IPv6, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_string ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value String + field_value String, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.fields_uuid ( timeseries_name String, timeseries_key UInt64, field_name String, - field_value UUID + field_value UUID, + last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() -ORDER BY (timeseries_name, field_name, field_value, timeseries_key); +ORDER BY (timeseries_name, field_name, field_value, timeseries_key) +TTL last_updated_at + INTERVAL 30 DAY; /* The timeseries schema table stores the extracted schema for the samples * oximeter collects. diff --git a/oximeter/db/src/model.rs b/oximeter/db/src/model.rs index 986bf00225..7608f81e45 100644 --- a/oximeter/db/src/model.rs +++ b/oximeter/db/src/model.rs @@ -45,7 +45,7 @@ use uuid::Uuid; /// - [`crate::Client::initialize_db_with_version`] /// - [`crate::Client::ensure_schema`] /// - The `clickhouse-schema-updater` binary in this crate -pub const OXIMETER_VERSION: u64 = 9; +pub const OXIMETER_VERSION: u64 = 10; // Wrapper type to represent a boolean in the database. // diff --git a/oximeter/instruments/src/http.rs b/oximeter/instruments/src/http.rs index 6a0a35ce63..2eef327d02 100644 --- a/oximeter/instruments/src/http.rs +++ b/oximeter/instruments/src/http.rs @@ -6,17 +6,14 @@ // Copyright 2024 Oxide Computer Company -use dropshot::{ - HttpError, HttpResponse, RequestContext, RequestInfo, ServerContext, -}; +use dropshot::{HttpError, HttpResponse, RequestContext, ServerContext}; use futures::Future; use http::StatusCode; -use http::Uri; use oximeter::{ histogram::Histogram, histogram::Record, MetricsError, Producer, Sample, }; -use std::borrow::Cow; -use std::collections::BTreeMap; +use std::collections::HashMap; +use std::hash::{DefaultHasher, Hash as _, Hasher}; use std::sync::{Arc, Mutex}; use std::time::{Duration, Instant}; @@ -24,28 +21,18 @@ oximeter::use_timeseries!("http-service.toml"); pub use http_service::HttpService; pub use http_service::RequestLatencyHistogram; -// Return the route portion of the request, normalized to include a single -// leading slash and no trailing slashes. -fn normalized_uri_path(uri: &Uri) -> Cow<'static, str> { - Cow::Owned(format!( - "/{}", - uri.path().trim_end_matches('/').trim_start_matches('/') - )) -} - impl RequestLatencyHistogram { /// Build a new `RequestLatencyHistogram` with a specified histogram. /// /// Latencies are expressed in seconds. pub fn new( - request: &RequestInfo, + operation_id: &str, status_code: StatusCode, histogram: Histogram, ) -> Self { Self { - route: normalized_uri_path(request.uri()), - method: request.method().to_string().into(), - status_code: status_code.as_u16().into(), + operation_id: operation_id.to_string().into(), + status_code: status_code.as_u16(), datum: histogram, } } @@ -59,25 +46,27 @@ impl RequestLatencyHistogram { /// /// Latencies are expressed as seconds. pub fn with_latency_decades( - request: &RequestInfo, + operation_id: &str, status_code: StatusCode, start_decade: i16, end_decade: i16, ) -> Result { Ok(Self::new( - request, + operation_id, status_code, Histogram::span_decades(start_decade, end_decade)?, )) } - fn key_for(request: &RequestInfo, status_code: StatusCode) -> String { - format!( - "{}:{}:{}", - normalized_uri_path(request.uri()), - request.method(), - status_code.as_u16() - ) + /// Return a key used to ID this histogram. + /// + /// This is a quick way to look up the histogram tracking any particular + /// request and response. + fn key_for(operation_id: &str, status_code: StatusCode) -> u64 { + let mut hasher = DefaultHasher::new(); + operation_id.hash(&mut hasher); + status_code.hash(&mut hasher); + hasher.finish() } } @@ -92,8 +81,19 @@ impl RequestLatencyHistogram { /// The `LatencyTracker` can be used to produce metric data collected by `oximeter`. #[derive(Debug, Clone)] pub struct LatencyTracker { + /// The HTTP service target for which we're tracking request histograms. pub service: HttpService, - latencies: Arc>>, + /// The latency histogram for each request. + /// + /// The map here use a hash of the request fields (operation and status + /// code) as the key to each histogram. It's a bit redundant to then store + /// that in a hashmap, but this lets us avoid creating a new + /// `RequestLatencyHistogram` when handling a request that we already have + /// one for. Instead, we use this key to get the existing entry. + latencies: Arc>>, + /// The histogram used to track each request. + /// + /// We store it here to clone as we see new requests. histogram: Histogram, } @@ -104,7 +104,7 @@ impl LatencyTracker { pub fn new(service: HttpService, histogram: Histogram) -> Self { Self { service, - latencies: Arc::new(Mutex::new(BTreeMap::new())), + latencies: Arc::new(Mutex::new(HashMap::new())), histogram, } } @@ -129,15 +129,15 @@ impl LatencyTracker { /// to which the other arguments belong. (One is created if it does not exist.) pub fn update( &self, - request: &RequestInfo, + operation_id: &str, status_code: StatusCode, latency: Duration, ) -> Result<(), MetricsError> { - let key = RequestLatencyHistogram::key_for(request, status_code); + let key = RequestLatencyHistogram::key_for(operation_id, status_code); let mut latencies = self.latencies.lock().unwrap(); let entry = latencies.entry(key).or_insert_with(|| { RequestLatencyHistogram::new( - request, + operation_id, status_code, self.histogram.clone(), ) @@ -170,14 +170,14 @@ impl LatencyTracker { Ok(response) => response.status_code(), Err(ref e) => e.status_code, }; - if let Err(e) = self.update(&context.request, status_code, latency) { + if let Err(e) = self.update(&context.operation_id, status_code, latency) + { slog::error!( &context.log, "error instrumenting dropshot handler"; "error" => ?e, "status_code" => status_code.as_u16(), - "method" => %context.request.method(), - "uri" => %context.request.uri(), + "operation_id" => &context.operation_id, "remote_addr" => context.request.remote_addr(), "latency" => ?latency, ); @@ -220,41 +220,24 @@ mod tests { HttpService { name: "my-service".into(), id: ID.parse().unwrap() }; let hist = Histogram::new(&[0.0, 1.0]).unwrap(); let tracker = LatencyTracker::new(service, hist); - let request = http::request::Builder::new() - .method(http::Method::GET) - .uri("/some/uri") - .body(()) + let status_code0 = StatusCode::OK; + let status_code1 = StatusCode::NOT_FOUND; + let operation_id = "some_operation_id"; + tracker + .update(operation_id, status_code0, Duration::from_secs_f64(0.5)) .unwrap(); - let status_code = StatusCode::OK; tracker - .update( - &RequestInfo::new(&request, "0.0.0.0:0".parse().unwrap()), - status_code, - Duration::from_secs_f64(0.5), - ) + .update(operation_id, status_code1, Duration::from_secs_f64(0.5)) .unwrap(); - - let key = "/some/uri:GET:200"; - let actual_hist = tracker.latencies.lock().unwrap()[key].datum.clone(); - assert_eq!(actual_hist.n_samples(), 1); - let bins = actual_hist.iter().collect::>(); - assert_eq!(bins[1].count, 1); - } - - #[test] - fn test_normalize_uri_path() { - const EXPECTED: &str = "/foo/bar"; - const TESTS: &[&str] = &[ - "/foo/bar", - "/foo/bar/", - "//foo/bar", - "//foo/bar/", - "/foo/bar//", - "////foo/bar/////", - ]; - for test in TESTS.iter() { - println!("{test}"); - assert_eq!(normalized_uri_path(&test.parse().unwrap()), EXPECTED); + let key0 = RequestLatencyHistogram::key_for(operation_id, status_code0); + let key1 = RequestLatencyHistogram::key_for(operation_id, status_code1); + let latencies = tracker.latencies.lock().unwrap(); + assert_eq!(latencies.len(), 2); + for key in [key0, key1] { + let actual_hist = &latencies[&key].datum; + assert_eq!(actual_hist.n_samples(), 1); + let bins = actual_hist.iter().collect::>(); + assert_eq!(bins[1].count, 1); } } } diff --git a/oximeter/oximeter/schema/http-service.toml b/oximeter/oximeter/schema/http-service.toml index 9098110656..5270f6942c 100644 --- a/oximeter/oximeter/schema/http-service.toml +++ b/oximeter/oximeter/schema/http-service.toml @@ -14,7 +14,7 @@ description = "Duration for the server to handle a request" units = "seconds" datum_type = "histogram_f64" versions = [ - { added_in = 1, fields = [ "route", "method", "status_code" ] } + { added_in = 1, fields = [ "operation_id", "status_code" ] } ] [fields.name] @@ -25,14 +25,15 @@ description = "The name of the HTTP server, or program running it" type = "uuid" description = "UUID of the HTTP server" -[fields.route] +[fields.operation_id] type = "string" -description = "HTTP route in the request" +description = """\ +The identifier for the HTTP operation.\ -[fields.method] -type = "string" -description = "HTTP method in the request" +In most cases, this the OpenAPI `operationId` field that uniquely identifies the +endpoint the request is targeted to and the HTTP method used. +""" [fields.status_code] -type = "i64" +type = "u16" description = "HTTP status code in the server's response" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index fb1b94ae0f..1c58626d2d 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -101,6 +101,7 @@ sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.6.0", features = ["bytes", "inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } smallvec = { version = "1.13.2", default-features = false, features = ["const_new"] } +socket2 = { version = "0.5.7", default-features = false, features = ["all"] } spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } @@ -208,6 +209,7 @@ sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.6.0", features = ["bytes", "inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } smallvec = { version = "1.13.2", default-features = false, features = ["const_new"] } +socket2 = { version = "0.5.7", default-features = false, features = ["all"] } spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } From 845298e32594018b579353c1487f9c6456efffea Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Mon, 19 Aug 2024 09:45:55 -0700 Subject: [PATCH 40/51] Support hex integer literals in OxQL (#6380) --- oximeter/db/src/oxql/ast/grammar.rs | 47 ++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/oximeter/db/src/oxql/ast/grammar.rs b/oximeter/db/src/oxql/ast/grammar.rs index cbca4470f9..62182ec553 100644 --- a/oximeter/db/src/oxql/ast/grammar.rs +++ b/oximeter/db/src/oxql/ast/grammar.rs @@ -279,11 +279,27 @@ peg::parser! { pub rule string_literal() -> Literal = s:string_literal_impl() { Literal::String(s) } + pub(super) rule hex_integer_literal_impl() -> i128 + = n:$("0x" ['0'..='9' | 'a'..='f' | 'A'..='F']+ !['.']) + {? + let Some((maybe_sign, digits)) = n.split_once("0x") else { + return Err("hex literals should start with '0x'"); + }; + i128::from_str_radix(digits, 16).map_err(|_| "invalid hex literal") + } + + pub(super) rule dec_integer_literal_impl() -> i128 + = n:$(['0'..='9']+ !['e' | 'E' | '.']) + {? + n.parse().map_err(|_| "integer literal") + } + pub(super) rule integer_literal_impl() -> i128 - = n:$("-"? ['0'..='9']+ !['e' | 'E' | '.']) + = maybe_sign:$("-"?) n:(hex_integer_literal_impl() / dec_integer_literal_impl()) {? - let Ok(x) = n.parse() else { - return Err("integer literal"); + let sign = if maybe_sign == "-" { -1 } else { 1 }; + let Some(x) = n.checked_mul(sign) else { + return Err("negative overflow"); }; if x < i128::from(i64::MIN) { Err("negative overflow") @@ -747,13 +763,36 @@ mod tests { fn test_integer_literal() { assert_eq!(query_parser::integer_literal_impl("1").unwrap(), 1); assert_eq!(query_parser::integer_literal_impl("-1").unwrap(), -1); - assert_eq!(query_parser::integer_literal_impl("-1").unwrap(), -1); assert!(query_parser::integer_literal_impl("-1.0").is_err()); assert!(query_parser::integer_literal_impl("-1.").is_err()); assert!(query_parser::integer_literal_impl("1e3").is_err()); } + #[test] + fn test_hex_integer_literal() { + assert_eq!(query_parser::integer_literal_impl("0x1").unwrap(), 1); + assert_eq!(query_parser::integer_literal_impl("-0x1").unwrap(), -1); + assert_eq!(query_parser::integer_literal_impl("-0xa").unwrap(), -0xa); + assert_eq!( + query_parser::integer_literal_impl("0xfeed").unwrap(), + 0xfeed + ); + assert_eq!( + query_parser::integer_literal_impl("0xFEED").unwrap(), + 0xfeed + ); + + // Out of range in either direction + assert!(query_parser::integer_literal_impl("0xFFFFFFFFFFFFFFFFFFFF") + .is_err()); + assert!(query_parser::integer_literal_impl("-0xFFFFFFFFFFFFFFFFFFFF") + .is_err()); + + assert!(query_parser::integer_literal_impl("-0x1.0").is_err()); + assert!(query_parser::integer_literal_impl("-0x1.").is_err()); + } + #[test] fn test_double_literal() { assert_eq!(query_parser::double_literal_impl("1.0").unwrap(), 1.0); From 54b387871e17579f9ad30166d4b7522ddc18ba31 Mon Sep 17 00:00:00 2001 From: Rain Date: Mon, 19 Aug 2024 10:36:31 -0700 Subject: [PATCH 41/51] [sled-agent] make sled-agent-sim implement the sled-agent API trait (#6339) `sled-agent-sim` is another implementation of the API, so it can be backed by the same trait, reducing the likelihood of confusion. I did have to add stub implementations for unimplemented methods, though -- I think that's okay. (I'd like to address this via the `api_description` macro at some point.) --- sled-agent/src/sim/http_entrypoints.rs | 894 +++++++++++++------------ 1 file changed, 455 insertions(+), 439 deletions(-) diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index c219a747ce..e93bebad98 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -5,18 +5,27 @@ //! HTTP entrypoint functions for the sled agent's exposed API use super::collection::PokeMode; +use camino::Utf8PathBuf; +use dropshot::endpoint; use dropshot::ApiDescription; +use dropshot::FreeformBody; use dropshot::HttpError; +use dropshot::HttpResponseCreated; +use dropshot::HttpResponseDeleted; +use dropshot::HttpResponseHeaders; use dropshot::HttpResponseOk; use dropshot::HttpResponseUpdatedNoContent; use dropshot::Path; +use dropshot::Query; use dropshot::RequestContext; +use dropshot::StreamingBody; use dropshot::TypedBody; -use dropshot::{endpoint, ApiDescriptionRegisterError}; +use nexus_sled_agent_shared::inventory::SledRole; use nexus_sled_agent_shared::inventory::{Inventory, OmicronZonesConfig}; use omicron_common::api::internal::nexus::DiskRuntimeState; use omicron_common::api::internal::nexus::SledInstanceState; use omicron_common::api::internal::nexus::UpdateArtifactId; +use omicron_common::api::internal::shared::SledIdentifiers; use omicron_common::api::internal::shared::VirtualNetworkInterfaceHost; use omicron_common::api::internal::shared::{ ResolvedVpcRouteSet, ResolvedVpcRouteState, SwitchPorts, @@ -24,8 +33,12 @@ use omicron_common::api::internal::shared::{ use omicron_common::disk::DisksManagementResult; use omicron_common::disk::OmicronPhysicalDisksConfig; use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; +use sled_agent_api::*; +use sled_agent_types::boot_disk::BootDiskOsWriteStatus; +use sled_agent_types::boot_disk::BootDiskPathParams; +use sled_agent_types::boot_disk::BootDiskUpdatePathParams; +use sled_agent_types::boot_disk::BootDiskWriteStartQueryParams; +use sled_agent_types::bootstore::BootstoreStatus; use sled_agent_types::disk::DiskEnsureBody; use sled_agent_types::early_networking::EarlyNetworkConfig; use sled_agent_types::firewall_rules::VpcFirewallRulesEnsureBody; @@ -35,8 +48,14 @@ use sled_agent_types::instance::InstancePutStateBody; use sled_agent_types::instance::InstancePutStateResponse; use sled_agent_types::instance::InstanceUnregisterResponse; use sled_agent_types::sled::AddSledRequest; +use sled_agent_types::time_sync::TimeSync; +use sled_agent_types::zone_bundle::BundleUtilization; +use sled_agent_types::zone_bundle::CleanupContext; +use sled_agent_types::zone_bundle::CleanupCount; +use sled_agent_types::zone_bundle::ZoneBundleId; +use sled_agent_types::zone_bundle::ZoneBundleMetadata; +use std::collections::BTreeMap; use std::sync::Arc; -use uuid::Uuid; use super::sled_agent::SledAgent; @@ -44,510 +63,507 @@ type SledApiDescription = ApiDescription>; /// Returns a description of the sled agent API pub fn api() -> SledApiDescription { - fn register_endpoints( - api: &mut SledApiDescription, - ) -> Result<(), ApiDescriptionRegisterError> { - api.register(instance_put_state)?; - api.register(instance_get_state)?; - api.register(instance_register)?; - api.register(instance_unregister)?; - api.register(instance_put_external_ip)?; - api.register(instance_delete_external_ip)?; + fn register_endpoints() -> Result { + let mut api = sled_agent_api::sled_agent_api_mod::api_description::< + SledAgentSimImpl, + >()?; api.register(instance_poke_post)?; api.register(instance_poke_single_step_post)?; api.register(instance_post_sim_migration_source)?; - api.register(disk_put)?; api.register(disk_poke_post)?; - api.register(update_artifact)?; - api.register(instance_issue_disk_snapshot_request)?; - api.register(vpc_firewall_rules_put)?; - api.register(set_v2p)?; - api.register(del_v2p)?; - api.register(list_v2p)?; - api.register(uplink_ensure)?; - api.register(read_network_bootstore_config)?; - api.register(write_network_bootstore_config)?; - api.register(inventory)?; - api.register(omicron_physical_disks_get)?; - api.register(omicron_physical_disks_put)?; - api.register(omicron_zones_get)?; - api.register(omicron_zones_put)?; - api.register(sled_add)?; - api.register(list_vpc_routes)?; - api.register(set_vpc_routes)?; - - Ok(()) - } - - let mut api = SledApiDescription::new(); - if let Err(err) = register_endpoints(&mut api) { - panic!("failed to register entrypoints: {}", err); - } - api -} -/// Path parameters for Instance requests (sled agent API) -#[derive(Deserialize, JsonSchema)] -struct InstancePathParam { - instance_id: InstanceUuid, -} + Ok(api) + } -#[endpoint { - method = PUT, - path = "/instances/{instance_id}", -}] -async fn instance_register( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result, HttpError> { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - let body_args = body.into_inner(); - Ok(HttpResponseOk( - sa.instance_register( - instance_id, - body_args.propolis_id, - body_args.hardware, - body_args.instance_runtime, - body_args.vmm_runtime, - body_args.metadata, + register_endpoints().expect("failed to register entrypoints") +} + +enum SledAgentSimImpl {} + +impl SledAgentApi for SledAgentSimImpl { + type Context = Arc; + + async fn instance_register( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + Ok(HttpResponseOk( + sa.instance_register( + instance_id, + body_args.propolis_id, + body_args.hardware, + body_args.instance_runtime, + body_args.vmm_runtime, + body_args.metadata, + ) + .await?, + )) + } + + async fn instance_unregister( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + Ok(HttpResponseOk(sa.instance_unregister(instance_id).await?)) + } + + async fn instance_put_state( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + Ok(HttpResponseOk( + sa.instance_ensure_state(instance_id, body_args.state).await?, + )) + } + + async fn instance_get_state( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + Ok(HttpResponseOk(sa.instance_get_state(instance_id).await?)) + } + + async fn instance_put_external_ip( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + sa.instance_put_external_ip(instance_id, &body_args).await?; + Ok(HttpResponseUpdatedNoContent()) + } + + async fn instance_delete_external_ip( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + sa.instance_delete_external_ip(instance_id, &body_args).await?; + Ok(HttpResponseUpdatedNoContent()) + } + + async fn disk_put( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let disk_id = path_params.into_inner().disk_id; + let body_args = body.into_inner(); + Ok(HttpResponseOk( + sa.disk_ensure( + disk_id, + body_args.initial_runtime.clone(), + body_args.target.clone(), + ) + .await?, + )) + } + + async fn update_artifact( + rqctx: RequestContext, + artifact: TypedBody, + ) -> Result { + let sa = rqctx.context(); + sa.updates() + .download_artifact( + artifact.into_inner(), + rqctx.context().nexus_client.as_ref(), + ) + .await + .map_err(|e| HttpError::for_internal_error(e.to_string()))?; + Ok(HttpResponseUpdatedNoContent()) + } + + async fn instance_issue_disk_snapshot_request( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result< + HttpResponseOk, + HttpError, + > { + let sa = rqctx.context(); + let path_params = path_params.into_inner(); + let body = body.into_inner(); + + sa.instance_issue_disk_snapshot_request( + InstanceUuid::from_untyped_uuid(path_params.instance_id), + path_params.disk_id, + body.snapshot_id, ) - .await?, - )) -} + .await + .map_err(|e| HttpError::for_internal_error(e.to_string()))?; -#[endpoint { - method = DELETE, - path = "/instances/{instance_id}", -}] -async fn instance_unregister( - rqctx: RequestContext>, - path_params: Path, -) -> Result, HttpError> { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - Ok(HttpResponseOk(sa.instance_unregister(instance_id).await?)) -} + Ok(HttpResponseOk(InstanceIssueDiskSnapshotRequestResponse { + snapshot_id: body.snapshot_id, + })) + } -#[endpoint { - method = PUT, - path = "/instances/{instance_id}/state", -}] -async fn instance_put_state( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result, HttpError> { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - let body_args = body.into_inner(); - Ok(HttpResponseOk( - sa.instance_ensure_state(instance_id, body_args.state).await?, - )) -} + async fn vpc_firewall_rules_put( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, + ) -> Result { + let _sa = rqctx.context(); + let _vpc_id = path_params.into_inner().vpc_id; + let _body_args = body.into_inner(); -#[endpoint { - method = GET, - path = "/instances/{instance_id}/state", -}] -async fn instance_get_state( - rqctx: RequestContext>, - path_params: Path, -) -> Result, HttpError> { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - Ok(HttpResponseOk(sa.instance_get_state(instance_id).await?)) -} + Ok(HttpResponseUpdatedNoContent()) + } -#[endpoint { - method = PUT, - path = "/instances/{instance_id}/external-ip", -}] -async fn instance_put_external_ip( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - let body_args = body.into_inner(); - sa.instance_put_external_ip(instance_id, &body_args).await?; - Ok(HttpResponseUpdatedNoContent()) -} + async fn set_v2p( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let body_args = body.into_inner(); -#[endpoint { - method = DELETE, - path = "/instances/{instance_id}/external-ip", -}] -async fn instance_delete_external_ip( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - let body_args = body.into_inner(); - sa.instance_delete_external_ip(instance_id, &body_args).await?; - Ok(HttpResponseUpdatedNoContent()) -} + sa.set_virtual_nic_host(&body_args) + .await + .map_err(|e| HttpError::for_internal_error(e.to_string()))?; -#[endpoint { - method = POST, - path = "/instances/{instance_id}/poke", -}] -async fn instance_poke_post( - rqctx: RequestContext>, - path_params: Path, -) -> Result { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - sa.instance_poke(instance_id, PokeMode::Drain).await; - Ok(HttpResponseUpdatedNoContent()) -} + Ok(HttpResponseUpdatedNoContent()) + } -#[endpoint { - method = POST, - path = "/instances/{instance_id}/poke-single-step", -}] -async fn instance_poke_single_step_post( - rqctx: RequestContext>, - path_params: Path, -) -> Result { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - sa.instance_poke(instance_id, PokeMode::SingleStep).await; - Ok(HttpResponseUpdatedNoContent()) -} + async fn del_v2p( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let body_args = body.into_inner(); -#[endpoint { - method = POST, - path = "/instances/{instance_id}/sim-migration-source", -}] -async fn instance_post_sim_migration_source( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let instance_id = path_params.into_inner().instance_id; - sa.instance_simulate_migration_source(instance_id, body.into_inner()) - .await?; - Ok(HttpResponseUpdatedNoContent()) -} + sa.unset_virtual_nic_host(&body_args) + .await + .map_err(|e| HttpError::for_internal_error(e.to_string()))?; -/// Path parameters for Disk requests (sled agent API) -#[derive(Deserialize, JsonSchema)] -struct DiskPathParam { - disk_id: Uuid, -} + Ok(HttpResponseUpdatedNoContent()) + } -#[endpoint { - method = PUT, - path = "/disks/{disk_id}", -}] -async fn disk_put( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result, HttpError> { - let sa = rqctx.context(); - let disk_id = path_params.into_inner().disk_id; - let body_args = body.into_inner(); - Ok(HttpResponseOk( - sa.disk_ensure( - disk_id, - body_args.initial_runtime.clone(), - body_args.target.clone(), - ) - .await?, - )) -} + async fn list_v2p( + rqctx: RequestContext, + ) -> Result>, HttpError> + { + let sa = rqctx.context(); -#[endpoint { - method = POST, - path = "/disks/{disk_id}/poke", -}] -async fn disk_poke_post( - rqctx: RequestContext>, - path_params: Path, -) -> Result { - let sa = rqctx.context(); - let disk_id = path_params.into_inner().disk_id; - sa.disk_poke(disk_id).await; - Ok(HttpResponseUpdatedNoContent()) -} + let vnics = sa.list_virtual_nics().await.map_err(HttpError::from)?; -#[endpoint { - method = POST, - path = "/update" -}] -async fn update_artifact( - rqctx: RequestContext>, - artifact: TypedBody, -) -> Result { - let sa = rqctx.context(); - sa.updates() - .download_artifact( - artifact.into_inner(), - rqctx.context().nexus_client.as_ref(), - ) - .await - .map_err(|e| HttpError::for_internal_error(e.to_string()))?; - Ok(HttpResponseUpdatedNoContent()) -} + Ok(HttpResponseOk(vnics)) + } -#[derive(Deserialize, JsonSchema)] -pub struct InstanceIssueDiskSnapshotRequestPathParam { - instance_id: Uuid, - disk_id: Uuid, -} + async fn uplink_ensure( + _rqctx: RequestContext, + _body: TypedBody, + ) -> Result { + Ok(HttpResponseUpdatedNoContent()) + } -#[derive(Deserialize, JsonSchema)] -pub struct InstanceIssueDiskSnapshotRequestBody { - snapshot_id: Uuid, -} + async fn read_network_bootstore_config_cache( + rqctx: RequestContext, + ) -> Result, HttpError> { + let config = + rqctx.context().bootstore_network_config.lock().await.clone(); + Ok(HttpResponseOk(config)) + } -#[derive(Serialize, JsonSchema)] -pub struct InstanceIssueDiskSnapshotRequestResponse { - snapshot_id: Uuid, -} + async fn write_network_bootstore_config( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let mut config = rqctx.context().bootstore_network_config.lock().await; + *config = body.into_inner(); + Ok(HttpResponseUpdatedNoContent()) + } -/// Take a snapshot of a disk that is attached to an instance -#[endpoint { - method = POST, - path = "/instances/{instance_id}/disks/{disk_id}/snapshot", -}] -async fn instance_issue_disk_snapshot_request( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result, HttpError> -{ - let sa = rqctx.context(); - let path_params = path_params.into_inner(); - let body = body.into_inner(); - - sa.instance_issue_disk_snapshot_request( - InstanceUuid::from_untyped_uuid(path_params.instance_id), - path_params.disk_id, - body.snapshot_id, - ) - .await - .map_err(|e| HttpError::for_internal_error(e.to_string()))?; - - Ok(HttpResponseOk(InstanceIssueDiskSnapshotRequestResponse { - snapshot_id: body.snapshot_id, - })) -} + /// Fetch basic information about this sled + async fn inventory( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk( + sa.inventory(rqctx.server.local_addr).await.map_err(|e| { + HttpError::for_internal_error(format!("{:#}", e)) + })?, + )) + } -/// Path parameters for VPC requests (sled agent API) -#[derive(Deserialize, JsonSchema)] -struct VpcPathParam { - vpc_id: Uuid, -} + async fn omicron_physical_disks_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result, HttpError> { + let sa = rqctx.context(); + let body_args = body.into_inner(); + let result = sa.omicron_physical_disks_ensure(body_args).await?; + Ok(HttpResponseOk(result)) + } -#[endpoint { - method = PUT, - path = "/vpc/{vpc_id}/firewall/rules", -}] -async fn vpc_firewall_rules_put( - rqctx: RequestContext>, - path_params: Path, - body: TypedBody, -) -> Result { - let _sa = rqctx.context(); - let _vpc_id = path_params.into_inner().vpc_id; - let _body_args = body.into_inner(); + async fn omicron_physical_disks_get( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.omicron_physical_disks_list().await?)) + } - Ok(HttpResponseUpdatedNoContent()) -} + async fn omicron_zones_get( + rqctx: RequestContext, + ) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.omicron_zones_list().await)) + } -/// Create a mapping from a virtual NIC to a physical host -#[endpoint { - method = PUT, - path = "/v2p/", -}] -async fn set_v2p( - rqctx: RequestContext>, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let body_args = body.into_inner(); + async fn omicron_zones_put( + rqctx: RequestContext, + body: TypedBody, + ) -> Result { + let sa = rqctx.context(); + let body_args = body.into_inner(); + sa.omicron_zones_ensure(body_args).await; + Ok(HttpResponseUpdatedNoContent()) + } - sa.set_virtual_nic_host(&body_args) - .await - .map_err(|e| HttpError::for_internal_error(e.to_string()))?; + async fn sled_add( + _rqctx: RequestContext, + _body: TypedBody, + ) -> Result { + Ok(HttpResponseUpdatedNoContent()) + } - Ok(HttpResponseUpdatedNoContent()) -} + async fn list_vpc_routes( + rqctx: RequestContext, + ) -> Result>, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.list_vpc_routes().await)) + } -/// Delete a mapping from a virtual NIC to a physical host -#[endpoint { - method = DELETE, - path = "/v2p/", -}] -async fn del_v2p( - rqctx: RequestContext>, - body: TypedBody, -) -> Result { - let sa = rqctx.context(); - let body_args = body.into_inner(); + async fn set_vpc_routes( + rqctx: RequestContext, + body: TypedBody>, + ) -> Result { + let sa = rqctx.context(); + sa.set_vpc_routes(body.into_inner()).await; + Ok(HttpResponseUpdatedNoContent()) + } - sa.unset_virtual_nic_host(&body_args) - .await - .map_err(|e| HttpError::for_internal_error(e.to_string()))?; + // --- Unimplemented endpoints --- - Ok(HttpResponseUpdatedNoContent()) -} + async fn zone_bundle_list_all( + _rqctx: RequestContext, + _query: Query, + ) -> Result>, HttpError> { + method_unimplemented() + } -/// List v2p mappings present on sled -#[endpoint { - method = GET, - path = "/v2p/", -}] -async fn list_v2p( - rqctx: RequestContext>, -) -> Result>, HttpError> { - let sa = rqctx.context(); + async fn zone_bundle_list( + _rqctx: RequestContext, + _params: Path, + ) -> Result>, HttpError> { + method_unimplemented() + } - let vnics = sa.list_virtual_nics().await.map_err(HttpError::from)?; + async fn zone_bundle_create( + _rqctx: RequestContext, + _params: Path, + ) -> Result, HttpError> { + method_unimplemented() + } - Ok(HttpResponseOk(vnics)) -} + async fn zone_bundle_get( + _rqctx: RequestContext, + _params: Path, + ) -> Result>, HttpError> + { + method_unimplemented() + } -#[endpoint { - method = POST, - path = "/switch-ports", -}] -async fn uplink_ensure( - _rqctx: RequestContext>, - _body: TypedBody, -) -> Result { - Ok(HttpResponseUpdatedNoContent()) -} + async fn zone_bundle_delete( + _rqctx: RequestContext, + _params: Path, + ) -> Result { + method_unimplemented() + } -#[endpoint { - method = GET, - path = "/network-bootstore-config", -}] -async fn read_network_bootstore_config( - rqctx: RequestContext>, -) -> Result, HttpError> { - let config = rqctx.context().bootstore_network_config.lock().await.clone(); - Ok(HttpResponseOk(config)) -} + async fn zone_bundle_utilization( + _rqctx: RequestContext, + ) -> Result< + HttpResponseOk>, + HttpError, + > { + method_unimplemented() + } -#[endpoint { - method = PUT, - path = "/network-bootstore-config", -}] -async fn write_network_bootstore_config( - rqctx: RequestContext>, - body: TypedBody, -) -> Result { - let mut config = rqctx.context().bootstore_network_config.lock().await; - *config = body.into_inner(); - Ok(HttpResponseUpdatedNoContent()) -} + async fn zone_bundle_cleanup_context( + _rqctx: RequestContext, + ) -> Result, HttpError> { + method_unimplemented() + } -/// Fetch basic information about this sled -#[endpoint { - method = GET, - path = "/inventory", -}] -async fn inventory( - rqctx: RequestContext>, -) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk( - sa.inventory(rqctx.server.local_addr) - .await - .map_err(|e| HttpError::for_internal_error(format!("{:#}", e)))?, - )) -} + async fn zone_bundle_cleanup_context_update( + _rqctx: RequestContext, + _body: TypedBody, + ) -> Result { + method_unimplemented() + } -#[endpoint { - method = PUT, - path = "/omicron-physical-disks", -}] -async fn omicron_physical_disks_put( - rqctx: RequestContext>, - body: TypedBody, -) -> Result, HttpError> { - let sa = rqctx.context(); - let body_args = body.into_inner(); - let result = sa.omicron_physical_disks_ensure(body_args).await?; - Ok(HttpResponseOk(result)) -} + async fn zone_bundle_cleanup( + _rqctx: RequestContext, + ) -> Result>, HttpError> + { + method_unimplemented() + } -#[endpoint { - method = GET, - path = "/omicron-physical-disks", -}] -async fn omicron_physical_disks_get( - rqctx: RequestContext>, -) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.omicron_physical_disks_list().await?)) + async fn zones_list( + _rqctx: RequestContext, + ) -> Result>, HttpError> { + method_unimplemented() + } + + async fn zpools_get( + _rqctx: RequestContext, + ) -> Result>, HttpError> { + method_unimplemented() + } + + async fn sled_role_get( + _rqctx: RequestContext, + ) -> Result, HttpError> { + method_unimplemented() + } + + async fn cockroachdb_init( + _rqctx: RequestContext, + ) -> Result { + method_unimplemented() + } + + async fn timesync_get( + _rqctx: RequestContext, + ) -> Result, HttpError> { + method_unimplemented() + } + + async fn host_os_write_start( + _rqctx: RequestContext, + _path_params: Path, + _query_params: Query, + _body: StreamingBody, + ) -> Result { + method_unimplemented() + } + + async fn host_os_write_status_get( + _rqctx: RequestContext, + _path_params: Path, + ) -> Result, HttpError> { + method_unimplemented() + } + + async fn host_os_write_status_delete( + _rqctx: RequestContext, + _path_params: Path, + ) -> Result { + method_unimplemented() + } + + async fn sled_identifiers( + _rqctx: RequestContext, + ) -> Result, HttpError> { + method_unimplemented() + } + + async fn bootstore_status( + _rqctx: RequestContext, + ) -> Result, HttpError> { + method_unimplemented() + } } -#[endpoint { - method = GET, - path = "/omicron-zones", -}] -async fn omicron_zones_get( - rqctx: RequestContext>, -) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.omicron_zones_list().await)) +fn method_unimplemented() -> Result { + Err(HttpError { + // Use a client error here (405 Method Not Allowed vs 501 Not + // Implemented) even though it isn't strictly accurate here, so tests + // get to see the error message. + status_code: http::StatusCode::METHOD_NOT_ALLOWED, + error_code: None, + external_message: "Method not implemented in sled-agent-sim" + .to_string(), + internal_message: "Method not implemented in sled-agent-sim" + .to_string(), + }) } +// --- Extra endpoints only available in the sim implementation --- + #[endpoint { - method = PUT, - path = "/omicron-zones", + method = POST, + path = "/instances/{instance_id}/poke", }] -async fn omicron_zones_put( +async fn instance_poke_post( rqctx: RequestContext>, - body: TypedBody, + path_params: Path, ) -> Result { let sa = rqctx.context(); - let body_args = body.into_inner(); - sa.omicron_zones_ensure(body_args).await; + let instance_id = path_params.into_inner().instance_id; + sa.instance_poke(instance_id, PokeMode::Drain).await; Ok(HttpResponseUpdatedNoContent()) } #[endpoint { - method = PUT, - path = "/sleds" + method = POST, + path = "/instances/{instance_id}/poke-single-step", }] -async fn sled_add( - _rqctx: RequestContext>, - _body: TypedBody, +async fn instance_poke_single_step_post( + rqctx: RequestContext>, + path_params: Path, ) -> Result { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + sa.instance_poke(instance_id, PokeMode::SingleStep).await; Ok(HttpResponseUpdatedNoContent()) } #[endpoint { - method = GET, - path = "/vpc-routes", + method = POST, + path = "/instances/{instance_id}/sim-migration-source", }] -async fn list_vpc_routes( +async fn instance_post_sim_migration_source( rqctx: RequestContext>, -) -> Result>, HttpError> { + path_params: Path, + body: TypedBody, +) -> Result { let sa = rqctx.context(); - Ok(HttpResponseOk(sa.list_vpc_routes().await)) + let instance_id = path_params.into_inner().instance_id; + sa.instance_simulate_migration_source(instance_id, body.into_inner()) + .await?; + Ok(HttpResponseUpdatedNoContent()) } #[endpoint { - method = PUT, - path = "/vpc-routes", + method = POST, + path = "/disks/{disk_id}/poke", }] -async fn set_vpc_routes( +async fn disk_poke_post( rqctx: RequestContext>, - body: TypedBody>, + path_params: Path, ) -> Result { let sa = rqctx.context(); - sa.set_vpc_routes(body.into_inner()).await; + let disk_id = path_params.into_inner().disk_id; + sa.disk_poke(disk_id).await; Ok(HttpResponseUpdatedNoContent()) } From 6dd980251a26430466bcd5aff1edad5416cf94e5 Mon Sep 17 00:00:00 2001 From: Rain Date: Mon, 19 Aug 2024 12:13:05 -0700 Subject: [PATCH 42/51] [3/6] [nexus-auth] move some types into nexus-types (#6369) Put them here rather than in `nexus-auth` so that the upcoming nexus-external-api crate has fewer dependencies. --- Cargo.lock | 5 +++++ nexus/auth/src/authn/external/mod.rs | 1 - nexus/auth/src/authn/external/session_cookie.rs | 2 +- nexus/src/external_api/console_api.rs | 10 ++++------ nexus/types/Cargo.toml | 5 +++++ .../src/authn/external => types/src/authn}/cookies.rs | 0 nexus/types/src/authn/mod.rs | 7 +++++++ nexus/types/src/lib.rs | 1 + 8 files changed, 23 insertions(+), 8 deletions(-) rename nexus/{auth/src/authn/external => types/src/authn}/cookies.rs (100%) create mode 100644 nexus/types/src/authn/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 874b33134f..6bd71f6d38 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5443,17 +5443,22 @@ version = "0.1.0" dependencies = [ "anyhow", "api_identity", + "async-trait", "base64 0.22.1", "chrono", "clap", + "cookie 0.18.1", "derive-where", "derive_more", "dns-service-client", + "dropshot", "futures", "gateway-client", + "http 0.2.12", "humantime", "ipnetwork", "newtype-uuid", + "newtype_derive", "nexus-sled-agent-shared", "omicron-common", "omicron-passwords", diff --git a/nexus/auth/src/authn/external/mod.rs b/nexus/auth/src/authn/external/mod.rs index ccb7218285..5c7fc7af05 100644 --- a/nexus/auth/src/authn/external/mod.rs +++ b/nexus/auth/src/authn/external/mod.rs @@ -13,7 +13,6 @@ use slog::trace; use std::borrow::Borrow; use uuid::Uuid; -pub mod cookies; pub mod session_cookie; pub mod spoof; pub mod token; diff --git a/nexus/auth/src/authn/external/session_cookie.rs b/nexus/auth/src/authn/external/session_cookie.rs index 7811bf2826..f6b23308a0 100644 --- a/nexus/auth/src/authn/external/session_cookie.rs +++ b/nexus/auth/src/authn/external/session_cookie.rs @@ -4,7 +4,6 @@ //! authn scheme for console that looks up cookie values in a session table -use super::cookies::parse_cookies; use super::{HttpAuthnScheme, Reason, SchemeResult}; use crate::authn; use crate::authn::{Actor, Details}; @@ -13,6 +12,7 @@ use async_trait::async_trait; use chrono::{DateTime, Duration, Utc}; use dropshot::HttpError; use http::HeaderValue; +use nexus_types::authn::cookies::parse_cookies; use slog::debug; use uuid::Uuid; diff --git a/nexus/src/external_api/console_api.rs b/nexus/src/external_api/console_api.rs index fb0a47bbea..2169b631a7 100644 --- a/nexus/src/external_api/console_api.rs +++ b/nexus/src/external_api/console_api.rs @@ -35,15 +35,13 @@ use nexus_db_model::AuthenticationMode; use nexus_db_queries::authn::silos::IdentityProviderType; use nexus_db_queries::context::OpContext; use nexus_db_queries::{ - authn::external::{ - cookies::Cookies, - session_cookie::{ - clear_session_cookie_header_value, session_cookie_header_value, - SessionStore, SESSION_COOKIE_COOKIE_NAME, - }, + authn::external::session_cookie::{ + clear_session_cookie_header_value, session_cookie_header_value, + SessionStore, SESSION_COOKIE_COOKIE_NAME, }, db::identity::Asset, }; +use nexus_types::authn::cookies::Cookies; use nexus_types::external_api::params; use nexus_types::identity::Resource; use omicron_common::api::external::http_pagination::PaginatedBy; diff --git a/nexus/types/Cargo.toml b/nexus/types/Cargo.toml index 8dd6292d5c..124f0d42c9 100644 --- a/nexus/types/Cargo.toml +++ b/nexus/types/Cargo.toml @@ -9,14 +9,19 @@ workspace = true [dependencies] anyhow.workspace = true +async-trait.workspace = true chrono.workspace = true clap.workspace = true +cookie.workspace = true base64.workspace = true derive-where.workspace = true derive_more.workspace = true +dropshot.workspace = true futures.workspace = true +http.workspace = true humantime.workspace = true ipnetwork.workspace = true +newtype_derive.workspace = true omicron-uuid-kinds.workspace = true openssl.workspace = true oxql-types.workspace = true diff --git a/nexus/auth/src/authn/external/cookies.rs b/nexus/types/src/authn/cookies.rs similarity index 100% rename from nexus/auth/src/authn/external/cookies.rs rename to nexus/types/src/authn/cookies.rs diff --git a/nexus/types/src/authn/mod.rs b/nexus/types/src/authn/mod.rs new file mode 100644 index 0000000000..f87935428e --- /dev/null +++ b/nexus/types/src/authn/mod.rs @@ -0,0 +1,7 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Authentication types for the Nexus API. + +pub mod cookies; diff --git a/nexus/types/src/lib.rs b/nexus/types/src/lib.rs index 494573e834..8a0a3ec80e 100644 --- a/nexus/types/src/lib.rs +++ b/nexus/types/src/lib.rs @@ -29,6 +29,7 @@ //! rules, so our model layer knows about our views. That seems to be a //! relatively minor offense, so it's the way we leave things for now. +pub mod authn; pub mod deployment; pub mod external_api; pub mod identity; From a338e8a69ad8c9924856a20125d39b048c7cb8e1 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Mon, 19 Aug 2024 16:49:56 -0700 Subject: [PATCH 43/51] fix flaky test_demo_saga (#6388) --- nexus/src/app/saga.rs | 4 + nexus/src/app/sagas/demo.rs | 154 +++++++++++++++++++++++++++--------- 2 files changed, 119 insertions(+), 39 deletions(-) diff --git a/nexus/src/app/saga.rs b/nexus/src/app/saga.rs index 5bc69946ad..975df7fc3b 100644 --- a/nexus/src/app/saga.rs +++ b/nexus/src/app/saga.rs @@ -469,6 +469,10 @@ impl super::Nexus { // We don't need the handle that runnable_saga.start() returns because // we're not going to wait for the saga to finish here. let _ = runnable_saga.start().await?; + + let mut demo_sagas = self.demo_sagas()?; + demo_sagas.preregister(demo_saga_id); + Ok(DemoSaga { saga_id, demo_saga_id }) } diff --git a/nexus/src/app/sagas/demo.rs b/nexus/src/app/sagas/demo.rs index 4a8eda8b80..d76a48688d 100644 --- a/nexus/src/app/sagas/demo.rs +++ b/nexus/src/app/sagas/demo.rs @@ -21,56 +21,66 @@ use super::NexusActionContext; use super::{ActionRegistry, NexusSaga, SagaInitError}; use crate::app::sagas::declare_saga_actions; -use anyhow::ensure; +use anyhow::Context; use omicron_common::api::external::Error; use omicron_uuid_kinds::DemoSagaUuid; use serde::Deserialize; use serde::Serialize; use slog::info; use std::collections::BTreeMap; +use std::future::Future; +use std::sync::Arc; use steno::ActionError; -use tokio::sync::oneshot; +use tokio::sync::Semaphore; -/// Set of demo sagas that have been marked completed +/// Rendezvous point for demo sagas /// -/// Nexus maintains one of these at the top level. Individual demo sagas wait -/// until their id shows up here, then remove it and proceed. +/// This is where: +/// +/// - demo sagas wait for a completion message +/// - completion messages are recorded for demo sagas that haven't started +/// waiting yet +/// +/// Nexus maintains one of these structures at the top level. pub struct CompletingDemoSagas { - ids: BTreeMap>, + sagas: BTreeMap>, } impl CompletingDemoSagas { pub fn new() -> CompletingDemoSagas { - CompletingDemoSagas { ids: BTreeMap::new() } + CompletingDemoSagas { sagas: BTreeMap::new() } } - pub fn complete(&mut self, id: DemoSagaUuid) -> Result<(), Error> { - self.ids - .remove(&id) - .ok_or_else(|| { - Error::non_resourcetype_not_found(format!( - "demo saga with id {:?}", - id - )) - })? - .send(()) - .map_err(|_| { - Error::internal_error( - "saga stopped listening (Nexus shutting down?)", - ) - }) + pub fn preregister(&mut self, id: DemoSagaUuid) { + assert!(self.sagas.insert(id, Arc::new(Semaphore::new(0))).is_none()); } pub fn subscribe( &mut self, id: DemoSagaUuid, - ) -> Result, anyhow::Error> { - let (tx, rx) = oneshot::channel(); - ensure!( - self.ids.insert(id, tx).is_none(), - "multiple subscriptions for the same demo saga" - ); - Ok(rx) + ) -> impl Future> { + let sem = + self.sagas.entry(id).or_insert_with(|| Arc::new(Semaphore::new(0))); + let sem_clone = sem.clone(); + async move { + sem_clone + .acquire() + .await + // We don't need the Semaphore permit once we've acquired it. + .map(|_| ()) + .context("acquiring demo saga semaphore") + } + } + + pub fn complete(&mut self, id: DemoSagaUuid) -> Result<(), Error> { + let sem = self.sagas.get_mut(&id).ok_or_else(|| { + Error::non_resourcetype_not_found(format!( + "demo saga with demo saga id {:?}", + id + )) + })?; + sem.add_permits(1); + Ok(()) } } @@ -115,21 +125,87 @@ async fn demo_wait(sagactx: NexusActionContext) -> Result<(), ActionError> { .nexus() .demo_sagas() .map_err(ActionError::action_failed)?; - demo_sagas.subscribe(demo_id).map_err(|e| { - ActionError::action_failed(Error::internal_error(&format!( - "demo saga subscribe failed: {:#}", - e - ))) - })? + demo_sagas.subscribe(demo_id) }; match rx.await { Ok(_) => { info!(log, "demo saga: completing"; "id" => %demo_id); + Ok(()) } - Err(_) => { - info!(log, "demo saga: waiting failed (Nexus shutting down?)"; - "id" => %demo_id); + Err(error) => { + warn!(log, "demo saga: waiting failed (Nexus shutting down?)"; + "id" => %demo_id, + "error" => #?error, + ); + Err(ActionError::action_failed(Error::internal_error(&format!( + "demo saga wait failed: {:#}", + error + )))) } } - Ok(()) +} + +#[cfg(test)] +mod test { + use super::*; + use assert_matches::assert_matches; + + #[tokio::test] + async fn test_demo_saga_rendezvous() { + let mut hub = CompletingDemoSagas::new(); + + // The most straightforward sequence is: + // - create (preregister) demo saga + // - demo saga starts and waits for completion (subscribe) + // - complete demo saga + let demo_saga_id = DemoSagaUuid::new_v4(); + println!("demo saga: {demo_saga_id}"); + hub.preregister(demo_saga_id); + println!("demo saga: {demo_saga_id} preregistered"); + let subscribe = hub.subscribe(demo_saga_id); + println!("demo saga: {demo_saga_id} subscribed"); + assert!(hub.complete(demo_saga_id).is_ok()); + println!("demo saga: {demo_saga_id} marked completed"); + subscribe.await.unwrap(); + println!("demo saga: {demo_saga_id} done"); + + // It's also possible that the completion request arrives before the + // saga started waiting. In that case, the sequence is: + // + // - create (preregister) demo saga + // - complete demo saga + // - demo saga starts and waits for completion (subscribe) + // + // This should work, too, with no errors. + let demo_saga_id = DemoSagaUuid::new_v4(); + println!("demo saga: {demo_saga_id}"); + hub.preregister(demo_saga_id); + println!("demo saga: {demo_saga_id} preregistered"); + assert!(hub.complete(demo_saga_id).is_ok()); + println!("demo saga: {demo_saga_id} marked completed"); + let subscribe = hub.subscribe(demo_saga_id); + println!("demo saga: {demo_saga_id} subscribed"); + subscribe.await.unwrap(); + println!("demo saga: {demo_saga_id} done"); + + // It's also possible to have no preregistration at all. This happens + // if the demo saga was recovered. That's fine, too, but then it will + // only work if the completion arrives after the saga starts waiting. + let demo_saga_id = DemoSagaUuid::new_v4(); + println!("demo saga: {demo_saga_id}"); + let subscribe = hub.subscribe(demo_saga_id); + println!("demo saga: {demo_saga_id} subscribed"); + assert!(hub.complete(demo_saga_id).is_ok()); + println!("demo saga: {demo_saga_id} marked completed"); + subscribe.await.unwrap(); + println!("demo saga: {demo_saga_id} done"); + + // If there's no preregistration and we get a completion request, then + // that request should fail. + let demo_saga_id = DemoSagaUuid::new_v4(); + println!("demo saga: {demo_saga_id}"); + let error = hub.complete(demo_saga_id).unwrap_err(); + assert_matches!(error, Error::NotFound { .. }); + println!("demo saga: {demo_saga_id} complete error: {:#}", error); + } } From 6bd999b4955a23ae883447a5b1ed0c5d19049425 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karen=20C=C3=A1rcamo?= Date: Tue, 20 Aug 2024 18:04:40 +1200 Subject: [PATCH 44/51] [reconfigurator] `clickhouse_server` SMF service and oximeter replicated mode (#6343) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Overview This commit introduces a few changes: - a new `clickhouse_server` smf service which runs the old "replicated" mode from the `clickhouse` service - a new `replicated` field for the oximeter configuration file which is consumed by the `oximeter` binary that runs the replicated SQL against a database. It now connects to the listen address from `ServiceName::ClickhouseServer` or `ServiceName::Clickhouse` depending which zone has been deployed. - a new `--clickhouse-topology` build target flag which builds artifacts based on either a `single-node` or `replicated-cluster` setup. The difference between the two is whether the `oximeter` SMF service is executing the `oximeter` CLI with the `--replicated` flag or not. __CAVEAT:__ It's still necessary to manually change the RSS [node count constants](https://github.com/oxidecomputer/omicron/blob/ffc8807caf04ca3f81b543c520ddbe26b3284264/sled-agent/src/rack_setup/plan/service.rs#L57-L77) to the specified amount for each clickhouse topology mode. This requirement will be short lived as we are moving to use reconfigurator. ## Usage To run single node ClickHouse nothing changes, artifacts can be built the same way as before. To run replicated ClickHouse set the [node count constants](https://github.com/oxidecomputer/omicron/blob/ffc8807caf04ca3f81b543c520ddbe26b3284264/sled-agent/src/rack_setup/plan/service.rs#L57-L77) to the specified amount, and set the build target in the following manner: ```console $ cargo run --locked --release --bin omicron-package -- -t target create -i standard -m non-gimlet -s softnpu -r single-sled -c replicated-cluster Finished `release` profile [optimized] target(s) in 1.03s Running `target/release/omicron-package -t target create -i standard -m non-gimlet -s softnpu -r single-sled -c replicated-cluster` Logging to: /home/coatlicue/src/omicron/out/LOG Created new build target 'centzon' and set it as active $ cargo run --locked --release --bin omicron-package -- -t package <...> $ pfexec ./target/release/omicron-package -t install ``` ## Purpose As laid out in [RFD 468](https://rfd.shared.oxide.computer/rfd/0468), to roll out replicated ClickHouse we will need the ability to roll out either replicated or single node ClickHouse for an undetermined amount of time. This commit is a step in that direction. We need to have separate services for running replicated or single-node ClickHouse servers. ## Testing Deploying omicron on a helios box with both modes. Single node: ```console $ cargo run --locked --release --bin omicron-package -- -t centzon target create -i standard -m non-gimlet -s softnpu -r single-sled Finished `release` profile [optimized] target(s) in 0.94s Running `target/release/omicron-package -t centzon target create -i standard -m non-gimlet -s softnpu -r single-sled` Logging to: /home/coatlicue/src/omicron/out/LOG Created new build target 'centzon' and set it as active $ cargo run --locked --release --bin omicron-package -- -t centzon package <...> $ pfexec ./target/release/omicron-package -t centzon install Logging to: /home/coatlicue/src/omicron/out/LOG $ zoneadm list | grep clickhouse oxz_clickhouse_7ce86c8b-2c9e-4d02-a857-269cb0a99c2e root@oxz_clickhouse_7ce86c8b:~# /opt/oxide/clickhouse/clickhouse client --host fd00:1122:3344:101::e ClickHouse client version 23.8.7.1. Connecting to fd00:1122:3344:101::e:9000 as user default. Connected to ClickHouse server version 23.8.7 revision 54465. oxz_clickhouse_7ce86c8b-2c9e-4d02-a857-269cb0a99c2e.local :) SHOW TABLES FROM oximeter SHOW TABLES FROM oximeter Query id: 5e91fafb-4d70-4a27-a188-75fb83bb7e5e ┌─name───────────────────────┐ │ fields_bool │ │ fields_i16 │ │ fields_i32 │ │ fields_i64 │ │ fields_i8 │ │ fields_ipaddr │ │ fields_string │ │ fields_u16 │ │ fields_u32 │ │ fields_u64 │ │ fields_u8 │ │ fields_uuid │ │ measurements_bool │ │ measurements_bytes │ │ measurements_cumulativef32 │ │ measurements_cumulativef64 │ │ measurements_cumulativei64 │ │ measurements_cumulativeu64 │ │ measurements_f32 │ │ measurements_f64 │ │ measurements_histogramf32 │ │ measurements_histogramf64 │ │ measurements_histogrami16 │ │ measurements_histogrami32 │ │ measurements_histogrami64 │ │ measurements_histogrami8 │ │ measurements_histogramu16 │ │ measurements_histogramu32 │ │ measurements_histogramu64 │ │ measurements_histogramu8 │ │ measurements_i16 │ │ measurements_i32 │ │ measurements_i64 │ │ measurements_i8 │ │ measurements_string │ │ measurements_u16 │ │ measurements_u32 │ │ measurements_u64 │ │ measurements_u8 │ │ timeseries_schema │ │ version │ └────────────────────────────┘ 41 rows in set. Elapsed: 0.014 sec. oxz_clickhouse_7ce86c8b-2c9e-4d02-a857-269cb0a99c2e.local :) SELECT * FROM oximeter.fields_i64 SELECT * FROM oximeter.fields_i64 Query id: 4bbcec72-101f-4cf4-9966-680381f5b62c ┌─timeseries_name────────────────────────┬───────timeseries_key─┬─field_name──┬─field_value─┐ │ http_service:request_latency_histogram │ 8326032694586838023 │ status_code │ 200 │ <...> $ pfexec zlogin oxz_oximeter_b235200f-f0ad-4218-9184-d995df5acaf0 [Connected to zone 'oxz_oximeter_b235200f-f0ad-4218-9184-d995df5acaf0' pts/3] The illumos Project helios-2.0.22784 July 2024 root@oxz_oximeter_b235200f:~# cat /var/svc/manifest/site/oximeter/config.toml # Example configuration file for running an oximeter collector server [db] batch_size = 1000 batch_interval = 5 # In seconds replicated = false [log] level = "debug" mode = "file" path = "/dev/stdout" if_exists = "append" ``` Replicated cluster: ```console $ cargo run --locked --release --bin omicron-package -- -t centzon target create -i standard -m non-gimlet -s softnpu -r single-sled -c replicated-cluster Finished `release` profile [optimized] target(s) in 1.03s Running `target/release/omicron-package -t centzon target create -i standard -m non-gimlet -s softnpu -r single-sled -c replicated-cluster` Logging to: /home/coatlicue/src/omicron/out/LOG Created new build target 'centzon' and set it as active $ cargo run --locked --release --bin omicron-package -- -t centzon package <...> $ pfexec ./target/release/omicron-package -t centzon install Logging to: /home/coatlicue/src/omicron/out/LOG $ zoneadm list | grep clickhouse oxz_clickhouse_keeper_73e7fda2-20af-4a90-9a61-c89ceed47c1a oxz_clickhouse_server_74876663-5337-4d9b-85cb-99d1e88bdf8a oxz_clickhouse_keeper_8eaac4f9-d9e0-4d56-b269-eab7da0c73a3 oxz_clickhouse_keeper_01f3a6af-5249-4dff-b9a4-f1076e467c9a oxz_clickhouse_server_bc6010bf-507c-4b5a-ad4c-3a7af889a6c0 $ pfexec zlogin oxz_clickhouse_server_74876663-5337-4d9b-85cb-99d1e88bdf8a [Connected to zone 'oxz_clickhouse_server_74876663-5337-4d9b-85cb-99d1e88bdf8a' pts/3] The illumos Project helios-2.0.22784 July 2024 root@oxz_clickhouse_server_74876663:~# /opt/oxide/clickhouse_server/clickhouse client --host fd00:1122:3344:101::e ClickHouse client version 23.8.7.1. Connecting to fd00:1122:3344:101::e:9000 as user default. Connected to ClickHouse server version 23.8.7 revision 54465. oximeter_cluster node 1 :) SHOW TABLES FROM oximeter SHOW TABLES FROM oximeter Query id: a5603063-1cbc-41a5-bfbd-33c986764e92 ┌─name─────────────────────────────┐ │ fields_bool │ │ fields_bool_local │ │ fields_i16 │ │ fields_i16_local │ │ fields_i32 │ │ fields_i32_local │ │ fields_i64 │ │ fields_i64_local │ │ fields_i8 │ │ fields_i8_local │ │ fields_ipaddr │ │ fields_ipaddr_local │ │ fields_string │ │ fields_string_local │ │ fields_u16 │ │ fields_u16_local │ │ fields_u32 │ │ fields_u32_local │ │ fields_u64 │ │ fields_u64_local │ │ fields_u8 │ │ fields_u8_local │ │ fields_uuid │ │ fields_uuid_local │ │ measurements_bool │ │ measurements_bool_local │ │ measurements_bytes │ │ measurements_bytes_local │ │ measurements_cumulativef32 │ │ measurements_cumulativef32_local │ │ measurements_cumulativef64 │ │ measurements_cumulativef64_local │ │ measurements_cumulativei64 │ │ measurements_cumulativei64_local │ │ measurements_cumulativeu64 │ │ measurements_cumulativeu64_local │ │ measurements_f32 │ │ measurements_f32_local │ │ measurements_f64 │ │ measurements_f64_local │ │ measurements_histogramf32 │ │ measurements_histogramf32_local │ │ measurements_histogramf64 │ │ measurements_histogramf64_local │ │ measurements_histogrami16 │ │ measurements_histogrami16_local │ │ measurements_histogrami32 │ │ measurements_histogrami32_local │ │ measurements_histogrami64 │ │ measurements_histogrami64_local │ │ measurements_histogrami8 │ │ measurements_histogrami8_local │ │ measurements_histogramu16 │ │ measurements_histogramu16_local │ │ measurements_histogramu32 │ │ measurements_histogramu32_local │ │ measurements_histogramu64 │ │ measurements_histogramu64_local │ │ measurements_histogramu8 │ │ measurements_histogramu8_local │ │ measurements_i16 │ │ measurements_i16_local │ │ measurements_i32 │ │ measurements_i32_local │ │ measurements_i64 │ │ measurements_i64_local │ │ measurements_i8 │ │ measurements_i8_local │ │ measurements_string │ │ measurements_string_local │ │ measurements_u16 │ │ measurements_u16_local │ │ measurements_u32 │ │ measurements_u32_local │ │ measurements_u64 │ │ measurements_u64_local │ │ measurements_u8 │ │ measurements_u8_local │ │ timeseries_schema │ │ timeseries_schema_local │ │ version │ └──────────────────────────────────┘ 81 rows in set. Elapsed: 0.010 sec. oximeter_cluster node 1 :) SELECT * FROM oximeter.fields_i64 SELECT * FROM oximeter.fields_i64 Query id: 14f07468-0e33-4de1-8893-df3e11eb7660 ┌─timeseries_name────────────────────────┬───────timeseries_key─┬─field_name──┬─field_value─┐ │ http_service:request_latency_histogram │ 436117616059041516 │ status_code │ 200 │ <...> $ pfexec zlogin oxz_oximeter_bcba1c06-1ca5-49cf-b277-8c2387975274 [Connected to zone 'oxz_oximeter_bcba1c06-1ca5-49cf-b277-8c2387975274' pts/3] The illumos Project helios-2.0.22784 July 2024 root@oxz_oximeter_bcba1c06:~# cat /var/svc/manifest/site/oximeter/config.toml # Example configuration file for running an oximeter collector server [db] batch_size = 1000 batch_interval = 5 # In seconds replicated = true [log] level = "debug" mode = "file" path = "/dev/stdout" if_exists = "append" ``` Related: https://github.com/oxidecomputer/omicron/issues/5999 --- internal-dns-cli/src/bin/dnswait.rs | 6 +- internal-dns/src/config.rs | 4 + nexus/test-utils/src/lib.rs | 1 + oximeter/collector/src/agent.rs | 14 +- oximeter/collector/src/lib.rs | 13 +- package-manifest.toml | 38 ++++- package/src/bin/omicron-package.rs | 9 +- package/src/lib.rs | 15 ++ package/src/target.rs | 26 +++- sled-agent/src/rack_setup/plan/service.rs | 52 ++++++- sled-agent/src/services.rs | 78 +++++++++- smf/clickhouse/method_script.sh | 141 ++---------------- .../config_replica.xml | 0 smf/clickhouse_server/manifest.xml | 46 ++++++ smf/clickhouse_server/method_script.sh | 124 +++++++++++++++ .../{ => replicated-cluster}/config.toml | 1 + smf/oximeter/single-node/config.toml | 12 ++ 17 files changed, 428 insertions(+), 152 deletions(-) rename smf/{clickhouse => clickhouse_server}/config_replica.xml (100%) create mode 100644 smf/clickhouse_server/manifest.xml create mode 100755 smf/clickhouse_server/method_script.sh rename smf/oximeter/{ => replicated-cluster}/config.toml (91%) create mode 100644 smf/oximeter/single-node/config.toml diff --git a/internal-dns-cli/src/bin/dnswait.rs b/internal-dns-cli/src/bin/dnswait.rs index 8dbd675d64..f9875e71a0 100644 --- a/internal-dns-cli/src/bin/dnswait.rs +++ b/internal-dns-cli/src/bin/dnswait.rs @@ -36,15 +36,17 @@ struct Opt { #[value(rename_all = "kebab-case")] enum ServiceName { Cockroach, - Clickhouse, ClickhouseKeeper, + ClickhouseServer, } impl From for internal_dns::ServiceName { fn from(value: ServiceName) -> Self { match value { ServiceName::Cockroach => internal_dns::ServiceName::Cockroach, - ServiceName::Clickhouse => internal_dns::ServiceName::Clickhouse, + ServiceName::ClickhouseServer => { + internal_dns::ServiceName::ClickhouseServer + } ServiceName::ClickhouseKeeper => { internal_dns::ServiceName::ClickhouseKeeper } diff --git a/internal-dns/src/config.rs b/internal-dns/src/config.rs index a9ff664030..e9d7ed873d 100644 --- a/internal-dns/src/config.rs +++ b/internal-dns/src/config.rs @@ -510,6 +510,10 @@ mod test { ServiceName::ClickhouseKeeper.dns_name(), "_clickhouse-keeper._tcp", ); + assert_eq!( + ServiceName::ClickhouseServer.dns_name(), + "_clickhouse-server._tcp", + ); assert_eq!(ServiceName::Cockroach.dns_name(), "_cockroach._tcp",); assert_eq!(ServiceName::InternalDns.dns_name(), "_nameservice._tcp",); assert_eq!(ServiceName::Nexus.dns_name(), "_nexus._tcp",); diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index ea46f2d017..acee46ce10 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -1428,6 +1428,7 @@ pub async fn start_oximeter( address: Some(SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db_port)), batch_size: 10, batch_interval: 1, + replicated: false, }; let config = oximeter_collector::Config { nexus_address: Some(nexus_address), diff --git a/oximeter/collector/src/agent.rs b/oximeter/collector/src/agent.rs index 8271b2e068..b13fbd3938 100644 --- a/oximeter/collector/src/agent.rs +++ b/oximeter/collector/src/agent.rs @@ -17,7 +17,6 @@ use futures::TryStreamExt; use internal_dns::resolver::Resolver; use internal_dns::ServiceName; use nexus_client::types::IdSortMode; -use omicron_common::address::CLICKHOUSE_PORT; use omicron_common::backoff; use omicron_common::backoff::BackoffError; use oximeter::types::ProducerResults; @@ -380,6 +379,7 @@ impl OximeterAgent { db_config: DbConfig, resolver: &Resolver, log: &Logger, + replicated: bool, ) -> Result { let (result_sender, result_receiver) = mpsc::channel(8); let log = log.new(o!( @@ -393,10 +393,15 @@ impl OximeterAgent { // database. let db_address = if let Some(address) = db_config.address { address + } else if replicated { + SocketAddr::V6( + resolver + .lookup_socket_v6(ServiceName::ClickhouseServer) + .await?, + ) } else { - SocketAddr::new( - resolver.lookup_ip(ServiceName::Clickhouse).await?, - CLICKHOUSE_PORT, + SocketAddr::V6( + resolver.lookup_socket_v6(ServiceName::Clickhouse).await?, ) }; @@ -422,7 +427,6 @@ impl OximeterAgent { .. }) => { debug!(log, "oximeter database does not exist, creating"); - let replicated = client.is_oximeter_cluster().await?; client .initialize_db_with_version( replicated, diff --git a/oximeter/collector/src/lib.rs b/oximeter/collector/src/lib.rs index 7dd423d074..0576c7d532 100644 --- a/oximeter/collector/src/lib.rs +++ b/oximeter/collector/src/lib.rs @@ -78,12 +78,18 @@ pub struct DbConfig { #[serde(default, skip_serializing_if = "Option::is_none")] pub address: Option, - /// Batch size of samples at which to insert + /// Batch size of samples at which to insert. pub batch_size: usize, /// Interval on which to insert data into the database, regardless of the number of collected /// samples. Value is in seconds. pub batch_interval: u64, + + // TODO (https://github.com/oxidecomputer/omicron/issues/4148): This field + // should be removed if single node functionality is removed. + /// Whether ClickHouse is running as a replicated cluster or + /// single-node server. + pub replicated: bool, } impl DbConfig { @@ -95,12 +101,16 @@ impl DbConfig { /// ClickHouse. pub const DEFAULT_BATCH_INTERVAL: u64 = 5; + /// Default ClickHouse topology. + pub const DEFAULT_REPLICATED: bool = false; + // Construct config with an address, using the defaults for other fields fn with_address(address: SocketAddr) -> Self { Self { address: Some(address), batch_size: Self::DEFAULT_BATCH_SIZE, batch_interval: Self::DEFAULT_BATCH_INTERVAL, + replicated: Self::DEFAULT_REPLICATED, } } } @@ -207,6 +217,7 @@ impl Oximeter { config.db, &resolver, &log, + config.db.replicated, ) .await?, )) diff --git a/package-manifest.toml b/package-manifest.toml index 9189ed09a0..0822225837 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -140,13 +140,15 @@ source.type = "local" source.rust.binary_names = ["oximeter", "clickhouse-schema-updater"] source.rust.release = true source.paths = [ - { from = "smf/oximeter", to = "/var/svc/manifest/site/oximeter" }, { from = "oximeter/db/schema", to = "/opt/oxide/oximeter/schema" }, + { from = "smf/oximeter/{{clickhouse-topology}}/config.toml", to = "/var/svc/manifest/site/oximeter/config.toml" }, + { from = "smf/oximeter/manifest.xml", to = "/var/svc/manifest/site/oximeter/manifest.xml" }, ] output.type = "zone" output.intermediate_only = true [package.clickhouse] +# This service runs a single-node ClickHouse server. service_name = "clickhouse" only_for_targets.image = "standard" source.type = "composite" @@ -169,13 +171,45 @@ source.paths = [ { from = "out/clickhouse", to = "/opt/oxide/clickhouse" }, { from = "smf/clickhouse/manifest.xml", to = "/var/svc/manifest/site/clickhouse/manifest.xml" }, { from = "smf/clickhouse/method_script.sh", to = "/opt/oxide/lib/svc/manifest/clickhouse.sh" }, - { from = "smf/clickhouse/config_replica.xml", to = "/opt/oxide/clickhouse/config.d/config_replica.xml" }, +] +output.type = "zone" +output.intermediate_only = true +setup_hint = "Run `cargo xtask download clickhouse` to download the necessary binaries" + +[package.clickhouse_server] +# This service runs a server for a replicated ClickHouse cluster. +# It is complimentary to the clickhouse_keeper service. +# One cannot be run without the other. +service_name = "clickhouse_server" +only_for_targets.image = "standard" +source.type = "composite" +source.packages = [ + "clickhouse_server_svc.tar.gz", + "internal-dns-cli.tar.gz", + "omicron-clickhouse-admin.tar.gz", + "zone-setup.tar.gz", + "zone-network-install.tar.gz" +] +output.type = "zone" + +[package.clickhouse_server_svc] +service_name = "clickhouse_server_svc" +only_for_targets.image = "standard" +source.type = "local" +source.paths = [ + { from = "out/clickhouse", to = "/opt/oxide/clickhouse_server" }, + { from = "smf/clickhouse_server/manifest.xml", to = "/var/svc/manifest/site/clickhouse_server/manifest.xml" }, + { from = "smf/clickhouse_server/method_script.sh", to = "/opt/oxide/lib/svc/manifest/clickhouse_server.sh" }, + { from = "smf/clickhouse_server/config_replica.xml", to = "/opt/oxide/clickhouse_server/config.d/config_replica.xml" }, ] output.type = "zone" output.intermediate_only = true setup_hint = "Run `cargo xtask download clickhouse` to download the necessary binaries" [package.clickhouse_keeper] +# This service runs a keeper for a replicated ClickHouse cluster. +# It is complimentary to the clickhouse_server service. +# One cannot be run without the other. service_name = "clickhouse_keeper" only_for_targets.image = "standard" source.type = "composite" diff --git a/package/src/bin/omicron-package.rs b/package/src/bin/omicron-package.rs index b2b8703015..cd88345d0a 100644 --- a/package/src/bin/omicron-package.rs +++ b/package/src/bin/omicron-package.rs @@ -265,12 +265,19 @@ async fn do_target( format!("failed to create directory {}", target_dir) })?; match subcommand { - TargetCommand::Create { image, machine, switch, rack_topology } => { + TargetCommand::Create { + image, + machine, + switch, + rack_topology, + clickhouse_topology, + } => { let target = KnownTarget::new( image.clone(), machine.clone(), switch.clone(), rack_topology.clone(), + clickhouse_topology.clone(), )?; let path = get_single_target(&target_dir, name).await?; diff --git a/package/src/lib.rs b/package/src/lib.rs index 2009de9dfe..b37c1774fd 100644 --- a/package/src/lib.rs +++ b/package/src/lib.rs @@ -68,6 +68,21 @@ pub enum TargetCommand { /// fail in a single-sled environment. `single-sled` relaxes this /// requirement. rack_topology: crate::target::RackTopology, + + #[clap( + short, + long, + default_value = Some("single-node"), + required = false + )] + // TODO (https://github.com/oxidecomputer/omicron/issues/4148): Remove + // once single-node functionality is removed. + /// Specify whether clickhouse will be deployed as a replicated cluster + /// or single-node configuration. + /// + /// Replicated cluster configuration is an experimental feature to be + /// used only for testing. + clickhouse_topology: crate::target::ClickhouseTopology, }, /// List all existing targets List, diff --git a/package/src/target.rs b/package/src/target.rs index 589dba7870..6a6cbd32d8 100644 --- a/package/src/target.rs +++ b/package/src/target.rs @@ -62,6 +62,18 @@ pub enum RackTopology { SingleSled, } +/// Topology of the ClickHouse installation within the rack. +#[derive(Clone, Debug, strum::EnumString, strum::Display, ValueEnum)] +#[strum(serialize_all = "kebab-case")] +#[clap(rename_all = "kebab-case")] +pub enum ClickhouseTopology { + /// Use configurations suitable for a replicated ClickHouse cluster deployment. + ReplicatedCluster, + + /// Use configurations suitable for a single-node ClickHouse deployment. + SingleNode, +} + /// A strongly-typed variant of [Target]. #[derive(Clone, Debug)] pub struct KnownTarget { @@ -69,6 +81,7 @@ pub struct KnownTarget { machine: Option, switch: Option, rack_topology: RackTopology, + clickhouse_topology: ClickhouseTopology, } impl KnownTarget { @@ -77,6 +90,7 @@ impl KnownTarget { machine: Option, switch: Option, rack_topology: RackTopology, + clickhouse_topology: ClickhouseTopology, ) -> Result { if matches!(image, Image::Trampoline) { if machine.is_some() { @@ -93,7 +107,7 @@ impl KnownTarget { bail!("'switch=asic' is only valid with 'machine=gimlet'"); } - Ok(Self { image, machine, switch, rack_topology }) + Ok(Self { image, machine, switch, rack_topology, clickhouse_topology }) } } @@ -104,6 +118,7 @@ impl Default for KnownTarget { machine: Some(Machine::NonGimlet), switch: Some(Switch::Stub), rack_topology: RackTopology::MultiSled, + clickhouse_topology: ClickhouseTopology::SingleNode, } } } @@ -119,6 +134,10 @@ impl From for Target { map.insert("switch".to_string(), switch.to_string()); } map.insert("rack-topology".to_string(), kt.rack_topology.to_string()); + map.insert( + "clickhouse-topology".to_string(), + kt.clickhouse_topology.to_string(), + ); Target(map) } } @@ -140,6 +159,7 @@ impl std::str::FromStr for KnownTarget { let mut machine = None; let mut switch = None; let mut rack_topology = None; + let mut clickhouse_topology = None; for (k, v) in target.0.into_iter() { match k.as_str() { @@ -155,6 +175,9 @@ impl std::str::FromStr for KnownTarget { "rack-topology" => { rack_topology = Some(v.parse()?); } + "clickhouse-topology" => { + clickhouse_topology = Some(v.parse()?); + } _ => { bail!( "Unknown target key {k}\nValid keys include: [{}]", @@ -173,6 +196,7 @@ impl std::str::FromStr for KnownTarget { machine, switch, rack_topology.unwrap_or(RackTopology::MultiSled), + clickhouse_topology.unwrap_or(ClickhouseTopology::SingleNode), ) } } diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index c9ed0c2248..8c26d0bf58 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -58,14 +58,23 @@ const OXIMETER_COUNT: usize = 1; // when Nexus provisions Clickhouse. // TODO(https://github.com/oxidecomputer/omicron/issues/4000): Use // omicron_common::policy::CLICKHOUSE_SERVER_REDUNDANCY once we enable -// replicated ClickHouse +// replicated ClickHouse. +// Set to 0 when testing replicated ClickHouse. const CLICKHOUSE_COUNT: usize = 1; // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove // when Nexus provisions Clickhouse keeper. // TODO(https://github.com/oxidecomputer/omicron/issues/4000): Use // omicron_common::policy::CLICKHOUSE_KEEPER_REDUNDANCY once we enable // replicated ClickHouse +// Set to 3 when testing replicated ClickHouse. const CLICKHOUSE_KEEPER_COUNT: usize = 0; +// TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove +// when Nexus provisions Clickhouse server. +// TODO(https://github.com/oxidecomputer/omicron/issues/4000): Use +// omicron_common::policy::CLICKHOUSE_SERVER_REDUNDANCY once we enable +// replicated ClickHouse. +// Set to 2 when testing replicated ClickHouse +const CLICKHOUSE_SERVER_COUNT: usize = 0; // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove. // when Nexus provisions Crucible. const MINIMUM_U2_COUNT: usize = 3; @@ -628,6 +637,47 @@ impl Plan { }); } + // Provision Clickhouse server zones, continuing to stripe across sleds. + // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove + // Temporary linter rule until replicated Clickhouse is enabled + #[allow(clippy::reversed_empty_ranges)] + for _ in 0..CLICKHOUSE_SERVER_COUNT { + let sled = { + let which_sled = + sled_allocator.next().ok_or(PlanError::NotEnoughSleds)?; + &mut sled_info[which_sled] + }; + let id = OmicronZoneUuid::new_v4(); + let ip = sled.addr_alloc.next().expect("Not enough addrs"); + // TODO: This may need to be a different port if/when to have single node + // and replicated running side by side as per stage 1 of RFD 468. + let port = omicron_common::address::CLICKHOUSE_PORT; + let address = SocketAddrV6::new(ip, port, 0, 0); + dns_builder + .host_zone_with_one_backend( + id, + ip, + ServiceName::ClickhouseServer, + port, + ) + .unwrap(); + let dataset_name = + sled.alloc_dataset_from_u2s(DatasetType::ClickhouseServer)?; + let filesystem_pool = Some(dataset_name.pool().clone()); + sled.request.zones.push(OmicronZoneConfig { + // TODO-cleanup use TypedUuid everywhere + id: id.into_untyped_uuid(), + underlay_address: ip, + zone_type: OmicronZoneType::ClickhouseServer { + address, + dataset: OmicronZoneDataset { + pool_name: dataset_name.pool().clone(), + }, + }, + filesystem_pool, + }); + } + // Provision Clickhouse Keeper zones, continuing to stripe across sleds. // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove // Temporary linter rule until replicated Clickhouse is enabled diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 32cf844e6d..abc50aa06c 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -1618,18 +1618,82 @@ impl ServiceManager { zone: OmicronZoneConfig { zone_type: OmicronZoneType::ClickhouseServer { .. }, - underlay_address: _, + underlay_address, .. }, .. }) => { - // We aren't yet deploying this service - error!( - &self.inner.log, - "Deploying ClickhouseServer zones is not yet supported" - ); + let Some(info) = self.inner.sled_info.get() else { + return Err(Error::SledAgentNotReady); + }; + + let listen_addr = *underlay_address; + let listen_port = CLICKHOUSE_PORT.to_string(); + + let nw_setup_service = Self::zone_network_setup_install( + Some(&info.underlay_address), + &installed_zone, + &[listen_addr], + )?; + + let dns_service = Self::dns_install(info, None, &None).await?; + + let config = PropertyGroupBuilder::new("config") + .add_property( + "listen_addr", + "astring", + listen_addr.to_string(), + ) + .add_property("listen_port", "astring", listen_port) + .add_property("store", "astring", "/data"); + let clickhouse_server_service = + ServiceBuilder::new("oxide/clickhouse_server") + .add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(config), + ); + + let ch_address = + SocketAddr::new(IpAddr::V6(listen_addr), CLICKHOUSE_PORT) + .to_string(); + + let admin_address = SocketAddr::new( + IpAddr::V6(listen_addr), + CLICKHOUSE_ADMIN_PORT, + ) + .to_string(); + + let clickhouse_admin_config = + PropertyGroupBuilder::new("config") + .add_property( + "clickhouse_address", + "astring", + ch_address, + ) + .add_property("http_address", "astring", admin_address); + let clickhouse_admin_service = + ServiceBuilder::new("oxide/clickhouse-admin").add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(clickhouse_admin_config), + ); - todo!() + let profile = ProfileBuilder::new("omicron") + .add_service(nw_setup_service) + .add_service(disabled_ssh_service) + .add_service(clickhouse_server_service) + .add_service(dns_service) + .add_service(enabled_dns_client_service) + .add_service(clickhouse_admin_service); + profile + .add_to_zone(&self.inner.log, &installed_zone) + .await + .map_err(|err| { + Error::io( + "Failed to setup clickhouse server profile", + err, + ) + })?; + RunningZone::boot(installed_zone).await? } ZoneArgs::Omicron(OmicronZoneConfigLocal { diff --git a/smf/clickhouse/method_script.sh b/smf/clickhouse/method_script.sh index 224d759cf3..bb5dd960a1 100755 --- a/smf/clickhouse/method_script.sh +++ b/smf/clickhouse/method_script.sh @@ -10,136 +10,13 @@ LISTEN_ADDR="$(svcprop -c -p config/listen_addr "${SMF_FMRI}")" LISTEN_PORT="$(svcprop -c -p config/listen_port "${SMF_FMRI}")" DATASTORE="$(svcprop -c -p config/store "${SMF_FMRI}")" -# TEMPORARY: Racks will be set up with single node ClickHouse until -# Nexus provisions services so there is no divergence between racks -# https://github.com/oxidecomputer/omicron/issues/732 -single_node=true +args=( +"--log-file" "/var/tmp/clickhouse-server.log" +"--errorlog-file" "/var/tmp/clickhouse-server.errlog" +"--" +"--path" "${DATASTORE}" +"--listen_host" "$LISTEN_ADDR" +"--http_port" "$LISTEN_PORT" +) -command=() -# TODO((https://github.com/oxidecomputer/omicron/issues/4000)): Remove single node mode once all racks are running in replicated mode -if $single_node -then - command+=( - "/opt/oxide/clickhouse/clickhouse" "server" - "--log-file" "/var/tmp/clickhouse-server.log" - "--errorlog-file" "/var/tmp/clickhouse-server.errlog" - "--" - "--path" "${DATASTORE}" - "--listen_host" "$LISTEN_ADDR" - "--http_port" "$LISTEN_PORT" - ) -else - # Retrieve hostnames (SRV records in internal DNS) of the clickhouse nodes. - CH_ADDRS="$(/opt/oxide/internal-dns-cli/bin/dnswait clickhouse -H)" - - if [[ -z "$CH_ADDRS" ]]; then - printf 'ERROR: found no hostnames for other ClickHouse nodes\n' >&2 - exit "$SMF_EXIT_ERR_CONFIG" - fi - - declare -a nodes=($CH_ADDRS) - - for i in "${nodes[@]}" - do - if ! grep -q "host.control-plane.oxide.internal" <<< "${i}"; then - printf 'ERROR: retrieved ClickHouse hostname does not match the expected format\n' >&2 - exit "$SMF_EXIT_ERR_CONFIG" - fi - done - - # Assign hostnames to replicas - REPLICA_HOST_01="${nodes[0]}" - REPLICA_HOST_02="${nodes[1]}" - - # Retrieve hostnames (SRV records in internal DNS) of the keeper nodes. - K_ADDRS="$(/opt/oxide/internal-dns-cli/bin/dnswait clickhouse-keeper -H)" - - if [[ -z "$K_ADDRS" ]]; then - printf 'ERROR: found no hostnames for other ClickHouse Keeper nodes\n' >&2 - exit "$SMF_EXIT_ERR_CONFIG" - fi - - declare -a keepers=($K_ADDRS) - - for i in "${keepers[@]}" - do - if ! grep -q "host.control-plane.oxide.internal" <<< "${i}"; then - printf 'ERROR: retrieved ClickHouse Keeper hostname does not match the expected format\n' >&2 - exit "$SMF_EXIT_ERR_CONFIG" - fi - done - - if [[ "${#keepers[@]}" != 3 ]] - then - printf "ERROR: expected 3 ClickHouse Keeper hosts, found "${#keepers[@]}" instead\n" >&2 - exit "$SMF_EXIT_ERR_CONFIG" - fi - - # Identify the node type this is as this will influence how the config is constructed - # TODO(https://github.com/oxidecomputer/omicron/issues/3824): There are probably much - # better ways to do this service discovery, but this works for now. - # The services contain the same IDs as the hostnames. - CLICKHOUSE_SVC="$(zonename | tr -dc [:digit:])" - REPLICA_IDENTIFIER_01="$( echo "${REPLICA_HOST_01}" | tr -dc [:digit:])" - REPLICA_IDENTIFIER_02="$( echo "${REPLICA_HOST_02}" | tr -dc [:digit:])" - if [[ $REPLICA_IDENTIFIER_01 == $CLICKHOUSE_SVC ]] - then - REPLICA_DISPLAY_NAME="oximeter_cluster node 1" - REPLICA_NUMBER="01" - elif [[ $REPLICA_IDENTIFIER_02 == $CLICKHOUSE_SVC ]] - then - REPLICA_DISPLAY_NAME="oximeter_cluster node 2" - REPLICA_NUMBER="02" - else - printf 'ERROR: service name does not match any of the identified ClickHouse hostnames\n' >&2 - exit "$SMF_EXIT_ERR_CONFIG" - fi - - # Setting environment variables this way is best practice, but has the downside of - # obscuring the field values to anyone ssh-ing into the zone. To mitigate this, - # we will be saving them to ${DATASTORE}/config_env_vars - export CH_LOG="${DATASTORE}/clickhouse-server.log" - export CH_ERROR_LOG="${DATASTORE}/clickhouse-server.errlog" - export CH_REPLICA_DISPLAY_NAME=${REPLICA_DISPLAY_NAME} - export CH_LISTEN_ADDR=${LISTEN_ADDR} - export CH_LISTEN_PORT=${LISTEN_PORT} - export CH_DATASTORE=${DATASTORE} - export CH_TMP_PATH="${DATASTORE}/tmp/" - export CH_USER_FILES_PATH="${DATASTORE}/user_files/" - export CH_USER_LOCAL_DIR="${DATASTORE}/access/" - export CH_FORMAT_SCHEMA_PATH="${DATASTORE}/format_schemas/" - export CH_REPLICA_NUMBER=${REPLICA_NUMBER} - export CH_REPLICA_HOST_01=${REPLICA_HOST_01} - export CH_REPLICA_HOST_02=${REPLICA_HOST_02} - export CH_KEEPER_HOST_01="${keepers[0]}" - export CH_KEEPER_HOST_02="${keepers[1]}" - export CH_KEEPER_HOST_03="${keepers[2]}" - - content="CH_LOG="${CH_LOG}"\n\ - CH_ERROR_LOG="${CH_ERROR_LOG}"\n\ - CH_REPLICA_DISPLAY_NAME="${CH_REPLICA_DISPLAY_NAME}"\n\ - CH_LISTEN_ADDR="${CH_LISTEN_ADDR}"\n\ - CH_LISTEN_PORT="${CH_LISTEN_PORT}"\n\ - CH_DATASTORE="${CH_DATASTORE}"\n\ - CH_TMP_PATH="${CH_TMP_PATH}"\n\ - CH_USER_FILES_PATH="${CH_USER_FILES_PATH}"\n\ - CH_USER_LOCAL_DIR="${CH_USER_LOCAL_DIR}"\n\ - CH_FORMAT_SCHEMA_PATH="${CH_FORMAT_SCHEMA_PATH}"\n\ - CH_REPLICA_NUMBER="${CH_REPLICA_NUMBER}"\n\ - CH_REPLICA_HOST_01="${CH_REPLICA_HOST_01}"\n\ - CH_REPLICA_HOST_02="${CH_REPLICA_HOST_02}"\n\ - CH_KEEPER_HOST_01="${CH_KEEPER_HOST_01}"\n\ - CH_KEEPER_HOST_02="${CH_KEEPER_HOST_02}"\n\ - CH_KEEPER_HOST_03="${CH_KEEPER_HOST_03}"" - - echo $content >> "${DATASTORE}/config_env_vars" - - - # The clickhouse binary must be run from within the directory that contains it. - # Otherwise, it does not automatically detect the configuration files, nor does - # it append them when necessary - cd /opt/oxide/clickhouse/ - command+=("./clickhouse" "server") -fi - -exec "${command[@]}" & \ No newline at end of file +exec /opt/oxide/clickhouse/clickhouse server "${args[@]}" & \ No newline at end of file diff --git a/smf/clickhouse/config_replica.xml b/smf/clickhouse_server/config_replica.xml similarity index 100% rename from smf/clickhouse/config_replica.xml rename to smf/clickhouse_server/config_replica.xml diff --git a/smf/clickhouse_server/manifest.xml b/smf/clickhouse_server/manifest.xml new file mode 100644 index 0000000000..8ab4f78bcb --- /dev/null +++ b/smf/clickhouse_server/manifest.xml @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/smf/clickhouse_server/method_script.sh b/smf/clickhouse_server/method_script.sh new file mode 100755 index 0000000000..a0d61072ac --- /dev/null +++ b/smf/clickhouse_server/method_script.sh @@ -0,0 +1,124 @@ +#!/bin/bash + +set -x +set -o errexit +set -o pipefail + +. /lib/svc/share/smf_include.sh + +LISTEN_ADDR="$(svcprop -c -p config/listen_addr "${SMF_FMRI}")" +LISTEN_PORT="$(svcprop -c -p config/listen_port "${SMF_FMRI}")" +DATASTORE="$(svcprop -c -p config/store "${SMF_FMRI}")" + +# Retrieve hostnames (SRV records in internal DNS) of the clickhouse nodes. +CH_ADDRS="$(/opt/oxide/internal-dns-cli/bin/dnswait clickhouse-server -H)" + +if [[ -z "$CH_ADDRS" ]]; then + printf 'ERROR: found no hostnames for other ClickHouse server nodes\n' >&2 + exit "$SMF_EXIT_ERR_CONFIG" +fi + +declare -a nodes=($CH_ADDRS) + +for i in "${nodes[@]}" +do + if ! grep -q "host.control-plane.oxide.internal" <<< "${i}"; then + printf 'ERROR: retrieved ClickHouse hostname does not match the expected format\n' >&2 + exit "$SMF_EXIT_ERR_CONFIG" + fi +done + +# Assign hostnames to replicas +REPLICA_HOST_01="${nodes[0]}" +REPLICA_HOST_02="${nodes[1]}" + +# Retrieve hostnames (SRV records in internal DNS) of the keeper nodes. +K_ADDRS="$(/opt/oxide/internal-dns-cli/bin/dnswait clickhouse-keeper -H)" + +if [[ -z "$K_ADDRS" ]]; then + printf 'ERROR: found no hostnames for other ClickHouse Keeper nodes\n' >&2 + exit "$SMF_EXIT_ERR_CONFIG" +fi + +declare -a keepers=($K_ADDRS) + +for i in "${keepers[@]}" +do + if ! grep -q "host.control-plane.oxide.internal" <<< "${i}"; then + printf 'ERROR: retrieved ClickHouse Keeper hostname does not match the expected format\n' >&2 + exit "$SMF_EXIT_ERR_CONFIG" + fi +done + +if [[ "${#keepers[@]}" != 3 ]] +then + printf "ERROR: expected 3 ClickHouse Keeper hosts, found "${#keepers[@]}" instead\n" >&2 + exit "$SMF_EXIT_ERR_CONFIG" +fi + +# Identify the node type this is as this will influence how the config is constructed +# TODO(https://github.com/oxidecomputer/omicron/issues/3824): There are probably much +# better ways to do this service discovery, but this works for now. +# The services contain the same IDs as the hostnames. +CLICKHOUSE_SVC="$(zonename | tr -dc [:digit:])" +REPLICA_IDENTIFIER_01="$( echo "${REPLICA_HOST_01}" | tr -dc [:digit:])" +REPLICA_IDENTIFIER_02="$( echo "${REPLICA_HOST_02}" | tr -dc [:digit:])" +if [[ $REPLICA_IDENTIFIER_01 == $CLICKHOUSE_SVC ]] +then + REPLICA_DISPLAY_NAME="oximeter_cluster node 1" + REPLICA_NUMBER="01" +elif [[ $REPLICA_IDENTIFIER_02 == $CLICKHOUSE_SVC ]] +then + REPLICA_DISPLAY_NAME="oximeter_cluster node 2" + REPLICA_NUMBER="02" +else + printf 'ERROR: service name does not match any of the identified ClickHouse hostnames\n' >&2 + exit "$SMF_EXIT_ERR_CONFIG" +fi + +# Setting environment variables this way is best practice, but has the downside of +# obscuring the field values to anyone ssh-ing into the zone. To mitigate this, +# we will be saving them to ${DATASTORE}/config_env_vars +export CH_LOG="${DATASTORE}/clickhouse-server.log" +export CH_ERROR_LOG="${DATASTORE}/clickhouse-server.errlog" +export CH_REPLICA_DISPLAY_NAME=${REPLICA_DISPLAY_NAME} +export CH_LISTEN_ADDR=${LISTEN_ADDR} +export CH_LISTEN_PORT=${LISTEN_PORT} +export CH_DATASTORE=${DATASTORE} +export CH_TMP_PATH="${DATASTORE}/tmp/" +export CH_USER_FILES_PATH="${DATASTORE}/user_files/" +export CH_USER_LOCAL_DIR="${DATASTORE}/access/" +export CH_FORMAT_SCHEMA_PATH="${DATASTORE}/format_schemas/" +export CH_REPLICA_NUMBER=${REPLICA_NUMBER} +export CH_REPLICA_HOST_01=${REPLICA_HOST_01} +export CH_REPLICA_HOST_02=${REPLICA_HOST_02} +export CH_KEEPER_HOST_01="${keepers[0]}" +export CH_KEEPER_HOST_02="${keepers[1]}" +export CH_KEEPER_HOST_03="${keepers[2]}" + +content="CH_LOG="${CH_LOG}"\n\ +CH_ERROR_LOG="${CH_ERROR_LOG}"\n\ +CH_REPLICA_DISPLAY_NAME="${CH_REPLICA_DISPLAY_NAME}"\n\ +CH_LISTEN_ADDR="${CH_LISTEN_ADDR}"\n\ +CH_LISTEN_PORT="${CH_LISTEN_PORT}"\n\ +CH_DATASTORE="${CH_DATASTORE}"\n\ +CH_TMP_PATH="${CH_TMP_PATH}"\n\ +CH_USER_FILES_PATH="${CH_USER_FILES_PATH}"\n\ +CH_USER_LOCAL_DIR="${CH_USER_LOCAL_DIR}"\n\ +CH_FORMAT_SCHEMA_PATH="${CH_FORMAT_SCHEMA_PATH}"\n\ +CH_REPLICA_NUMBER="${CH_REPLICA_NUMBER}"\n\ +CH_REPLICA_HOST_01="${CH_REPLICA_HOST_01}"\n\ +CH_REPLICA_HOST_02="${CH_REPLICA_HOST_02}"\n\ +CH_KEEPER_HOST_01="${CH_KEEPER_HOST_01}"\n\ +CH_KEEPER_HOST_02="${CH_KEEPER_HOST_02}"\n\ +CH_KEEPER_HOST_03="${CH_KEEPER_HOST_03}"" + +echo $content >> "${DATASTORE}/config_env_vars" + + +# The clickhouse binary must be run from within the directory that contains it. +# Otherwise, it does not automatically detect the configuration files, nor does +# it append them when necessary +cd /opt/oxide/clickhouse_server/ + +exec ./clickhouse server & \ No newline at end of file diff --git a/smf/oximeter/config.toml b/smf/oximeter/replicated-cluster/config.toml similarity index 91% rename from smf/oximeter/config.toml rename to smf/oximeter/replicated-cluster/config.toml index ca14fe6ec8..f7958e5eb1 100644 --- a/smf/oximeter/config.toml +++ b/smf/oximeter/replicated-cluster/config.toml @@ -3,6 +3,7 @@ [db] batch_size = 1000 batch_interval = 5 # In seconds +replicated = true [log] level = "debug" diff --git a/smf/oximeter/single-node/config.toml b/smf/oximeter/single-node/config.toml new file mode 100644 index 0000000000..bc0418159c --- /dev/null +++ b/smf/oximeter/single-node/config.toml @@ -0,0 +1,12 @@ +# Example configuration file for running an oximeter collector server + +[db] +batch_size = 1000 +batch_interval = 5 # In seconds +replicated = false + +[log] +level = "debug" +mode = "file" +path = "/dev/stdout" +if_exists = "append" From 256c06663a6298f8fe7f33a003888c6dd923f3db Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Tue, 20 Aug 2024 09:52:33 -0700 Subject: [PATCH 45/51] [sp-sim] rudimentary simulation of sensors (#6313) In order to develop Oximeter metrics for SP sensor readings, emitted by MGS, we would like the `sp-sim` binary to be able to simulate the protocol for reading SP sensors. This branch adds a fairly rudimentary implementation of this: components configured in the `sp-sim` config file may now include an array of one or more `sensors`, like this: ```toml # ... [[simulated_sps.gimlet.components]] id = "dev-0" device = "tmp117" description = "FAKE Southwest temperature sensor" capabilities = 2 presence = "Present" sensors = [ { name = "Southwest", kind = "Temperature", last_data.value = 41.7890625, last_data.timestamp = 1234 }, ] ``` Once this is added, the simulated SP will implement the `num_component_details`, `component_details`, and `read_sensor` functions for any such components: ```console eliza@noctis ~/Code/oxide/omicron $ curl -s http://127.0.0.1:11111/sp/sled/0/component | jq { "components": [ { "component": "sp3-host-cpu", "device": "sp3-host-cpu", "serial_number": null, "description": "FAKE host cpu", "capabilities": 0, "presence": "present" }, { "component": "dev-0", "device": "tmp117", "serial_number": null, "description": "FAKE Southwest temperature sensor", "capabilities": 2, "presence": "present" } ] } eliza@noctis ~/Code/oxide/omicron $ curl -s http://127.0.0.1:11111/sp/sled/0/component/dev-0 | jq [ { "type": "measurement", "name": "Southwest", "kind": { "kind": "temperature" }, "value": 41.789062 } ] ``` In the future, I would like to extend this functionality substantially: it would be nice to add a notion of a simulated global timestamp, and a mechanism for changing the values of sensor readings dynamically. I think this would be useful for testing the timebase synchronization code we will no doubt need to write eventually for this. But, for now, being able to hard-code sensor values is a start. --- sp-sim/examples/config.toml | 21 ++++ sp-sim/src/config.rs | 14 +++ sp-sim/src/gimlet.rs | 33 ++++-- sp-sim/src/lib.rs | 1 + sp-sim/src/sensors.rs | 218 ++++++++++++++++++++++++++++++++++++ sp-sim/src/sidecar.rs | 35 ++++-- 6 files changed, 307 insertions(+), 15 deletions(-) create mode 100644 sp-sim/src/sensors.rs diff --git a/sp-sim/examples/config.toml b/sp-sim/examples/config.toml index cf338ecf2e..f53ea7cfd8 100644 --- a/sp-sim/examples/config.toml +++ b/sp-sim/examples/config.toml @@ -24,6 +24,16 @@ capabilities = 0 presence = "Present" serial_console = "[::1]:33312" +[[simulated_sps.gimlet.components]] +id = "dev-0" +device = "tmp117" +description = "FAKE Southwest temperature sensor" +capabilities = 2 +presence = "Present" +sensors = [ + { name = "Southwest", kind = "Temperature", last_data.value = 41.7890625, last_data.timestamp = 1234 }, +] + [[simulated_sps.gimlet]] multicast_addr = "ff15:0:1de::2" bind_addrs = ["[::]:33320", "[::]:33321"] @@ -39,6 +49,17 @@ capabilities = 0 presence = "Present" serial_console = "[::1]:33322" +[[simulated_sps.gimlet.components]] +id = "dev-0" +device = "tmp117" +description = "FAKE Southwest temperature sensor" +capabilities = 2 +presence = "Present" +sensors = [ + { name = "Southwest", kind = "Temperature", last_data.value = 41.7890625, last_data.timestamp = 1234 }, +] + + [log] # Show log messages of this level and more severe level = "debug" diff --git a/sp-sim/src/config.rs b/sp-sim/src/config.rs index b64953e5ed..d45e956dee 100644 --- a/sp-sim/src/config.rs +++ b/sp-sim/src/config.rs @@ -5,6 +5,7 @@ //! Interfaces for parsing configuration files and working with a simulated SP //! configuration +use crate::sensors; use dropshot::ConfigLogging; use gateway_messages::DeviceCapabilities; use gateway_messages::DevicePresence; @@ -59,6 +60,9 @@ pub struct SpComponentConfig { /// /// Only supported for components inside a [`GimletConfig`]. pub serial_console: Option, + + #[serde(skip_serializing_if = "Vec::is_empty", default)] + pub sensors: Vec, } /// Configuration of a simulated sidecar SP @@ -93,6 +97,16 @@ pub struct Config { pub log: ConfigLogging, } +/// Configuration for a component's sensor readings. +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub struct SensorConfig { + #[serde(flatten)] + pub def: sensors::SensorDef, + + #[serde(flatten)] + pub state: sensors::SensorState, +} + impl Config { /// Load a `Config` from the given TOML file /// diff --git a/sp-sim/src/gimlet.rs b/sp-sim/src/gimlet.rs index e980a4b67d..70c2e72fcb 100644 --- a/sp-sim/src/gimlet.rs +++ b/sp-sim/src/gimlet.rs @@ -6,6 +6,7 @@ use crate::config::GimletConfig; use crate::config::SpComponentConfig; use crate::helpers::rot_slot_id_from_u16; use crate::helpers::rot_slot_id_to_u16; +use crate::sensors::Sensors; use crate::serial_number_padded; use crate::server; use crate::server::SimSpHandler; @@ -630,6 +631,7 @@ struct Handler { startup_options: StartupOptions, update_state: SimSpUpdate, reset_pending: Option, + sensors: Sensors, last_request_handled: Option, @@ -665,9 +667,12 @@ impl Handler { .push(&*Box::leak(c.description.clone().into_boxed_str())); } + let sensors = Sensors::from_component_configs(&components); + Self { log, components, + sensors, leaked_component_device_strings, leaked_component_description_strings, serial_number, @@ -1206,13 +1211,16 @@ impl SpHandler for Handler { port: SpPort, component: SpComponent, ) -> Result { + let num_details = + self.sensors.num_component_details(&component).unwrap_or(0); debug!( - &self.log, "asked for component details (returning 0 details)"; + &self.log, "asked for number of component details"; "sender" => %sender, "port" => ?port, "component" => ?component, + "num_details" => num_details ); - Ok(0) + Ok(num_details) } fn component_details( @@ -1220,9 +1228,20 @@ impl SpHandler for Handler { component: SpComponent, index: BoundsChecked, ) -> ComponentDetails { - // We return 0 for all components, so we should never be called (`index` - // would have to have been bounds checked to live in 0..0). - unreachable!("asked for {component:?} details index {index:?}") + let Some(sensor_details) = + self.sensors.component_details(&component, index) + else { + unreachable!( + "this is a gimlet, so it should have no port status details" + ); + }; + debug!( + &self.log, "asked for component details for a sensor"; + "component" => ?component, + "index" => index.0, + "details" => ?sensor_details + ); + sensor_details } fn component_clear_status( @@ -1445,9 +1464,9 @@ impl SpHandler for Handler { fn read_sensor( &mut self, - _request: gateway_messages::SensorRequest, + request: gateway_messages::SensorRequest, ) -> std::result::Result { - Err(SpError::RequestUnsupportedForSp) + self.sensors.read_sensor(request).map_err(SpError::Sensor) } fn current_time(&mut self) -> std::result::Result { diff --git a/sp-sim/src/lib.rs b/sp-sim/src/lib.rs index 0f340ed642..15f2034aa8 100644 --- a/sp-sim/src/lib.rs +++ b/sp-sim/src/lib.rs @@ -5,6 +5,7 @@ pub mod config; mod gimlet; mod helpers; +mod sensors; mod server; mod sidecar; mod update; diff --git a/sp-sim/src/sensors.rs b/sp-sim/src/sensors.rs new file mode 100644 index 0000000000..fc684af01b --- /dev/null +++ b/sp-sim/src/sensors.rs @@ -0,0 +1,218 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::config::SpComponentConfig; +use gateway_messages::measurement::MeasurementError; +use gateway_messages::measurement::MeasurementKind; +use gateway_messages::sp_impl::BoundsChecked; +use gateway_messages::ComponentDetails; +use gateway_messages::DeviceCapabilities; +use gateway_messages::Measurement; +use gateway_messages::SensorDataMissing; +use gateway_messages::SensorError; +use gateway_messages::SensorReading; +use gateway_messages::SensorRequest; +use gateway_messages::SensorRequestKind; +use gateway_messages::SensorResponse; +use gateway_messages::SpComponent; + +use std::collections::HashMap; + +pub(crate) struct Sensors { + by_component: HashMap>, + sensors: Vec, +} + +#[derive(Debug)] +struct Sensor { + def: SensorDef, + state: SensorState, +} + +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize, PartialEq)] +pub struct SensorDef { + pub name: String, + pub kind: MeasurementKind, +} + +// TODO(eliza): note that currently, we just hardcode these in +// `MeasurementConfig`. Eventually, it would be neat to allow the sensor to be +// changed dynamically as part of a simulation. +#[derive(Clone, Debug, serde::Serialize, serde::Deserialize, PartialEq)] +pub struct SensorState { + #[serde(default)] + pub last_error: Option, + + #[serde(default)] + pub last_data: Option, +} + +#[derive( + Clone, Copy, Debug, serde::Serialize, serde::Deserialize, PartialEq, +)] +pub struct LastError { + pub timestamp: u64, + pub value: SensorDataMissing, +} + +#[derive( + Clone, Copy, Debug, serde::Serialize, serde::Deserialize, PartialEq, +)] +pub struct LastData { + pub timestamp: u64, + pub value: f32, +} + +impl SensorState { + fn last_reading(&self) -> SensorReading { + match self { + Self { last_data: Some(data), last_error: Some(error) } => { + if data.timestamp >= error.timestamp { + SensorReading { + value: Ok(data.value), + timestamp: data.timestamp, + } + } else { + SensorReading { + value: Err(error.value), + timestamp: error.timestamp, + } + } + } + Self { last_data: Some(data), last_error: None } => SensorReading { + value: Ok(data.value), + timestamp: data.timestamp, + }, + Self { last_data: None, last_error: Some(error) } => { + SensorReading { + value: Err(error.value), + timestamp: error.timestamp, + } + } + Self { last_data: None, last_error: None } => SensorReading { + value: Err(SensorDataMissing::DeviceNotPresent), + timestamp: 0, // TODO(eliza): what do? + }, + } + } +} + +impl Sensors { + pub(crate) fn from_component_configs<'a>( + cfgs: impl IntoIterator, + ) -> Self { + let mut sensors = Vec::new(); + let mut by_component = HashMap::new(); + for cfg in cfgs { + if cfg.sensors.is_empty() { + continue; + } + if !cfg + .capabilities + .contains(DeviceCapabilities::HAS_MEASUREMENT_CHANNELS) + { + panic!( + "invalid component config: a device with sensors should \ + have the `HAS_MEASUREMENT_CHANNELS` capability:{cfg:#?}" + ); + } + + let mut ids = Vec::with_capacity(cfg.sensors.len()); + for sensor in &cfg.sensors { + let sensor_id = sensors.len() as u32; + sensors.push(Sensor { + def: sensor.def.clone(), + state: sensor.state.clone(), + }); + ids.push(sensor_id) + } + + let component = SpComponent::try_from(cfg.id.as_str()).unwrap(); + let prev = by_component.insert(component, ids); + assert!(prev.is_none(), "component ID {component} already exists!"); + } + Self { sensors, by_component } + } + + fn sensor_for_component<'sensors>( + &'sensors self, + component: &SpComponent, + index: BoundsChecked, + ) -> Option<&'sensors Sensor> { + let &id = self.by_component.get(component)?.get(index.0 as usize)?; + self.sensors.get(id as usize) + } + + pub(crate) fn num_component_details( + &self, + component: &SpComponent, + ) -> Option { + let len = self + .by_component + .get(component)? + .len() + .try_into() + .expect("why would you have more than `u32::MAX` sensors?"); + Some(len) + } + + /// This method returns an `Option` because the component's details might + /// be a port status rather than a measurement, if we eventually decide to + /// implement port statuses in the simulated sidecar... + pub(crate) fn component_details( + &self, + component: &SpComponent, + index: BoundsChecked, + ) -> Option { + let sensor = self.sensor_for_component(component, index)?; + let value = + sensor.state.last_reading().value.map_err(|err| match err { + SensorDataMissing::DeviceError => MeasurementError::DeviceError, + SensorDataMissing::DeviceNotPresent => { + MeasurementError::NotPresent + } + SensorDataMissing::DeviceOff => MeasurementError::DeviceOff, + SensorDataMissing::DeviceTimeout => { + MeasurementError::DeviceTimeout + } + SensorDataMissing::DeviceUnavailable => { + MeasurementError::DeviceUnavailable + } + }); + Some(ComponentDetails::Measurement(Measurement { + name: sensor.def.name.clone(), + kind: sensor.def.kind, + value, + })) + } + + pub(crate) fn read_sensor( + &self, + SensorRequest { id, kind }: SensorRequest, + ) -> Result { + let sensor = + self.sensors.get(id as usize).ok_or(SensorError::InvalidSensor)?; + match kind { + SensorRequestKind::LastReading => { + Ok(SensorResponse::LastReading(sensor.state.last_reading())) + } + SensorRequestKind::ErrorCount => { + let count = + // TODO(eliza): simulate more than one error... + if sensor.state.last_error.is_some() { 1 } else { 0 }; + Ok(SensorResponse::ErrorCount(count)) + } + SensorRequestKind::LastData => { + let LastData { timestamp, value } = + sensor.state.last_data.ok_or(SensorError::NoReading)?; + Ok(SensorResponse::LastData { value, timestamp }) + } + SensorRequestKind::LastError => { + let LastError { timestamp, value } = + sensor.state.last_error.ok_or(SensorError::NoReading)?; + Ok(SensorResponse::LastError { value, timestamp }) + } + } + } +} diff --git a/sp-sim/src/sidecar.rs b/sp-sim/src/sidecar.rs index c2fb2467d8..bef1d26c78 100644 --- a/sp-sim/src/sidecar.rs +++ b/sp-sim/src/sidecar.rs @@ -8,6 +8,7 @@ use crate::config::SimulatedSpsConfig; use crate::config::SpComponentConfig; use crate::helpers::rot_slot_id_from_u16; use crate::helpers::rot_slot_id_to_u16; +use crate::sensors::Sensors; use crate::serial_number_padded; use crate::server; use crate::server::SimSpHandler; @@ -377,6 +378,7 @@ struct Handler { // our life as a simulator. leaked_component_device_strings: Vec<&'static str>, leaked_component_description_strings: Vec<&'static str>, + sensors: Sensors, serial_number: String, ignition: FakeIgnition, @@ -417,9 +419,12 @@ impl Handler { .push(&*Box::leak(c.description.clone().into_boxed_str())); } + let sensors = Sensors::from_component_configs(&components); + Self { log, components, + sensors, leaked_component_device_strings, leaked_component_description_strings, serial_number, @@ -929,13 +934,18 @@ impl SpHandler for Handler { port: SpPort, component: SpComponent, ) -> Result { - warn!( - &self.log, "asked for component details (returning 0 details)"; + let num_sensor_details = + self.sensors.num_component_details(&component).unwrap_or(0); + // TODO: here is where we might also handle port statuses, if we decide + // to simulate that later... + debug!( + &self.log, "asked for number of component details"; "sender" => %sender, "port" => ?port, "component" => ?component, + "num_details" => num_sensor_details ); - Ok(0) + Ok(num_sensor_details) } fn component_details( @@ -943,9 +953,18 @@ impl SpHandler for Handler { component: SpComponent, index: BoundsChecked, ) -> ComponentDetails { - // We return 0 for all components, so we should never be called (`index` - // would have to have been bounds checked to live in 0..0). - unreachable!("asked for {component:?} details index {index:?}") + let Some(sensor_details) = + self.sensors.component_details(&component, index) + else { + todo!("simulate port status details..."); + }; + debug!( + &self.log, "asked for component details for a sensor"; + "component" => ?component, + "index" => index.0, + "details" => ?sensor_details + ); + sensor_details } fn component_clear_status( @@ -1163,9 +1182,9 @@ impl SpHandler for Handler { fn read_sensor( &mut self, - _request: gateway_messages::SensorRequest, + request: gateway_messages::SensorRequest, ) -> std::result::Result { - Err(SpError::RequestUnsupportedForSp) + self.sensors.read_sensor(request).map_err(SpError::Sensor) } fn current_time(&mut self) -> std::result::Result { From 2e0025cc6b0c19e9024419a6d6096c5b49c8d720 Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 20 Aug 2024 10:26:17 -0700 Subject: [PATCH 46/51] back out "Update Rust crate tokio to 1.39.2 (#6249)" (#6356) Tokio 1.39 updated its mio dependency to 1.0, which changed the waker impl on illumos from a self-pipe to eventfd. That has caused several issues already: * https://github.com/oxidecomputer/helios/issues/169 * https://github.com/oxidecomputer/helios/pull/171 Based on these and the potential for other lurking issues, we're making a policy decision to roll back to 1.38 (mio 0.8) for r10. We can't be off of the train forever so we're aiming to land the 1.39 update early in the r11 cycle. This backs out commit d7d4beaf0dbfa82c6ae0da10a6ce43b3c5a89142. --- Cargo.lock | 15 ++++++++------- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 20 ++++++++++---------- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6bd71f6d38..96a1db4983 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6494,7 +6494,7 @@ dependencies = [ "log", "managed", "memchr", - "mio 1.0.2", + "mio 0.8.11", "nix 0.28.0", "nom", "num-bigint-dig", @@ -10605,27 +10605,28 @@ dependencies = [ [[package]] name = "tokio" -version = "1.39.2" +version = "1.38.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daa4fb1bc778bd6f04cbfc4bb2d06a7396a8f299dc33ea1900cedaa316f467b1" +checksum = "eb2caba9f80616f438e09748d5acda951967e1ea58508ef53d9c6402485a46df" dependencies = [ "backtrace", "bytes", "libc", - "mio 1.0.2", + "mio 0.8.11", + "num_cpus", "parking_lot 0.12.2", "pin-project-lite", "signal-hook-registry", "socket2 0.5.7", "tokio-macros", - "windows-sys 0.52.0", + "windows-sys 0.48.0", ] [[package]] name = "tokio-macros" -version = "2.4.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" +checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 83aea83ddf..ce2b7f8eb0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -577,7 +577,7 @@ textwrap = "0.16.1" test-strategy = "0.3.1" thiserror = "1.0" tofino = { git = "https://github.com/oxidecomputer/tofino", branch = "main" } -tokio = "1.39.2" +tokio = "1.38.1" tokio-postgres = { version = "0.7", features = [ "with-chrono-0_4", "with-uuid-1" ] } tokio-stream = "0.1.15" tokio-tungstenite = "0.20" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 1c58626d2d..014444c542 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -107,7 +107,7 @@ string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.74", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.36", features = ["formatting", "local-offset", "macros", "parsing"] } -tokio = { version = "1.39.2", features = ["full", "test-util"] } +tokio = { version = "1.38.1", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.11", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } tokio-stream = { version = "0.1.15", features = ["net"] } tokio-util = { version = "0.7.11", features = ["codec", "io-util"] } @@ -217,7 +217,7 @@ syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extr syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.74", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.36", features = ["formatting", "local-offset", "macros", "parsing"] } time-macros = { version = "0.2.18", default-features = false, features = ["formatting", "parsing"] } -tokio = { version = "1.39.2", features = ["full", "test-util"] } +tokio = { version = "1.38.1", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.11", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } tokio-stream = { version = "0.1.15", features = ["net"] } tokio-util = { version = "0.7.11", features = ["codec", "io-util"] } @@ -237,7 +237,7 @@ zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } [target.x86_64-unknown-linux-gnu.dependencies] dof = { version = "0.3.0", default-features = false, features = ["des"] } linux-raw-sys = { version = "0.4.13", default-features = false, features = ["elf", "errno", "general", "ioctl", "no_std", "std", "system"] } -mio = { version = "1.0.2", features = ["net", "os-ext"] } +mio = { version = "0.8.11", features = ["net", "os-ext"] } nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } @@ -246,35 +246,35 @@ signal-hook-mio = { version = "0.2.4", default-features = false, features = ["su [target.x86_64-unknown-linux-gnu.build-dependencies] dof = { version = "0.3.0", default-features = false, features = ["des"] } linux-raw-sys = { version = "0.4.13", default-features = false, features = ["elf", "errno", "general", "ioctl", "no_std", "std", "system"] } -mio = { version = "1.0.2", features = ["net", "os-ext"] } +mio = { version = "0.8.11", features = ["net", "os-ext"] } nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } [target.x86_64-apple-darwin.dependencies] -mio = { version = "1.0.2", features = ["net", "os-ext"] } +mio = { version = "0.8.11", features = ["net", "os-ext"] } nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } [target.x86_64-apple-darwin.build-dependencies] -mio = { version = "1.0.2", features = ["net", "os-ext"] } +mio = { version = "0.8.11", features = ["net", "os-ext"] } nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } [target.aarch64-apple-darwin.dependencies] -mio = { version = "1.0.2", features = ["net", "os-ext"] } +mio = { version = "0.8.11", features = ["net", "os-ext"] } nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } signal-hook-mio = { version = "0.2.4", default-features = false, features = ["support-v0_8", "support-v1_0"] } [target.aarch64-apple-darwin.build-dependencies] -mio = { version = "1.0.2", features = ["net", "os-ext"] } +mio = { version = "0.8.11", features = ["net", "os-ext"] } nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } @@ -282,7 +282,7 @@ signal-hook-mio = { version = "0.2.4", default-features = false, features = ["su [target.x86_64-unknown-illumos.dependencies] dof = { version = "0.3.0", default-features = false, features = ["des"] } -mio = { version = "1.0.2", features = ["net", "os-ext"] } +mio = { version = "0.8.11", features = ["net", "os-ext"] } nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } @@ -291,7 +291,7 @@ toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", featu [target.x86_64-unknown-illumos.build-dependencies] dof = { version = "0.3.0", default-features = false, features = ["des"] } -mio = { version = "1.0.2", features = ["net", "os-ext"] } +mio = { version = "0.8.11", features = ["net", "os-ext"] } nix = { version = "0.28.0", features = ["feature", "fs", "ioctl", "poll", "signal", "term", "uio"] } once_cell = { version = "1.19.0" } rustix = { version = "0.38.34", features = ["fs", "stdio", "system", "termios"] } From ab29a029ba612722cc062f0d691a7b63deb0afe7 Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 20 Aug 2024 21:58:12 -0700 Subject: [PATCH 47/51] [reconfigurator] a couple of type improvements (#6395) * Use a typestate for the physical disk dependency. Just want to make sure the ordering is right once this moves to being expressed via the update engine. * Add strong typing around the output of `realize_blueprint`. --- .../execution/src/cockroachdb.rs | 22 ++-- nexus/reconfigurator/execution/src/dns.rs | 106 +++++++++--------- nexus/reconfigurator/execution/src/lib.rs | 28 +++-- .../execution/src/omicron_physical_disks.rs | 51 ++++++--- .../background/tasks/blueprint_execution.rs | 41 +++++-- 5 files changed, 156 insertions(+), 92 deletions(-) diff --git a/nexus/reconfigurator/execution/src/cockroachdb.rs b/nexus/reconfigurator/execution/src/cockroachdb.rs index 277f5f91c4..01baebfb57 100644 --- a/nexus/reconfigurator/execution/src/cockroachdb.rs +++ b/nexus/reconfigurator/execution/src/cockroachdb.rs @@ -34,6 +34,7 @@ pub(crate) async fn ensure_settings( mod test { use super::*; use crate::overridables::Overridables; + use crate::RealizeBlueprintOutput; use nexus_db_queries::authn; use nexus_db_queries::authz; use nexus_test_utils_macros::nexus_test; @@ -97,16 +98,17 @@ mod test { .await; // Execute the initial blueprint. let overrides = Overridables::for_test(cptestctx); - crate::realize_blueprint_with_overrides( - &opctx, - datastore, - resolver, - &blueprint, - Uuid::new_v4(), - &overrides, - ) - .await - .expect("failed to execute initial blueprint"); + let _: RealizeBlueprintOutput = + crate::realize_blueprint_with_overrides( + &opctx, + datastore, + resolver, + &blueprint, + Uuid::new_v4(), + &overrides, + ) + .await + .expect("failed to execute initial blueprint"); // The CockroachDB settings should not have changed. assert_eq!( settings, diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index 4395944b25..9ca14f8e24 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -458,6 +458,7 @@ pub fn blueprint_nexus_external_ips(blueprint: &Blueprint) -> Vec { mod test { use super::*; use crate::overridables::Overridables; + use crate::RealizeBlueprintOutput; use crate::Sled; use dns_service_client::DnsDiff; use internal_dns::config::Host; @@ -1245,16 +1246,17 @@ mod test { // Now, execute the initial blueprint. let overrides = Overridables::for_test(cptestctx); - crate::realize_blueprint_with_overrides( - &opctx, - datastore, - resolver, - &blueprint, - Uuid::new_v4(), - &overrides, - ) - .await - .expect("failed to execute initial blueprint"); + let _: RealizeBlueprintOutput = + crate::realize_blueprint_with_overrides( + &opctx, + datastore, + resolver, + &blueprint, + Uuid::new_v4(), + &overrides, + ) + .await + .expect("failed to execute initial blueprint"); // DNS ought not to have changed. verify_dns_unchanged( @@ -1385,16 +1387,17 @@ mod test { .await .expect("failed to set blueprint as target"); - crate::realize_blueprint_with_overrides( - &opctx, - datastore, - resolver, - &blueprint2, - Uuid::new_v4(), - &overrides, - ) - .await - .expect("failed to execute second blueprint"); + let _: RealizeBlueprintOutput = + crate::realize_blueprint_with_overrides( + &opctx, + datastore, + resolver, + &blueprint2, + Uuid::new_v4(), + &overrides, + ) + .await + .expect("failed to execute second blueprint"); // Now fetch DNS again. Both should have changed this time. let dns_latest_internal = datastore @@ -1459,16 +1462,17 @@ mod test { } // If we execute it again, we should see no more changes. - crate::realize_blueprint_with_overrides( - &opctx, - datastore, - resolver, - &blueprint2, - Uuid::new_v4(), - &overrides, - ) - .await - .expect("failed to execute second blueprint again"); + let _: RealizeBlueprintOutput = + crate::realize_blueprint_with_overrides( + &opctx, + datastore, + resolver, + &blueprint2, + Uuid::new_v4(), + &overrides, + ) + .await + .expect("failed to execute second blueprint again"); verify_dns_unchanged( &opctx, datastore, @@ -1495,16 +1499,17 @@ mod test { // One more time, make sure that executing the blueprint does not do // anything. - crate::realize_blueprint_with_overrides( - &opctx, - datastore, - resolver, - &blueprint2, - Uuid::new_v4(), - &overrides, - ) - .await - .expect("failed to execute second blueprint again"); + let _: RealizeBlueprintOutput = + crate::realize_blueprint_with_overrides( + &opctx, + datastore, + resolver, + &blueprint2, + Uuid::new_v4(), + &overrides, + ) + .await + .expect("failed to execute second blueprint again"); verify_dns_unchanged( &opctx, datastore, @@ -1589,16 +1594,17 @@ mod test { ); // If we execute the blueprint, DNS should not be changed. - crate::realize_blueprint_with_overrides( - &opctx, - datastore, - resolver, - &blueprint, - Uuid::new_v4(), - &overrides, - ) - .await - .expect("failed to execute blueprint"); + let _: RealizeBlueprintOutput = + crate::realize_blueprint_with_overrides( + &opctx, + datastore, + resolver, + &blueprint, + Uuid::new_v4(), + &overrides, + ) + .await + .expect("failed to execute blueprint"); let dns_latest_internal = datastore .dns_config_read(&opctx, DnsGroup::Internal) .await diff --git a/nexus/reconfigurator/execution/src/lib.rs b/nexus/reconfigurator/execution/src/lib.rs index 8606187762..2c70c7acbb 100644 --- a/nexus/reconfigurator/execution/src/lib.rs +++ b/nexus/reconfigurator/execution/src/lib.rs @@ -70,6 +70,15 @@ impl From for Sled { } } +/// The result of calling [`realize_blueprint`] or +/// [`realize_blueprint_with_overrides`]. +#[derive(Debug)] +#[must_use = "the output of realize_blueprint should probably be used"] +pub struct RealizeBlueprintOutput { + /// Whether any sagas need to be reassigned to a new Nexus. + pub needs_saga_recovery: bool, +} + /// Make one attempt to realize the given blueprint, meaning to take actions to /// alter the real system to match the blueprint /// @@ -81,7 +90,7 @@ pub async fn realize_blueprint( resolver: &Resolver, blueprint: &Blueprint, nexus_id: Uuid, -) -> Result> { +) -> Result> { realize_blueprint_with_overrides( opctx, datastore, @@ -100,7 +109,7 @@ pub async fn realize_blueprint_with_overrides( blueprint: &Blueprint, nexus_id: Uuid, overrides: &Overridables, -) -> Result> { +) -> Result> { let opctx = opctx.child(BTreeMap::from([( "comment".to_string(), blueprint.comment.clone(), @@ -132,7 +141,7 @@ pub async fn realize_blueprint_with_overrides( }) .collect(); - omicron_physical_disks::deploy_disks( + let deploy_disks_done = omicron_physical_disks::deploy_disks( &opctx, &sleds_by_id, &blueprint.blueprint_disks, @@ -205,11 +214,12 @@ pub async fn realize_blueprint_with_overrides( ) .await?; - // This depends on the "deploy_disks" call earlier -- disk expungement is a - // statement of policy, but we need to be assured that the Sled Agent has - // stopped using that disk before we can mark its state as decommissioned. - omicron_physical_disks::decommission_expunged_disks(&opctx, datastore) - .await?; + omicron_physical_disks::decommission_expunged_disks( + &opctx, + datastore, + deploy_disks_done, + ) + .await?; // From this point on, we'll assume that any errors that we encounter do // *not* require stopping execution. We'll just accumulate them and return @@ -244,7 +254,7 @@ pub async fn realize_blueprint_with_overrides( } if errors.is_empty() { - Ok(needs_saga_recovery) + Ok(RealizeBlueprintOutput { needs_saga_recovery }) } else { Err(errors) } diff --git a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs index 7adc41213e..af95eb8e77 100644 --- a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs +++ b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs @@ -25,7 +25,7 @@ pub(crate) async fn deploy_disks( opctx: &OpContext, sleds_by_id: &BTreeMap, sled_configs: &BTreeMap, -) -> Result<(), Vec> { +) -> Result> { let errors: Vec<_> = stream::iter(sled_configs) .filter_map(|(sled_id, config)| async move { let log = opctx.log.new(o!( @@ -92,16 +92,26 @@ pub(crate) async fn deploy_disks( .await; if errors.is_empty() { - Ok(()) + Ok(DeployDisksDone {}) } else { Err(errors) } } -/// Decommissions all disks which are currently expunged +/// Typestate indicating that the deploy disks step was performed. +#[derive(Debug)] +#[must_use = "this should be passed into decommission_expunged_disks"] +pub(crate) struct DeployDisksDone {} + +/// Decommissions all disks which are currently expunged. pub(crate) async fn decommission_expunged_disks( opctx: &OpContext, datastore: &DataStore, + // This is taken as a parameter to ensure that this depends on a + // "deploy_disks" call made earlier. Disk expungement is a statement of + // policy, but we need to be assured that the Sled Agent has stopped using + // that disk before we can mark its state as decommissioned. + _deploy_disks_done: DeployDisksDone, ) -> Result<(), Vec> { datastore .physical_disk_decommission_all_expunged(&opctx) @@ -113,6 +123,7 @@ pub(crate) async fn decommission_expunged_disks( #[cfg(test)] mod test { use super::deploy_disks; + use super::DeployDisksDone; use crate::DataStore; use crate::Sled; @@ -217,9 +228,13 @@ mod test { // Get a success result back when the blueprint has an empty set of // disks. let (_, blueprint) = create_blueprint(BTreeMap::new()); - deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) - .await - .expect("failed to deploy no disks"); + // Use an explicit type here because not doing so can cause errors to + // be ignored (this behavior is genuinely terrible). Instead, ensure + // that the type has the right result. + let _: DeployDisksDone = + deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) + .await + .expect("failed to deploy no disks"); // Disks are updated in a particular order, but each request contains // the full set of disks that must be running. @@ -272,9 +287,10 @@ mod test { } // Execute it. - deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) - .await - .expect("failed to deploy initial disks"); + let _: DeployDisksDone = + deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) + .await + .expect("failed to deploy initial disks"); s1.verify_and_clear(); s2.verify_and_clear(); @@ -293,9 +309,10 @@ mod test { )), ); } - deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) - .await - .expect("failed to deploy same disks"); + let _: DeployDisksDone = + deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) + .await + .expect("failed to deploy same disks"); s1.verify_and_clear(); s2.verify_and_clear(); @@ -567,7 +584,15 @@ mod test { assert_eq!(d.disk_state, PhysicalDiskState::Active); assert_eq!(d.disk_policy, PhysicalDiskPolicy::InService); - super::decommission_expunged_disks(&opctx, &datastore).await.unwrap(); + super::decommission_expunged_disks( + &opctx, + &datastore, + // This is an internal test, and we're testing decommissioning in + // isolation, so it's okay to create the typestate here. + DeployDisksDone {}, + ) + .await + .unwrap(); // After decommissioning, we see the expunged disk become // decommissioned. The other disk remains in-service. diff --git a/nexus/src/app/background/tasks/blueprint_execution.rs b/nexus/src/app/background/tasks/blueprint_execution.rs index b430270ec9..d13e5428f8 100644 --- a/nexus/src/app/background/tasks/blueprint_execution.rs +++ b/nexus/src/app/background/tasks/blueprint_execution.rs @@ -10,6 +10,7 @@ use futures::FutureExt; use internal_dns::resolver::Resolver; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; +use nexus_reconfigurator_execution::RealizeBlueprintOutput; use nexus_types::deployment::{Blueprint, BlueprintTarget}; use serde_json::json; use std::sync::Arc; @@ -98,16 +99,23 @@ impl BlueprintExecutor { // Trigger anybody waiting for this to finish. self.tx.send_modify(|count| *count = *count + 1); - // If executing the blueprint requires activating the saga recovery - // background task, do that now. - info!(&opctx.log, "activating saga recovery task"); - if let Ok(true) = result { - self.saga_recovery.activate(); - } - // Return the result as a `serde_json::Value` match result { - Ok(_) => json!({}), + Ok(RealizeBlueprintOutput { needs_saga_recovery }) => { + // If executing the blueprint requires activating the saga + // recovery background task, do that now. + if let Ok(output) = &result { + if output.needs_saga_recovery { + info!(&opctx.log, "activating saga recovery task"); + self.saga_recovery.activate(); + } + } + + json!({ + "target_id": blueprint.id.to_string(), + "needs_saga_recovery": needs_saga_recovery, + }) + } Err(errors) => { let errors: Vec<_> = errors.into_iter().map(|e| format!("{:#}", e)).collect(); @@ -302,10 +310,17 @@ mod test { ) .await, ); + let blueprint_id = blueprint.1.id; blueprint_tx.send(Some(blueprint)).unwrap(); let value = task.activate(&opctx).await; println!("activating with no zones: {:?}", value); - assert_eq!(value, json!({})); + assert_eq!( + value, + json!({ + "target_id": blueprint_id, + "needs_saga_recovery": false, + }) + ); // Create a non-empty blueprint describing two servers and verify that // the task correctly winds up making requests to both of them and @@ -393,7 +408,13 @@ mod test { // Activate the task to trigger zone configuration on the sled-agents let value = task.activate(&opctx).await; println!("activating two sled agents: {:?}", value); - assert_eq!(value, json!({})); + assert_eq!( + value, + json!({ + "target_id": blueprint.1.id.to_string(), + "needs_saga_recovery": false, + }) + ); s1.verify_and_clear(); s2.verify_and_clear(); From 6dde3f86ca91a758d5a99dafb5a2ad40ec31e735 Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 20 Aug 2024 22:31:46 -0700 Subject: [PATCH 48/51] [4/6] [openapi-manager] richer extra validation (#6370) With the Nexus external API, the validator generates a `nexus_tags.txt` file that must be kept track of. Instead of the validation function simply erroring out if the file is different, it is a better experience for users if it records what it expects the file to be, and then the OpenAPI manager simply treats it as an extra file similar to the document itself. With this pattern, the check and generate functions can both work on the extra file just like they work on the document. In order for the there to be a richer protocol for validation, the interface needs to be split into its own crate. This way, the API crates can depend on this minimal interface, and the OpenAPI manager itself can depend on the API crates. This isn't used yet, but will be in #6373. --- Cargo.lock | 11 + Cargo.toml | 3 + dev-tools/openapi-manager/Cargo.toml | 1 + dev-tools/openapi-manager/src/check.rs | 125 +++++--- dev-tools/openapi-manager/src/dispatch.rs | 8 +- dev-tools/openapi-manager/src/generate.rs | 26 +- dev-tools/openapi-manager/src/output.rs | 54 +++- dev-tools/openapi-manager/src/spec.rs | 276 ++++++++++++++---- dev-tools/openapi-manager/types/Cargo.toml | 13 + dev-tools/openapi-manager/types/src/lib.rs | 12 + .../openapi-manager/types/src/validation.rs | 47 +++ workspace-hack/Cargo.toml | 2 + 12 files changed, 453 insertions(+), 125 deletions(-) create mode 100644 dev-tools/openapi-manager/types/Cargo.toml create mode 100644 dev-tools/openapi-manager/types/src/lib.rs create mode 100644 dev-tools/openapi-manager/types/src/validation.rs diff --git a/Cargo.lock b/Cargo.lock index 96a1db4983..830ec523a3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6510,6 +6510,7 @@ dependencies = [ "postgres-types", "predicates", "proc-macro2", + "quote", "regex", "regex-automata 0.4.6", "regex-syntax 0.8.4", @@ -6637,6 +6638,7 @@ dependencies = [ "nexus-internal-api", "omicron-workspace-hack", "openapi-lint", + "openapi-manager-types", "openapiv3", "owo-colors", "oximeter-api", @@ -6647,6 +6649,15 @@ dependencies = [ "wicketd-api", ] +[[package]] +name = "openapi-manager-types" +version = "0.1.0" +dependencies = [ + "anyhow", + "camino", + "omicron-workspace-hack", +] + [[package]] name = "openapiv3" version = "2.0.0" diff --git a/Cargo.toml b/Cargo.toml index ce2b7f8eb0..55859ae9e6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,7 @@ members = [ "dev-tools/omicron-dev", "dev-tools/omicron-dev-lib", "dev-tools/openapi-manager", + "dev-tools/openapi-manager/types", "dev-tools/oxlog", "dev-tools/reconfigurator-cli", "dev-tools/releng", @@ -145,6 +146,7 @@ default-members = [ "dev-tools/omicron-dev", "dev-tools/omicron-dev-lib", "dev-tools/openapi-manager", + "dev-tools/openapi-manager/types", "dev-tools/oxlog", "dev-tools/reconfigurator-cli", "dev-tools/releng", @@ -453,6 +455,7 @@ oxlog = { path = "dev-tools/oxlog" } oxnet = { git = "https://github.com/oxidecomputer/oxnet" } once_cell = "1.19.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } +openapi-manager-types = { path = "dev-tools/openapi-manager/types" } openapiv3 = "2.0.0" # must match samael's crate! openssl = "0.10" diff --git a/dev-tools/openapi-manager/Cargo.toml b/dev-tools/openapi-manager/Cargo.toml index fe90737d9e..2ca1bc3e4d 100644 --- a/dev-tools/openapi-manager/Cargo.toml +++ b/dev-tools/openapi-manager/Cargo.toml @@ -25,6 +25,7 @@ nexus-internal-api.workspace = true omicron-workspace-hack.workspace = true openapiv3.workspace = true openapi-lint.workspace = true +openapi-manager-types.workspace = true owo-colors.workspace = true oximeter-api.workspace = true serde_json.workspace = true diff --git a/dev-tools/openapi-manager/src/check.rs b/dev-tools/openapi-manager/src/check.rs index 182ed9fb19..b43e43e7e5 100644 --- a/dev-tools/openapi-manager/src/check.rs +++ b/dev-tools/openapi-manager/src/check.rs @@ -5,17 +5,16 @@ use std::{io::Write, process::ExitCode}; use anyhow::Result; -use camino::Utf8Path; use indent_write::io::IndentWriter; use owo_colors::OwoColorize; use similar::TextDiff; use crate::{ output::{ - display_api_spec, display_error, display_summary, headers::*, plural, - write_diff, OutputOpts, Styles, + display_api_spec, display_api_spec_file, display_error, + display_summary, headers::*, plural, write_diff, OutputOpts, Styles, }, - spec::{all_apis, CheckStatus}, + spec::{all_apis, CheckStale, Environment}, FAILURE_EXIT_CODE, NEEDS_UPDATE_EXIT_CODE, }; @@ -37,7 +36,7 @@ impl CheckResult { } pub(crate) fn check_impl( - dir: &Utf8Path, + env: &Environment, output: &OutputOpts, ) -> Result { let mut styles = Styles::default(); @@ -48,6 +47,7 @@ pub(crate) fn check_impl( let all_apis = all_apis(); let total = all_apis.len(); let count_width = total.to_string().len(); + let count_section_indent = count_section_indent(count_width); let continued_indent = continued_indent(count_width); eprintln!("{:>HEADER_WIDTH$}", SEPARATOR); @@ -58,57 +58,89 @@ pub(crate) fn check_impl( total.style(styles.bold), plural::documents(total), ); - let mut num_up_to_date = 0; + let mut num_fresh = 0; let mut num_stale = 0; - let mut num_missing = 0; let mut num_failed = 0; for (ix, spec) in all_apis.iter().enumerate() { let count = ix + 1; - match spec.check(&dir) { - Ok(status) => match status { - CheckStatus::Ok(summary) => { - eprintln!( - "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}: {}", - UP_TO_DATE.style(styles.success_header), - display_api_spec(spec, &styles), - display_summary(&summary, &styles), - ); + match spec.check(env) { + Ok(status) => { + let total_errors = status.total_errors(); + let total_errors_width = total_errors.to_string().len(); + + if total_errors == 0 { + // Success case. + let extra = if status.extra_files_len() > 0 { + format!( + ", {} extra files", + status.extra_files_len().style(styles.bold) + ) + } else { + "".to_string() + }; - num_up_to_date += 1; - } - CheckStatus::Stale { full_path, actual, expected } => { eprintln!( - "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}", - STALE.style(styles.warning_header), + "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}: {}{extra}", + FRESH.style(styles.success_header), display_api_spec(spec, &styles), + display_summary(&status.summary, &styles), ); - let diff = TextDiff::from_lines(&actual, &expected); - write_diff( - &diff, - &full_path, - &styles, - // Add an indent to align diff with the status message. - &mut IndentWriter::new( - &continued_indent, - std::io::stderr(), - ), - )?; - - num_stale += 1; + num_fresh += 1; + continue; } - CheckStatus::Missing => { - eprintln!( - "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}", - MISSING.style(styles.warning_header), - display_api_spec(spec, &styles), - ); - num_missing += 1; + // Out of date: print errors. + eprintln!( + "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}", + STALE.style(styles.warning_header), + display_api_spec(spec, &styles), + ); + num_stale += 1; + + for (error_ix, (spec_file, error)) in + status.iter_errors().enumerate() + { + let error_count = error_ix + 1; + + let display_heading = |heading: &str| { + eprintln!( + "{:>HEADER_WIDTH$}{count_section_indent}\ + ({error_count:>total_errors_width$}/{total_errors}) {}", + heading.style(styles.warning_header), + display_api_spec_file(spec, spec_file, &styles), + ); + }; + + match error { + CheckStale::Modified { + full_path, + actual, + expected, + } => { + display_heading(MODIFIED); + + let diff = + TextDiff::from_lines(&**actual, &**expected); + write_diff( + &diff, + &full_path, + &styles, + // Add an indent to align diff with the status message. + &mut IndentWriter::new( + &continued_indent, + std::io::stderr(), + ), + )?; + } + CheckStale::New => { + display_heading(NEW); + } + } } - }, + } Err(error) => { eprint!( "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}", @@ -138,13 +170,12 @@ pub(crate) fn check_impl( }; eprintln!( - "{:>HEADER_WIDTH$} {} {} checked: {} up-to-date, {} stale, {} missing, {} failed", + "{:>HEADER_WIDTH$} {} {} checked: {} fresh, {} stale, {} failed", status_header, total.style(styles.bold), plural::documents(total), - num_up_to_date.style(styles.bold), + num_fresh.style(styles.bold), num_stale.style(styles.bold), - num_missing.style(styles.bold), num_failed.style(styles.bold), ); if num_failed > 0 { @@ -170,14 +201,14 @@ pub(crate) fn check_impl( mod tests { use std::process::ExitCode; - use crate::spec::find_openapi_dir; + use crate::spec::Environment; use super::*; #[test] fn check_apis_up_to_date() -> Result { let output = OutputOpts { color: clap::ColorChoice::Auto }; - let dir = find_openapi_dir()?; + let dir = Environment::new(None)?; let result = check_impl(&dir, &output)?; Ok(result.to_exit_code()) diff --git a/dev-tools/openapi-manager/src/dispatch.rs b/dev-tools/openapi-manager/src/dispatch.rs index 937a8b485f..ca2989396f 100644 --- a/dev-tools/openapi-manager/src/dispatch.rs +++ b/dev-tools/openapi-manager/src/dispatch.rs @@ -10,7 +10,7 @@ use clap::{Args, Parser, Subcommand}; use crate::{ check::check_impl, generate::generate_impl, list::list_impl, - output::OutputOpts, spec::openapi_dir, + output::OutputOpts, spec::Environment, }; /// Manage OpenAPI specifications. @@ -73,7 +73,7 @@ pub struct GenerateArgs { impl GenerateArgs { fn exec(self, output: &OutputOpts) -> anyhow::Result { - let dir = openapi_dir(self.dir)?; + let dir = Environment::new(self.dir)?; Ok(generate_impl(&dir, output)?.to_exit_code()) } } @@ -87,8 +87,8 @@ pub struct CheckArgs { impl CheckArgs { fn exec(self, output: &OutputOpts) -> anyhow::Result { - let dir = openapi_dir(self.dir)?; - Ok(check_impl(&dir, output)?.to_exit_code()) + let env = Environment::new(self.dir)?; + Ok(check_impl(&env, output)?.to_exit_code()) } } diff --git a/dev-tools/openapi-manager/src/generate.rs b/dev-tools/openapi-manager/src/generate.rs index f776ff2709..1cf9ebbb61 100644 --- a/dev-tools/openapi-manager/src/generate.rs +++ b/dev-tools/openapi-manager/src/generate.rs @@ -5,7 +5,6 @@ use std::{io::Write, process::ExitCode}; use anyhow::Result; -use camino::Utf8Path; use indent_write::io::IndentWriter; use owo_colors::OwoColorize; @@ -14,7 +13,7 @@ use crate::{ display_api_spec, display_error, display_summary, headers::*, plural, OutputOpts, Styles, }, - spec::{all_apis, OverwriteStatus}, + spec::{all_apis, Environment}, FAILURE_EXIT_CODE, }; @@ -34,7 +33,7 @@ impl GenerateResult { } pub(crate) fn generate_impl( - dir: &Utf8Path, + env: &Environment, output: &OutputOpts, ) -> Result { let mut styles = Styles::default(); @@ -62,27 +61,30 @@ pub(crate) fn generate_impl( for (ix, spec) in all_apis.iter().enumerate() { let count = ix + 1; - match spec.overwrite(&dir) { - Ok((status, summary)) => match status { - OverwriteStatus::Updated => { + match spec.overwrite(env) { + Ok(status) => { + let updated_count = status.updated_count(); + + if updated_count > 0 { eprintln!( - "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}: {}", + "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}: {} ({} {} updated)", UPDATED.style(styles.success_header), display_api_spec(spec, &styles), - display_summary(&summary, &styles), + display_summary(&status.summary, &styles), + updated_count.style(styles.bold), + plural::files(updated_count), ); num_updated += 1; - } - OverwriteStatus::Unchanged => { + } else { eprintln!( "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}: {}", UNCHANGED.style(styles.unchanged_header), display_api_spec(spec, &styles), - display_summary(&summary, &styles), + display_summary(&status.summary, &styles), ); num_unchanged += 1; } - }, + } Err(err) => { eprintln!( "{:>HEADER_WIDTH$} [{count:>count_width$}/{total}] {}", diff --git a/dev-tools/openapi-manager/src/output.rs b/dev-tools/openapi-manager/src/output.rs index 6cd578e778..fee7f0f15c 100644 --- a/dev-tools/openapi-manager/src/output.rs +++ b/dev-tools/openapi-manager/src/output.rs @@ -10,7 +10,7 @@ use indent_write::fmt::IndentWriter; use owo_colors::{OwoColorize, Style}; use similar::{ChangeTag, DiffableStr, TextDiff}; -use crate::spec::{ApiSpec, DocumentSummary}; +use crate::spec::{ApiSpec, ApiSpecFile, DocumentSummary}; #[derive(Debug, Args)] #[clap(next_help_heading = "Global options")] @@ -123,6 +123,21 @@ pub(crate) fn display_api_spec(spec: &ApiSpec, styles: &Styles) -> String { ) } +pub(crate) fn display_api_spec_file( + spec: &ApiSpec, + spec_file: ApiSpecFile<'_>, + styles: &Styles, +) -> String { + match spec_file { + ApiSpecFile::Openapi => { + format!("OpenAPI document {}", spec.filename.style(styles.filename)) + } + ApiSpecFile::Extra(path) => { + format!("Extra file {}", path.style(styles.filename)) + } + } +} + pub(crate) fn display_summary( summary: &DocumentSummary, styles: &Styles, @@ -201,9 +216,14 @@ pub(crate) mod headers { pub(crate) static CHECKING: &str = "Checking"; pub(crate) static GENERATING: &str = "Generating"; - pub(crate) static UP_TO_DATE: &str = "Up-to-date"; + pub(crate) static FRESH: &str = "Fresh"; + + // Stale encompasses: + // - Stale: the file on disk is different from what we generated. + // - Missing: the file on disk does not exist. pub(crate) static STALE: &str = "Stale"; - pub(crate) static MISSING: &str = "Missing"; + pub(crate) static NEW: &str = "-> New"; + pub(crate) static MODIFIED: &str = "-> Modified"; pub(crate) static UPDATED: &str = "Updated"; pub(crate) static UNCHANGED: &str = "Unchanged"; @@ -211,22 +231,38 @@ pub(crate) mod headers { pub(crate) static SUCCESS: &str = "Success"; pub(crate) static FAILURE: &str = "Failure"; - pub(crate) fn continued_indent(count_width: usize) -> String { + fn count_section_width(count_width: usize) -> usize { // Status strings are of the form: // // Generated [ 1/12] api.json: 1 path, 1 schema + // ^^^^^^^^^ // - // So the continued indent is: - // - // HEADER_WIDTH for the status string - // + (count_width * 2) for current and total counts + // So the width of the count section is: + // (count_width * 2) for current and total counts // + 3 for '[/]' // + 2 for spaces on either side. - " ".repeat(HEADER_WIDTH + count_width * 2 + 3 + 2) + count_width * 2 + 3 + 2 + } + + pub(crate) fn count_section_indent(count_width: usize) -> String { + " ".repeat(count_section_width(count_width)) + } + + pub(crate) fn continued_indent(count_width: usize) -> String { + // HEADER_WIDTH for the status string + count_section_width + " ".repeat(HEADER_WIDTH + count_section_width(count_width)) } } pub(crate) mod plural { + pub(crate) fn files(count: usize) -> &'static str { + if count == 1 { + "file" + } else { + "files" + } + } + pub(crate) fn documents(count: usize) -> &'static str { if count == 1 { "document" diff --git a/dev-tools/openapi-manager/src/spec.rs b/dev-tools/openapi-manager/src/spec.rs index 29601a63d6..e74cf7ed7a 100644 --- a/dev-tools/openapi-manager/src/spec.rs +++ b/dev-tools/openapi-manager/src/spec.rs @@ -9,6 +9,7 @@ use atomicwrites::AtomicFile; use camino::{Utf8Path, Utf8PathBuf}; use dropshot::{ApiDescription, ApiDescriptionBuildErrors, StubContext}; use fs_err as fs; +use openapi_manager_types::{ValidationBackend, ValidationContext}; use openapiv3::OpenAPI; /// All APIs managed by openapi-manager. @@ -143,47 +144,64 @@ pub struct ApiSpec { pub filename: &'static str, /// Extra validation to perform on the OpenAPI spec, if any. - pub extra_validation: Option anyhow::Result<()>>, + pub extra_validation: Option)>, } impl ApiSpec { pub(crate) fn overwrite( &self, - dir: &Utf8Path, - ) -> Result<(OverwriteStatus, DocumentSummary)> { + env: &Environment, + ) -> Result { let contents = self.to_json_bytes()?; - let summary = self + let (summary, validation_result) = self .validate_json(&contents) .context("OpenAPI document validation failed")?; - let full_path = dir.join(&self.filename); - let status = overwrite_file(&full_path, &contents)?; - - Ok((status, summary)) + let full_path = env.openapi_dir.join(&self.filename); + let openapi_doc_status = overwrite_file(&full_path, &contents)?; + + let extra_files = validation_result + .extra_files + .into_iter() + .map(|(path, contents)| { + let full_path = env.workspace_root.join(&path); + let status = overwrite_file(&full_path, &contents)?; + Ok((path, status)) + }) + .collect::>()?; + + Ok(SpecOverwriteStatus { + summary, + openapi_doc: openapi_doc_status, + extra_files, + }) } - pub(crate) fn check(&self, dir: &Utf8Path) -> Result { + pub(crate) fn check(&self, env: &Environment) -> Result { let contents = self.to_json_bytes()?; - let summary = self + let (summary, validation_result) = self .validate_json(&contents) .context("OpenAPI document validation failed")?; - let full_path = dir.join(&self.filename); - let existing_contents = - read_opt(&full_path).context("failed to read contents on disk")?; - - match existing_contents { - Some(existing_contents) if existing_contents == contents => { - Ok(CheckStatus::Ok(summary)) - } - Some(existing_contents) => Ok(CheckStatus::Stale { - full_path, - actual: existing_contents, - expected: contents, - }), - None => Ok(CheckStatus::Missing), - } + let full_path = env.openapi_dir.join(&self.filename); + let openapi_doc_status = check_file(full_path, contents)?; + + let extra_files = validation_result + .extra_files + .into_iter() + .map(|(path, contents)| { + let full_path = env.workspace_root.join(&path); + let status = check_file(full_path, contents)?; + Ok((path, status)) + }) + .collect::>()?; + + Ok(SpecCheckStatus { + summary, + openapi_doc: openapi_doc_status, + extra_files, + }) } pub(crate) fn to_openapi_doc(&self) -> Result { @@ -216,7 +234,10 @@ impl ApiSpec { Ok(contents) } - fn validate_json(&self, contents: &[u8]) -> Result { + fn validate_json( + &self, + contents: &[u8], + ) -> Result<(DocumentSummary, ValidationResult)> { let openapi_doc = contents_to_openapi(contents) .context("JSON returned by ApiDescription is not valid OpenAPI")?; @@ -231,11 +252,51 @@ impl ApiSpec { return Err(anyhow::anyhow!("{}", errors.join("\n\n"))); } - if let Some(extra_validation) = self.extra_validation { - extra_validation(&openapi_doc)?; - } + let extra_files = if let Some(extra_validation) = self.extra_validation + { + let mut validation_context = + ValidationContextImpl { errors: Vec::new(), files: Vec::new() }; + extra_validation( + &openapi_doc, + ValidationContext::new(&mut validation_context), + ); + + if !validation_context.errors.is_empty() { + return Err(anyhow::anyhow!( + "OpenAPI document extended validation failed:\n{}", + validation_context + .errors + .iter() + .map(|e| e.to_string()) + .collect::>() + .join("\n") + )); + } + + validation_context.files + } else { + Vec::new() + }; + + Ok(( + DocumentSummary::new(&openapi_doc), + ValidationResult { extra_files }, + )) + } +} + +struct ValidationContextImpl { + errors: Vec, + files: Vec<(Utf8PathBuf, Vec)>, +} + +impl ValidationBackend for ValidationContextImpl { + fn report_error(&mut self, error: anyhow::Error) { + self.errors.push(error); + } - Ok(DocumentSummary::new(&openapi_doc)) + fn record_file_contents(&mut self, path: Utf8PathBuf, contents: Vec) { + self.files.push((path, contents)); } } @@ -260,6 +321,32 @@ impl fmt::Display for ApiBoundary { } } +#[derive(Debug)] +#[must_use] +pub(crate) struct SpecOverwriteStatus { + pub(crate) summary: DocumentSummary, + openapi_doc: OverwriteStatus, + extra_files: Vec<(Utf8PathBuf, OverwriteStatus)>, +} + +impl SpecOverwriteStatus { + pub(crate) fn updated_count(&self) -> usize { + self.iter() + .filter(|(_, status)| matches!(status, OverwriteStatus::Updated)) + .count() + } + + fn iter( + &self, + ) -> impl Iterator, &OverwriteStatus)> { + std::iter::once((ApiSpecFile::Openapi, &self.openapi_doc)).chain( + self.extra_files.iter().map(|(file_name, status)| { + (ApiSpecFile::Extra(file_name), status) + }), + ) + } +} + #[derive(Debug)] #[must_use] pub(crate) enum OverwriteStatus { @@ -267,12 +354,58 @@ pub(crate) enum OverwriteStatus { Unchanged, } +#[derive(Debug)] +#[must_use] +pub(crate) struct SpecCheckStatus { + pub(crate) summary: DocumentSummary, + pub(crate) openapi_doc: CheckStatus, + pub(crate) extra_files: Vec<(Utf8PathBuf, CheckStatus)>, +} + +impl SpecCheckStatus { + pub(crate) fn total_errors(&self) -> usize { + self.iter_errors().count() + } + + pub(crate) fn extra_files_len(&self) -> usize { + self.extra_files.len() + } + + pub(crate) fn iter_errors( + &self, + ) -> impl Iterator, &CheckStale)> { + std::iter::once((ApiSpecFile::Openapi, &self.openapi_doc)) + .chain(self.extra_files.iter().map(|(file_name, status)| { + (ApiSpecFile::Extra(file_name), status) + })) + .filter_map(|(spec_file, status)| { + if let CheckStatus::Stale(e) = status { + Some((spec_file, e)) + } else { + None + } + }) + } +} + +#[derive(Clone, Copy, Debug)] +pub(crate) enum ApiSpecFile<'a> { + Openapi, + Extra(&'a Utf8Path), +} + #[derive(Debug)] #[must_use] pub(crate) enum CheckStatus { - Ok(DocumentSummary), - Stale { full_path: Utf8PathBuf, actual: Vec, expected: Vec }, - Missing, + Fresh, + Stale(CheckStale), +} + +#[derive(Debug)] +#[must_use] +pub(crate) enum CheckStale { + Modified { full_path: Utf8PathBuf, actual: Vec, expected: Vec }, + New, } #[derive(Debug)] @@ -295,31 +428,45 @@ impl DocumentSummary { } } -pub(crate) fn openapi_dir(dir: Option) -> Result { - match dir { - Some(dir) => Ok(dir.canonicalize_utf8().with_context(|| { - format!("failed to canonicalize directory: {}", dir) - })?), - None => find_openapi_dir().context("failed to find openapi directory"), - } +#[derive(Debug)] +#[must_use] +struct ValidationResult { + // Extra files recorded by the validation context. + extra_files: Vec<(Utf8PathBuf, Vec)>, } -pub(crate) fn find_openapi_dir() -> Result { - let mut root = Utf8PathBuf::from(env!("CARGO_MANIFEST_DIR")); - // This crate is two levels down from the root of omicron, so go up twice. - root.pop(); - root.pop(); +pub(crate) struct Environment { + pub(crate) workspace_root: Utf8PathBuf, + pub(crate) openapi_dir: Utf8PathBuf, +} - root.push("openapi"); - let root = root.canonicalize_utf8().with_context(|| { - format!("failed to canonicalize openapi directory: {}", root) - })?; +impl Environment { + pub(crate) fn new(openapi_dir: Option) -> Result { + let mut root = Utf8PathBuf::from(env!("CARGO_MANIFEST_DIR")); + // This crate is two levels down from the root of omicron, so go up twice. + root.pop(); + root.pop(); - if !root.is_dir() { - anyhow::bail!("openapi root is not a directory: {}", root); - } + let workspace_root = root.canonicalize_utf8().with_context(|| { + format!("failed to canonicalize workspace root: {}", root) + })?; - Ok(root) + let openapi_dir = + openapi_dir.unwrap_or_else(|| workspace_root.join("openapi")); + let openapi_dir = + openapi_dir.canonicalize_utf8().with_context(|| { + format!( + "failed to canonicalize openapi directory: {}", + openapi_dir + ) + })?; + + if !openapi_dir.is_dir() { + anyhow::bail!("openapi root is not a directory: {}", root); + } + + Ok(Self { workspace_root, openapi_dir }) + } } /// Overwrite a file with new contents, if the contents are different. @@ -344,6 +491,29 @@ fn overwrite_file(path: &Utf8Path, contents: &[u8]) -> Result { Ok(OverwriteStatus::Updated) } +/// Check a file against expected contents. +fn check_file( + full_path: Utf8PathBuf, + contents: Vec, +) -> Result { + let existing_contents = + read_opt(&full_path).context("failed to read contents on disk")?; + + match existing_contents { + Some(existing_contents) if existing_contents == contents => { + Ok(CheckStatus::Fresh) + } + Some(existing_contents) => { + Ok(CheckStatus::Stale(CheckStale::Modified { + full_path, + actual: existing_contents, + expected: contents, + })) + } + None => Ok(CheckStatus::Stale(CheckStale::New)), + } +} + fn read_opt(path: &Utf8Path) -> std::io::Result>> { match fs::read(path) { Ok(contents) => Ok(Some(contents)), diff --git a/dev-tools/openapi-manager/types/Cargo.toml b/dev-tools/openapi-manager/types/Cargo.toml new file mode 100644 index 0000000000..262529f1a9 --- /dev/null +++ b/dev-tools/openapi-manager/types/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "openapi-manager-types" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +camino.workspace = true +omicron-workspace-hack.workspace = true diff --git a/dev-tools/openapi-manager/types/src/lib.rs b/dev-tools/openapi-manager/types/src/lib.rs new file mode 100644 index 0000000000..b48ea03e74 --- /dev/null +++ b/dev-tools/openapi-manager/types/src/lib.rs @@ -0,0 +1,12 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Shared types for the OpenAPI manager. +//! +//! API trait crates can depend on this crate to get access to interfaces +//! exposed by the OpenAPI manager. + +mod validation; + +pub use validation::*; diff --git a/dev-tools/openapi-manager/types/src/validation.rs b/dev-tools/openapi-manager/types/src/validation.rs new file mode 100644 index 0000000000..6f22228f4d --- /dev/null +++ b/dev-tools/openapi-manager/types/src/validation.rs @@ -0,0 +1,47 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use camino::Utf8PathBuf; + +/// Context for validation of OpenAPI specifications. +pub struct ValidationContext<'a> { + backend: &'a mut dyn ValidationBackend, +} + +impl<'a> ValidationContext<'a> { + /// Note part of the public API -- only called by the OpenAPI manager. + #[doc(hidden)] + pub fn new(backend: &'a mut dyn ValidationBackend) -> Self { + Self { backend } + } + + /// Reports a validation error. + pub fn report_error(&mut self, error: anyhow::Error) { + self.backend.report_error(error); + } + + /// Records that the file has the given contents. + /// + /// In check mode, if the files differ, an error is logged. + /// + /// In generate mode, the file is overwritten with the given contents. + /// + /// The path is treated as relative to the root of the repository. + pub fn record_file_contents( + &mut self, + path: impl Into, + contents: Vec, + ) { + self.backend.record_file_contents(path.into(), contents); + } +} + +/// The backend for validation. +/// +/// Not part of the public API -- only implemented by the OpenAPI manager. +#[doc(hidden)] +pub trait ValidationBackend { + fn report_error(&mut self, error: anyhow::Error); + fn record_file_contents(&mut self, path: Utf8PathBuf, contents: Vec); +} diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 014444c542..a39daa5735 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -85,6 +85,7 @@ pkcs8 = { version = "0.10.2", default-features = false, features = ["encryption" postgres-types = { version = "0.2.7", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } predicates = { version = "3.1.2" } proc-macro2 = { version = "1.0.86" } +quote = { version = "1.0.36" } regex = { version = "1.10.6" } regex-automata = { version = "0.4.6", default-features = false, features = ["dfa", "hybrid", "meta", "nfa", "perf", "unicode"] } regex-syntax = { version = "0.8.4" } @@ -193,6 +194,7 @@ pkcs8 = { version = "0.10.2", default-features = false, features = ["encryption" postgres-types = { version = "0.2.7", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } predicates = { version = "3.1.2" } proc-macro2 = { version = "1.0.86" } +quote = { version = "1.0.36" } regex = { version = "1.10.6" } regex-automata = { version = "0.4.6", default-features = false, features = ["dfa", "hybrid", "meta", "nfa", "perf", "unicode"] } regex-syntax = { version = "0.8.4" } From 14b94f785820c68cd2972ae4058f1476c2de96b3 Mon Sep 17 00:00:00 2001 From: Ryan Goodfellow Date: Tue, 20 Aug 2024 23:27:07 -0700 Subject: [PATCH 49/51] plumb static route local preference (#6361) --- Cargo.lock | 4 +- Cargo.toml | 4 +- common/src/api/external/mod.rs | 3 + common/src/api/internal/shared.rs | 3 + nexus/db-model/src/schema.rs | 1 + nexus/db-model/src/schema_versions.rs | 3 +- nexus/db-model/src/switch_port.rs | 5 +- .../src/db/datastore/switch_port.rs | 1 + .../tasks/sync_switch_configuration.rs | 90 +++++++++++-------- nexus/src/app/rack.rs | 1 + nexus/tests/integration_tests/switch_port.rs | 1 + nexus/types/src/external_api/params.rs | 4 + openapi/bootstrap-agent.json | 8 ++ openapi/nexus-internal.json | 8 ++ openapi/nexus.json | 14 +++ openapi/sled-agent.json | 8 ++ openapi/wicketd.json | 8 ++ package-manifest.toml | 12 +-- schema/crdb/dbinit.sql | 3 +- schema/crdb/route-local-pref/up.sql | 1 + schema/rss-sled-plan.json | 10 +++ sled-agent/src/bootstrap/early_networking.rs | 3 +- sled-agent/src/rack_setup/service.rs | 1 + .../tests/integration_tests/early_network.rs | 1 + .../madrid-rss-sled-plan.json | 6 +- sled-agent/types/src/early_networking.rs | 6 ++ tools/generate-nexus-api.sh | 1 - tools/maghemite_ddm_openapi_version | 2 +- tools/maghemite_mg_openapi_version | 4 +- tools/maghemite_mgd_checksums | 4 +- wicket-common/src/example.rs | 2 + wicket/src/cli/rack_setup/config_toml.rs | 5 +- wicket/src/ui/panes/rack_setup.rs | 10 ++- wicketd/src/rss_config.rs | 1 + 34 files changed, 175 insertions(+), 63 deletions(-) create mode 100644 schema/crdb/route-local-pref/up.sql diff --git a/Cargo.lock b/Cargo.lock index 830ec523a3..91617c2eb6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1786,7 +1786,7 @@ dependencies = [ [[package]] name = "ddm-admin-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/maghemite?rev=220dd026e83142b83bd93123f465a64dd4600201#220dd026e83142b83bd93123f465a64dd4600201" +source = "git+https://github.com/oxidecomputer/maghemite?rev=73e63eaae3fe616bd7c48a20c69736d7e025836b#73e63eaae3fe616bd7c48a20c69736d7e025836b" dependencies = [ "oxnet", "percent-encoding", @@ -4697,7 +4697,7 @@ dependencies = [ [[package]] name = "mg-admin-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/maghemite?rev=220dd026e83142b83bd93123f465a64dd4600201#220dd026e83142b83bd93123f465a64dd4600201" +source = "git+https://github.com/oxidecomputer/maghemite?rev=73e63eaae3fe616bd7c48a20c69736d7e025836b#73e63eaae3fe616bd7c48a20c69736d7e025836b" dependencies = [ "anyhow", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 55859ae9e6..413990c9a8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -408,8 +408,8 @@ macaddr = { version = "1.0.1", features = ["serde_std"] } maplit = "1.0.2" mockall = "0.13" newtype_derive = "0.1.6" -mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "220dd026e83142b83bd93123f465a64dd4600201" } -ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "220dd026e83142b83bd93123f465a64dd4600201" } +mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "73e63eaae3fe616bd7c48a20c69736d7e025836b" } +ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "73e63eaae3fe616bd7c48a20c69736d7e025836b" } multimap = "0.10.0" nexus-auth = { path = "nexus/auth" } nexus-client = { path = "clients/nexus-client" } diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index c7421aa5ee..986f45bfd1 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -2492,6 +2492,9 @@ pub struct SwitchPortRouteConfig { /// The VLAN identifier for the route. Use this if the gateway is reachable /// over an 802.1Q tagged L2 segment. pub vlan_id: Option, + + /// Local preference indicating priority within and across protocols. + pub local_pref: Option, } /* diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index 089ff9b324..395bc3d132 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -305,6 +305,9 @@ pub struct RouteConfig { /// The VLAN id associated with this route. #[serde(default)] pub vlan_id: Option, + /// The local preference associated with this route. + #[serde(default)] + pub local_pref: Option, } #[derive( diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 845da13a44..d1205dac65 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -195,6 +195,7 @@ table! { dst -> Inet, gw -> Inet, vid -> Nullable, + local_pref -> Nullable, } } diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 1e0caabb02..ef9a11c330 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(87, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(88, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(88, "route-local-pref"), KnownVersion::new(87, "add-clickhouse-server-enum-variants"), KnownVersion::new(86, "snapshot-replacement"), KnownVersion::new(85, "add-migrations-by-time-created-index"), diff --git a/nexus/db-model/src/switch_port.rs b/nexus/db-model/src/switch_port.rs index f790d7d527..9b36cbda48 100644 --- a/nexus/db-model/src/switch_port.rs +++ b/nexus/db-model/src/switch_port.rs @@ -554,6 +554,7 @@ pub struct SwitchPortRouteConfig { pub dst: IpNetwork, pub gw: IpNetwork, pub vid: Option, + pub local_pref: Option, } impl SwitchPortRouteConfig { @@ -563,8 +564,9 @@ impl SwitchPortRouteConfig { dst: IpNetwork, gw: IpNetwork, vid: Option, + local_pref: Option, ) -> Self { - Self { port_settings_id, interface_name, dst, gw, vid } + Self { port_settings_id, interface_name, dst, gw, vid, local_pref } } } @@ -576,6 +578,7 @@ impl Into for SwitchPortRouteConfig { dst: self.dst.into(), gw: self.gw.into(), vlan_id: self.vid.map(Into::into), + local_pref: self.local_pref.map(Into::into), } } } diff --git a/nexus/db-queries/src/db/datastore/switch_port.rs b/nexus/db-queries/src/db/datastore/switch_port.rs index 159933dce0..f9c61147f3 100644 --- a/nexus/db-queries/src/db/datastore/switch_port.rs +++ b/nexus/db-queries/src/db/datastore/switch_port.rs @@ -1120,6 +1120,7 @@ async fn do_switch_port_settings_create( route.dst.into(), route.gw.into(), route.vid.map(Into::into), + route.local_pref.map(Into::into), )); } } diff --git a/nexus/src/app/background/tasks/sync_switch_configuration.rs b/nexus/src/app/background/tasks/sync_switch_configuration.rs index 20a12d1127..6ecdaa2e55 100644 --- a/nexus/src/app/background/tasks/sync_switch_configuration.rs +++ b/nexus/src/app/background/tasks/sync_switch_configuration.rs @@ -977,6 +977,7 @@ impl BackgroundTask for SwitchPortSettingsManager { destination: r.dst.into(), nexthop: r.gw.ip(), vlan_id: r.vid.map(|x| x.0), + local_pref: r.local_pref.map(|x| x.0), }) .collect(), switch: *location, @@ -1455,7 +1456,8 @@ fn build_sled_agent_clients( sled_agent_clients } -type SwitchStaticRoutes = HashSet<(Ipv4Addr, Prefix4, Option)>; +type SwitchStaticRoutes = + HashSet<(Ipv4Addr, Prefix4, Option, Option)>; fn static_routes_to_del( current_static_routes: HashMap, @@ -1471,10 +1473,11 @@ fn static_routes_to_del( // if it's on the switch but not desired (in our db), it should be removed let stale_routes = routes_on_switch .difference(routes_wanted) - .map(|(nexthop, prefix, vlan_id)| StaticRoute4 { + .map(|(nexthop, prefix, vlan_id, local_pref)| StaticRoute4 { nexthop: *nexthop, prefix: *prefix, vlan_id: *vlan_id, + local_pref: *local_pref, }) .collect::>(); @@ -1488,10 +1491,11 @@ fn static_routes_to_del( // if no desired routes are present, all routes on this switch should be deleted let stale_routes = routes_on_switch .iter() - .map(|(nexthop, prefix, vlan_id)| StaticRoute4 { + .map(|(nexthop, prefix, vlan_id, local_pref)| StaticRoute4 { nexthop: *nexthop, prefix: *prefix, vlan_id: *vlan_id, + local_pref: *local_pref, }) .collect::>(); @@ -1538,10 +1542,11 @@ fn static_routes_to_add( }; let missing_routes = routes_wanted .difference(routes_on_switch) - .map(|(nexthop, prefix, vlan_id)| StaticRoute4 { + .map(|(nexthop, prefix, vlan_id, local_pref)| StaticRoute4 { nexthop: *nexthop, prefix: *prefix, vlan_id: *vlan_id, + local_pref: *local_pref, }) .collect::>(); @@ -1590,7 +1595,12 @@ fn static_routes_in_db( } IpAddr::V6(_) => continue, }; - routes.insert((nexthop, prefix, route.vid.map(|x| x.0))); + routes.insert(( + nexthop, + prefix, + route.vid.map(|x| x.0), + route.local_pref.map(|x| x.0), + )); } match routes_from_db.entry(*location) { @@ -1768,44 +1778,46 @@ async fn static_routes_on_switch<'a>( let mut routes_on_switch = HashMap::new(); for (location, client) in mgd_clients { - let static_routes: SwitchStaticRoutes = - match client.static_list_v4_routes().await { - Ok(routes) => { - let mut flattened = HashSet::new(); - for (destination, paths) in routes.iter() { - let Ok(dst) = destination.parse() else { - error!( - log, - "failed to parse static route destination: \ + let static_routes: SwitchStaticRoutes = match client + .static_list_v4_routes() + .await + { + Ok(routes) => { + let mut flattened = HashSet::new(); + for (destination, paths) in routes.iter() { + let Ok(dst) = destination.parse() else { + error!( + log, + "failed to parse static route destination: \ {destination}" - ); - continue; + ); + continue; + }; + for p in paths.iter() { + let nh = match p.nexthop { + IpAddr::V4(addr) => addr, + IpAddr::V6(addr) => { + error!( + log, + "ipv6 nexthops not supported: {addr}" + ); + continue; + } }; - for p in paths.iter() { - let nh = match p.nexthop { - IpAddr::V4(addr) => addr, - IpAddr::V6(addr) => { - error!( - log, - "ipv6 nexthops not supported: {addr}" - ); - continue; - } - }; - flattened.insert((nh, dst, p.vlan_id)); - } + flattened.insert((nh, dst, p.vlan_id, p.local_pref)); } - flattened - } - Err(_) => { - error!( - &log, - "unable to retrieve routes from switch"; - "switch_location" => ?location, - ); - continue; } - }; + flattened + } + Err(_) => { + error!( + &log, + "unable to retrieve routes from switch"; + "switch_location" => ?location, + ); + continue; + } + }; routes_on_switch.insert(*location, static_routes); } routes_on_switch diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 13b30fd47a..4eb9883bcc 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -570,6 +570,7 @@ impl super::Nexus { dst: r.destination, gw: r.nexthop, vid: r.vlan_id, + local_pref: r.local_pref, }) .collect(); diff --git a/nexus/tests/integration_tests/switch_port.rs b/nexus/tests/integration_tests/switch_port.rs index 0b71ddb2cf..2485d82c45 100644 --- a/nexus/tests/integration_tests/switch_port.rs +++ b/nexus/tests/integration_tests/switch_port.rs @@ -140,6 +140,7 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { dst: "1.2.3.0/24".parse().unwrap(), gw: "1.2.3.4".parse().unwrap(), vid: None, + local_pref: None, }], }, ); diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index a7dd0a72cc..effd067ec8 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -1581,6 +1581,10 @@ pub struct Route { /// VLAN id the gateway is reachable over. pub vid: Option, + + /// Local preference for route. Higher preference indictes precedence + /// within and across protocols. + pub local_pref: Option, } /// Select a BGP config by a name or id. diff --git a/openapi/bootstrap-agent.json b/openapi/bootstrap-agent.json index 7b4f257670..b109eaf43e 100644 --- a/openapi/bootstrap-agent.json +++ b/openapi/bootstrap-agent.json @@ -1183,6 +1183,14 @@ } ] }, + "local_pref": { + "nullable": true, + "description": "The local preference associated with this route.", + "default": null, + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "nexthop": { "description": "The nexthop/gateway address.", "type": "string", diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index d054591f3a..6b9a63d7f2 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -4665,6 +4665,14 @@ } ] }, + "local_pref": { + "nullable": true, + "description": "The local preference associated with this route.", + "default": null, + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "nexthop": { "description": "The nexthop/gateway address.", "type": "string", diff --git a/openapi/nexus.json b/openapi/nexus.json index c29cb8a95c..e622239fa2 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -17180,6 +17180,13 @@ "type": "string", "format": "ip" }, + "local_pref": { + "nullable": true, + "description": "Local preference for route. Higher preference indictes precedence within and across protocols.", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "vid": { "nullable": true, "description": "VLAN id the gateway is reachable over.", @@ -19238,6 +19245,13 @@ "description": "The interface name this route configuration is assigned to.", "type": "string" }, + "local_pref": { + "nullable": true, + "description": "Local preference indicating priority within and across protocols.", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "port_settings_id": { "description": "The port settings object this route configuration belongs to.", "type": "string", diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 1241248a5e..6459595b65 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -4379,6 +4379,14 @@ } ] }, + "local_pref": { + "nullable": true, + "description": "The local preference associated with this route.", + "default": null, + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "nexthop": { "description": "The nexthop/gateway address.", "type": "string", diff --git a/openapi/wicketd.json b/openapi/wicketd.json index 757383897b..5041fb5e56 100644 --- a/openapi/wicketd.json +++ b/openapi/wicketd.json @@ -3062,6 +3062,14 @@ } ] }, + "local_pref": { + "nullable": true, + "description": "The local preference associated with this route.", + "default": null, + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "nexthop": { "description": "The nexthop/gateway address.", "type": "string", diff --git a/package-manifest.toml b/package-manifest.toml index 0822225837..0f42025fba 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -628,10 +628,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "0c4292fe5b3c8ac27d99b5a4502d595acdbf7441" +source.commit = "73e63eaae3fe616bd7c48a20c69736d7e025836b" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm-gz.sha256.txt -source.sha256 = "b0f08e754f7c834d7ca05093b13a574863f500cff56210591ef4cc7eaf20159b" +source.sha256 = "6b2b5b5fed0c8ea36d78138d8d9bb455e8768ae61e7443985ddea48535cfc2da" output.type = "tarball" [package.mg-ddm] @@ -644,10 +644,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "0c4292fe5b3c8ac27d99b5a4502d595acdbf7441" +source.commit = "73e63eaae3fe616bd7c48a20c69736d7e025836b" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "499962b57404626aff1ecd62d5045ba2ee06070d45f7cb2a8fc284e53eed17d6" +source.sha256 = "725a5b1eeed5bc34ad5473cb54b8df4b3993f3ed3808cc50304696082e490a4a" output.type = "zone" output.intermediate_only = true @@ -659,10 +659,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "0c4292fe5b3c8ac27d99b5a4502d595acdbf7441" +source.commit = "73e63eaae3fe616bd7c48a20c69736d7e025836b" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mgd.sha256.txt -source.sha256 = "e15db7d262b5b2f08a2e2799668c67d0cb883e84c72736a30d299688115bf055" +source.sha256 = "1f9833ce2d38bdb57099c3f7e7e9f2c414b17492fe0a3574e043b65756b78192" output.type = "zone" output.intermediate_only = true diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index ddc399d282..d0eba7847e 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -2715,6 +2715,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.switch_port_settings_route_config ( dst INET, gw INET, vid INT4, + local_pref INT8, /* TODO https://github.com/oxidecomputer/omicron/issues/3013 */ PRIMARY KEY (port_settings_id, interface_name, dst, gw) @@ -4217,7 +4218,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '87.0.0', NULL) + (TRUE, NOW(), NOW(), '88.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/route-local-pref/up.sql b/schema/crdb/route-local-pref/up.sql new file mode 100644 index 0000000000..d1051ccd0c --- /dev/null +++ b/schema/crdb/route-local-pref/up.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.switch_port_settings_route_config ADD COLUMN IF NOT EXISTS local_pref INT8; diff --git a/schema/rss-sled-plan.json b/schema/rss-sled-plan.json index a3d3425870..f8dfb935ce 100644 --- a/schema/rss-sled-plan.json +++ b/schema/rss-sled-plan.json @@ -894,6 +894,16 @@ } ] }, + "local_pref": { + "description": "The local preference associated with this route.", + "default": null, + "type": [ + "integer", + "null" + ], + "format": "uint32", + "minimum": 0.0 + }, "nexthop": { "description": "The nexthop/gateway address.", "type": "string", diff --git a/sled-agent/src/bootstrap/early_networking.rs b/sled-agent/src/bootstrap/early_networking.rs index 95a1f873f6..abc88d67c1 100644 --- a/sled-agent/src/bootstrap/early_networking.rs +++ b/sled-agent/src/bootstrap/early_networking.rs @@ -631,7 +631,8 @@ impl<'a> EarlyNetworkSetup<'a> { IpAddr::V6(_) => continue, }; let vlan_id = r.vlan_id; - let sr = StaticRoute4 { nexthop, prefix, vlan_id }; + let local_pref = r.local_pref; + let sr = StaticRoute4 { nexthop, prefix, vlan_id, local_pref }; rq.routes.list.push(sr); } } diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index bead95be80..2505985101 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -757,6 +757,7 @@ impl ServiceInner { destination: r.destination, nexthop: r.nexthop, vlan_id: r.vlan_id, + local_pref: r.local_pref, }) .collect(), addresses: config diff --git a/sled-agent/tests/integration_tests/early_network.rs b/sled-agent/tests/integration_tests/early_network.rs index 6fa91e0e4a..8da67729da 100644 --- a/sled-agent/tests/integration_tests/early_network.rs +++ b/sled-agent/tests/integration_tests/early_network.rs @@ -126,6 +126,7 @@ fn current_config_example() -> (&'static str, EarlyNetworkConfig) { destination: "10.1.9.32/16".parse().unwrap(), nexthop: "10.1.9.32".parse().unwrap(), vlan_id: None, + local_pref: None, }], addresses: vec!["2001:db8::/96".parse().unwrap()], switch: SwitchLocation::Switch0, diff --git a/sled-agent/tests/output/new-rss-sled-plans/madrid-rss-sled-plan.json b/sled-agent/tests/output/new-rss-sled-plans/madrid-rss-sled-plan.json index efd1a3c167..7df143d41d 100644 --- a/sled-agent/tests/output/new-rss-sled-plans/madrid-rss-sled-plan.json +++ b/sled-agent/tests/output/new-rss-sled-plans/madrid-rss-sled-plan.json @@ -128,7 +128,8 @@ { "destination": "0.0.0.0/0", "nexthop": "172.20.15.33", - "vlan_id": null + "vlan_id": null, + "local_pref": null } ], "addresses": [ @@ -149,7 +150,8 @@ { "destination": "0.0.0.0/0", "nexthop": "172.20.15.33", - "vlan_id": null + "vlan_id": null, + "local_pref": null } ], "addresses": [ diff --git a/sled-agent/types/src/early_networking.rs b/sled-agent/types/src/early_networking.rs index dc93aa1300..c4afbd0adb 100644 --- a/sled-agent/types/src/early_networking.rs +++ b/sled-agent/types/src/early_networking.rs @@ -322,6 +322,8 @@ pub mod back_compat { pub uplink_cidr: Ipv4Net, /// VLAN id to use for uplink pub uplink_vid: Option, + /// Local preference + pub local_pref: Option, } impl From for PortConfigV2 { @@ -331,6 +333,7 @@ pub mod back_compat { destination: "0.0.0.0/0".parse().unwrap(), nexthop: value.gateway_ip.into(), vlan_id: value.uplink_vid, + local_pref: value.local_pref, }], addresses: vec![UplinkAddressConfig { address: value.uplink_cidr.into(), @@ -472,6 +475,7 @@ mod tests { uplink_port_fec: PortFec::None, uplink_cidr: "192.168.0.1/16".parse().unwrap(), uplink_vid: None, + local_pref: None, }], }), }; @@ -501,6 +505,7 @@ mod tests { destination: "0.0.0.0/0".parse().unwrap(), nexthop: uplink.gateway_ip.into(), vlan_id: None, + local_pref: None, }], addresses: vec![UplinkAddressConfig { address: uplink.uplink_cidr.into(), @@ -545,6 +550,7 @@ mod tests { destination: "0.0.0.0/0".parse().unwrap(), nexthop: "192.168.0.2".parse().unwrap(), vlan_id: None, + local_pref: None, }], addresses: vec!["192.168.0.1/16".parse().unwrap()], switch: SwitchLocation::Switch0, diff --git a/tools/generate-nexus-api.sh b/tools/generate-nexus-api.sh index a0c7d13165..9e3f8d63f6 100755 --- a/tools/generate-nexus-api.sh +++ b/tools/generate-nexus-api.sh @@ -1,4 +1,3 @@ #!/usr/bin/env bash ./target/debug/nexus nexus/examples/config.toml -O > openapi/nexus.json -./target/debug/nexus nexus/examples/config.toml -I > openapi/nexus-internal.json diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version index c1e011e38d..3b07ab4e61 100644 --- a/tools/maghemite_ddm_openapi_version +++ b/tools/maghemite_ddm_openapi_version @@ -1,2 +1,2 @@ -COMMIT="0c4292fe5b3c8ac27d99b5a4502d595acdbf7441" +COMMIT="73e63eaae3fe616bd7c48a20c69736d7e025836b" SHA2="007bfb717ccbc077c0250dee3121aeb0c5bb0d1c16795429a514fa4f8635a5ef" diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version index 1184f6e4fd..691df704d7 100644 --- a/tools/maghemite_mg_openapi_version +++ b/tools/maghemite_mg_openapi_version @@ -1,2 +1,2 @@ -COMMIT="0c4292fe5b3c8ac27d99b5a4502d595acdbf7441" -SHA2="e4b42ab9daad90f0c561a830b62a9d17e294b4d0da0a6d44b4030929b0c37b7e" +COMMIT="73e63eaae3fe616bd7c48a20c69736d7e025836b" +SHA2="34536d8f55fc054d0b8114b5654b38c968099aafc7770562e04d405168f5be95" diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums index 7ca642fa70..f54745b92a 100644 --- a/tools/maghemite_mgd_checksums +++ b/tools/maghemite_mgd_checksums @@ -1,2 +1,2 @@ -CIDL_SHA256="e15db7d262b5b2f08a2e2799668c67d0cb883e84c72736a30d299688115bf055" -MGD_LINUX_SHA256="915e7b5cac8ff1deb6549b86e4ba49fd5c6adbdcc56ae5dc3c7b3e69555a7c2c" \ No newline at end of file +CIDL_SHA256="1f9833ce2d38bdb57099c3f7e7e9f2c414b17492fe0a3574e043b65756b78192" +MGD_LINUX_SHA256="3c47a55af8daa4dc2cd7da5ecc3c5043cef5e6890b60d070a2e8672101cdbd30" \ No newline at end of file diff --git a/wicket-common/src/example.rs b/wicket-common/src/example.rs index bb70273b45..63d12aea6d 100644 --- a/wicket-common/src/example.rs +++ b/wicket-common/src/example.rs @@ -176,6 +176,7 @@ impl ExampleRackSetupData { destination: "0.0.0.0/0".parse().unwrap(), nexthop: "172.30.0.10".parse().unwrap(), vlan_id: Some(1), + local_pref: None, }], bgp_peers: switch0_port0_bgp_peers, uplink_port_speed: PortSpeed::Speed400G, @@ -192,6 +193,7 @@ impl ExampleRackSetupData { destination: "0.0.0.0/0".parse().unwrap(), nexthop: "172.33.0.10".parse().unwrap(), vlan_id: Some(1), + local_pref: None, }], bgp_peers: switch1_port0_bgp_peers, uplink_port_speed: PortSpeed::Speed400G, diff --git a/wicket/src/cli/rack_setup/config_toml.rs b/wicket/src/cli/rack_setup/config_toml.rs index 68485815a8..198c740754 100644 --- a/wicket/src/cli/rack_setup/config_toml.rs +++ b/wicket/src/cli/rack_setup/config_toml.rs @@ -327,13 +327,16 @@ fn populate_uplink_table(cfg: &UserSpecifiedPortConfig) -> Table { // routes = [] let mut routes_out = Array::new(); for r in routes { - let RouteConfig { destination, nexthop, vlan_id } = r; + let RouteConfig { destination, nexthop, vlan_id, local_pref } = r; let mut route = InlineTable::new(); route.insert("nexthop", string_value(nexthop)); route.insert("destination", string_value(destination)); if let Some(vlan_id) = vlan_id { route.insert("vlan_id", i64_value(i64::from(*vlan_id))); } + if let Some(local_pref) = local_pref { + route.insert("local_pref", i64_value(i64::from(*local_pref))); + } routes_out.push(Value::InlineTable(route)); } uplink.insert("routes", Item::Value(Value::Array(routes_out))); diff --git a/wicket/src/ui/panes/rack_setup.rs b/wicket/src/ui/panes/rack_setup.rs index 7bb63b6b1b..76a240e981 100644 --- a/wicket/src/ui/panes/rack_setup.rs +++ b/wicket/src/ui/panes/rack_setup.rs @@ -771,7 +771,8 @@ fn rss_config_text<'a>( ]; let routes = routes.iter().map(|r| { - let RouteConfig { destination, nexthop, vlan_id } = r; + let RouteConfig { destination, nexthop, vlan_id, local_pref } = + r; let mut items = vec![ Span::styled(" • Route : ", label_style), @@ -787,6 +788,13 @@ fn rss_config_text<'a>( Span::styled(")", label_style), ]); } + if let Some(local_pref) = local_pref { + items.extend([ + Span::styled(" (local_pref=", label_style), + Span::styled(local_pref.to_string(), ok_style), + Span::styled(")", label_style), + ]); + } items }); diff --git a/wicketd/src/rss_config.rs b/wicketd/src/rss_config.rs index c6f2dd5892..cb40d56dd6 100644 --- a/wicketd/src/rss_config.rs +++ b/wicketd/src/rss_config.rs @@ -703,6 +703,7 @@ fn build_port_config( destination: r.destination, nexthop: r.nexthop, vlan_id: r.vlan_id, + local_pref: r.local_pref, }) .collect(), addresses: config From 5a63771941a629ef17b9234495bee4aebe78099c Mon Sep 17 00:00:00 2001 From: Ryan Goodfellow Date: Wed, 21 Aug 2024 00:54:54 -0700 Subject: [PATCH 50/51] API for showing exported prefixes (#6397) --- Cargo.lock | 4 +- Cargo.toml | 4 +- common/src/api/external/mod.rs | 10 +++ nexus/src/app/bgp.rs | 73 +++++++++++++++++++++- nexus/src/external_api/http_entrypoints.rs | 31 ++++++++- nexus/tests/integration_tests/endpoints.rs | 11 ++++ nexus/tests/output/nexus_tags.txt | 1 + openapi/nexus.json | 46 ++++++++++++++ package-manifest.toml | 12 ++-- tools/maghemite_ddm_openapi_version | 2 +- tools/maghemite_mg_openapi_version | 4 +- tools/maghemite_mgd_checksums | 4 +- 12 files changed, 182 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 91617c2eb6..f8699e62d6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1786,7 +1786,7 @@ dependencies = [ [[package]] name = "ddm-admin-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/maghemite?rev=73e63eaae3fe616bd7c48a20c69736d7e025836b#73e63eaae3fe616bd7c48a20c69736d7e025836b" +source = "git+https://github.com/oxidecomputer/maghemite?rev=9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42#9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42" dependencies = [ "oxnet", "percent-encoding", @@ -4697,7 +4697,7 @@ dependencies = [ [[package]] name = "mg-admin-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/maghemite?rev=73e63eaae3fe616bd7c48a20c69736d7e025836b#73e63eaae3fe616bd7c48a20c69736d7e025836b" +source = "git+https://github.com/oxidecomputer/maghemite?rev=9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42#9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42" dependencies = [ "anyhow", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 413990c9a8..cfb097ef3c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -408,8 +408,8 @@ macaddr = { version = "1.0.1", features = ["serde_std"] } maplit = "1.0.2" mockall = "0.13" newtype_derive = "0.1.6" -mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "73e63eaae3fe616bd7c48a20c69736d7e025836b" } -ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "73e63eaae3fe616bd7c48a20c69736d7e025836b" } +mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42" } +ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42" } multimap = "0.10.0" nexus-auth = { path = "nexus/auth" } nexus-client = { path = "clients/nexus-client" } diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 986f45bfd1..f3f5372749 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -23,6 +23,7 @@ pub use dropshot::PaginationOrder; pub use error::*; use futures::stream::BoxStream; use oxnet::IpNet; +use oxnet::Ipv4Net; use parse_display::Display; use parse_display::FromStr; use rand::thread_rng; @@ -2708,6 +2709,15 @@ pub struct BgpPeerStatus { pub switch: SwitchLocation, } +/// The current status of a BGP peer. +#[derive( + Clone, Debug, Deserialize, JsonSchema, Serialize, PartialEq, Default, +)] +pub struct BgpExported { + /// Exported routes indexed by peer address. + pub exports: HashMap>, +} + /// Opaque object representing BGP message history for a given BGP peer. The /// contents of this object are not yet stable. #[derive(Clone, Debug, Deserialize, Serialize)] diff --git a/nexus/src/app/bgp.rs b/nexus/src/app/bgp.rs index 118011500a..d192f1ccf9 100644 --- a/nexus/src/app/bgp.rs +++ b/nexus/src/app/bgp.rs @@ -9,8 +9,9 @@ use nexus_db_model::{BgpAnnounceSet, BgpAnnouncement, BgpConfig}; use nexus_db_queries::context::OpContext; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::{ - self, BgpImportedRouteIpv4, BgpMessageHistory, BgpPeerStatus, CreateResult, - DeleteResult, ListResultVec, LookupResult, NameOrId, SwitchBgpHistory, + self, BgpExported, BgpImportedRouteIpv4, BgpMessageHistory, BgpPeerStatus, + CreateResult, DeleteResult, ListResultVec, LookupResult, NameOrId, + SwitchBgpHistory, }; use std::net::IpAddr; @@ -145,6 +146,74 @@ impl super::Nexus { Ok(result) } + pub async fn bgp_exported( + &self, + opctx: &OpContext, + ) -> LookupResult { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + let mut result = BgpExported::default(); + for (switch, client) in &self.mg_clients().await.map_err(|e| { + external::Error::internal_error(&format!( + "failed to get mg clients: {e}" + )) + })? { + let router_info = match client.read_routers().await { + Ok(result) => result.into_inner(), + Err(e) => { + error!( + self.log, + "failed to get routers from {switch}: {e}" + ); + continue; + } + }; + for r in &router_info { + let asn = r.asn; + + let exported = match client + .get_exported(&mg_admin_client::types::AsnSelector { asn }) + .await + { + Ok(result) => result.into_inner(), + Err(e) => { + error!( + self.log, + "failed to get exports for asn {asn} from {switch}: {e}" + ); + continue; + } + }; + for (addr, exports) in exported { + let mut xps = Vec::new(); + for ex in exports.iter() { + let net = match ex { + mg_admin_client::types::Prefix::V4(v4) => { + oxnet::Ipv4Net::new_unchecked( + v4.value, v4.length, + ) + } + mg_admin_client::types::Prefix::V6(v6) => { + let v6 = oxnet::IpNet::V6( + oxnet::Ipv6Net::new_unchecked( + v6.value, v6.length, + ), + ); + warn!( + self.log, + "{v6}: ipv6 exports not supported yet" + ); + continue; + } + }; + xps.push(net); + } + result.exports.insert(addr.to_string(), xps); + } + } + } + Ok(result) + } + pub async fn bgp_message_history( &self, opctx: &OpContext, diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 5b80c973e3..015fe11e3a 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -41,6 +41,7 @@ use nexus_db_queries::db::lookup::ImageLookup; use nexus_db_queries::db::lookup::ImageParentLookup; use nexus_db_queries::db::model::Name; use nexus_types::external_api::shared::{BfdStatus, ProbeInfo}; +use omicron_common::api::external::http_pagination::data_page_params_for; use omicron_common::api::external::http_pagination::marker_for_name; use omicron_common::api::external::http_pagination::marker_for_name_or_id; use omicron_common::api::external::http_pagination::name_or_id_pagination; @@ -55,9 +56,11 @@ use omicron_common::api::external::http_pagination::ScanParams; use omicron_common::api::external::AddressLot; use omicron_common::api::external::AddressLotBlock; use omicron_common::api::external::AddressLotCreateResponse; +use omicron_common::api::external::AggregateBgpMessageHistory; use omicron_common::api::external::BgpAnnounceSet; use omicron_common::api::external::BgpAnnouncement; use omicron_common::api::external::BgpConfig; +use omicron_common::api::external::BgpExported; use omicron_common::api::external::BgpImportedRouteIpv4; use omicron_common::api::external::BgpPeerStatus; use omicron_common::api::external::DataPageParams; @@ -78,9 +81,6 @@ use omicron_common::api::external::TufRepoGetResponse; use omicron_common::api::external::TufRepoInsertResponse; use omicron_common::api::external::VpcFirewallRuleUpdateParams; use omicron_common::api::external::VpcFirewallRules; -use omicron_common::api::external::{ - http_pagination::data_page_params_for, AggregateBgpMessageHistory, -}; use omicron_common::bail_unless; use omicron_uuid_kinds::GenericUuid; use parse_display::Display; @@ -277,6 +277,7 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(networking_bgp_config_create)?; api.register(networking_bgp_config_list)?; api.register(networking_bgp_status)?; + api.register(networking_bgp_exported)?; api.register(networking_bgp_imported_routes_ipv4)?; api.register(networking_bgp_config_delete)?; api.register(networking_bgp_announce_set_update)?; @@ -3937,6 +3938,30 @@ async fn networking_bgp_status( .await } +//TODO pagination? the normal by-name/by-id stuff does not work here +/// Get BGP exported routes +#[endpoint { + method = GET, + path = "/v1/system/networking/bgp-exported", + tags = ["system/networking"], +}] +async fn networking_bgp_exported( + rqctx: RequestContext, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let handler = async { + let nexus = &apictx.context.nexus; + let result = nexus.bgp_exported(&opctx).await?; + Ok(HttpResponseOk(result)) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await +} + /// Get BGP router message history #[endpoint { method = GET, diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 9097082a20..381d59e073 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -587,6 +587,8 @@ pub static DEMO_BGP_ANNOUNCE: Lazy = }); pub const DEMO_BGP_STATUS_URL: &'static str = "/v1/system/networking/bgp-status"; +pub const DEMO_BGP_EXPORTED_URL: &'static str = + "/v1/system/networking/bgp-exported"; pub const DEMO_BGP_ROUTES_IPV4_URL: &'static str = "/v1/system/networking/bgp-routes-ipv4?asn=47"; pub const DEMO_BGP_MESSAGE_HISTORY_URL: &'static str = @@ -2307,6 +2309,15 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { ], }, + VerifyEndpoint { + url: &DEMO_BGP_EXPORTED_URL, + visibility: Visibility::Public, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::GetNonexistent, + ], + }, + VerifyEndpoint { url: &DEMO_BGP_ROUTES_IPV4_URL, visibility: Visibility::Public, diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index 340d72569b..053f56cf5c 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -184,6 +184,7 @@ networking_bgp_announce_set_update PUT /v1/system/networking/bgp-anno networking_bgp_config_create POST /v1/system/networking/bgp networking_bgp_config_delete DELETE /v1/system/networking/bgp networking_bgp_config_list GET /v1/system/networking/bgp +networking_bgp_exported GET /v1/system/networking/bgp-exported networking_bgp_imported_routes_ipv4 GET /v1/system/networking/bgp-routes-ipv4 networking_bgp_message_history GET /v1/system/networking/bgp-message-history networking_bgp_status GET /v1/system/networking/bgp-status diff --git a/openapi/nexus.json b/openapi/nexus.json index e622239fa2..f6ba231c02 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -6640,6 +6640,33 @@ } } }, + "/v1/system/networking/bgp-exported": { + "get": { + "tags": [ + "system/networking" + ], + "summary": "Get BGP exported routes", + "operationId": "networking_bgp_exported", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BgpExported" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/system/networking/bgp-message-history": { "get": { "tags": [ @@ -10351,6 +10378,25 @@ "items" ] }, + "BgpExported": { + "description": "The current status of a BGP peer.", + "type": "object", + "properties": { + "exports": { + "description": "Exported routes indexed by peer address.", + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Ipv4Net" + } + } + } + }, + "required": [ + "exports" + ] + }, "BgpImportedRouteIpv4": { "description": "A route imported from a BGP peer.", "type": "object", diff --git a/package-manifest.toml b/package-manifest.toml index 0f42025fba..e846e9da31 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -628,10 +628,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "73e63eaae3fe616bd7c48a20c69736d7e025836b" +source.commit = "9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm-gz.sha256.txt -source.sha256 = "6b2b5b5fed0c8ea36d78138d8d9bb455e8768ae61e7443985ddea48535cfc2da" +source.sha256 = "c53a87b6c08323ea58c1604e3db24df061b9ee457e7d2b1dc6168abda4a686bc" output.type = "tarball" [package.mg-ddm] @@ -644,10 +644,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "73e63eaae3fe616bd7c48a20c69736d7e025836b" +source.commit = "9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "725a5b1eeed5bc34ad5473cb54b8df4b3993f3ed3808cc50304696082e490a4a" +source.sha256 = "00b2433504cb4c984163c5cdfd455eee595858b125a29deadaa791628668e384" output.type = "zone" output.intermediate_only = true @@ -659,10 +659,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "73e63eaae3fe616bd7c48a20c69736d7e025836b" +source.commit = "9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mgd.sha256.txt -source.sha256 = "1f9833ce2d38bdb57099c3f7e7e9f2c414b17492fe0a3574e043b65756b78192" +source.sha256 = "67856e05347304523b03e7ddfbd7ec18e833b7bf291f39105d5d8c4c5c383392" output.type = "zone" output.intermediate_only = true diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version index 3b07ab4e61..8c549b3eb5 100644 --- a/tools/maghemite_ddm_openapi_version +++ b/tools/maghemite_ddm_openapi_version @@ -1,2 +1,2 @@ -COMMIT="73e63eaae3fe616bd7c48a20c69736d7e025836b" +COMMIT="9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42" SHA2="007bfb717ccbc077c0250dee3121aeb0c5bb0d1c16795429a514fa4f8635a5ef" diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version index 691df704d7..7befaae8db 100644 --- a/tools/maghemite_mg_openapi_version +++ b/tools/maghemite_mg_openapi_version @@ -1,2 +1,2 @@ -COMMIT="73e63eaae3fe616bd7c48a20c69736d7e025836b" -SHA2="34536d8f55fc054d0b8114b5654b38c968099aafc7770562e04d405168f5be95" +COMMIT="9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42" +SHA2="5b327f213f8f341cf9072d428980f53757b2c6383f684ac80bbccfb1984ffe5f" diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums index f54745b92a..e361263531 100644 --- a/tools/maghemite_mgd_checksums +++ b/tools/maghemite_mgd_checksums @@ -1,2 +1,2 @@ -CIDL_SHA256="1f9833ce2d38bdb57099c3f7e7e9f2c414b17492fe0a3574e043b65756b78192" -MGD_LINUX_SHA256="3c47a55af8daa4dc2cd7da5ecc3c5043cef5e6890b60d070a2e8672101cdbd30" \ No newline at end of file +CIDL_SHA256="67856e05347304523b03e7ddfbd7ec18e833b7bf291f39105d5d8c4c5c383392" +MGD_LINUX_SHA256="6e37daa25ddb8310a4dd215db590bbd18999d55decf0f8a9baf7b919cf101c52" \ No newline at end of file From 18ee28341d9c4eed80fe9966e7084e793022221b Mon Sep 17 00:00:00 2001 From: "oxide-reflector-bot[bot]" <130185838+oxide-reflector-bot[bot]@users.noreply.github.com> Date: Wed, 21 Aug 2024 15:11:44 +0000 Subject: [PATCH 51/51] Update maghemite to c92d6ff (#6282) Updated maghemite to commit c92d6ff. --------- Co-authored-by: reflector[bot] <130185838+reflector[bot]@users.noreply.github.com> --- package-manifest.toml | 12 ++++++------ tools/maghemite_ddm_openapi_version | 2 +- tools/maghemite_mg_openapi_version | 2 +- tools/maghemite_mgd_checksums | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/package-manifest.toml b/package-manifest.toml index e846e9da31..95017ca653 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -628,10 +628,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42" +source.commit = "c92d6ff85db8992066f49da176cf686acfd8fe0f" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm-gz.sha256.txt -source.sha256 = "c53a87b6c08323ea58c1604e3db24df061b9ee457e7d2b1dc6168abda4a686bc" +source.sha256 = "c33915998894dd36a2d1078f7e13717aa20760924c30640d7647d4791dd5f2ee" output.type = "tarball" [package.mg-ddm] @@ -644,10 +644,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42" +source.commit = "c92d6ff85db8992066f49da176cf686acfd8fe0f" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "00b2433504cb4c984163c5cdfd455eee595858b125a29deadaa791628668e384" +source.sha256 = "be9d657ec22a69468b18f2b4d48e55621538eade8b8d3e367a1d8d5cc686cfbe" output.type = "zone" output.intermediate_only = true @@ -659,10 +659,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42" +source.commit = "c92d6ff85db8992066f49da176cf686acfd8fe0f" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mgd.sha256.txt -source.sha256 = "67856e05347304523b03e7ddfbd7ec18e833b7bf291f39105d5d8c4c5c383392" +source.sha256 = "e000485f7e04ac1cf9b3532b60bcf23598ab980331ba4f1c6788a7e95c1e9ef8" output.type = "zone" output.intermediate_only = true diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version index 8c549b3eb5..0c223c85a8 100644 --- a/tools/maghemite_ddm_openapi_version +++ b/tools/maghemite_ddm_openapi_version @@ -1,2 +1,2 @@ -COMMIT="9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42" +COMMIT="c92d6ff85db8992066f49da176cf686acfd8fe0f" SHA2="007bfb717ccbc077c0250dee3121aeb0c5bb0d1c16795429a514fa4f8635a5ef" diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version index 7befaae8db..0db6a3b63d 100644 --- a/tools/maghemite_mg_openapi_version +++ b/tools/maghemite_mg_openapi_version @@ -1,2 +1,2 @@ -COMMIT="9e0fe45ca3862176dc31ad8cc83f605f8a7e1a42" +COMMIT="c92d6ff85db8992066f49da176cf686acfd8fe0f" SHA2="5b327f213f8f341cf9072d428980f53757b2c6383f684ac80bbccfb1984ffe5f" diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums index e361263531..2e180a83db 100644 --- a/tools/maghemite_mgd_checksums +++ b/tools/maghemite_mgd_checksums @@ -1,2 +1,2 @@ -CIDL_SHA256="67856e05347304523b03e7ddfbd7ec18e833b7bf291f39105d5d8c4c5c383392" -MGD_LINUX_SHA256="6e37daa25ddb8310a4dd215db590bbd18999d55decf0f8a9baf7b919cf101c52" \ No newline at end of file +CIDL_SHA256="e000485f7e04ac1cf9b3532b60bcf23598ab980331ba4f1c6788a7e95c1e9ef8" +MGD_LINUX_SHA256="1c3d93bbfbe4ce97af7cb81c13e42a2eea464e18de6827794a55d5bfd971b66c" \ No newline at end of file