From 5d696215199ebc042924abbbc9235d6d78f0aa64 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Mon, 14 Oct 2024 17:30:11 -0500 Subject: [PATCH] project-scoped timeseries endpoint + tests --- nexus/external-api/output/nexus_tags.txt | 1 + nexus/external-api/src/lib.rs | 14 ++ nexus/src/app/metrics.rs | 37 +++++ nexus/src/external_api/http_entrypoints.rs | 27 ++++ nexus/tests/integration_tests/endpoints.rs | 23 ++- nexus/tests/integration_tests/metrics.rs | 160 ++++++++++++++++++++- openapi/nexus.json | 49 +++++++ 7 files changed, 304 insertions(+), 7 deletions(-) diff --git a/nexus/external-api/output/nexus_tags.txt b/nexus/external-api/output/nexus_tags.txt index 3cc496b76e4..a8cff1db858 100644 --- a/nexus/external-api/output/nexus_tags.txt +++ b/nexus/external-api/output/nexus_tags.txt @@ -73,6 +73,7 @@ login_saml POST /login/{silo_name}/saml/{provi API operations found with tag "metrics" OPERATION ID METHOD URL PATH silo_metric GET /v1/metrics/{metric_name} +timeseries_query POST /v1/timeseries/query API operations found with tag "policy" OPERATION ID METHOD URL PATH diff --git a/nexus/external-api/src/lib.rs b/nexus/external-api/src/lib.rs index abb718427f7..f3cac8b211d 100644 --- a/nexus/external-api/src/lib.rs +++ b/nexus/external-api/src/lib.rs @@ -2567,6 +2567,20 @@ pub trait NexusExternalApi { body: TypedBody, ) -> Result, HttpError>; + /// Run project-scoped timeseries query + /// + /// Queries are written in OxQL. Queries can only refer to timeseries data from the specified project. + #[endpoint { + method = POST, + path = "/v1/timeseries/query", + tags = ["metrics"], + }] + async fn timeseries_query( + rqctx: RequestContext, + query_params: Query, + body: TypedBody, + ) -> Result, HttpError>; + // Updates /// Upload TUF repository diff --git a/nexus/src/app/metrics.rs b/nexus/src/app/metrics.rs index 40f7882281c..9e5db02cc3f 100644 --- a/nexus/src/app/metrics.rs +++ b/nexus/src/app/metrics.rs @@ -164,4 +164,41 @@ impl super::Nexus { _ => Error::InternalError { internal_message: e.to_string() }, }) } + + /// Run an OxQL query against the timeseries database, scoped to a specific project. + pub(crate) async fn timeseries_query_project( + &self, + _opctx: &OpContext, + project_lookup: &lookup::Project<'_>, + query: impl AsRef, + ) -> Result, Error> { + // Ensure the user has read access to the project + let (authz_silo, authz_project) = + project_lookup.lookup_for(authz::Action::Read).await?; + + // Ensure the query only refers to the project + let filtered_query = format!( + "{} | filter silo_id == \"{}\" && project_id == \"{}\"", + query.as_ref(), + authz_silo.id(), + authz_project.id() + ); + + self.timeseries_client + .oxql_query(filtered_query) + .await + .map(|result| result.tables) + .map_err(|e| match e { + oximeter_db::Error::DatabaseUnavailable(_) => { + Error::ServiceUnavailable { + internal_message: e.to_string(), + } + } + oximeter_db::Error::Oxql(_) + | oximeter_db::Error::TimeseriesNotFound(_) => { + Error::invalid_request(e.to_string()) + } + _ => Error::InternalError { internal_message: e.to_string() }, + }) + } } diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index a2855424426..740895b7e44 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -5544,6 +5544,33 @@ impl NexusExternalApi for NexusExternalApiImpl { .await } + async fn timeseries_query( + rqctx: RequestContext, + query_params: Query, + body: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let project_selector = query_params.into_inner(); + let query = body.into_inner().query; + let project_lookup = + nexus.project_lookup(&opctx, project_selector)?; + nexus + .timeseries_query_project(&opctx, &project_lookup, &query) + .await + .map(|tables| HttpResponseOk(views::OxqlQueryResult { tables })) + .map_err(HttpError::from) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + // Updates async fn system_update_put_repository( diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 8a7dff01823..5344c88480f 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -946,10 +946,14 @@ pub static DEMO_SILO_METRICS_URL: Lazy = Lazy::new(|| { ) }); -pub static TIMESERIES_LIST_URL: Lazy = +pub static TIMESERIES_QUERY_URL: Lazy = Lazy::new(|| { + format!("/v1/timeseries/query?project={}", *DEMO_PROJECT_NAME) +}); + +pub static SYSTEM_TIMESERIES_LIST_URL: Lazy = Lazy::new(|| String::from("/v1/system/timeseries/schema")); -pub static TIMESERIES_QUERY_URL: Lazy = +pub static SYSTEM_TIMESERIES_QUERY_URL: Lazy = Lazy::new(|| String::from("/v1/system/timeseries/query")); pub static DEMO_TIMESERIES_QUERY: Lazy = @@ -2206,7 +2210,18 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { }, VerifyEndpoint { - url: &TIMESERIES_LIST_URL, + url: &TIMESERIES_QUERY_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Post( + serde_json::to_value(&*DEMO_TIMESERIES_QUERY).unwrap() + ), + ], + }, + + VerifyEndpoint { + url: &SYSTEM_TIMESERIES_LIST_URL, visibility: Visibility::Public, unprivileged_access: UnprivilegedAccess::None, allowed_methods: vec![ @@ -2215,7 +2230,7 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { }, VerifyEndpoint { - url: &TIMESERIES_QUERY_URL, + url: &SYSTEM_TIMESERIES_QUERY_URL, visibility: Visibility::Public, unprivileged_access: UnprivilegedAccess::None, allowed_methods: vec![ diff --git a/nexus/tests/integration_tests/metrics.rs b/nexus/tests/integration_tests/metrics.rs index 89d81ee5dc2..8ce30a642c6 100644 --- a/nexus/tests/integration_tests/metrics.rs +++ b/nexus/tests/integration_tests/metrics.rs @@ -9,15 +9,19 @@ use crate::integration_tests::instances::{ }; use chrono::Utc; use dropshot::test_util::ClientTestContext; -use dropshot::ResultsPage; +use dropshot::{HttpErrorResponseBody, ResultsPage}; use http::{Method, StatusCode}; +use nexus_auth::authn::USER_TEST_UNPRIVILEGED; +use nexus_db_queries::db::identity::Asset; +use nexus_test_utils::background::activate_background_task; use nexus_test_utils::http_testing::{AuthnMode, NexusRequest, RequestBuilder}; use nexus_test_utils::resource_helpers::{ create_default_ip_pool, create_disk, create_instance, create_project, - objects_list_page_authz, DiskTest, + grant_iam, object_create_error, objects_list_page_authz, DiskTest, }; use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; +use nexus_types::external_api::shared::ProjectRole; use nexus_types::external_api::views::OxqlQueryResult; use nexus_types::silo::DEFAULT_SILO_ID; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; @@ -287,6 +291,28 @@ async fn test_timeseries_schema_list( pub async fn timeseries_query( cptestctx: &ControlPlaneTestContext, query: impl ToString, +) -> Vec { + execute_timeseries_query(cptestctx, "/v1/system/timeseries/query", query) + .await +} + +pub async fn project_timeseries_query( + cptestctx: &ControlPlaneTestContext, + project: &str, + query: impl ToString, +) -> Vec { + execute_timeseries_query( + cptestctx, + &format!("/v1/timeseries/query?project={}", project), + query, + ) + .await +} + +async fn execute_timeseries_query( + cptestctx: &ControlPlaneTestContext, + endpoint: &str, + query: impl ToString, ) -> Vec { // first, make sure the latest timeseries have been collected. cptestctx.oximeter.force_collect().await; @@ -300,7 +326,7 @@ pub async fn timeseries_query( nexus_test_utils::http_testing::RequestBuilder::new( &cptestctx.external_client, http::Method::POST, - "/v1/system/timeseries/query", + endpoint, ) .body(Some(&body)), ) @@ -527,6 +553,134 @@ async fn test_instance_watcher_metrics( assert_gte!(ts2_running, 2); } +#[nexus_test] +async fn test_project_timeseries_query( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + create_default_ip_pool(&client).await; // needed for instance create to work + + // Create two projects + let p1 = create_project(&client, "project1").await; + let _p2 = create_project(&client, "project2").await; + + // Create resources in each project + let i1 = create_instance(&client, "project1", "instance1").await; + let _i2 = create_instance(&client, "project2", "instance2").await; + + let internal_client = &cptestctx.internal_client; + + // get the instance metrics to show up + let _ = + activate_background_task(&internal_client, "instance_watcher").await; + + // Query with no project specified + let q1 = "get virtual_machine:check"; + + let result = project_timeseries_query(&cptestctx, "project1", q1).await; + assert_eq!(result.len(), 1); + assert!(result[0].timeseries().len() > 0); + + // also works with project ID + let result = + project_timeseries_query(&cptestctx, &p1.identity.id.to_string(), q1) + .await; + assert_eq!(result.len(), 1); + assert!(result[0].timeseries().len() > 0); + + let result = project_timeseries_query(&cptestctx, "project2", q1).await; + assert_eq!(result.len(), 1); + assert!(result[0].timeseries().len() > 0); + + // with project specified + let q2 = &format!("{} | filter project_id == \"{}\"", q1, p1.identity.id); + + let result = project_timeseries_query(&cptestctx, "project1", q2).await; + assert_eq!(result.len(), 1); + assert!(result[0].timeseries().len() > 0); + + let result = project_timeseries_query(&cptestctx, "project2", q2).await; + assert_eq!(result.len(), 1); + assert_eq!(result[0].timeseries().len(), 0); + + // with instance specified + let q3 = &format!("{} | filter instance_id == \"{}\"", q1, i1.identity.id); + + // project containing instance gives me something + let result = project_timeseries_query(&cptestctx, "project1", q3).await; + assert_eq!(result.len(), 1); + assert_eq!(result[0].timeseries().len(), 1); + + // should be empty or error + let result = project_timeseries_query(&cptestctx, "project2", q3).await; + assert_eq!(result.len(), 1); + assert_eq!(result[0].timeseries().len(), 0); + + // expect error when querying a metric that has no project_id on it + let q4 = "get integration_target:integration_metric"; + let url = "/v1/timeseries/query?project=project1"; + let body = nexus_types::external_api::params::TimeseriesQuery { + query: q4.to_string(), + }; + let result = + object_create_error(client, url, &body, StatusCode::BAD_REQUEST).await; + assert_eq!(result.error_code.unwrap(), "InvalidRequest"); + // Notable that the error confirms that the metric exists and says what the + // fields are. This is helpful generally, but here it would be better if + // we could say something more like "you can't query this timeseries from + // this endpoint" + assert_eq!(result.message, "The filter expression contains identifiers that are not valid for its input timeseries. Invalid identifiers: [\"project_id\", \"silo_id\"], timeseries fields: {\"datum\", \"metric_name\", \"target_name\", \"timestamp\"}"); + + // nonexistent project + let url = "/v1/timeseries/query?project=nonexistent"; + let body = nexus_types::external_api::params::TimeseriesQuery { + query: q4.to_string(), + }; + let result = + object_create_error(client, url, &body, StatusCode::NOT_FOUND).await; + assert_eq!(result.message, "not found: project with name \"nonexistent\""); + + // unprivileged user gets 404 on project that exists, but which they can't read + let url = "/v1/timeseries/query?project=project1"; + let body = nexus_types::external_api::params::TimeseriesQuery { + query: q1.to_string(), + }; + + let request = RequestBuilder::new(client, Method::POST, url) + .body(Some(&body)) + .expect_status(Some(StatusCode::NOT_FOUND)); + let result = NexusRequest::new(request) + .authn_as(AuthnMode::UnprivilegedUser) + .execute() + .await + .unwrap() + .parsed_body::() + .unwrap(); + assert_eq!(result.message, "not found: project with name \"project1\""); + + // now grant the user access to that project only + grant_iam( + client, + "/v1/projects/project1", + ProjectRole::Viewer, + USER_TEST_UNPRIVILEGED.id(), + AuthnMode::PrivilegedUser, + ) + .await; + + // now they can access the timeseries. how cool is that + let request = RequestBuilder::new(client, Method::POST, url) + .body(Some(&body)) + .expect_status(Some(StatusCode::OK)); + let result = NexusRequest::new(request) + .authn_as(AuthnMode::UnprivilegedUser) + .execute_and_parse_unwrap::() + .await; + assert_eq!(result.tables.len(), 1); + assert_eq!(result.tables[0].timeseries().len(), 1); +} + #[nexus_test] async fn test_mgs_metrics( cptestctx: &ControlPlaneTestContext, diff --git a/openapi/nexus.json b/openapi/nexus.json index 60a623631dc..5105663ac7a 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -8890,6 +8890,55 @@ } } }, + "/v1/timeseries/query": { + "post": { + "tags": [ + "metrics" + ], + "summary": "Run project-scoped timeseries query", + "description": "Queries are written in OxQL. Queries can only refer to timeseries data from the specified project.", + "operationId": "timeseries_query", + "parameters": [ + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/TimeseriesQuery" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/OxqlQueryResult" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/users": { "get": { "tags": [