diff --git a/nexus/tests/integration_tests/metrics.rs b/nexus/tests/integration_tests/metrics.rs index a468fa23d5..33cf7e2073 100644 --- a/nexus/tests/integration_tests/metrics.rs +++ b/nexus/tests/integration_tests/metrics.rs @@ -297,10 +297,44 @@ async fn test_timeseries_schema_list( .expect("Failed to find HTTP request latency histogram schema"); } -pub async fn timeseries_query( +/// Run an OxQL query until it succeeds or panics. +pub async fn timeseries_query_until_success( cptestctx: &ControlPlaneTestContext, query: impl ToString, ) -> Vec { + const POLL_INTERVAL: Duration = Duration::from_secs(1); + const POLL_MAX: Duration = Duration::from_secs(30); + let query_ = query.to_string(); + wait_for_condition( + || async { + match timeseries_query(cptestctx, &query_).await { + Some(r) => Ok(r), + None => Err(CondCheckError::<()>::NotYet), + } + }, + &POLL_INTERVAL, + &POLL_MAX, + ) + .await + .unwrap_or_else(|_| { + panic!( + "Timeseries named in query are not available \ + after {:?}, query: '{}'", + POLL_MAX, + query.to_string(), + ) + }) +} + +/// Run an OxQL query. +/// +/// This returns `None` if the query resulted in client error and the body +/// indicates that a timeseries named in the query could not be found. In all +/// other cases, it either succeeds or panics. +pub async fn timeseries_query( + cptestctx: &ControlPlaneTestContext, + query: impl ToString, +) -> Option> { // first, make sure the latest timeseries have been collected. cptestctx .oximeter @@ -327,14 +361,29 @@ pub async fn timeseries_query( .unwrap_or_else(|e| { panic!("timeseries query failed: {e:?}\nquery: {query}") }); - rsp.parsed_body::() - .unwrap_or_else(|e| { - panic!( - "could not parse timeseries query response: {e:?}\n\ - query: {query}\nresponse: {rsp:#?}" - ); - }) - .tables + + // Check for a timeseries-not-found error specifically. + if rsp.status.is_client_error() { + let text = std::str::from_utf8(&rsp.body) + .expect("Timeseries query response body should be UTF-8"); + if text.contains("Schema for timeseries") && text.contains("not found") + { + return None; + } + } + + // Try to parse the query as usual, which will fail on other kinds of + // errors. + Some( + rsp.parsed_body::() + .unwrap_or_else(|e| { + panic!( + "could not parse timeseries query response: {e:?}\n\ + query: {query}\nresponse: {rsp:#?}" + ); + }) + .tables, + ) } #[nexus_test] @@ -441,7 +490,7 @@ async fn test_instance_watcher_metrics( // activate the instance watcher background task. activate_instance_watcher().await; - let metrics = timeseries_query(&cptestctx, OXQL_QUERY).await; + let metrics = timeseries_query_until_success(&cptestctx, OXQL_QUERY).await; let checks = metrics .iter() .find(|t| t.name() == "virtual_machine:check") @@ -457,7 +506,7 @@ async fn test_instance_watcher_metrics( // activate the instance watcher background task. activate_instance_watcher().await; - let metrics = timeseries_query(&cptestctx, OXQL_QUERY).await; + let metrics = timeseries_query_until_success(&cptestctx, OXQL_QUERY).await; let checks = metrics .iter() .find(|t| t.name() == "virtual_machine:check") @@ -474,7 +523,7 @@ async fn test_instance_watcher_metrics( // activate the instance watcher background task. activate_instance_watcher().await; - let metrics = timeseries_query(&cptestctx, OXQL_QUERY).await; + let metrics = timeseries_query_until_success(&cptestctx, OXQL_QUERY).await; let checks = metrics .iter() .find(|t| t.name() == "virtual_machine:check") @@ -499,7 +548,7 @@ async fn test_instance_watcher_metrics( // activate the instance watcher background task. activate_instance_watcher().await; - let metrics = timeseries_query(&cptestctx, OXQL_QUERY).await; + let metrics = timeseries_query_until_success(&cptestctx, OXQL_QUERY).await; let checks = metrics .iter() .find(|t| t.name() == "virtual_machine:check") @@ -528,7 +577,7 @@ async fn test_instance_watcher_metrics( // activate the instance watcher background task. activate_instance_watcher().await; - let metrics = timeseries_query(&cptestctx, OXQL_QUERY).await; + let metrics = timeseries_query_until_success(&cptestctx, OXQL_QUERY).await; let checks = metrics .iter() .find(|t| t.name() == "virtual_machine:check") @@ -714,7 +763,7 @@ async fn test_mgs_metrics( .try_force_collect() .await .expect("Could not force oximeter collection"); - let table = timeseries_query(&cptestctx, &query) + let table = timeseries_query_until_success(&cptestctx, &query) .await .into_iter() .find(|t| t.name() == name)