Skip to content

Commit

Permalink
Retry OxQL queries in integration tests (#7156)
Browse files Browse the repository at this point in the history
Adds a small `wait_for_condition()` loop to OxQL queries in integration
tests. This tries to catch the error where the query fails specifically
because the timeseries it names is not yet available, and return `None`
in these cases. We expect it to appear soon, so this isn't immediately
fatal.

Fixes #7154 and #7084.
  • Loading branch information
bnaecker authored Dec 3, 2024
1 parent 1e68e63 commit 9285a7c
Showing 1 changed file with 64 additions and 15 deletions.
79 changes: 64 additions & 15 deletions nexus/tests/integration_tests/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,10 +297,44 @@ async fn test_timeseries_schema_list(
.expect("Failed to find HTTP request latency histogram schema");
}

pub async fn timeseries_query(
/// Run an OxQL query until it succeeds or panics.
pub async fn timeseries_query_until_success(
cptestctx: &ControlPlaneTestContext<omicron_nexus::Server>,
query: impl ToString,
) -> Vec<oxql_types::Table> {
const POLL_INTERVAL: Duration = Duration::from_secs(1);
const POLL_MAX: Duration = Duration::from_secs(30);
let query_ = query.to_string();
wait_for_condition(
|| async {
match timeseries_query(cptestctx, &query_).await {
Some(r) => Ok(r),
None => Err(CondCheckError::<()>::NotYet),
}
},
&POLL_INTERVAL,
&POLL_MAX,
)
.await
.unwrap_or_else(|_| {
panic!(
"Timeseries named in query are not available \
after {:?}, query: '{}'",
POLL_MAX,
query.to_string(),
)
})
}

/// Run an OxQL query.
///
/// This returns `None` if the query resulted in client error and the body
/// indicates that a timeseries named in the query could not be found. In all
/// other cases, it either succeeds or panics.
pub async fn timeseries_query(
cptestctx: &ControlPlaneTestContext<omicron_nexus::Server>,
query: impl ToString,
) -> Option<Vec<oxql_types::Table>> {
// first, make sure the latest timeseries have been collected.
cptestctx
.oximeter
Expand All @@ -327,14 +361,29 @@ pub async fn timeseries_query(
.unwrap_or_else(|e| {
panic!("timeseries query failed: {e:?}\nquery: {query}")
});
rsp.parsed_body::<OxqlQueryResult>()
.unwrap_or_else(|e| {
panic!(
"could not parse timeseries query response: {e:?}\n\
query: {query}\nresponse: {rsp:#?}"
);
})
.tables

// Check for a timeseries-not-found error specifically.
if rsp.status.is_client_error() {
let text = std::str::from_utf8(&rsp.body)
.expect("Timeseries query response body should be UTF-8");
if text.contains("Schema for timeseries") && text.contains("not found")
{
return None;
}
}

// Try to parse the query as usual, which will fail on other kinds of
// errors.
Some(
rsp.parsed_body::<OxqlQueryResult>()
.unwrap_or_else(|e| {
panic!(
"could not parse timeseries query response: {e:?}\n\
query: {query}\nresponse: {rsp:#?}"
);
})
.tables,
)
}

#[nexus_test]
Expand Down Expand Up @@ -441,7 +490,7 @@ async fn test_instance_watcher_metrics(
// activate the instance watcher background task.
activate_instance_watcher().await;

let metrics = timeseries_query(&cptestctx, OXQL_QUERY).await;
let metrics = timeseries_query_until_success(&cptestctx, OXQL_QUERY).await;
let checks = metrics
.iter()
.find(|t| t.name() == "virtual_machine:check")
Expand All @@ -457,7 +506,7 @@ async fn test_instance_watcher_metrics(
// activate the instance watcher background task.
activate_instance_watcher().await;

let metrics = timeseries_query(&cptestctx, OXQL_QUERY).await;
let metrics = timeseries_query_until_success(&cptestctx, OXQL_QUERY).await;
let checks = metrics
.iter()
.find(|t| t.name() == "virtual_machine:check")
Expand All @@ -474,7 +523,7 @@ async fn test_instance_watcher_metrics(
// activate the instance watcher background task.
activate_instance_watcher().await;

let metrics = timeseries_query(&cptestctx, OXQL_QUERY).await;
let metrics = timeseries_query_until_success(&cptestctx, OXQL_QUERY).await;
let checks = metrics
.iter()
.find(|t| t.name() == "virtual_machine:check")
Expand All @@ -499,7 +548,7 @@ async fn test_instance_watcher_metrics(
// activate the instance watcher background task.
activate_instance_watcher().await;

let metrics = timeseries_query(&cptestctx, OXQL_QUERY).await;
let metrics = timeseries_query_until_success(&cptestctx, OXQL_QUERY).await;
let checks = metrics
.iter()
.find(|t| t.name() == "virtual_machine:check")
Expand Down Expand Up @@ -528,7 +577,7 @@ async fn test_instance_watcher_metrics(
// activate the instance watcher background task.
activate_instance_watcher().await;

let metrics = timeseries_query(&cptestctx, OXQL_QUERY).await;
let metrics = timeseries_query_until_success(&cptestctx, OXQL_QUERY).await;
let checks = metrics
.iter()
.find(|t| t.name() == "virtual_machine:check")
Expand Down Expand Up @@ -714,7 +763,7 @@ async fn test_mgs_metrics(
.try_force_collect()
.await
.expect("Could not force oximeter collection");
let table = timeseries_query(&cptestctx, &query)
let table = timeseries_query_until_success(&cptestctx, &query)
.await
.into_iter()
.find(|t| t.name() == name)
Expand Down

0 comments on commit 9285a7c

Please sign in to comment.