Skip to content

Commit

Permalink
Oximeter self-stat tests don't need to be based on time
Browse files Browse the repository at this point in the history
- Fixes #4657
- Oximeter self stat tests run a few collections, and verify that the
  relevant counters keep track of those. Previously, that was based on
  _time_, advancing time forward until the expected number of
  collections actually occur. There's nothing really guaranteeing that
  those collections do occur, since the tasks may not run exactly when
  we expect. This commit updates the tests so that the producer
  explicitly updates a shared counter every time they receive a request.
  The tests then reduce to asserting this counter equals the self-stat
  counter.
  • Loading branch information
bnaecker committed Dec 11, 2023
1 parent ed3671a commit efa2f49
Showing 1 changed file with 36 additions and 18 deletions.
54 changes: 36 additions & 18 deletions oximeter/collector/src/agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,8 @@ mod tests {
use std::net::Ipv6Addr;
use std::net::SocketAddr;
use std::net::SocketAddrV6;
use std::sync::atomic::AtomicU64;
use std::sync::atomic::Ordering;
use std::time::Duration;
use tokio::sync::oneshot;
use tokio::time::Instant;
Expand All @@ -667,7 +669,8 @@ mod tests {
// timers complete as expected.
const TICK_INTERVAL: Duration = Duration::from_millis(10);

// Total number of collection attempts.
// Total number of collection attempts, and the expected number of
// collections which fail in the "unreachability" test below.
const N_COLLECTIONS: u64 = 5;

// Period these tests wait using `tokio::time::advance()` before checking
Expand All @@ -677,6 +680,12 @@ mod tests {
+ COLLECTION_INTERVAL.as_millis() as u64 / 2,
);

// The number of actual successful test collections.
static N_SUCCESSFUL_COLLECTIONS: AtomicU64 = AtomicU64::new(0);

// The number of actual failed test collections.
static N_FAILED_COLLECTIONS: AtomicU64 = AtomicU64::new(0);

// Test that we count successful collections from a target correctly.
#[tokio::test]
async fn test_self_stat_collection_count() {
Expand All @@ -697,13 +706,11 @@ mod tests {
// will be no actual data here, but the sample counter will increment.
let addr =
SocketAddr::V6(SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0));
async fn handler(
_: Request<Body>,
) -> Result<Response<Body>, Infallible> {
Ok(Response::new(Body::from("[]")))
}
let make_svc = make_service_fn(|_conn| async {
Ok::<_, Infallible>(service_fn(handler))
Ok::<_, Infallible>(service_fn(|_: Request<Body>| async {
N_SUCCESSFUL_COLLECTIONS.fetch_add(1, Ordering::SeqCst);
Ok::<_, Infallible>(Response::new(Body::from("[]")))
}))
});
let server = Server::bind(&addr).serve(make_svc);
let address = server.local_addr();
Expand All @@ -722,7 +729,11 @@ mod tests {
.await
.expect("failed to register dummy producer");

// Step time until there has been exactly `N_COLLECTIONS` collections.
// Step time for a few collections.
//
// Due to scheduling variations, we don't verify the number of
// collections we expect based on time, but we instead check that every
// collection that _has_ occurred bumps the counter.
tokio::time::pause();
let now = Instant::now();
while now.elapsed() < TEST_WAIT_PERIOD {
Expand All @@ -744,7 +755,10 @@ mod tests {
.await
.expect("failed to request statistics from task");
let stats = rx.await.expect("failed to receive statistics from task");
assert_eq!(stats.collections.datum.value(), N_COLLECTIONS);
assert_eq!(
stats.collections.datum.value(),
N_SUCCESSFUL_COLLECTIONS.load(Ordering::SeqCst)
);
assert!(stats.failed_collections.is_empty());
logctx.cleanup_successful();
}
Expand Down Expand Up @@ -837,15 +851,13 @@ mod tests {
// And a dummy server that will always fail with a 500.
let addr =
SocketAddr::V6(SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0));
async fn handler(
_: Request<Body>,
) -> Result<Response<Body>, Infallible> {
let mut res = Response::new(Body::from("im ded"));
*res.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
Ok(res)
}
let make_svc = make_service_fn(|_conn| async {
Ok::<_, Infallible>(service_fn(handler))
Ok::<_, Infallible>(service_fn(|_: Request<Body>| async {
N_FAILED_COLLECTIONS.fetch_add(1, Ordering::SeqCst);
let mut res = Response::new(Body::from("im ded"));
*res.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
Ok::<_, Infallible>(res)
}))
});
let server = Server::bind(&addr).serve(make_svc);
let address = server.local_addr();
Expand All @@ -865,6 +877,12 @@ mod tests {
.expect("failed to register flaky producer");

// Step time until there has been exactly `N_COLLECTIONS` collections.
//
// NOTE: This is technically still a bit racy, in that the server task
// may have made a different number of attempts than we expect. In
// practice, we've not seen this one fail, so basing the number of
// counts on time seems reasonable, especially since we don't have other
// low-cost options for verifying the behavior.
tokio::time::pause();
let now = Instant::now();
while now.elapsed() < TEST_WAIT_PERIOD {
Expand Down Expand Up @@ -894,7 +912,7 @@ mod tests {
.unwrap()
.datum
.value(),
N_COLLECTIONS,
N_FAILED_COLLECTIONS.load(Ordering::SeqCst),
);
assert_eq!(stats.failed_collections.len(), 1);
logctx.cleanup_successful();
Expand Down

0 comments on commit efa2f49

Please sign in to comment.