Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[nexus-test-utils] set 60s timeouts for each init step #4789

Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion common/src/api/external/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1919,7 +1919,7 @@ impl MacAddr {
/// Iterate the MAC addresses in the system address range
/// (used as an allocator in contexts where collisions are not expected and
/// determinism is useful, like in the test suite)
pub fn iter_system() -> impl Iterator<Item = MacAddr> {
pub fn iter_system() -> impl Iterator<Item = MacAddr> + Send {
((Self::MAX_SYSTEM_RESV + 1)..=Self::MAX_SYSTEM_ADDR)
.map(Self::from_i64)
}
Expand Down
4 changes: 2 additions & 2 deletions nexus/test-interface/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ use std::net::{SocketAddr, SocketAddrV6};
use uuid::Uuid;

#[async_trait]
pub trait NexusServer {
type InternalServer;
pub trait NexusServer: Send + Sync + 'static {
type InternalServer: Send + Sync + 'static;

async fn start_internal(
config: &Config,
Expand Down
2 changes: 2 additions & 0 deletions nexus/test-utils/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ crucible-agent-client.workspace = true
dns-server.workspace = true
dns-service-client.workspace = true
dropshot.workspace = true
futures.workspace = true
gateway-messages.workspace = true
gateway-test-utils.workspace = true
headers.workspace = true
Expand All @@ -34,6 +35,7 @@ serde.workspace = true
serde_json.workspace = true
serde_urlencoded.workspace = true
slog.workspace = true
tokio.workspace = true
trust-dns-resolver.workspace = true
uuid.workspace = true
omicron-workspace-hack.workspace = true
Expand Down
157 changes: 132 additions & 25 deletions nexus/test-utils/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ use dropshot::ConfigDropshot;
use dropshot::ConfigLogging;
use dropshot::ConfigLoggingLevel;
use dropshot::HandlerTaskMode;
use futures::future::BoxFuture;
use futures::FutureExt;
use gateway_test_utils::setup::GatewayTestContext;
use nexus_test_interface::NexusServer;
use nexus_types::external_api::params::UserId;
Expand All @@ -39,7 +41,7 @@ use omicron_test_utils::dev;
use oximeter_collector::Oximeter;
use oximeter_producer::LogConfig;
use oximeter_producer::Server as ProducerServer;
use slog::{debug, o, Logger};
use slog::{debug, error, o, Logger};
use std::collections::HashMap;
use std::fmt::Debug;
use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6};
Expand Down Expand Up @@ -158,7 +160,7 @@ struct RackInitRequestBuilder {
services: Vec<nexus_types::internal_api::params::ServicePutRequest>,
datasets: Vec<nexus_types::internal_api::params::DatasetCreateRequest>,
internal_dns_config: internal_dns::DnsConfigBuilder,
mac_addrs: Box<dyn Iterator<Item = MacAddr>>,
mac_addrs: Box<dyn Iterator<Item = MacAddr> + Send>,
}

impl RackInitRequestBuilder {
Expand Down Expand Up @@ -254,6 +256,7 @@ pub struct ControlPlaneTestContextBuilder<'a, N: NexusServer> {
pub external_dns_zone_name: Option<String>,
pub external_dns: Option<dns_server::TransientServer>,
pub internal_dns: Option<dns_server::TransientServer>,
dns_config: Option<DnsConfigParams>,

pub silo_name: Option<Name>,
pub user_name: Option<UserId>,
Expand Down Expand Up @@ -290,11 +293,40 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> {
external_dns_zone_name: None,
external_dns: None,
internal_dns: None,
dns_config: None,
silo_name: None,
user_name: None,
}
}

pub async fn init_with_steps(
&mut self,
steps: Vec<(
&str,
Box<dyn for<'b> FnOnce(&'b mut Self) -> BoxFuture<'b, ()>>,
sunshowers marked this conversation as resolved.
Show resolved Hide resolved
)>,
timeout: Duration,
) {
let log = self.logctx.log.new(o!("component" => "init_with_steps"));
for (step_name, step) in steps {
debug!(log, "Running step {step_name}");
let step_fut = step(self);
match tokio::time::timeout(timeout, step_fut).await {
Ok(()) => {}
Err(_) => {
error!(
log,
"Timed out after {timeout:?} \
while running step {step_name}, failing test"
);
panic!(
"Timed out after {timeout:?} while running step {step_name}",
);
}
}
}
}

pub async fn start_crdb(&mut self, populate: bool) {
let populate = if populate {
PopulateCrdb::FromEnvironmentSeed
Expand Down Expand Up @@ -581,7 +613,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> {
self.nexus_internal_addr = Some(nexus_internal_addr);
}

pub async fn populate_internal_dns(&mut self) -> DnsConfigParams {
pub async fn populate_internal_dns(&mut self) {
let log = &self.logctx.log;
debug!(log, "Populating Internal DNS");

Expand All @@ -604,18 +636,21 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> {
dns_config_client.dns_config_put(&dns_config).await.expect(
"Failed to send initial DNS records to internal DNS server",
);
dns_config
self.dns_config = Some(dns_config);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just to avoid a move between setup steps, yeah?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With this pattern, we no longer have a way to return a value that the next step can use. This is the easiest way to work around that (and follows the pattern the rest of the builder uses anyway).

I ran into something similar with the update-engine and came up with the design to use step handles that can be passed around, but only resolved once steps start executing. For example,

let buf_list = download_handle.into_value(cx.token()).await;
let temp_dirs =
temp_dirs_handle.into_value(cx.token()).await;
. But that's trying to do a lot more than we need here.

}

// Perform RSS handoff
pub async fn start_nexus_external(
&mut self,
dns_config: DnsConfigParams,
tls_certificates: Vec<Certificate>,
) {
let log = &self.logctx.log;
debug!(log, "Starting Nexus (external API)");

let dns_config = self.dns_config.clone().expect(
"populate_internal_dns must be called before start_nexus_external",
);

// Create a recovery silo
let external_dns_zone_name =
internal_dns::names::DNS_ZONE_EXTERNAL_TESTING.to_string();
Expand Down Expand Up @@ -956,30 +991,102 @@ async fn setup_with_config_impl<N: NexusServer>(
sim_mode: sim::SimMode,
initial_cert: Option<Certificate>,
) -> ControlPlaneTestContext<N> {
builder.start_crdb_impl(populate).await;
builder.start_clickhouse().await;
builder.start_gateway().await;
builder.start_dendrite(SwitchLocation::Switch0).await;
builder.start_dendrite(SwitchLocation::Switch1).await;
builder.start_mgd(SwitchLocation::Switch0).await;
builder.start_mgd(SwitchLocation::Switch1).await;
builder.start_internal_dns().await;
builder.start_external_dns().await;
builder.start_nexus_internal().await;
builder.start_sled(sim_mode).await;
builder.start_crucible_pantry().await;
builder.scrimlet_dns_setup().await;

// Give Nexus necessary information to find the Crucible Pantry
let dns_config = builder.populate_internal_dns().await;
const STEP_TIMEOUT: Duration = Duration::from_secs(60);

builder
.start_nexus_external(dns_config, initial_cert.into_iter().collect())
.init_with_steps(
vec![
(
"start_crdb",
Box::new(|builder| {
builder.start_crdb_impl(populate).boxed()
}),
),
(
"start_clickhouse",
Box::new(|builder| builder.start_clickhouse().boxed()),
),
(
"start_gateway",
Box::new(|builder| builder.start_gateway().boxed()),
),
(
"start_dendrite_switch0",
Box::new(|builder| {
builder.start_dendrite(SwitchLocation::Switch0).boxed()
}),
),
(
"start_dendrite_switch1",
Box::new(|builder| {
builder.start_dendrite(SwitchLocation::Switch1).boxed()
}),
),
(
"start_mgd_switch0",
Box::new(|builder| {
builder.start_mgd(SwitchLocation::Switch0).boxed()
}),
),
(
"start_mgd_switch1",
Box::new(|builder| {
builder.start_mgd(SwitchLocation::Switch1).boxed()
}),
),
(
"start_internal_dns",
Box::new(|builder| builder.start_internal_dns().boxed()),
),
(
"start_external_dns",
Box::new(|builder| builder.start_external_dns().boxed()),
),
(
"start_nexus_internal",
Box::new(|builder| builder.start_nexus_internal().boxed()),
),
(
"start_sled",
Box::new(move |builder| {
builder.start_sled(sim_mode).boxed()
}),
),
(
"start_crucible_pantry",
Box::new(|builder| builder.start_crucible_pantry().boxed()),
),
(
"scrimlet_dns_setup",
Box::new(|builder| builder.scrimlet_dns_setup().boxed()),
),
(
"populate_internal_dns",
Box::new(|builder| builder.populate_internal_dns().boxed()),
),
(
"start_nexus_external",
Box::new(|builder| {
builder
.start_nexus_external(
initial_cert.into_iter().collect(),
)
.boxed()
}),
),
(
"start_oximeter",
Box::new(|builder| builder.start_oximeter().boxed()),
),
(
"start_producer_server",
Box::new(|builder| builder.start_producer_server().boxed()),
),
],
STEP_TIMEOUT,
)
.await;

builder.start_oximeter().await;
builder.start_producer_server().await;

builder.build()
}

Expand Down
Loading