Skip to content

Commit

Permalink
turns out we can just totally disable it in tests
Browse files Browse the repository at this point in the history
  • Loading branch information
hawkw committed Jul 31, 2024
1 parent 4fa6bd8 commit ce3c35d
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 45 deletions.
11 changes: 11 additions & 0 deletions nexus-config/src/nexus_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,15 @@ pub struct InstanceUpdaterConfig {
/// period (in seconds) for periodic activations of this background task
#[serde_as(as = "DurationSeconds<u64>")]
pub period_secs: Duration,

/// disable background checks for instances in need of updates.
///
/// This config is intended for use in testing, and should generally not be
/// enabled in real life.
///
/// Default: Off
#[serde(default)]
pub disable: bool,
}

#[serde_as]
Expand Down Expand Up @@ -859,6 +868,7 @@ mod test {
region_replacement_driver.period_secs = 30
instance_watcher.period_secs = 30
instance_updater.period_secs = 30
instance_updater.disable = false
service_firewall_propagation.period_secs = 300
v2p_mapping_propagation.period_secs = 30
abandoned_vmm_reaper.period_secs = 60
Expand Down Expand Up @@ -1008,6 +1018,7 @@ mod test {
},
instance_updater: InstanceUpdaterConfig {
period_secs: Duration::from_secs(30),
disable: false,
},
service_firewall_propagation:
ServiceFirewallPropagationConfig {
Expand Down
10 changes: 9 additions & 1 deletion nexus/src/app/background/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -638,11 +638,19 @@ impl BackgroundTasksInitializer {
let updater = instance_updater::InstanceUpdater::new(
datastore.clone(),
sagas.clone(),
config.instance_updater.disable,
);
let period = if config.instance_updater.disable {
// If we're explicitly disabled by the config, don't waste
// energy activating the background task just to have it do nothing.
std::time::Duration::MAX
} else {
config.instance_updater.period_secs
};
driver.register( TaskDefinition {
name: "instance_updater",
description: "detects if instances require update sagas and schedules them",
period: config.instance_updater.period_secs,
period,
task_impl: Box::new(updater),
opctx: opctx.child(BTreeMap::new()),
watchers: vec![],
Expand Down
91 changes: 51 additions & 40 deletions nexus/src/app/background/tasks/instance_updater.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,19 @@ use tokio::task::JoinSet;
pub struct InstanceUpdater {
datastore: Arc<DataStore>,
sagas: Arc<dyn StartSaga>,
disable: bool,
}

impl InstanceUpdater {
pub fn new(datastore: Arc<DataStore>, sagas: Arc<dyn StartSaga>) -> Self {
InstanceUpdater { datastore, sagas }
pub fn new(
datastore: Arc<DataStore>,
sagas: Arc<dyn StartSaga>,
disable: bool,
) -> Self {
InstanceUpdater { datastore, sagas, disable }
}

async fn activate2(
async fn actually_activate(
&mut self,
opctx: &OpContext,
stats: &mut ActivationStats,
Expand Down Expand Up @@ -205,43 +210,49 @@ impl BackgroundTask for InstanceUpdater {
) -> BoxFuture<'a, serde_json::Value> {
async {
let mut stats = ActivationStats::default();
let error = match self.activate2(opctx, &mut stats).await {
Ok(()) => {
slog::info!(
&opctx.log,
"instance updater activation completed";
"destroyed_active_vmms" => stats.destroyed_active_vmms,
"terminated_active_migrations" => stats.terminated_active_migrations,
"update_sagas_started" => stats.sagas_started,
"update_sagas_completed" => stats.sagas_completed,
);
debug_assert_eq!(
stats.sagas_failed,
0,
"if the task completed successfully, then no sagas \
should have failed",
);
debug_assert_eq!(
stats.saga_start_failures,
0,
"if the task completed successfully, all sagas \
should have started successfully"
);
None
}
Err(error) => {
slog::warn!(
&opctx.log,
"instance updater activation failed!";
"error" => %error,
"destroyed_active_vmms" => stats.destroyed_active_vmms,
"terminated_active_migrations" => stats.terminated_active_migrations,
"update_sagas_started" => stats.sagas_started,
"update_sagas_completed" => stats.sagas_completed,
"update_sagas_failed" => stats.sagas_failed,
"update_saga_start_failures" => stats.saga_start_failures,
);
Some(error.to_string())

let error = if self.disable {
slog::info!(&opctx.log, "background instance updater explicitly disabled");
None
} else {
match self.actually_activate(opctx, &mut stats).await {
Ok(()) => {
slog::info!(
&opctx.log,
"instance updater activation completed";
"destroyed_active_vmms" => stats.destroyed_active_vmms,
"terminated_active_migrations" => stats.terminated_active_migrations,
"update_sagas_started" => stats.sagas_started,
"update_sagas_completed" => stats.sagas_completed,
);
debug_assert_eq!(
stats.sagas_failed,
0,
"if the task completed successfully, then no sagas \
should have failed",
);
debug_assert_eq!(
stats.saga_start_failures,
0,
"if the task completed successfully, all sagas \
should have started successfully"
);
None
}
Err(error) => {
slog::warn!(
&opctx.log,
"instance updater activation failed!";
"error" => %error,
"destroyed_active_vmms" => stats.destroyed_active_vmms,
"terminated_active_migrations" => stats.terminated_active_migrations,
"update_sagas_started" => stats.sagas_started,
"update_sagas_completed" => stats.sagas_completed,
"update_sagas_failed" => stats.sagas_failed,
"update_saga_start_failures" => stats.saga_start_failures,
);
Some(error.to_string())
}
}
};
json!({
Expand Down
7 changes: 3 additions & 4 deletions nexus/tests/config.test.toml
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,9 @@ lookup_region_port.period_secs = 60
# to be executed in a timely manner, so for integration tests, we don't want to
# *rely* on the instance-updater background task for running these sagas.
#
# Therefore, set a period long enough that this task won't activate during a
# reasonable integration test execution. Tests for the instance-updater task
# will explictly activate it.
instance_updater.period_secs = 600
# Therefore, disable the background task during tests.
instance_updater.disable = true
instance_updater.period_secs = 60

[default_region_allocation_strategy]
# we only have one sled in the test environment, so we need to use the
Expand Down

0 comments on commit ce3c35d

Please sign in to comment.