Skip to content

Commit

Permalink
[#5333 6/6] Region snapshot replacement finish (#6442)
Browse files Browse the repository at this point in the history
For the final commit in this series, add a background task that will
detect when all the related steps of a region snapshot replacement are
done, and then mark that region snapshot replacement request as
Complete.

Closes #5333.
  • Loading branch information
jmpesp authored Aug 30, 2024
1 parent 9cec48e commit fbedfd1
Show file tree
Hide file tree
Showing 14 changed files with 438 additions and 12 deletions.
25 changes: 25 additions & 0 deletions dev-tools/omdb/src/bin/omdb/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ use nexus_saga_recovery::LastPass;
use nexus_types::deployment::Blueprint;
use nexus_types::internal_api::background::LookupRegionPortStatus;
use nexus_types::internal_api::background::RegionReplacementDriverStatus;
use nexus_types::internal_api::background::RegionSnapshotReplacementFinishStatus;
use nexus_types::internal_api::background::RegionSnapshotReplacementGarbageCollectStatus;
use nexus_types::internal_api::background::RegionSnapshotReplacementStartStatus;
use nexus_types::internal_api::background::RegionSnapshotReplacementStepStatus;
Expand Down Expand Up @@ -1612,6 +1613,30 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) {
}
}
}
} else if name == "region_snapshot_replacement_finish" {
match serde_json::from_value::<RegionSnapshotReplacementFinishStatus>(
details.clone(),
) {
Err(error) => eprintln!(
"warning: failed to interpret task details: {:?}: {:?}",
error, details
),

Ok(status) => {
println!(
" total records transitioned to done: {}",
status.records_set_to_done.len(),
);
for line in &status.records_set_to_done {
println!(" > {line}");
}

println!(" errors: {}", status.errors.len());
for line in &status.errors {
println!(" > {line}");
}
}
}
} else {
println!(
"warning: unknown background task: {:?} \
Expand Down
12 changes: 12 additions & 0 deletions dev-tools/omdb/tests/env.out
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,10 @@ task: "region_replacement_driver"
drive region replacements forward to completion


task: "region_snapshot_replacement_finish"
complete a region snapshot replacement if all the steps are done


task: "region_snapshot_replacement_garbage_collection"
clean up all region snapshot replacement step volumes

Expand Down Expand Up @@ -289,6 +293,10 @@ task: "region_replacement_driver"
drive region replacements forward to completion


task: "region_snapshot_replacement_finish"
complete a region snapshot replacement if all the steps are done


task: "region_snapshot_replacement_garbage_collection"
clean up all region snapshot replacement step volumes

Expand Down Expand Up @@ -438,6 +446,10 @@ task: "region_replacement_driver"
drive region replacements forward to completion


task: "region_snapshot_replacement_finish"
complete a region snapshot replacement if all the steps are done


task: "region_snapshot_replacement_garbage_collection"
clean up all region snapshot replacement step volumes

Expand Down
20 changes: 20 additions & 0 deletions dev-tools/omdb/tests/successes.out
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,10 @@ task: "region_replacement_driver"
drive region replacements forward to completion


task: "region_snapshot_replacement_finish"
complete a region snapshot replacement if all the steps are done


task: "region_snapshot_replacement_garbage_collection"
clean up all region snapshot replacement step volumes

Expand Down Expand Up @@ -594,6 +598,14 @@ task: "region_replacement_driver"
number of region replacement finish sagas started ok: 0
number of errors: 0

task: "region_snapshot_replacement_finish"
configured period: every <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
total records transitioned to done: 0
errors: 0

task: "region_snapshot_replacement_garbage_collection"
configured period: every <REDACTED_DURATION>s
currently executing: no
Expand Down Expand Up @@ -1012,6 +1024,14 @@ task: "region_replacement_driver"
number of region replacement finish sagas started ok: 0
number of errors: 0

task: "region_snapshot_replacement_finish"
configured period: every <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
total records transitioned to done: 0
errors: 0

task: "region_snapshot_replacement_garbage_collection"
configured period: every <REDACTED_DURATION>s
currently executing: no
Expand Down
17 changes: 17 additions & 0 deletions nexus-config/src/nexus_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,9 @@ pub struct BackgroundTaskConfig {
RegionSnapshotReplacementGarbageCollectionConfig,
/// configuration for region snapshot replacement step task
pub region_snapshot_replacement_step: RegionSnapshotReplacementStepConfig,
/// configuration for region snapshot replacement finisher task
pub region_snapshot_replacement_finish:
RegionSnapshotReplacementFinishConfig,
}

#[serde_as]
Expand Down Expand Up @@ -658,6 +661,14 @@ pub struct RegionSnapshotReplacementStepConfig {
pub period_secs: Duration,
}

#[serde_as]
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub struct RegionSnapshotReplacementFinishConfig {
/// period (in seconds) for periodic activations of this background task
#[serde_as(as = "DurationSeconds<u64>")]
pub period_secs: Duration,
}

/// Configuration for a nexus server
#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
pub struct PackageConfig {
Expand Down Expand Up @@ -908,6 +919,7 @@ mod test {
region_snapshot_replacement_start.period_secs = 30
region_snapshot_replacement_garbage_collection.period_secs = 30
region_snapshot_replacement_step.period_secs = 30
region_snapshot_replacement_finish.period_secs = 30
[default_region_allocation_strategy]
type = "random"
seed = 0
Expand Down Expand Up @@ -1082,6 +1094,10 @@ mod test {
RegionSnapshotReplacementStepConfig {
period_secs: Duration::from_secs(30),
},
region_snapshot_replacement_finish:
RegionSnapshotReplacementFinishConfig {
period_secs: Duration::from_secs(30),
},
},
default_region_allocation_strategy:
crate::nexus_config::RegionAllocationStrategy::Random {
Expand Down Expand Up @@ -1161,6 +1177,7 @@ mod test {
region_snapshot_replacement_start.period_secs = 30
region_snapshot_replacement_garbage_collection.period_secs = 30
region_snapshot_replacement_step.period_secs = 30
region_snapshot_replacement_finish.period_secs = 30
[default_region_allocation_strategy]
type = "random"
"##,
Expand Down
1 change: 1 addition & 0 deletions nexus/examples/config-second.toml
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ lookup_region_port.period_secs = 60
region_snapshot_replacement_start.period_secs = 30
region_snapshot_replacement_garbage_collection.period_secs = 30
region_snapshot_replacement_step.period_secs = 30
region_snapshot_replacement_finish.period_secs = 30

[default_region_allocation_strategy]
# allocate region on 3 random distinct zpools, on 3 random distinct sleds.
Expand Down
1 change: 1 addition & 0 deletions nexus/examples/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ lookup_region_port.period_secs = 60
region_snapshot_replacement_start.period_secs = 30
region_snapshot_replacement_garbage_collection.period_secs = 30
region_snapshot_replacement_step.period_secs = 30
region_snapshot_replacement_finish.period_secs = 30

[default_region_allocation_strategy]
# allocate region on 3 random distinct zpools, on 3 random distinct sleds.
Expand Down
20 changes: 19 additions & 1 deletion nexus/src/app/background/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ use super::tasks::phantom_disks;
use super::tasks::physical_disk_adoption;
use super::tasks::region_replacement;
use super::tasks::region_replacement_driver;
use super::tasks::region_snapshot_replacement_finish::*;
use super::tasks::region_snapshot_replacement_garbage_collect::*;
use super::tasks::region_snapshot_replacement_start::*;
use super::tasks::region_snapshot_replacement_step::*;
Expand Down Expand Up @@ -167,6 +168,7 @@ pub struct BackgroundTasks {
pub task_region_snapshot_replacement_start: Activator,
pub task_region_snapshot_replacement_garbage_collection: Activator,
pub task_region_snapshot_replacement_step: Activator,
pub task_region_snapshot_replacement_finish: Activator,

// Handles to activate background tasks that do not get used by Nexus
// at-large. These background tasks are implementation details as far as
Expand Down Expand Up @@ -252,6 +254,7 @@ impl BackgroundTasksInitializer {
task_region_snapshot_replacement_garbage_collection: Activator::new(
),
task_region_snapshot_replacement_step: Activator::new(),
task_region_snapshot_replacement_finish: Activator::new(),

task_internal_dns_propagation: Activator::new(),
task_external_dns_propagation: Activator::new(),
Expand Down Expand Up @@ -316,6 +319,7 @@ impl BackgroundTasksInitializer {
task_region_snapshot_replacement_start,
task_region_snapshot_replacement_garbage_collection,
task_region_snapshot_replacement_step,
task_region_snapshot_replacement_finish,
// Add new background tasks here. Be sure to use this binding in a
// call to `Driver::register()` below. That's what actually wires
// up the Activator to the corresponding background task.
Expand Down Expand Up @@ -780,14 +784,28 @@ impl BackgroundTasksInitializer {
replacement, and run the step saga for them",
period: config.region_snapshot_replacement_step.period_secs,
task_impl: Box::new(RegionSnapshotReplacementFindAffected::new(
datastore,
datastore.clone(),
sagas.clone(),
)),
opctx: opctx.child(BTreeMap::new()),
watchers: vec![],
activator: task_region_snapshot_replacement_step,
});

driver.register(TaskDefinition {
name: "region_snapshot_replacement_finish",
description:
"complete a region snapshot replacement if all the steps are \
done",
period: config.region_snapshot_replacement_finish.period_secs,
task_impl: Box::new(RegionSnapshotReplacementFinishDetector::new(
datastore,
)),
opctx: opctx.child(BTreeMap::new()),
watchers: vec![],
activator: task_region_snapshot_replacement_finish,
});

driver
}
}
Expand Down
1 change: 1 addition & 0 deletions nexus/src/app/background/tasks/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ pub mod phantom_disks;
pub mod physical_disk_adoption;
pub mod region_replacement;
pub mod region_replacement_driver;
pub mod region_snapshot_replacement_finish;
pub mod region_snapshot_replacement_garbage_collect;
pub mod region_snapshot_replacement_start;
pub mod region_snapshot_replacement_step;
Expand Down
Loading

0 comments on commit fbedfd1

Please sign in to comment.