Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#5333 5/6] Region snapshot replacement step #6350

Merged
merged 8 commits into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions dev-tools/omdb/src/bin/omdb/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ use nexus_types::internal_api::background::LookupRegionPortStatus;
use nexus_types::internal_api::background::RegionReplacementDriverStatus;
use nexus_types::internal_api::background::RegionSnapshotReplacementGarbageCollectStatus;
use nexus_types::internal_api::background::RegionSnapshotReplacementStartStatus;
use nexus_types::internal_api::background::RegionSnapshotReplacementStepStatus;
use nexus_types::inventory::BaseboardId;
use omicron_uuid_kinds::CollectionUuid;
use omicron_uuid_kinds::DemoSagaUuid;
Expand Down Expand Up @@ -1504,6 +1505,46 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) {
println!(" > {line}");
}

println!(" errors: {}", status.errors.len());
for line in &status.errors {
println!(" > {line}");
}
}
}
} else if name == "region_snapshot_replacement_step" {
match serde_json::from_value::<RegionSnapshotReplacementStepStatus>(
details.clone(),
) {
Err(error) => eprintln!(
"warning: failed to interpret task details: {:?}: {:?}",
error, details
),

Ok(status) => {
println!(
" total step records created ok: {}",
status.step_records_created_ok.len(),
);
for line in &status.step_records_created_ok {
println!(" > {line}");
}

println!(
" total step garbage collect saga invoked ok: {}",
status.step_garbage_collect_invoked_ok.len(),
);
for line in &status.step_garbage_collect_invoked_ok {
println!(" > {line}");
}

println!(
" total step saga invoked ok: {}",
status.step_invoked_ok.len(),
);
for line in &status.step_invoked_ok {
println!(" > {line}");
}

println!(" errors: {}", status.errors.len());
for line in &status.errors {
println!(" > {line}");
Expand Down
15 changes: 15 additions & 0 deletions dev-tools/omdb/tests/env.out
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,11 @@ task: "region_snapshot_replacement_start"
detect if region snapshots need replacement and begin the process


task: "region_snapshot_replacement_step"
detect what volumes were affected by a region snapshot replacement, and run
the step saga for them


task: "saga_recovery"
recovers sagas assigned to this Nexus

Expand Down Expand Up @@ -292,6 +297,11 @@ task: "region_snapshot_replacement_start"
detect if region snapshots need replacement and begin the process


task: "region_snapshot_replacement_step"
detect what volumes were affected by a region snapshot replacement, and run
the step saga for them


task: "saga_recovery"
recovers sagas assigned to this Nexus

Expand Down Expand Up @@ -436,6 +446,11 @@ task: "region_snapshot_replacement_start"
detect if region snapshots need replacement and begin the process


task: "region_snapshot_replacement_step"
detect what volumes were affected by a region snapshot replacement, and run
the step saga for them


task: "saga_recovery"
recovers sagas assigned to this Nexus

Expand Down
25 changes: 25 additions & 0 deletions dev-tools/omdb/tests/successes.out
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,11 @@ task: "region_snapshot_replacement_start"
detect if region snapshots need replacement and begin the process


task: "region_snapshot_replacement_step"
detect what volumes were affected by a region snapshot replacement, and run
the step saga for them


task: "saga_recovery"
recovers sagas assigned to this Nexus

Expand Down Expand Up @@ -589,6 +594,16 @@ task: "region_snapshot_replacement_start"
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
warning: unknown background task: "region_snapshot_replacement_start" (don't know how to interpret details: Object {"errors": Array [], "requests_created_ok": Array [], "start_invoked_ok": Array []})

task: "region_snapshot_replacement_step"
configured period: every <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
total step records created ok: 0
total step garbage collect saga invoked ok: 0
total step saga invoked ok: 0
errors: 0

task: "saga_recovery"
configured period: every 10m
currently executing: no
Expand Down Expand Up @@ -995,6 +1010,16 @@ task: "region_snapshot_replacement_start"
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
warning: unknown background task: "region_snapshot_replacement_start" (don't know how to interpret details: Object {"errors": Array [], "requests_created_ok": Array [], "start_invoked_ok": Array []})

task: "region_snapshot_replacement_step"
configured period: every <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
total step records created ok: 0
total step garbage collect saga invoked ok: 0
total step saga invoked ok: 0
errors: 0

task: "saga_recovery"
configured period: every 10m
currently executing: no
Expand Down
16 changes: 16 additions & 0 deletions nexus-config/src/nexus_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,8 @@ pub struct BackgroundTaskConfig {
/// configuration for region snapshot replacement garbage collection
pub region_snapshot_replacement_garbage_collection:
RegionSnapshotReplacementGarbageCollectionConfig,
/// configuration for region snapshot replacement step task
pub region_snapshot_replacement_step: RegionSnapshotReplacementStepConfig,
}

#[serde_as]
Expand Down Expand Up @@ -648,6 +650,14 @@ pub struct RegionSnapshotReplacementGarbageCollectionConfig {
pub period_secs: Duration,
}

#[serde_as]
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub struct RegionSnapshotReplacementStepConfig {
/// period (in seconds) for periodic activations of this background task
#[serde_as(as = "DurationSeconds<u64>")]
pub period_secs: Duration,
}

/// Configuration for a nexus server
#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
pub struct PackageConfig {
Expand Down Expand Up @@ -897,6 +907,7 @@ mod test {
lookup_region_port.period_secs = 60
region_snapshot_replacement_start.period_secs = 30
region_snapshot_replacement_garbage_collection.period_secs = 30
region_snapshot_replacement_step.period_secs = 30
[default_region_allocation_strategy]
type = "random"
seed = 0
Expand Down Expand Up @@ -1067,6 +1078,10 @@ mod test {
RegionSnapshotReplacementGarbageCollectionConfig {
period_secs: Duration::from_secs(30),
},
region_snapshot_replacement_step:
RegionSnapshotReplacementStepConfig {
period_secs: Duration::from_secs(30),
},
},
default_region_allocation_strategy:
crate::nexus_config::RegionAllocationStrategy::Random {
Expand Down Expand Up @@ -1145,6 +1160,7 @@ mod test {
lookup_region_port.period_secs = 60
region_snapshot_replacement_start.period_secs = 30
region_snapshot_replacement_garbage_collection.period_secs = 30
region_snapshot_replacement_step.period_secs = 30
[default_region_allocation_strategy]
type = "random"
"##,
Expand Down
1 change: 1 addition & 0 deletions nexus/examples/config-second.toml
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ saga_recovery.period_secs = 600
lookup_region_port.period_secs = 60
region_snapshot_replacement_start.period_secs = 30
region_snapshot_replacement_garbage_collection.period_secs = 30
region_snapshot_replacement_step.period_secs = 30

[default_region_allocation_strategy]
# allocate region on 3 random distinct zpools, on 3 random distinct sleds.
Expand Down
1 change: 1 addition & 0 deletions nexus/examples/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ saga_recovery.period_secs = 600
lookup_region_port.period_secs = 60
region_snapshot_replacement_start.period_secs = 30
region_snapshot_replacement_garbage_collection.period_secs = 30
region_snapshot_replacement_step.period_secs = 30
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could region_snapshot_replacement_start, region_snapshot_replacement_garbage_collection,
and region_snapshot_replacement_step all start at the same time?
Like, do we need to consider thundering heard type problems here and stagger these from each other
a littlie? Or will the trigger to start each one come in at different times and keeping them the same
will ensure that they don't collide?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I know, they usually do start at the same time! :) I think we're ok - each task only operates on requests that are in a certain state, and so are bounded in that sense from it being a thundering herd.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm thinking about a sled being expunged and all 10 crucible zones and all the things
that could be on those 10 zones, etc. Making sure we can handle an outage like that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right now we only limit the number of operations that can happen at one time to a Volume, and don't currently limit otherwise.I'm not too concerned

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(hit comment too early!) I'm not too concerned, but it will be quite a bit of traffic to have all those reconciliations, live repairs, and clones going on... on second thought maybe I am concerned haha.

I'm not sure what to do other than to test it out and see though. We might be able to come up with an upper bound on the number of regions and snapshots maybe? Even if we do we can't know what the non-expunge load on the rack will be at any given time.

What do you think? Do you think some sort of limit is a good idea?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's hard to say if we should set a limit without knowing what the load a repair might generate.

But, that being said, it is probably better to put in a limit to make things slower but still
able to finish and then loosen that limit as we discover how much the rack can handle.
The alternative would be to just let repair take as many resources as it wants and, then
let things break if it's too much? That second choice does not sound like something
I would want to explain to a customer :)

We don't have anything yet we can use as an overall guage of rack "busyness". In additon,
we don't know the load an "average repair" generates even. Maybe our first steps would
be to build the tools to determine what the current repair load is so we can measure
it's impact?


[default_region_allocation_strategy]
# allocate region on 3 random distinct zpools, on 3 random distinct sleds.
Expand Down
21 changes: 20 additions & 1 deletion nexus/src/app/background/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ use super::tasks::region_replacement;
use super::tasks::region_replacement_driver;
use super::tasks::region_snapshot_replacement_garbage_collect::*;
use super::tasks::region_snapshot_replacement_start::*;
use super::tasks::region_snapshot_replacement_step::*;
use super::tasks::saga_recovery;
use super::tasks::service_firewall_rules;
use super::tasks::sync_service_zone_nat::ServiceZoneNatTracker;
Expand Down Expand Up @@ -165,6 +166,7 @@ pub struct BackgroundTasks {
pub task_lookup_region_port: Activator,
pub task_region_snapshot_replacement_start: Activator,
pub task_region_snapshot_replacement_garbage_collection: Activator,
pub task_region_snapshot_replacement_step: Activator,

// Handles to activate background tasks that do not get used by Nexus
// at-large. These background tasks are implementation details as far as
Expand Down Expand Up @@ -249,6 +251,7 @@ impl BackgroundTasksInitializer {
task_region_snapshot_replacement_start: Activator::new(),
task_region_snapshot_replacement_garbage_collection: Activator::new(
),
task_region_snapshot_replacement_step: Activator::new(),

task_internal_dns_propagation: Activator::new(),
task_external_dns_propagation: Activator::new(),
Expand Down Expand Up @@ -312,6 +315,7 @@ impl BackgroundTasksInitializer {
task_lookup_region_port,
task_region_snapshot_replacement_start,
task_region_snapshot_replacement_garbage_collection,
task_region_snapshot_replacement_step,
// Add new background tasks here. Be sure to use this binding in a
// call to `Driver::register()` below. That's what actually wires
// up the Activator to the corresponding background task.
Expand Down Expand Up @@ -760,14 +764,29 @@ impl BackgroundTasksInitializer {
.region_snapshot_replacement_garbage_collection
.period_secs,
task_impl: Box::new(RegionSnapshotReplacementGarbageCollect::new(
datastore,
datastore.clone(),
sagas.clone(),
)),
opctx: opctx.child(BTreeMap::new()),
watchers: vec![],
activator: task_region_snapshot_replacement_garbage_collection,
});

driver.register(TaskDefinition {
name: "region_snapshot_replacement_step",
description:
"detect what volumes were affected by a region snapshot \
replacement, and run the step saga for them",
period: config.region_snapshot_replacement_step.period_secs,
task_impl: Box::new(RegionSnapshotReplacementFindAffected::new(
datastore,
sagas.clone(),
)),
opctx: opctx.child(BTreeMap::new()),
watchers: vec![],
activator: task_region_snapshot_replacement_step,
});

driver
}
}
Expand Down
1 change: 1 addition & 0 deletions nexus/src/app/background/tasks/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ pub mod region_replacement;
pub mod region_replacement_driver;
pub mod region_snapshot_replacement_garbage_collect;
pub mod region_snapshot_replacement_start;
pub mod region_snapshot_replacement_step;
pub mod saga_recovery;
pub mod service_firewall_rules;
pub mod sync_service_zone_nat;
Expand Down
Loading
Loading