Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for deleted volumes during region replacement #6659

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions dev-tools/omdb/src/bin/omdb/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1129,6 +1129,14 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) {
println!(" > {line}");
}

println!(
" region replacement requests set to completed ok: {}",
status.requests_completed_ok.len()
);
for line in &status.requests_completed_ok {
println!(" > {line}");
}

println!(" errors: {}", status.errors.len());
for line in &status.errors {
println!(" > {line}");
Expand Down
116 changes: 59 additions & 57 deletions dev-tools/omdb/tests/successes.out

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions nexus/db-model/src/region_replacement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,13 @@ impl std::str::FromStr for RegionReplacementState {
/// "finish" notification is seen by the region replacement drive background
/// task. This check is done before invoking the region replacement drive saga.
///
/// If the volume whose region is being replaced is soft-deleted or
/// hard-deleted, then the replacement request will be transitioned along the
/// states to Complete while avoiding operations that are meant to operate on
/// that volume. If the volume is soft-deleted or hard-deleted while the
/// replacement request is in the "Requested" state, the replacement request
/// will transition straight to Complete, and no operations will be performed.
///
/// See also: RegionReplacementStep records
#[derive(
Queryable,
Expand Down
71 changes: 70 additions & 1 deletion nexus/db-queries/src/db/datastore/region_replacement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -657,7 +657,7 @@ impl DataStore {

/// Transition a RegionReplacement record from Completing to Complete,
/// clearing the operating saga id. Also removes the `volume_repair` record
/// that is taking a "lock" on the Volume.
/// that is taking a lock on the Volume.
pub async fn set_region_replacement_complete(
&self,
opctx: &OpContext,
Expand Down Expand Up @@ -723,6 +723,75 @@ impl DataStore {
})
}

/// Transition a RegionReplacement record from Requested to Complete, which
/// occurs when the associated volume is soft or hard deleted. Also removes
/// the `volume_repair` record that is taking a lock on the Volume.
pub async fn set_region_replacement_complete_from_requested(
&self,
opctx: &OpContext,
request: RegionReplacement,
) -> Result<(), Error> {
type TxnError = TransactionError<Error>;

assert_eq!(
request.replacement_state,
RegionReplacementState::Requested,
);

self.pool_connection_authorized(opctx)
.await?
.transaction_async(|conn| async move {
Self::volume_repair_delete_query(
request.volume_id,
request.id,
)
.execute_async(&conn)
.await?;

use db::schema::region_replacement::dsl;

let result = diesel::update(dsl::region_replacement)
.filter(dsl::id.eq(request.id))
.filter(
dsl::replacement_state.eq(RegionReplacementState::Requested),
)
.filter(dsl::operating_saga_id.is_null())
.set((
dsl::replacement_state.eq(RegionReplacementState::Complete),
))
.check_if_exists::<RegionReplacement>(request.id)
.execute_and_check(&conn)
.await?;

match result.status {
UpdateStatus::Updated => Ok(()),

UpdateStatus::NotUpdatedButExists => {
let record = result.found;

if record.replacement_state == RegionReplacementState::Complete {
Ok(())
} else {
Err(TxnError::CustomError(Error::conflict(format!(
"region replacement {} set to {:?} (operating saga id {:?})",
request.id,
record.replacement_state,
record.operating_saga_id,
))))
}
}
}
})
.await
.map_err(|e| match e {
TxnError::CustomError(error) => error,

TxnError::Database(error) => {
public_error_from_diesel(error, ErrorHandler::Server)
}
})
}

/// Nexus has been notified by an Upstairs (or has otherwised determined)
/// that a region replacement is done, so update the record. Filter on the
/// following:
Expand Down
32 changes: 27 additions & 5 deletions nexus/db-queries/src/db/datastore/volume.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1572,6 +1572,14 @@ impl DataStore {
.optional()
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
}

/// Return true if a volume was soft-deleted or hard-deleted
pub async fn volume_deleted(&self, volume_id: Uuid) -> Result<bool, Error> {
leftwo marked this conversation as resolved.
Show resolved Hide resolved
match self.volume_get(volume_id).await? {
Some(v) => Ok(v.time_deleted.is_some()),
None => Ok(true),
}
}
}

#[derive(Default, Clone, Debug, Serialize, Deserialize)]
Expand Down Expand Up @@ -2049,13 +2057,20 @@ impl DataStore {
let old_volume = if let Some(old_volume) = maybe_old_volume {
old_volume
} else {
// Existing volume was deleted, so return here. We can't
// perform the region replacement now, and this will
// short-circuit the rest of the process.
// Existing volume was hard-deleted, so return here. We
// can't perform the region replacement now, and this
// will short-circuit the rest of the process.

return Ok(VolumeReplaceResult::ExistingVolumeDeleted);
};

if old_volume.time_deleted.is_some() {
// Existing volume was soft-deleted, so return here for
// the same reason: the region replacement process
// should be short-circuited now.
return Ok(VolumeReplaceResult::ExistingVolumeDeleted);
}

let old_vcr: VolumeConstructionRequest =
match serde_json::from_str(&old_volume.data()) {
Ok(vcr) => vcr,
Expand Down Expand Up @@ -2260,13 +2275,20 @@ impl DataStore {
let old_volume = if let Some(old_volume) = maybe_old_volume {
old_volume
} else {
// Existing volume was deleted, so return here. We can't
// perform the region snapshot replacement now, and this
// Existing volume was hard-deleted, so return here. We
// can't perform the region replacement now, and this
// will short-circuit the rest of the process.

return Ok(VolumeReplaceResult::ExistingVolumeDeleted);
};

if old_volume.time_deleted.is_some() {
// Existing volume was soft-deleted, so return here for
// the same reason: the region replacement process
// should be short-circuited now.
return Ok(VolumeReplaceResult::ExistingVolumeDeleted);
}

let old_vcr: VolumeConstructionRequest =
match serde_json::from_str(&old_volume.data()) {
Ok(vcr) => vcr,
Expand Down
2 changes: 1 addition & 1 deletion nexus/examples/config-second.toml
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ blueprints.period_secs_collect_crdb_node_ids = 180
sync_service_zone_nat.period_secs = 30
switch_port_settings_manager.period_secs = 30
region_replacement.period_secs = 30
region_replacement_driver.period_secs = 10
region_replacement_driver.period_secs = 30
# How frequently to query the status of active instances.
instance_watcher.period_secs = 30
# How frequently to schedule new instance update sagas.
Expand Down
2 changes: 1 addition & 1 deletion nexus/examples/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ blueprints.period_secs_collect_crdb_node_ids = 180
sync_service_zone_nat.period_secs = 30
switch_port_settings_manager.period_secs = 30
region_replacement.period_secs = 30
region_replacement_driver.period_secs = 10
region_replacement_driver.period_secs = 30
# How frequently to query the status of active instances.
instance_watcher.period_secs = 30
# How frequently to schedule new instance update sagas.
Expand Down
111 changes: 110 additions & 1 deletion nexus/src/app/background/tasks/region_replacement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,79 @@ impl BackgroundTask for RegionReplacementDetector {
};

for request in requests {
// If the replacement request is in the `requested` state and
// the request's volume was soft-deleted or hard-deleted, avoid
// sending the start request and instead transition the request
// to completed

let volume_deleted = match self
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because we have mut self here, does that mean that this volume_deleted state
will be guaranteed not to change while this method is running?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately no - this queries the database, and the result could change immediately after the query.

If it does change to deleted in the middle of the saga, then that could be a problem, but both volume_replace_region and volume_replace_snapshot check for if the volume was hard-deleted during the transaction................. and a38b751 updates this to also check if it is soft-deleted too!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just want to be sure that we can handle a delete if it can happen, really anywhere during this saga. If we can handle that, either by failing the replacement or handling it at the end, then I'm good :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, the saga will unwind. There's more work to do in a related follow up commit though, I found a case where the start saga will do some extra unnecessary work allocating regions if there's a hard delete of the volume in the middle of its execution.

.datastore
.volume_deleted(request.volume_id)
.await
{
Ok(volume_deleted) => volume_deleted,

Err(e) => {
let s = format!(
"error checking if volume id {} was deleted: {e}",
request.volume_id,
);
error!(&log, "{s}");

status.errors.push(s);
continue;
}
};

let request_id = request.id;

if volume_deleted {
// Volume was soft or hard deleted, so proceed with clean
// up, which if this is in state Requested there won't be
leftwo marked this conversation as resolved.
Show resolved Hide resolved
// any additional associated state, so transition the record
// to Completed.

info!(
&log,
"request {} volume {} was soft or hard deleted!",
request_id,
request.volume_id,
);

let result = self
.datastore
.set_region_replacement_complete_from_requested(
opctx, request,
)
.await;

match result {
Ok(()) => {
let s = format!(
"request {} transitioned from requested to \
complete",
request_id,
);

info!(&log, "{s}");
status.requests_completed_ok.push(s);
}

Err(e) => {
let s = format!(
"error transitioning {} from requested to \
complete: {e}",
request_id,
);

error!(&log, "{s}");
status.errors.push(s);
}
}

continue;
}

let result = self
.send_start_request(
authn::saga::Serialized::for_opctx(opctx),
Expand Down Expand Up @@ -213,7 +284,10 @@ mod test {
use super::*;
use crate::app::background::init::test::NoopStartSaga;
use nexus_db_model::RegionReplacement;
use nexus_db_model::Volume;
use nexus_test_utils_macros::nexus_test;
use sled_agent_client::types::CrucibleOpts;
use sled_agent_client::types::VolumeConstructionRequest;
use uuid::Uuid;

type ControlPlaneTestContext =
Expand All @@ -239,14 +313,49 @@ mod test {
assert_eq!(result, json!(RegionReplacementStatus::default()));

// Add a region replacement request for a fake region
let request = RegionReplacement::new(Uuid::new_v4(), Uuid::new_v4());
let volume_id = Uuid::new_v4();
let request = RegionReplacement::new(Uuid::new_v4(), volume_id);
let request_id = request.id;

datastore
.insert_region_replacement_request(&opctx, request)
.await
.unwrap();

let volume_construction_request = VolumeConstructionRequest::Volume {
id: volume_id,
block_size: 0,
sub_volumes: vec![VolumeConstructionRequest::Region {
block_size: 0,
blocks_per_extent: 0,
extent_count: 0,
gen: 0,
opts: CrucibleOpts {
id: volume_id,
target: vec![
// if you put something here, you'll need a synthetic
// dataset record
],
lossy: false,
flush_timeout: None,
key: None,
cert_pem: None,
key_pem: None,
root_cert_pem: None,
control: None,
read_only: false,
},
}],
read_only_parent: None,
};

let volume_data =
serde_json::to_string(&volume_construction_request).unwrap();

let volume = Volume::new(volume_id, volume_data);

datastore.volume_create(volume).await.unwrap();

// Activate the task - it should pick that up and try to run the region
// replacement start saga
let result: RegionReplacementStatus =
Expand Down
26 changes: 26 additions & 0 deletions nexus/src/app/sagas/region_replacement_drive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,32 @@ async fn srrd_drive_region_replacement_check(
&params.serialized_authn,
);

// It doesn't make sense to perform any of this saga if the volume was soft
// or hard deleted: for example, this happens if the higher level resource
// like the disk was deleted. Volume deletion potentially results in the
// clean-up of Crucible resources, so it wouldn't even be valid to attempt
// to drive forward any type of live repair or reconciliation.
//
// Setting Done here will cause this saga to transition the replacement
// request to ReplacementDone.

let volume_deleted = osagactx
.datastore()
.volume_deleted(params.request.volume_id)
.await
.map_err(ActionError::action_failed)?;

if volume_deleted {
info!(
log,
"volume was soft or hard deleted!";
"region replacement id" => %params.request.id,
"volume id" => %params.request.volume_id,
);

return Ok(DriveCheck::Done);
}

let last_request_step = osagactx
.datastore()
.current_region_replacement_request_step(&opctx, params.request.id)
Expand Down
4 changes: 2 additions & 2 deletions nexus/src/app/sagas/region_replacement_finish.rs
Original file line number Diff line number Diff line change
Expand Up @@ -275,8 +275,8 @@ pub(crate) mod test {
opts: CrucibleOpts {
id: old_region_volume_id,
target: vec![
// XXX if you put something here, you'll need a
// synthetic dataset record
// if you put something here, you'll need a synthetic
// dataset record
],
lossy: false,
flush_timeout: None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -250,8 +250,8 @@ pub(crate) mod test {
opts: CrucibleOpts {
id: old_snapshot_volume_id,
target: vec![
// XXX if you put something here, you'll need a
// synthetic dataset record
// if you put something here, you'll need a synthetic
// dataset record
],
lossy: false,
flush_timeout: None,
Expand Down
Loading
Loading