Skip to content

Commit

Permalink
Mark as ReplacementDone only when unlocked (#6196)
Browse files Browse the repository at this point in the history
The transition of a region replacement request to ReplacementDone
(attempted when a finish notification is seen) should only occur if the
record is unlocked. If it is locked, then a saga is executing for that
record, and an arbitrary state transition should (surprisingly) not be
allowed.
  • Loading branch information
jmpesp authored Aug 6, 2024
1 parent 9e1344e commit 8f5232a
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 12 deletions.
4 changes: 4 additions & 0 deletions nexus/db-model/src/region_replacement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ impl std::str::FromStr for RegionReplacementState {
/// modification was committed to the database and will not change or be
/// unwound.
///
/// It's also possible to transition from Running to ReplacementDone if a
/// "finish" notification is seen by the region replacement drive background
/// task. This check is done before invoking the region replacement drive saga.
///
/// See also: RegionReplacementStep records
#[derive(
Queryable,
Expand Down
125 changes: 113 additions & 12 deletions nexus/db-queries/src/db/datastore/region_replacement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -732,9 +732,18 @@ impl DataStore {
}

/// Nexus has been notified by an Upstairs (or has otherwised determined)
/// that a region replacement is done, so update the record. This may arrive
/// in the middle of a drive saga invocation, so do not filter on state or
/// operating saga id!
/// that a region replacement is done, so update the record. Filter on the
/// following:
///
/// - operating saga id being None, as this happens outside of a saga and
/// should only transition the record if there isn't currently a lock.
///
/// - the record being in the state "Running": this function is called when
/// a "finish" notification is seen, and that only happens after a region
/// replacement drive saga has invoked either a reconcilation or live
/// repair, and that has finished. The region replacement drive background
/// task will scan for these notifications and call this function if one
/// is seen.
pub async fn mark_region_replacement_as_done(
&self,
opctx: &OpContext,
Expand All @@ -743,11 +752,12 @@ impl DataStore {
use db::schema::region_replacement::dsl;
let updated = diesel::update(dsl::region_replacement)
.filter(dsl::id.eq(region_replacement_id))
.set((
.filter(dsl::operating_saga_id.is_null())
.filter(dsl::replacement_state.eq(RegionReplacementState::Running))
.set(
dsl::replacement_state
.eq(RegionReplacementState::ReplacementDone),
dsl::operating_saga_id.eq(Option::<Uuid>::None),
))
)
.check_if_exists::<RegionReplacement>(region_replacement_id)
.execute_and_check(&*self.pool_connection_authorized(opctx).await?)
.await;
Expand Down Expand Up @@ -858,7 +868,7 @@ mod test {
async fn test_replacement_done_in_middle_of_drive_saga() {
// If Nexus receives a notification that a repair has finished in the
// middle of a drive saga, then make sure the replacement request state
// ends up as `ReplacementDone`.
// eventually ends up as `ReplacementDone`.

let logctx = dev::test_setup_log(
"test_replacement_done_in_middle_of_drive_saga",
Expand All @@ -880,7 +890,7 @@ mod test {
.await
.unwrap();

// Transition to Driving
// The drive saga will transition the record to Driving, locking it.

let saga_id = Uuid::new_v4();

Expand All @@ -890,7 +900,39 @@ mod test {
.unwrap();

// Now, Nexus receives a notification that the repair has finished
// successfully
// successfully. A background task trying to mark as replacement done
// should fail as the record was locked by the saga.

datastore
.mark_region_replacement_as_done(&opctx, request.id)
.await
.unwrap_err();

// Ensure that the state is still Driving, and the operating saga id is
// set.

let actual_request = datastore
.get_region_replacement_request_by_id(&opctx, request.id)
.await
.unwrap();

assert_eq!(
actual_request.replacement_state,
RegionReplacementState::Driving
);
assert_eq!(actual_request.operating_saga_id, Some(saga_id));

// The Drive saga will finish, but doesn't transition to replacement
// done because it didn't detect that one of the repair operations had
// finished ok.

datastore
.undo_set_region_replacement_driving(&opctx, request.id, saga_id)
.await
.unwrap();

// Now the region replacement drive background task wakes up again, and
// this time marks the record as replacement done successfully.

datastore
.mark_region_replacement_as_done(&opctx, request.id)
Expand All @@ -911,14 +953,73 @@ mod test {
);
assert_eq!(actual_request.operating_saga_id, None);

// The Drive saga will unwind when it tries to set the state back to
// Running.
db.cleanup().await.unwrap();
logctx.cleanup_successful();
}

#[tokio::test]
async fn test_replacement_done_in_middle_of_finish_saga() {
// If multiple Nexus are racing, don't let one mark a record as
// "ReplacementDone" if it's in the middle of the finish saga.

let logctx = dev::test_setup_log(
"test_replacement_done_in_middle_of_finish_saga",
);
let mut db = test_setup_database(&logctx.log).await;
let (opctx, datastore) = datastore_test(&logctx, &db).await;

let region_id = Uuid::new_v4();
let volume_id = Uuid::new_v4();

let request = {
let mut request = RegionReplacement::new(region_id, volume_id);
request.replacement_state = RegionReplacementState::ReplacementDone;
request
};

datastore
.undo_set_region_replacement_driving(&opctx, request.id, saga_id)
.insert_region_replacement_request(&opctx, request.clone())
.await
.unwrap();

// The finish saga will transition to Completing, setting operating saga
// id accordingly.

let saga_id = Uuid::new_v4();

datastore
.set_region_replacement_completing(&opctx, request.id, saga_id)
.await
.unwrap();

// Double check that another saga can't do this, because the first saga
// took the lock.

datastore
.set_region_replacement_completing(
&opctx,
request.id,
Uuid::new_v4(),
)
.await
.unwrap_err();

// mark_region_replacement_as_done is called due to a finish
// notification scan by the region replacement drive background task.
// This should fail as the saga took the lock on this record.

datastore
.mark_region_replacement_as_done(&opctx, request.id)
.await
.unwrap_err();

// The first saga has finished and sets the record to Complete.

datastore
.set_region_replacement_complete(&opctx, request.id, saga_id)
.await
.unwrap();

db.cleanup().await.unwrap();
logctx.cleanup_successful();
}
Expand Down

0 comments on commit 8f5232a

Please sign in to comment.