Skip to content

Commit

Permalink
[#3886 3/4] Start saga for region replacement (#5839)
Browse files Browse the repository at this point in the history
Start filling out the region replacement background task skeleton by
adding a check for regions that are stored on disks that were expunged,
and inserting region replacement requests for them. In the same
background task, check if there are any requests in the "Requested"
state and running the new "region replacement start" saga for them. This
background task will also pick up manually requested region
replacements.

Also in this commit is the region replacement start saga, which will
take a replacement request that is in state "Requested", and perform the
steps to:

- allocate a new region

- swap the region being replaced with that newly allocated region in the
affected volume

- create a fake volume that can later be deleted, referencing the region
that was swapped out

- update the region replacement request's state to "Running" and
clearing the operating saga id

This represents the first step to be taken after a region goes away:
allocate the replacement, and swap it in to the affected volume. Once
this is done, any new checkout and construction of the affected volume
will no longer reference the expunged region but will cause
reconciliation to take place. It is still degraded in the sense that the
newly allocated region is blank and the other two are not, and an
Upstairs needs to perform that reconciliation or repair. Existing
constructed Volumes running in a propolis or pantry context will remain
unmodified: the next commmit will be a saga that takes care of
initiating live repair or reconciliation for those existing running
Volumes in order to drive either live repair or reconciliation forward.
  • Loading branch information
jmpesp authored Jun 6, 2024
1 parent 4fd9dd2 commit 6c324c3
Show file tree
Hide file tree
Showing 7 changed files with 1,707 additions and 48 deletions.
133 changes: 99 additions & 34 deletions nexus/db-queries/src/db/datastore/volume.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1672,6 +1672,50 @@ impl DataStore {
}
}

/// Check if a region is present in a Volume Construction Request
fn region_in_vcr(
vcr: &VolumeConstructionRequest,
region: &SocketAddrV6,
) -> anyhow::Result<bool> {
let mut parts: VecDeque<&VolumeConstructionRequest> = VecDeque::new();
parts.push_back(vcr);

let mut region_found = false;

while let Some(vcr_part) = parts.pop_front() {
match vcr_part {
VolumeConstructionRequest::Volume { sub_volumes, .. } => {
for sub_volume in sub_volumes {
parts.push_back(sub_volume);
}

// Skip looking at read-only parent, this function only looks
// for R/W regions
}

VolumeConstructionRequest::Url { .. } => {
// nothing required
}

VolumeConstructionRequest::Region { opts, .. } => {
for target in &opts.target {
let parsed_target: SocketAddrV6 = target.parse()?;
if parsed_target == *region {
region_found = true;
break;
}
}
}

VolumeConstructionRequest::File { .. } => {
// nothing required
}
}
}

Ok(region_found)
}

pub struct VolumeReplacementParams {
pub volume_id: Uuid,
pub region_id: Uuid,
Expand Down Expand Up @@ -1796,6 +1840,61 @@ impl DataStore {
.transaction(&conn, |conn| {
let err = err.clone();
async move {
// Grab the old volume first
let maybe_old_volume = {
volume_dsl::volume
.filter(volume_dsl::id.eq(existing.volume_id))
.select(Volume::as_select())
.first_async::<Volume>(&conn)
.await
.optional()
.map_err(|e| {
err.bail_retryable_or_else(e, |e| {
VolumeReplaceRegionError::Public(
public_error_from_diesel(
e,
ErrorHandler::Server,
)
)
})
})?
};

let old_volume = if let Some(old_volume) = maybe_old_volume {
old_volume
} else {
// Existing volume was deleted, so return an error. We
// can't perform the region replacement now!
return Err(err.bail(VolumeReplaceRegionError::TargetVolumeDeleted));
};

let old_vcr: VolumeConstructionRequest =
match serde_json::from_str(&old_volume.data()) {
Ok(vcr) => vcr,
Err(e) => {
return Err(err.bail(VolumeReplaceRegionError::SerdeError(e)));
},
};

// Does it look like this replacement already happened?
let old_region_in_vcr = match region_in_vcr(&old_vcr, &existing.region_addr) {
Ok(v) => v,
Err(e) => {
return Err(err.bail(VolumeReplaceRegionError::RegionReplacementError(e)));
},
};
let new_region_in_vcr = match region_in_vcr(&old_vcr, &replacement.region_addr) {
Ok(v) => v,
Err(e) => {
return Err(err.bail(VolumeReplaceRegionError::RegionReplacementError(e)));
},
};

if !old_region_in_vcr && new_region_in_vcr {
// It does seem like the replacement happened
return Ok(());
}

use db::schema::region::dsl as region_dsl;
use db::schema::volume::dsl as volume_dsl;

Expand Down Expand Up @@ -1838,40 +1937,6 @@ impl DataStore {
// Update the existing volume's construction request to
// replace the existing region's SocketAddrV6 with the
// replacement region's
let maybe_old_volume = {
volume_dsl::volume
.filter(volume_dsl::id.eq(existing.volume_id))
.select(Volume::as_select())
.first_async::<Volume>(&conn)
.await
.optional()
.map_err(|e| {
err.bail_retryable_or_else(e, |e| {
VolumeReplaceRegionError::Public(
public_error_from_diesel(
e,
ErrorHandler::Server,
)
)
})
})?
};

let old_volume = if let Some(old_volume) = maybe_old_volume {
old_volume
} else {
// existing volume was deleted, so return an error, we
// can't perform the region replacement now!
return Err(err.bail(VolumeReplaceRegionError::TargetVolumeDeleted));
};

let old_vcr: VolumeConstructionRequest =
match serde_json::from_str(&old_volume.data()) {
Ok(vcr) => vcr,
Err(e) => {
return Err(err.bail(VolumeReplaceRegionError::SerdeError(e)));
},
};

// Copy the old volume's VCR, changing out the old region
// for the new.
Expand Down
Loading

0 comments on commit 6c324c3

Please sign in to comment.