Skip to content

Commit

Permalink
[nexus] Remove zones on expunged disks (#5599)
Browse files Browse the repository at this point in the history
Expunges zones for which the corresponding physical disk, holding
durable data, has been removed.

A follow-up PR will be necessary to expunge zones which have had their
transient zone filesystems removed,
but Nexus is not yet aware of where zone placement decisions are made.

Partial fix of #5372
  • Loading branch information
smklein authored May 30, 2024
1 parent b07382f commit d2af4e4
Show file tree
Hide file tree
Showing 3 changed files with 218 additions and 67 deletions.
133 changes: 83 additions & 50 deletions nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
//! Low-level facility for generating Blueprints
use crate::ip_allocator::IpAllocator;
use crate::planner::zone_needs_expungement;
use crate::planner::ZoneExpungeReason;
use anyhow::anyhow;
use internal_dns::config::Host;
Expand All @@ -25,6 +26,7 @@ use nexus_types::deployment::DiskFilter;
use nexus_types::deployment::OmicronZoneDataset;
use nexus_types::deployment::OmicronZoneExternalFloatingIp;
use nexus_types::deployment::PlanningInput;
use nexus_types::deployment::SledDetails;
use nexus_types::deployment::SledFilter;
use nexus_types::deployment::SledResources;
use nexus_types::deployment::ZpoolName;
Expand Down Expand Up @@ -351,33 +353,72 @@ impl<'a> BlueprintBuilder<'a> {
self.comments.push(String::from(comment));
}

/// Expunges all zones from a sled.
/// Expunges all zones requiring expungement from a sled.
///
/// Returns a list of zone IDs expunged (excluding zones that were already
/// expunged). If the list is empty, then the operation was a no-op.
pub(crate) fn expunge_all_zones_for_sled(
pub(crate) fn expunge_zones_for_sled(
&mut self,
sled_id: SledUuid,
reason: ZoneExpungeReason,
) -> Result<BTreeSet<OmicronZoneUuid>, Error> {
sled_details: &SledDetails,
) -> Result<BTreeMap<OmicronZoneUuid, ZoneExpungeReason>, Error> {
let log = self.log.new(o!(
"sled_id" => sled_id.to_string(),
));

// Do any zones need to be marked expunged?
let mut zones_to_expunge = BTreeSet::new();
let mut zones_to_expunge = BTreeMap::new();

let sled_zones = self.zones.current_sled_zones(sled_id);
for (z, state) in sled_zones {
for (zone_config, state) in sled_zones {
let zone_id = zone_config.id;
let log = log.new(o!(
"zone_id" => zone_id.to_string()
));

let Some(reason) =
zone_needs_expungement(sled_details, zone_config)
else {
continue;
};

let is_expunged =
is_already_expunged(z, state).map_err(|error| {
is_already_expunged(zone_config, state).map_err(|error| {
Error::Planner(anyhow!(error).context(format!(
"for sled {sled_id}, error computing zones to expunge"
)))
})?;

if !is_expunged {
zones_to_expunge.insert(z.id);
match reason {
ZoneExpungeReason::DiskExpunged => {
info!(
&log,
"expunged disk with non-expunged zone was found"
);
}
ZoneExpungeReason::SledDecommissioned => {
// A sled marked as decommissioned should have no resources
// allocated to it. If it does, it's an illegal state, possibly
// introduced by a bug elsewhere in the system -- we need to
// produce a loud warning (i.e. an ERROR-level log message) on
// this, while still removing the zones.
error!(
&log,
"sled has state Decommissioned, yet has zone \
allocated to it; will expunge it"
);
}
ZoneExpungeReason::SledExpunged => {
// This is the expected situation.
info!(
&log,
"expunged sled with non-expunged zone found"
);
}
}

zones_to_expunge.insert(zone_id, reason);
}
}

Expand All @@ -389,51 +430,43 @@ impl<'a> BlueprintBuilder<'a> {
return Ok(zones_to_expunge);
}

match reason {
ZoneExpungeReason::SledDecommissioned { policy } => {
// A sled marked as decommissioned should have no resources
// allocated to it. If it does, it's an illegal state, possibly
// introduced by a bug elsewhere in the system -- we need to
// produce a loud warning (i.e. an ERROR-level log message) on
// this, while still removing the zones.
error!(
&log,
"sled has state Decommissioned, yet has zones \
allocated to it; will expunge them \
(sled policy is \"{policy:?}\")"
);
}
ZoneExpungeReason::SledExpunged => {
// This is the expected situation.
info!(
&log,
"expunged sled with {} non-expunged zones found \
(will expunge all zones)",
zones_to_expunge.len()
);
}
}

// Now expunge all the zones that need it.
let change = self.zones.change_sled_zones(sled_id);
change.expunge_zones(zones_to_expunge.clone()).map_err(|error| {
anyhow!(error)
.context(format!("for sled {sled_id}, error expunging zones"))
})?;

// Finally, add a comment describing what happened.
let reason = match reason {
ZoneExpungeReason::SledDecommissioned { .. } => {
"sled state is decommissioned"
change
.expunge_zones(zones_to_expunge.keys().cloned().collect())
.map_err(|error| {
anyhow!(error).context(format!(
"for sled {sled_id}, error expunging zones"
))
})?;

// Finally, add comments describing what happened.
//
// Group the zones by their reason for expungement.
let mut count_disk_expunged = 0;
let mut count_sled_decommissioned = 0;
let mut count_sled_expunged = 0;
for reason in zones_to_expunge.values() {
match reason {
ZoneExpungeReason::DiskExpunged => count_disk_expunged += 1,
ZoneExpungeReason::SledDecommissioned => {
count_sled_decommissioned += 1;
}
ZoneExpungeReason::SledExpunged => count_sled_expunged += 1,
};
}
let count_and_reason = [
(count_disk_expunged, "zone was using expunged disk"),
(count_sled_decommissioned, "sled state is decommissioned"),
(count_sled_expunged, "sled policy is expunged"),
];
for (count, reason) in count_and_reason {
if count > 0 {
self.comment(format!(
"sled {sled_id} ({reason}): {count} zones expunged",
));
}
ZoneExpungeReason::SledExpunged => "sled policy is expunged",
};

self.comment(format!(
"sled {} ({reason}): {} zones expunged",
sled_id,
zones_to_expunge.len(),
));
}

Ok(zones_to_expunge)
}
Expand Down
122 changes: 105 additions & 17 deletions nexus/reconfigurator/planning/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@ use crate::blueprint_builder::EnsureMultiple;
use crate::blueprint_builder::Error;
use crate::planner::omicron_zone_placement::PlacementError;
use nexus_types::deployment::Blueprint;
use nexus_types::deployment::BlueprintZoneConfig;
use nexus_types::deployment::BlueprintZoneDisposition;
use nexus_types::deployment::CockroachDbClusterVersion;
use nexus_types::deployment::CockroachDbPreserveDowngrade;
use nexus_types::deployment::CockroachDbSettings;
use nexus_types::deployment::PlanningInput;
use nexus_types::deployment::SledDetails;
use nexus_types::deployment::SledFilter;
use nexus_types::deployment::ZpoolFilter;
use nexus_types::external_api::views::SledPolicy;
Expand Down Expand Up @@ -170,15 +172,8 @@ impl<'a> Planner<'a> {
{
commissioned_sled_ids.insert(sled_id);

// Does this sled need zone expungement based on the details?
let Some(reason) =
needs_zone_expungement(sled_details.state, sled_details.policy)
else {
continue;
};

// Perform the expungement.
self.blueprint.expunge_all_zones_for_sled(sled_id, reason)?;
// Perform the expungement, for any zones that might need it.
self.blueprint.expunge_zones_for_sled(sled_id, sled_details)?;
}

// Check for any decommissioned sleds (i.e., sleds for which our
Expand Down Expand Up @@ -558,7 +553,7 @@ impl<'a> Planner<'a> {

/// Returns `Some(reason)` if the sled needs its zones to be expunged,
/// based on the policy and state.
fn needs_zone_expungement(
fn sled_needs_all_zones_expunged(
state: SledState,
policy: SledPolicy,
) -> Option<ZoneExpungeReason> {
Expand All @@ -569,7 +564,7 @@ fn needs_zone_expungement(
// an illegal state, but representable. If we see a sled in this
// state, we should still expunge all zones in it, but parent code
// should warn on it.
return Some(ZoneExpungeReason::SledDecommissioned { policy });
return Some(ZoneExpungeReason::SledDecommissioned);
}
}

Expand All @@ -579,13 +574,36 @@ fn needs_zone_expungement(
}
}

pub(crate) fn zone_needs_expungement(
sled_details: &SledDetails,
zone_config: &BlueprintZoneConfig,
) -> Option<ZoneExpungeReason> {
// Should we expunge the zone because the sled is gone?
if let Some(reason) =
sled_needs_all_zones_expunged(sled_details.state, sled_details.policy)
{
return Some(reason);
}

// Should we expunge the zone because durable storage is gone?
if let Some(durable_storage_zpool) = zone_config.zone_type.zpool() {
let zpool_id = durable_storage_zpool.id();
if !sled_details.resources.zpool_is_provisionable(&zpool_id) {
return Some(ZoneExpungeReason::DiskExpunged);
}
};

None
}

/// The reason a sled's zones need to be expunged.
///
/// This is used only for introspection and logging -- it's not part of the
/// logical flow.
#[derive(Copy, Clone, Debug)]
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
pub(crate) enum ZoneExpungeReason {
SledDecommissioned { policy: SledPolicy },
DiskExpunged,
SledDecommissioned,
SledExpunged,
}

Expand All @@ -611,6 +629,9 @@ mod test {
use nexus_types::deployment::CockroachDbPreserveDowngrade;
use nexus_types::deployment::CockroachDbSettings;
use nexus_types::deployment::OmicronZoneNetworkResources;
use nexus_types::deployment::SledDisk;
use nexus_types::external_api::views::PhysicalDiskPolicy;
use nexus_types::external_api::views::PhysicalDiskState;
use nexus_types::external_api::views::SledPolicy;
use nexus_types::external_api::views::SledProvisionPolicy;
use nexus_types::external_api::views::SledState;
Expand Down Expand Up @@ -1032,15 +1053,15 @@ mod test {
// Make generated disk ids deterministic
let mut disk_rng =
TypedUuidRng::from_seed(TEST_NAME, "NewPhysicalDisks");
let mut new_sled_disk = |policy| nexus_types::deployment::SledDisk {
let mut new_sled_disk = |policy| SledDisk {
disk_identity: DiskIdentity {
vendor: "test-vendor".to_string(),
serial: "test-serial".to_string(),
model: "test-model".to_string(),
},
disk_id: PhysicalDiskUuid::from(disk_rng.next()),
policy,
state: nexus_types::external_api::views::PhysicalDiskState::Active,
state: PhysicalDiskState::Active,
};

let (_, sled_details) = builder.sleds_mut().iter_mut().next().unwrap();
Expand All @@ -1057,13 +1078,13 @@ mod test {
for _ in 0..NEW_IN_SERVICE_DISKS {
sled_details.resources.zpools.insert(
ZpoolUuid::from(zpool_rng.next()),
new_sled_disk(nexus_types::external_api::views::PhysicalDiskPolicy::InService),
new_sled_disk(PhysicalDiskPolicy::InService),
);
}
for _ in 0..NEW_EXPUNGED_DISKS {
sled_details.resources.zpools.insert(
ZpoolUuid::from(zpool_rng.next()),
new_sled_disk(nexus_types::external_api::views::PhysicalDiskPolicy::Expunged),
new_sled_disk(PhysicalDiskPolicy::Expunged),
);
}

Expand Down Expand Up @@ -1096,6 +1117,73 @@ mod test {
logctx.cleanup_successful();
}

#[test]
fn test_disk_expungement_removes_zones() {
static TEST_NAME: &str = "planner_disk_expungement_removes_zones";
let logctx = test_setup_log(TEST_NAME);

// Create an example system with a single sled
let (collection, input, blueprint1) =
example(&logctx.log, TEST_NAME, 1);

let mut builder = input.into_builder();

// Aside: Avoid churning on the quantity of Nexus zones - we're okay
// staying at one.
builder.policy_mut().target_nexus_zone_count = 1;

// The example system should be assigning crucible zones to each
// in-service disk. When we expunge one of these disks, the planner
// should remove the associated zone.
let (_, sled_details) = builder.sleds_mut().iter_mut().next().unwrap();
let (_, disk) =
sled_details.resources.zpools.iter_mut().next().unwrap();
disk.policy = PhysicalDiskPolicy::Expunged;

let input = builder.build();

let blueprint2 = Planner::new_based_on(
logctx.log.clone(),
&blueprint1,
&input,
"test: expunge a disk",
&collection,
)
.expect("failed to create planner")
.with_rng_seed((TEST_NAME, "bp2"))
.plan()
.expect("failed to plan");

let diff = blueprint2.diff_since_blueprint(&blueprint1);
println!("1 -> 2 (expunge a disk):\n{}", diff.display());
assert_eq!(diff.sleds_added.len(), 0);
assert_eq!(diff.sleds_removed.len(), 0);
assert_eq!(diff.sleds_modified.len(), 1);

// We should be removing a single zone, associated with the Crucible
// using that device.
assert_eq!(diff.zones.added.len(), 0);
assert_eq!(diff.zones.removed.len(), 0);
assert_eq!(diff.zones.modified.len(), 1);

let (_zone_id, modified_zones) =
diff.zones.modified.iter().next().unwrap();
assert_eq!(modified_zones.zones.len(), 1);
let modified_zone = &modified_zones.zones.first().unwrap().zone;
assert!(
matches!(modified_zone.kind(), ZoneKind::Crucible),
"Expected the modified zone to be a Crucible zone, but it was: {:?}",
modified_zone.kind()
);
assert_eq!(
modified_zone.disposition(),
BlueprintZoneDisposition::Expunged,
"Should have expunged this zone"
);

logctx.cleanup_successful();
}

/// Check that the planner will skip non-provisionable sleds when allocating
/// extra Nexus zones
#[test]
Expand Down
Loading

0 comments on commit d2af4e4

Please sign in to comment.