Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[nexus] Remove zones on expunged disks #5599

Merged
merged 19 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 83 additions & 50 deletions nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
//! Low-level facility for generating Blueprints

use crate::ip_allocator::IpAllocator;
use crate::planner::zone_needs_expungement;
use crate::planner::ZoneExpungeReason;
use anyhow::anyhow;
use internal_dns::config::Host;
Expand All @@ -25,6 +26,7 @@ use nexus_types::deployment::DiskFilter;
use nexus_types::deployment::OmicronZoneDataset;
use nexus_types::deployment::OmicronZoneExternalFloatingIp;
use nexus_types::deployment::PlanningInput;
use nexus_types::deployment::SledDetails;
use nexus_types::deployment::SledFilter;
use nexus_types::deployment::SledResources;
use nexus_types::deployment::ZpoolName;
Expand Down Expand Up @@ -351,33 +353,72 @@ impl<'a> BlueprintBuilder<'a> {
self.comments.push(String::from(comment));
}

/// Expunges all zones from a sled.
/// Expunges all zones requiring expungement from a sled.
///
/// Returns a list of zone IDs expunged (excluding zones that were already
/// expunged). If the list is empty, then the operation was a no-op.
pub(crate) fn expunge_all_zones_for_sled(
pub(crate) fn expunge_zones_for_sled(
&mut self,
sled_id: SledUuid,
reason: ZoneExpungeReason,
) -> Result<BTreeSet<OmicronZoneUuid>, Error> {
sled_details: &SledDetails,
) -> Result<BTreeMap<OmicronZoneUuid, ZoneExpungeReason>, Error> {
let log = self.log.new(o!(
"sled_id" => sled_id.to_string(),
));

// Do any zones need to be marked expunged?
let mut zones_to_expunge = BTreeSet::new();
let mut zones_to_expunge = BTreeMap::new();

let sled_zones = self.zones.current_sled_zones(sled_id);
for (z, state) in sled_zones {
for (zone_config, state) in sled_zones {
let zone_id = zone_config.id;
let log = log.new(o!(
"zone_id" => zone_id.to_string()
));

let Some(reason) =
zone_needs_expungement(sled_details, zone_config)
else {
continue;
};

let is_expunged =
is_already_expunged(z, state).map_err(|error| {
is_already_expunged(zone_config, state).map_err(|error| {
Error::Planner(anyhow!(error).context(format!(
"for sled {sled_id}, error computing zones to expunge"
)))
})?;

if !is_expunged {
zones_to_expunge.insert(z.id);
match reason {
ZoneExpungeReason::DiskExpunged => {
info!(
&log,
smklein marked this conversation as resolved.
Show resolved Hide resolved
"expunged disk with non-expunged zone was found"
);
}
ZoneExpungeReason::SledDecommissioned => {
// A sled marked as decommissioned should have no resources
// allocated to it. If it does, it's an illegal state, possibly
// introduced by a bug elsewhere in the system -- we need to
// produce a loud warning (i.e. an ERROR-level log message) on
// this, while still removing the zones.
error!(
&log,
"sled has state Decommissioned, yet has zone \
allocated to it; will expunge it"
);
}
ZoneExpungeReason::SledExpunged => {
// This is the expected situation.
info!(
&log,
"expunged sled with non-expunged zone found"
);
}
}

zones_to_expunge.insert(zone_id, reason);
}
}

Expand All @@ -389,51 +430,43 @@ impl<'a> BlueprintBuilder<'a> {
return Ok(zones_to_expunge);
}

match reason {
ZoneExpungeReason::SledDecommissioned { policy } => {
// A sled marked as decommissioned should have no resources
// allocated to it. If it does, it's an illegal state, possibly
// introduced by a bug elsewhere in the system -- we need to
// produce a loud warning (i.e. an ERROR-level log message) on
// this, while still removing the zones.
error!(
&log,
"sled has state Decommissioned, yet has zones \
allocated to it; will expunge them \
(sled policy is \"{policy:?}\")"
);
}
ZoneExpungeReason::SledExpunged => {
// This is the expected situation.
info!(
&log,
"expunged sled with {} non-expunged zones found \
(will expunge all zones)",
zones_to_expunge.len()
);
}
}

// Now expunge all the zones that need it.
let change = self.zones.change_sled_zones(sled_id);
change.expunge_zones(zones_to_expunge.clone()).map_err(|error| {
anyhow!(error)
.context(format!("for sled {sled_id}, error expunging zones"))
})?;

// Finally, add a comment describing what happened.
let reason = match reason {
ZoneExpungeReason::SledDecommissioned { .. } => {
"sled state is decommissioned"
change
.expunge_zones(zones_to_expunge.keys().cloned().collect())
.map_err(|error| {
anyhow!(error).context(format!(
"for sled {sled_id}, error expunging zones"
))
})?;

// Finally, add comments describing what happened.
//
// Group the zones by their reason for expungement.
let mut count_disk_expunged = 0;
let mut count_sled_decommissioned = 0;
let mut count_sled_expunged = 0;
for reason in zones_to_expunge.values() {
match reason {
ZoneExpungeReason::DiskExpunged => count_disk_expunged += 1,
ZoneExpungeReason::SledDecommissioned => {
count_sled_decommissioned += 1;
}
ZoneExpungeReason::SledExpunged => count_sled_expunged += 1,
};
}
let count_and_reason = [
(count_disk_expunged, "zone was using expunged disk"),
(count_sled_decommissioned, "sled state is decommissioned"),
(count_sled_expunged, "sled policy is expunged"),
];
for (count, reason) in count_and_reason {
if count > 0 {
self.comment(format!(
"sled {sled_id} ({reason}): {count} zones expunged",
));
Comment on lines +446 to +467
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah this is exactly the comment handling I was talking about in #5493 -- thanks for doing this!

I think I might consider doing this at a higher level. Instead of storing a list of comments in the BlueprintBuilder, store a log of operations. Then, while converting a BlueprintBuilder into a Blueprint, turn the log of operations into comments (after possibly doing some merging of operations). That would separate out the data layer from the presentation layer -- and you can test against this.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm interested in this proposal, but it seems like it might extend a fair bit beyond the scope of this PR. Right now, the BlueprintBuilder is modifying structures in-place, rather than having a clear delineation of the "before" + "applied operations" .

If I was to implement what you're suggesting, presumably we'd alter the change_sled_zones and change_sled_disks - rather than mutating in place, we'd need some way to represent operations as "additions, updates, or removals" that are stored separately, and applied when we build the blueprint?

(That's my interpretation of your comment at least, am I reading that right?)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I was to implement what you're suggesting, presumably we'd alter the change_sled_zones and change_sled_disks - rather than mutating in place, we'd need some way to represent operations as "additions, updates, or removals" that are stored separately, and applied when we build the blueprint?

Ah so we decided not to do this in #5493. Instead, what we can do is to both apply the changes, and store a structured log of the changes.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've attempted to resolve this in #5834, or at least start this, with an operation log. It is currently being stringified when the blueprint is built, but it could remain a stronger type eventually, if that would be useful.

}
ZoneExpungeReason::SledExpunged => "sled policy is expunged",
};

self.comment(format!(
"sled {} ({reason}): {} zones expunged",
sled_id,
zones_to_expunge.len(),
));
}

Ok(zones_to_expunge)
}
Expand Down
122 changes: 105 additions & 17 deletions nexus/reconfigurator/planning/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@ use crate::blueprint_builder::EnsureMultiple;
use crate::blueprint_builder::Error;
use crate::planner::omicron_zone_placement::PlacementError;
use nexus_types::deployment::Blueprint;
use nexus_types::deployment::BlueprintZoneConfig;
use nexus_types::deployment::BlueprintZoneDisposition;
use nexus_types::deployment::CockroachDbClusterVersion;
use nexus_types::deployment::CockroachDbPreserveDowngrade;
use nexus_types::deployment::CockroachDbSettings;
use nexus_types::deployment::PlanningInput;
use nexus_types::deployment::SledDetails;
use nexus_types::deployment::SledFilter;
use nexus_types::deployment::ZpoolFilter;
use nexus_types::external_api::views::SledPolicy;
Expand Down Expand Up @@ -170,15 +172,8 @@ impl<'a> Planner<'a> {
{
commissioned_sled_ids.insert(sled_id);

// Does this sled need zone expungement based on the details?
let Some(reason) =
needs_zone_expungement(sled_details.state, sled_details.policy)
else {
continue;
};

// Perform the expungement.
self.blueprint.expunge_all_zones_for_sled(sled_id, reason)?;
// Perform the expungement, for any zones that might need it.
self.blueprint.expunge_zones_for_sled(sled_id, sled_details)?;
}

// Check for any decommissioned sleds (i.e., sleds for which our
Expand Down Expand Up @@ -558,7 +553,7 @@ impl<'a> Planner<'a> {

/// Returns `Some(reason)` if the sled needs its zones to be expunged,
/// based on the policy and state.
fn needs_zone_expungement(
fn sled_needs_all_zones_expunged(
state: SledState,
policy: SledPolicy,
) -> Option<ZoneExpungeReason> {
Expand All @@ -569,7 +564,7 @@ fn needs_zone_expungement(
// an illegal state, but representable. If we see a sled in this
// state, we should still expunge all zones in it, but parent code
// should warn on it.
return Some(ZoneExpungeReason::SledDecommissioned { policy });
return Some(ZoneExpungeReason::SledDecommissioned);
}
}

Expand All @@ -579,13 +574,36 @@ fn needs_zone_expungement(
}
}

pub(crate) fn zone_needs_expungement(
sled_details: &SledDetails,
zone_config: &BlueprintZoneConfig,
) -> Option<ZoneExpungeReason> {
// Should we expunge the zone because the sled is gone?
if let Some(reason) =
sled_needs_all_zones_expunged(sled_details.state, sled_details.policy)
{
return Some(reason);
}

// Should we expunge the zone because durable storage is gone?
if let Some(durable_storage_zpool) = zone_config.zone_type.zpool() {
let zpool_id = durable_storage_zpool.id();
if !sled_details.resources.zpool_is_provisionable(&zpool_id) {
return Some(ZoneExpungeReason::DiskExpunged);
}
};

None
}

/// The reason a sled's zones need to be expunged.
///
/// This is used only for introspection and logging -- it's not part of the
/// logical flow.
#[derive(Copy, Clone, Debug)]
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
pub(crate) enum ZoneExpungeReason {
SledDecommissioned { policy: SledPolicy },
DiskExpunged,
SledDecommissioned,
SledExpunged,
}

Expand All @@ -611,6 +629,9 @@ mod test {
use nexus_types::deployment::CockroachDbPreserveDowngrade;
use nexus_types::deployment::CockroachDbSettings;
use nexus_types::deployment::OmicronZoneNetworkResources;
use nexus_types::deployment::SledDisk;
use nexus_types::external_api::views::PhysicalDiskPolicy;
use nexus_types::external_api::views::PhysicalDiskState;
use nexus_types::external_api::views::SledPolicy;
use nexus_types::external_api::views::SledProvisionPolicy;
use nexus_types::external_api::views::SledState;
Expand Down Expand Up @@ -1032,15 +1053,15 @@ mod test {
// Make generated disk ids deterministic
let mut disk_rng =
TypedUuidRng::from_seed(TEST_NAME, "NewPhysicalDisks");
let mut new_sled_disk = |policy| nexus_types::deployment::SledDisk {
let mut new_sled_disk = |policy| SledDisk {
disk_identity: DiskIdentity {
vendor: "test-vendor".to_string(),
serial: "test-serial".to_string(),
model: "test-model".to_string(),
},
disk_id: PhysicalDiskUuid::from(disk_rng.next()),
policy,
state: nexus_types::external_api::views::PhysicalDiskState::Active,
state: PhysicalDiskState::Active,
};

let (_, sled_details) = builder.sleds_mut().iter_mut().next().unwrap();
Expand All @@ -1057,13 +1078,13 @@ mod test {
for _ in 0..NEW_IN_SERVICE_DISKS {
sled_details.resources.zpools.insert(
ZpoolUuid::from(zpool_rng.next()),
new_sled_disk(nexus_types::external_api::views::PhysicalDiskPolicy::InService),
new_sled_disk(PhysicalDiskPolicy::InService),
);
}
for _ in 0..NEW_EXPUNGED_DISKS {
sled_details.resources.zpools.insert(
ZpoolUuid::from(zpool_rng.next()),
new_sled_disk(nexus_types::external_api::views::PhysicalDiskPolicy::Expunged),
new_sled_disk(PhysicalDiskPolicy::Expunged),
);
}

Expand Down Expand Up @@ -1096,6 +1117,73 @@ mod test {
logctx.cleanup_successful();
}

#[test]
fn test_disk_expungement_removes_zones() {
static TEST_NAME: &str = "planner_disk_expungement_removes_zones";
let logctx = test_setup_log(TEST_NAME);

// Create an example system with a single sled
let (collection, input, blueprint1) =
example(&logctx.log, TEST_NAME, 1);

let mut builder = input.into_builder();

// Aside: Avoid churning on the quantity of Nexus zones - we're okay
// staying at one.
builder.policy_mut().target_nexus_zone_count = 1;

// The example system should be assigning crucible zones to each
// in-service disk. When we expunge one of these disks, the planner
// should remove the associated zone.
let (_, sled_details) = builder.sleds_mut().iter_mut().next().unwrap();
let (_, disk) =
sled_details.resources.zpools.iter_mut().next().unwrap();
disk.policy = PhysicalDiskPolicy::Expunged;

let input = builder.build();

let blueprint2 = Planner::new_based_on(
logctx.log.clone(),
&blueprint1,
&input,
"test: expunge a disk",
&collection,
)
.expect("failed to create planner")
.with_rng_seed((TEST_NAME, "bp2"))
.plan()
.expect("failed to plan");

let diff = blueprint2.diff_since_blueprint(&blueprint1);
println!("1 -> 2 (expunge a disk):\n{}", diff.display());
assert_eq!(diff.sleds_added.len(), 0);
assert_eq!(diff.sleds_removed.len(), 0);
assert_eq!(diff.sleds_modified.len(), 1);

// We should be removing a single zone, associated with the Crucible
// using that device.
assert_eq!(diff.zones.added.len(), 0);
assert_eq!(diff.zones.removed.len(), 0);
assert_eq!(diff.zones.modified.len(), 1);

let (_zone_id, modified_zones) =
diff.zones.modified.iter().next().unwrap();
assert_eq!(modified_zones.zones.len(), 1);
let modified_zone = &modified_zones.zones.first().unwrap().zone;
assert!(
matches!(modified_zone.kind(), ZoneKind::Crucible),
"Expected the modified zone to be a Crucible zone, but it was: {:?}",
modified_zone.kind()
);
assert_eq!(
modified_zone.disposition(),
BlueprintZoneDisposition::Expunged,
"Should have expunged this zone"
);

logctx.cleanup_successful();
}

/// Check that the planner will skip non-provisionable sleds when allocating
/// extra Nexus zones
#[test]
Expand Down
Loading
Loading