Skip to content

Commit

Permalink
[nexus] Expunge zones on expunged disks, only provision crucible to i…
Browse files Browse the repository at this point in the history
…n-service disks
  • Loading branch information
smklein committed Apr 23, 2024
1 parent 138a116 commit 5229eff
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 8 deletions.
25 changes: 25 additions & 0 deletions nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,19 @@ impl<'a> BlueprintBuilder<'a> {
&mut self,
sled_id: SledUuid,
reason: ZoneExpungeReason,
) -> Result<BTreeSet<OmicronZoneUuid>, Error> {
self.expunge_all_zones_for_sled_where(sled_id, reason, |_| true)
}

/// Expunges all zones from a sled where `filter_fn` returns `true`.
///
/// Returns a list of zone IDs expunged (excluding zones that were already
/// expunged). If the list is empty, then the operation was a no-op.
pub(crate) fn expunge_all_zones_for_sled_where(
&mut self,
sled_id: SledUuid,
reason: ZoneExpungeReason,
filter_fn: impl Fn(&BlueprintZoneConfig) -> bool,
) -> Result<BTreeSet<OmicronZoneUuid>, Error> {
let log = self.log.new(o!(
"sled_id" => sled_id.to_string(),
Expand All @@ -414,6 +427,11 @@ impl<'a> BlueprintBuilder<'a> {

let sled_zones = self.zones.current_sled_zones(sled_id);
for (z, state) in sled_zones {
// Only consider zones for which the filter returns true.
if !filter_fn(z) {
continue;
}

let is_expunged =
is_already_expunged(z, state).map_err(|error| {
Error::Planner(anyhow!(error).context(format!(
Expand All @@ -435,6 +453,12 @@ impl<'a> BlueprintBuilder<'a> {
}

match reason {
ZoneExpungeReason::DiskExpunged => {
info!(
&log,
"expunged disk with non-expunged zone(s) was found"
);
}
ZoneExpungeReason::SledDecommissioned { policy } => {
// A sled marked as decommissioned should have no resources
// allocated to it. If it does, it's an illegal state, possibly
Expand Down Expand Up @@ -468,6 +492,7 @@ impl<'a> BlueprintBuilder<'a> {

// Finally, add a comment describing what happened.
let reason = match reason {
ZoneExpungeReason::DiskExpunged => "zone was using expunged disk",
ZoneExpungeReason::SledDecommissioned { .. } => {
"sled state is decommissioned"
}
Expand Down
38 changes: 30 additions & 8 deletions nexus/reconfigurator/planning/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,18 +85,39 @@ impl<'a> Planner<'a> {

fn do_plan_expunge(&mut self) -> Result<(), Error> {
// Remove services from sleds marked expunged. We use `SledFilter::All`
// and have a custom `needs_zone_expungement` function that allows us
// and have a custom `needs_all_zones_expungement` function that allows us
// to produce better errors.
for (sled_id, sled_details) in self.input.all_sleds(SledFilter::All) {
// Does this sled need zone expungement based on the details?
let Some(reason) =
needs_zone_expungement(sled_details.state, sled_details.policy)
else {
if let Some(reason) = needs_all_zones_expungement(
sled_details.state,
sled_details.policy,
) {
// Perform the expungement.
self.blueprint.expunge_all_zones_for_sled(sled_id, reason)?;

// In this case, all zones have been expunged. We can jump to
// the next sled.
continue;
};
}

// Perform the expungement.
self.blueprint.expunge_all_zones_for_sled(sled_id, reason)?;
// The sled isn't being decommissioned, but there still might be
// some zones that need expungement.
//
// Remove any zones which rely on expunged disks.
self.blueprint.expunge_all_zones_for_sled_where(
sled_id,
ZoneExpungeReason::DiskExpunged,
|zone_config| {
let durable_storage_zpool =
match zone_config.zone_type.zpool() {
Some(pool) => pool,
None => return false,
};
let zpool_id = durable_storage_zpool.id();
!sled_details.resources.zpool_is_provisionable(&zpool_id)
},
)?;
}

Ok(())
Expand Down Expand Up @@ -360,7 +381,7 @@ impl<'a> Planner<'a> {

/// Returns `Some(reason)` if the sled needs its zones to be expunged,
/// based on the policy and state.
fn needs_zone_expungement(
fn needs_all_zones_expungement(
state: SledState,
policy: SledPolicy,
) -> Option<ZoneExpungeReason> {
Expand All @@ -387,6 +408,7 @@ fn needs_zone_expungement(
/// logical flow.
#[derive(Copy, Clone, Debug)]
pub(crate) enum ZoneExpungeReason {
DiskExpunged,
SledDecommissioned { policy: SledPolicy },
SledExpunged,
}
Expand Down
30 changes: 30 additions & 0 deletions nexus/types/src/deployment/zone_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,36 @@ pub enum BlueprintZoneType {
}

impl BlueprintZoneType {
/// Returns the zpool being used by this zone, if any.
pub fn zpool(&self) -> Option<&omicron_common::zpool_name::ZpoolName> {
match self {
BlueprintZoneType::ExternalDns(
blueprint_zone_type::ExternalDns { dataset, .. },
)
| BlueprintZoneType::Clickhouse(
blueprint_zone_type::Clickhouse { dataset, .. },
)
| BlueprintZoneType::ClickhouseKeeper(
blueprint_zone_type::ClickhouseKeeper { dataset, .. },
)
| BlueprintZoneType::CockroachDb(
blueprint_zone_type::CockroachDb { dataset, .. },
)
| BlueprintZoneType::Crucible(blueprint_zone_type::Crucible {
dataset,
..
})
| BlueprintZoneType::InternalDns(
blueprint_zone_type::InternalDns { dataset, .. },
) => Some(&dataset.pool_name),
BlueprintZoneType::BoundaryNtp(_)
| BlueprintZoneType::InternalNtp(_)
| BlueprintZoneType::Nexus(_)
| BlueprintZoneType::Oximeter(_)
| BlueprintZoneType::CruciblePantry(_) => None,
}
}

pub fn external_ip(&self) -> Option<IpAddr> {
match self {
BlueprintZoneType::Nexus(nexus) => Some(nexus.external_ip),
Expand Down

0 comments on commit 5229eff

Please sign in to comment.