Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Detect and un-delete phantom disks #4547

Merged
merged 16 commits into from
Dec 4, 2023
Merged
24 changes: 20 additions & 4 deletions common/src/nexus_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,8 @@ pub struct BackgroundTaskConfig {
pub nat_cleanup: NatCleanupConfig,
/// configuration for inventory tasks
pub inventory: InventoryConfig,
/// configuration for phantom disks task
pub phantom_disks: PhantomDiskConfig,
}

#[serde_as]
Expand Down Expand Up @@ -386,7 +388,7 @@ pub struct NatCleanupConfig {
pub struct InventoryConfig {
/// period (in seconds) for periodic activations of this background task
///
/// Each activation fetches information about all harware and software in
/// Each activation fetches information about all hardware and software in
/// the system and inserts it into the database. This generates a moderate
/// amount of data.
#[serde_as(as = "DurationSeconds<u64>")]
Expand All @@ -405,6 +407,14 @@ pub struct InventoryConfig {
pub disable: bool,
}

#[serde_as]
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub struct PhantomDiskConfig {
/// period (in seconds) for periodic activations of this background task
#[serde_as(as = "DurationSeconds<u64>")]
pub period_secs: Duration,
}

/// Configuration for a nexus server
#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
pub struct PackageConfig {
Expand Down Expand Up @@ -508,8 +518,9 @@ mod test {
BackgroundTaskConfig, Config, ConfigDropshotWithTls, ConsoleConfig,
Database, DeploymentConfig, DnsTasksConfig, DpdConfig,
ExternalEndpointsConfig, InternalDns, InventoryConfig, LoadError,
LoadErrorKind, MgdConfig, NatCleanupConfig, PackageConfig, SchemeName,
TimeseriesDbConfig, Tunables, UpdatesConfig,
LoadErrorKind, MgdConfig, NatCleanupConfig, PackageConfig,
PhantomDiskConfig, SchemeName, TimeseriesDbConfig, Tunables,
UpdatesConfig,
};
use crate::address::{Ipv6Subnet, RACK_PREFIX};
use crate::api::internal::shared::SwitchLocation;
Expand Down Expand Up @@ -663,6 +674,7 @@ mod test {
inventory.period_secs = 10
inventory.nkeep = 11
inventory.disable = false
phantom_disks.period_secs = 30
[default_region_allocation_strategy]
type = "random"
seed = 0
Expand Down Expand Up @@ -764,7 +776,10 @@ mod test {
period_secs: Duration::from_secs(10),
nkeep: 11,
disable: false,
}
},
phantom_disks: PhantomDiskConfig {
period_secs: Duration::from_secs(30),
},
},
default_region_allocation_strategy:
crate::nexus_config::RegionAllocationStrategy::Random {
Expand Down Expand Up @@ -822,6 +837,7 @@ mod test {
inventory.period_secs = 10
inventory.nkeep = 3
inventory.disable = false
phantom_disks.period_secs = 30
[default_region_allocation_strategy]
type = "random"
"##,
Expand Down
16 changes: 16 additions & 0 deletions dev-tools/omdb/src/bin/omdb/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,22 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) {
);
}
};
} else if name == "phantom_disks" {
#[derive(Deserialize)]
struct TaskSuccess {
/// how many phantom disks were found
ok: usize,
bnaecker marked this conversation as resolved.
Show resolved Hide resolved
}

match serde_json::from_value::<TaskSuccess>(details.clone()) {
Err(error) => eprintln!(
"warning: failed to interpret task details: {:?}: {:?}",
error, details
),
Ok(success) => {
println!(" phantom disks found: {}", success.ok);
bnaecker marked this conversation as resolved.
Show resolved Hide resolved
}
};
} else {
println!(
"warning: unknown background task: {:?} \
Expand Down
12 changes: 12 additions & 0 deletions dev-tools/omdb/tests/env.out
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ task: "nat_v4_garbage_collector"
predetermined retention policy


task: "phantom_disks"
detects and un-deletes phantom disks


---------------------------------------------
stderr:
note: using Nexus URL http://127.0.0.1:REDACTED_PORT
Expand Down Expand Up @@ -131,6 +135,10 @@ task: "nat_v4_garbage_collector"
predetermined retention policy


task: "phantom_disks"
detects and un-deletes phantom disks


---------------------------------------------
stderr:
note: Nexus URL not specified. Will pick one from DNS.
Expand Down Expand Up @@ -183,6 +191,10 @@ task: "nat_v4_garbage_collector"
predetermined retention policy


task: "phantom_disks"
detects and un-deletes phantom disks


---------------------------------------------
stderr:
note: Nexus URL not specified. Will pick one from DNS.
Expand Down
11 changes: 11 additions & 0 deletions dev-tools/omdb/tests/successes.out
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,10 @@ task: "nat_v4_garbage_collector"
predetermined retention policy


task: "phantom_disks"
detects and un-deletes phantom disks


---------------------------------------------
stderr:
note: using Nexus URL http://127.0.0.1:REDACTED_PORT/
Expand Down Expand Up @@ -357,6 +361,13 @@ task: "inventory_collection"
last collection started: <REDACTED_TIMESTAMP>
last collection done: <REDACTED_TIMESTAMP>

task: "phantom_disks"
configured period: every 30s
currently executing: no
last completed activation: iter 2, triggered by an explicit signal
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
phantom disks found: 0

---------------------------------------------
stderr:
note: using Nexus URL http://127.0.0.1:REDACTED_PORT/
Expand Down
6 changes: 5 additions & 1 deletion nexus/db-model/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1299,7 +1299,7 @@ table! {
///
/// This should be updated whenever the schema is changed. For more details,
/// refer to: schema/crdb/README.adoc
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(14, 0, 0);
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(15, 0, 0);
bnaecker marked this conversation as resolved.
Show resolved Hide resolved

allow_tables_to_appear_in_same_query!(
system_update,
Expand Down Expand Up @@ -1368,3 +1368,7 @@ allow_tables_to_appear_in_same_query!(
switch_port,
switch_port_settings_bgp_peer_config
);

allow_tables_to_appear_in_same_query!(disk, virtual_provisioning_resource);

allow_tables_to_appear_in_same_query!(volume, virtual_provisioning_resource);
142 changes: 142 additions & 0 deletions nexus/db-queries/src/db/datastore/disk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,14 @@ use crate::db::model::DiskUpdate;
use crate::db::model::Instance;
use crate::db::model::Name;
use crate::db::model::Project;
use crate::db::model::VirtualProvisioningResource;
use crate::db::model::Volume;
use crate::db::pagination::paginated;
use crate::db::queries::disk::DiskSetClauseForAttach;
use crate::db::update_and_check::UpdateAndCheck;
use crate::db::update_and_check::UpdateStatus;
use async_bb8_diesel::AsyncRunQueryDsl;
use chrono::DateTime;
use chrono::Utc;
use diesel::prelude::*;
use omicron_common::api;
Expand Down Expand Up @@ -652,4 +655,143 @@ impl DataStore {
}
}
}

/// Set a disk to faulted and un-delete it
///
/// If the disk delete saga unwinds, then the disk should _not_ remain
/// deleted: disk delete saga should be triggered again in order to fully
/// complete, and the only way to do that is to un-delete the disk. Set it
/// to faulted to ensure that it won't be used.
pub async fn project_undelete_disk_set_faulted_no_auth(
&self,
disk_id: &Uuid,
) -> Result<(), Error> {
use db::schema::disk::dsl;
let conn = self.pool_connection_unauthorized().await?;

let faulted = api::external::DiskState::Faulted.label();

let result = diesel::update(dsl::disk)
.filter(dsl::time_deleted.is_not_null())
.filter(dsl::id.eq(*disk_id))
jmpesp marked this conversation as resolved.
Show resolved Hide resolved
.set((
dsl::time_deleted.eq(None::<DateTime<Utc>>),
dsl::disk_state.eq(faulted),
))
.check_if_exists::<Disk>(*disk_id)
.execute_and_check(&conn)
.await
.map_err(|e| {
public_error_from_diesel(
e,
ErrorHandler::NotFoundByLookup(
ResourceType::Disk,
LookupType::ById(*disk_id),
),
)
})?;

match result.status {
UpdateStatus::Updated => Ok(()),
UpdateStatus::NotUpdatedButExists => {
let disk = result.found;
let disk_state = disk.state();

if disk.time_deleted().is_none()
&& disk_state.state() == &api::external::DiskState::Faulted
{
// To maintain idempotency, if the disk has already been
// faulted, don't throw an error.
return Ok(());
} else {
// NOTE: This is a "catch-all" error case, more specific
// errors should be preferred as they're more actionable.
return Err(Error::InternalError {
internal_message: String::from(
"disk exists, but cannot be faulted",
),
});
}
}
}
}

/// Find disks that have been deleted but still have a
/// `virtual_provisioning_resource` record: this indicates that a disk
/// delete saga partially succeeded, then unwound, which (before the fixes
/// in customer-support#58) would mean the disk was deleted but the project
/// it was in could not be deleted (due to an erroneous number of bytes
/// "still provisioned").
pub async fn find_phantom_disks(&self) -> ListResultVec<Disk> {
use db::schema::disk::dsl;
use db::schema::virtual_provisioning_resource::dsl as resource_dsl;
use db::schema::volume::dsl as volume_dsl;

let conn = self.pool_connection_unauthorized().await?;

let potential_phantom_disks: Vec<(
Disk,
Option<VirtualProvisioningResource>,
Option<Volume>,
)> = dsl::disk
.filter(dsl::time_deleted.is_not_null())
.left_join(
resource_dsl::virtual_provisioning_resource
.on(resource_dsl::id.eq(dsl::id)),
)
.left_join(volume_dsl::volume.on(dsl::volume_id.eq(volume_dsl::id)))
.select((
Disk::as_select(),
Option::<VirtualProvisioningResource>::as_select(),
Option::<Volume>::as_select(),
))
.load_async(&*conn)
.await
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?;

// The first forward steps of the disk delete saga (plus the volume
// delete sub saga) are as follows:
//
// 1. soft-delete the disk
// 2. call virtual_provisioning_collection_delete_disk
// 3. soft-delete the disk's volume
//
// Before the fixes as part of customer-support#58, steps 1 and 3 did
// not have undo steps, where step 2 did. In order to detect when the
// disk delete saga unwound, find entries where
//
// 1. the disk and volume are soft-deleted
// 2. the `virtual_provisioning_resource` exists
//
// It's important not to conflict with any currently running disk delete
// saga.

Ok(potential_phantom_disks
.into_iter()
.filter(|(disk, resource, volume)| {
if let Some(volume) = volume {
bnaecker marked this conversation as resolved.
Show resolved Hide resolved
// In this branch, the volume record was soft-deleted. The
bnaecker marked this conversation as resolved.
Show resolved Hide resolved
// saga unwound before the volume record could be hard
// deleted. This won't conflict with a running disk delete
// saga, because the resource record should be None if the
// disk and volume were already soft deleted (if there is
// one, the saga will be at or past step 3).
disk.time_deleted().is_some()
&& volume.time_deleted.is_some()
&& resource.is_some()
} else {
// In this branch, the volume record was hard-deleted. The
// saga could still have unwound after hard deleting the
// volume record, so proceed with filtering. This won't
// conflict with a running disk delete saga because the
// resource record should be None if the disk was soft
// deleted and the volume was hard deleted (if there is one,
// the saga should be almost finished as the volume hard
// delete is the last thing it does).
disk.time_deleted().is_some() && resource.is_some()
}
})
.map(|(disk, _, _)| disk)
.collect())
}
}
1 change: 1 addition & 0 deletions nexus/examples/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ inventory.period_secs = 600
inventory.nkeep = 5
# Disable inventory collection altogether (for emergencies)
inventory.disable = false
phantom_disks.period_secs = 30

[default_region_allocation_strategy]
# allocate region on 3 random distinct zpools, on 3 random distinct sleds.
Expand Down
23 changes: 22 additions & 1 deletion nexus/src/app/background/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use super::dns_servers;
use super::external_endpoints;
use super::inventory_collection;
use super::nat_cleanup;
use super::phantom_disks;
use nexus_db_model::DnsGroup;
use nexus_db_queries::context::OpContext;
use nexus_db_queries::db::DataStore;
Expand Down Expand Up @@ -52,6 +53,9 @@ pub struct BackgroundTasks {

/// task handle for the task that collects inventory
pub task_inventory_collection: common::TaskHandle,

/// task handle for the task that detects phantom disks
pub task_phantom_disks: common::TaskHandle,
}

impl BackgroundTasks {
Expand Down Expand Up @@ -122,7 +126,7 @@ impl BackgroundTasks {
// Background task: inventory collector
let task_inventory_collection = {
let collector = inventory_collection::InventoryCollector::new(
datastore,
datastore.clone(),
resolver,
&nexus_id.to_string(),
config.inventory.nkeep,
Expand All @@ -143,6 +147,22 @@ impl BackgroundTasks {
task
};

// Background task: phantom disk detection
let task_phantom_disks = {
let detector = phantom_disks::PhantomDiskDetector::new(datastore);

let task = driver.register(
String::from("phantom_disks"),
String::from("detects and un-deletes phantom disks"),
config.phantom_disks.period_secs,
Box::new(detector),
opctx.child(BTreeMap::new()),
vec![],
);

task
};

BackgroundTasks {
driver,
task_internal_dns_config,
Expand All @@ -153,6 +173,7 @@ impl BackgroundTasks {
external_endpoints,
nat_cleanup,
task_inventory_collection,
task_phantom_disks,
}
}

Expand Down
Loading
Loading