Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Detect and un-delete phantom disks #4547

Merged
merged 16 commits into from
Dec 4, 2023
Merged
24 changes: 20 additions & 4 deletions common/src/nexus_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,8 @@ pub struct BackgroundTaskConfig {
pub nat_cleanup: NatCleanupConfig,
/// configuration for inventory tasks
pub inventory: InventoryConfig,
/// configuration for phantom disks task
pub phantom_disks: PhantomDiskConfig,
}

#[serde_as]
Expand Down Expand Up @@ -386,7 +388,7 @@ pub struct NatCleanupConfig {
pub struct InventoryConfig {
/// period (in seconds) for periodic activations of this background task
///
/// Each activation fetches information about all harware and software in
/// Each activation fetches information about all hardware and software in
/// the system and inserts it into the database. This generates a moderate
/// amount of data.
#[serde_as(as = "DurationSeconds<u64>")]
Expand All @@ -405,6 +407,14 @@ pub struct InventoryConfig {
pub disable: bool,
}

#[serde_as]
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub struct PhantomDiskConfig {
/// period (in seconds) for periodic activations of this background task
#[serde_as(as = "DurationSeconds<u64>")]
pub period_secs: Duration,
}

/// Configuration for a nexus server
#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
pub struct PackageConfig {
Expand Down Expand Up @@ -508,8 +518,9 @@ mod test {
BackgroundTaskConfig, Config, ConfigDropshotWithTls, ConsoleConfig,
Database, DeploymentConfig, DnsTasksConfig, DpdConfig,
ExternalEndpointsConfig, InternalDns, InventoryConfig, LoadError,
LoadErrorKind, MgdConfig, NatCleanupConfig, PackageConfig, SchemeName,
TimeseriesDbConfig, Tunables, UpdatesConfig,
LoadErrorKind, MgdConfig, NatCleanupConfig, PackageConfig,
PhantomDiskConfig, SchemeName, TimeseriesDbConfig, Tunables,
UpdatesConfig,
};
use crate::address::{Ipv6Subnet, RACK_PREFIX};
use crate::api::internal::shared::SwitchLocation;
Expand Down Expand Up @@ -663,6 +674,7 @@ mod test {
inventory.period_secs = 10
inventory.nkeep = 11
inventory.disable = false
phantom_disks.period_secs = 30
[default_region_allocation_strategy]
type = "random"
seed = 0
Expand Down Expand Up @@ -764,7 +776,10 @@ mod test {
period_secs: Duration::from_secs(10),
nkeep: 11,
disable: false,
}
},
phantom_disks: PhantomDiskConfig {
period_secs: Duration::from_secs(30),
},
},
default_region_allocation_strategy:
crate::nexus_config::RegionAllocationStrategy::Random {
Expand Down Expand Up @@ -822,6 +837,7 @@ mod test {
inventory.period_secs = 10
inventory.nkeep = 3
inventory.disable = false
phantom_disks.period_secs = 30
[default_region_allocation_strategy]
type = "random"
"##,
Expand Down
26 changes: 26 additions & 0 deletions dev-tools/omdb/src/bin/omdb/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,32 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) {
);
}
};
} else if name == "phantom_disks" {
#[derive(Deserialize)]
struct TaskSuccess {
/// how many phantom disks were deleted ok
phantom_disk_deleted_ok: usize,

/// how many phantom disks could not be deleted
phantom_disk_deleted_err: usize,
}

match serde_json::from_value::<TaskSuccess>(details.clone()) {
Err(error) => eprintln!(
"warning: failed to interpret task details: {:?}: {:?}",
error, details
),
Ok(success) => {
println!(
" number of phantom disks deleted: {}",
success.phantom_disk_deleted_ok
);
println!(
" number of phantom disk delete errors: {}",
success.phantom_disk_deleted_err
);
}
};
} else {
println!(
"warning: unknown background task: {:?} \
Expand Down
12 changes: 12 additions & 0 deletions dev-tools/omdb/tests/env.out
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ task: "nat_v4_garbage_collector"
predetermined retention policy


task: "phantom_disks"
detects and un-deletes phantom disks


---------------------------------------------
stderr:
note: using Nexus URL http://127.0.0.1:REDACTED_PORT
Expand Down Expand Up @@ -131,6 +135,10 @@ task: "nat_v4_garbage_collector"
predetermined retention policy


task: "phantom_disks"
detects and un-deletes phantom disks


---------------------------------------------
stderr:
note: Nexus URL not specified. Will pick one from DNS.
Expand Down Expand Up @@ -183,6 +191,10 @@ task: "nat_v4_garbage_collector"
predetermined retention policy


task: "phantom_disks"
detects and un-deletes phantom disks


---------------------------------------------
stderr:
note: Nexus URL not specified. Will pick one from DNS.
Expand Down
12 changes: 12 additions & 0 deletions dev-tools/omdb/tests/successes.out
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,10 @@ task: "nat_v4_garbage_collector"
predetermined retention policy


task: "phantom_disks"
detects and un-deletes phantom disks


---------------------------------------------
stderr:
note: using Nexus URL http://127.0.0.1:REDACTED_PORT/
Expand Down Expand Up @@ -357,6 +361,14 @@ task: "inventory_collection"
last collection started: <REDACTED_TIMESTAMP>
last collection done: <REDACTED_TIMESTAMP>

task: "phantom_disks"
configured period: every 30s
currently executing: no
last completed activation: iter 2, triggered by an explicit signal
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
number of phantom disks deleted: 0
number of phantom disk delete errors: 0

---------------------------------------------
stderr:
note: using Nexus URL http://127.0.0.1:REDACTED_PORT/
Expand Down
6 changes: 5 additions & 1 deletion nexus/db-model/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1301,7 +1301,7 @@ table! {
///
/// This should be updated whenever the schema is changed. For more details,
/// refer to: schema/crdb/README.adoc
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(17, 0, 0);
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(18, 0, 0);

allow_tables_to_appear_in_same_query!(
system_update,
Expand Down Expand Up @@ -1370,3 +1370,7 @@ allow_tables_to_appear_in_same_query!(
switch_port,
switch_port_settings_bgp_peer_config
);

allow_tables_to_appear_in_same_query!(disk, virtual_provisioning_resource);

allow_tables_to_appear_in_same_query!(volume, virtual_provisioning_resource);
Loading
Loading