diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index 44de433603..9be58d3222 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -363,6 +363,12 @@ pub struct InventoryConfig { /// This is a very coarse mechanism to keep the system from overwhelming /// itself with inventory data. pub nkeep: u32, + + /// disable inventory collection altogether + /// + /// This is an emergency lever for support / operations. It should never be + /// necessary. + pub disable: bool, } /// Configuration for a nexus server @@ -615,6 +621,7 @@ mod test { external_endpoints.period_secs = 9 inventory.period_secs = 10 inventory.nkeep = 11 + inventory.disable = false [default_region_allocation_strategy] type = "random" seed = 0 @@ -703,6 +710,7 @@ mod test { inventory: InventoryConfig { period_secs: Duration::from_secs(10), nkeep: 11, + disable: false, } }, default_region_allocation_strategy: @@ -758,6 +766,7 @@ mod test { external_endpoints.period_secs = 9 inventory.period_secs = 10 inventory.nkeep = 3 + inventory.disable = false [default_region_allocation_strategy] type = "random" "##, diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index c7345156a7..efc9aa9c27 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -97,6 +97,8 @@ external_endpoints.period_secs = 60 inventory.period_secs = 600 # Maximum number of past collections to keep in the database inventory.nkeep = 5 +# Disable inventory collection altogether (for emergencies) +inventory.disable = false [default_region_allocation_strategy] # allocate region on 3 random distinct zpools, on 3 random distinct sleds. diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index 1c178175fe..bdcfedd065 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -103,6 +103,7 @@ impl BackgroundTasks { resolver, &nexus_id.to_string(), config.inventory.nkeep, + config.inventory.disable, ); let task = driver.register( String::from("inventory_collection"), diff --git a/nexus/src/app/background/inventory_collection.rs b/nexus/src/app/background/inventory_collection.rs index d9ab9fe4be..96a0941524 100644 --- a/nexus/src/app/background/inventory_collection.rs +++ b/nexus/src/app/background/inventory_collection.rs @@ -5,6 +5,7 @@ //! Background task for reading inventory for the rack use super::common::BackgroundTask; +use anyhow::ensure; use anyhow::Context; use futures::future::BoxFuture; use futures::FutureExt; @@ -21,6 +22,7 @@ pub struct InventoryCollector { resolver: internal_dns::resolver::Resolver, creator: String, nkeep: u32, + disable: bool, } impl InventoryCollector { @@ -29,12 +31,14 @@ impl InventoryCollector { resolver: internal_dns::resolver::Resolver, creator: &str, nkeep: u32, + disable: bool, ) -> InventoryCollector { InventoryCollector { datastore, resolver, creator: creator.to_owned(), nkeep, + disable, } } } @@ -55,6 +59,7 @@ impl BackgroundTask for InventoryCollector { &self.resolver, &self.creator, self.nkeep, + self.disable, ) .await .context("failed to collect inventory") @@ -88,7 +93,12 @@ async fn inventory_activate( resolver: &internal_dns::resolver::Resolver, creator: &str, nkeep: u32, + disabled: bool, ) -> Result { + // If we're disabled, don't do anything. (This switch is only intended for + // unforeseen production emergencies.) + ensure!(!disabled, "disabled by explicit configuration"); + // Prune old collections. We do this first, here, to ensure that we never // develop an unbounded backlog of collections. (If this process were done // by a separate task, it would be possible for the backlog to grow @@ -185,8 +195,13 @@ mod test { // a bunch and make sure that it always creates a new collection and // does not allow a backlog to accumulate. let nkeep = 3; - let mut task = - InventoryCollector::new(datastore.clone(), resolver, "me", nkeep); + let mut task = InventoryCollector::new( + datastore.clone(), + resolver.clone(), + "me", + nkeep, + false, + ); let nkeep = usize::try_from(nkeep).unwrap(); for i in 0..10 { let _ = task.activate(&opctx).await; @@ -208,5 +223,18 @@ mod test { assert_eq!(collections.len(), std::cmp::min(i + 2, nkeep + 1)); last_collections = collections; } + + // Create a disabled task and make sure that does nothing. + let mut task = InventoryCollector::new( + datastore.clone(), + resolver, + "disabled", + 3, + true, + ); + let previous = datastore.inventory_collections().await.unwrap(); + let _ = task.activate(&opctx).await; + let latest = datastore.inventory_collections().await.unwrap(); + assert_eq!(previous, latest); } } diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 3e50a1ef18..3629ae9cb2 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -94,6 +94,8 @@ external_endpoints.period_secs = 60 inventory.period_secs = 600 # Maximum number of past collections to keep in the database inventory.nkeep = 3 +# Disable inventory collection altogether (for emergencies) +inventory.disable = false [default_region_allocation_strategy] # we only have one sled in the test environment, so we need to use the diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml index c9b2f3fdc2..cae1f650c9 100644 --- a/smf/nexus/multi-sled/config-partial.toml +++ b/smf/nexus/multi-sled/config-partial.toml @@ -43,6 +43,8 @@ external_endpoints.period_secs = 60 inventory.period_secs = 600 # Maximum number of past collections to keep in the database inventory.nkeep = 3 +# Disable inventory collection altogether (for emergencies) +inventory.disable = false [default_region_allocation_strategy] # by default, allocate across 3 distinct sleds diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml index 65bd020e0b..be8683be54 100644 --- a/smf/nexus/single-sled/config-partial.toml +++ b/smf/nexus/single-sled/config-partial.toml @@ -43,6 +43,8 @@ external_endpoints.period_secs = 60 inventory.period_secs = 600 # Maximum number of past collections to keep in the database inventory.nkeep = 3 +# Disable inventory collection altogether (for emergencies) +inventory.disable = false [default_region_allocation_strategy] # by default, allocate without requirement for distinct sleds.