From 469522d401f47fd213cf86bcb1a0dd41bad4fa19 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Mon, 30 Sep 2024 17:58:28 -0400 Subject: [PATCH] Temporarily disable region snapshot replacement (#6728) Until read-only region reference counting is implemented, region snapshot replacement should be disabled - it is currently the only thing that creates read-only regions. --- nexus/src/app/background/init.rs | 32 ++++++++++++------- .../region_snapshot_replacement_finish.rs | 12 ++++++- ...on_snapshot_replacement_garbage_collect.rs | 23 ++++++++++++- .../region_snapshot_replacement_start.rs | 15 ++++++++- .../tasks/region_snapshot_replacement_step.rs | 23 ++++++++++++- 5 files changed, 89 insertions(+), 16 deletions(-) diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index 8ae3be84af..69221779ee 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -778,7 +778,8 @@ impl BackgroundTasksInitializer { "detect if region snapshots need replacement and begin the \ process", period: config.region_snapshot_replacement_start.period_secs, - task_impl: Box::new(RegionSnapshotReplacementDetector::new( + // XXX temporarily disabled, see oxidecomputer/omicron#6353 + task_impl: Box::new(RegionSnapshotReplacementDetector::disabled( datastore.clone(), sagas.clone(), )), @@ -794,10 +795,13 @@ impl BackgroundTasksInitializer { period: config .region_snapshot_replacement_garbage_collection .period_secs, - task_impl: Box::new(RegionSnapshotReplacementGarbageCollect::new( - datastore.clone(), - sagas.clone(), - )), + // XXX temporarily disabled, see oxidecomputer/omicron#6353 + task_impl: Box::new( + RegionSnapshotReplacementGarbageCollect::disabled( + datastore.clone(), + sagas.clone(), + ), + ), opctx: opctx.child(BTreeMap::new()), watchers: vec![], activator: task_region_snapshot_replacement_garbage_collection, @@ -809,10 +813,13 @@ impl BackgroundTasksInitializer { "detect what volumes were affected by a region snapshot \ replacement, and run the step saga for them", period: config.region_snapshot_replacement_step.period_secs, - task_impl: Box::new(RegionSnapshotReplacementFindAffected::new( - datastore.clone(), - sagas.clone(), - )), + // XXX temporarily disabled, see oxidecomputer/omicron#6353 + task_impl: Box::new( + RegionSnapshotReplacementFindAffected::disabled( + datastore.clone(), + sagas.clone(), + ), + ), opctx: opctx.child(BTreeMap::new()), watchers: vec![], activator: task_region_snapshot_replacement_step, @@ -824,9 +831,10 @@ impl BackgroundTasksInitializer { "complete a region snapshot replacement if all the steps are \ done", period: config.region_snapshot_replacement_finish.period_secs, - task_impl: Box::new(RegionSnapshotReplacementFinishDetector::new( - datastore, - )), + // XXX temporarily disabled, see oxidecomputer/omicron#6353 + task_impl: Box::new( + RegionSnapshotReplacementFinishDetector::disabled(datastore), + ), opctx: opctx.child(BTreeMap::new()), watchers: vec![], activator: task_region_snapshot_replacement_finish, diff --git a/nexus/src/app/background/tasks/region_snapshot_replacement_finish.rs b/nexus/src/app/background/tasks/region_snapshot_replacement_finish.rs index caa2fa7bed..83078cb978 100644 --- a/nexus/src/app/background/tasks/region_snapshot_replacement_finish.rs +++ b/nexus/src/app/background/tasks/region_snapshot_replacement_finish.rs @@ -19,11 +19,17 @@ use std::sync::Arc; pub struct RegionSnapshotReplacementFinishDetector { datastore: Arc, + disabled: bool, } impl RegionSnapshotReplacementFinishDetector { + #[allow(dead_code)] pub fn new(datastore: Arc) -> Self { - RegionSnapshotReplacementFinishDetector { datastore } + RegionSnapshotReplacementFinishDetector { datastore, disabled: false } + } + + pub fn disabled(datastore: Arc) -> Self { + RegionSnapshotReplacementFinishDetector { datastore, disabled: true } } async fn transition_requests_to_done( @@ -153,6 +159,10 @@ impl BackgroundTask for RegionSnapshotReplacementFinishDetector { async move { let mut status = RegionSnapshotReplacementFinishStatus::default(); + if self.disabled { + return json!(status); + } + self.transition_requests_to_done(opctx, &mut status).await; json!(status) diff --git a/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs b/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs index f3b1b68198..eb171fda12 100644 --- a/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs +++ b/nexus/src/app/background/tasks/region_snapshot_replacement_garbage_collect.rs @@ -22,11 +22,28 @@ use std::sync::Arc; pub struct RegionSnapshotReplacementGarbageCollect { datastore: Arc, sagas: Arc, + disabled: bool, } impl RegionSnapshotReplacementGarbageCollect { + #[allow(dead_code)] pub fn new(datastore: Arc, sagas: Arc) -> Self { - RegionSnapshotReplacementGarbageCollect { datastore, sagas } + RegionSnapshotReplacementGarbageCollect { + datastore, + sagas, + disabled: false, + } + } + + pub fn disabled( + datastore: Arc, + sagas: Arc, + ) -> Self { + RegionSnapshotReplacementGarbageCollect { + datastore, + sagas, + disabled: true, + } } async fn send_garbage_collect_request( @@ -135,6 +152,10 @@ impl BackgroundTask for RegionSnapshotReplacementGarbageCollect { let mut status = RegionSnapshotReplacementGarbageCollectStatus::default(); + if self.disabled { + return json!(status); + } + self.clean_up_region_snapshot_replacement_volumes( opctx, &mut status, diff --git a/nexus/src/app/background/tasks/region_snapshot_replacement_start.rs b/nexus/src/app/background/tasks/region_snapshot_replacement_start.rs index bc739ecf27..8fd1e55975 100644 --- a/nexus/src/app/background/tasks/region_snapshot_replacement_start.rs +++ b/nexus/src/app/background/tasks/region_snapshot_replacement_start.rs @@ -29,11 +29,20 @@ use std::sync::Arc; pub struct RegionSnapshotReplacementDetector { datastore: Arc, sagas: Arc, + disabled: bool, } impl RegionSnapshotReplacementDetector { + #[allow(dead_code)] pub fn new(datastore: Arc, sagas: Arc) -> Self { - RegionSnapshotReplacementDetector { datastore, sagas } + RegionSnapshotReplacementDetector { datastore, sagas, disabled: false } + } + + pub fn disabled( + datastore: Arc, + sagas: Arc, + ) -> Self { + RegionSnapshotReplacementDetector { datastore, sagas, disabled: true } } async fn send_start_request( @@ -237,6 +246,10 @@ impl BackgroundTask for RegionSnapshotReplacementDetector { async { let mut status = RegionSnapshotReplacementStartStatus::default(); + if self.disabled { + return json!(status); + } + self.create_requests_for_region_snapshots_on_expunged_disks( opctx, &mut status, diff --git a/nexus/src/app/background/tasks/region_snapshot_replacement_step.rs b/nexus/src/app/background/tasks/region_snapshot_replacement_step.rs index a294ea5847..da05500a58 100644 --- a/nexus/src/app/background/tasks/region_snapshot_replacement_step.rs +++ b/nexus/src/app/background/tasks/region_snapshot_replacement_step.rs @@ -42,11 +42,28 @@ use std::sync::Arc; pub struct RegionSnapshotReplacementFindAffected { datastore: Arc, sagas: Arc, + disabled: bool, } impl RegionSnapshotReplacementFindAffected { + #[allow(dead_code)] pub fn new(datastore: Arc, sagas: Arc) -> Self { - RegionSnapshotReplacementFindAffected { datastore, sagas } + RegionSnapshotReplacementFindAffected { + datastore, + sagas, + disabled: false, + } + } + + pub fn disabled( + datastore: Arc, + sagas: Arc, + ) -> Self { + RegionSnapshotReplacementFindAffected { + datastore, + sagas, + disabled: true, + } } async fn send_start_request( @@ -435,6 +452,10 @@ impl BackgroundTask for RegionSnapshotReplacementFindAffected { async move { let mut status = RegionSnapshotReplacementStepStatus::default(); + if self.disabled { + return json!(status); + } + // Importantly, clean old steps up before finding affected volumes! // Otherwise, will continue to find the snapshot in volumes to // delete, and will continue to see conflicts in next function.