From 3cb42cbdb61e2ff2c1ae28915856745424d1606e Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Wed, 12 Jun 2024 14:54:44 -0700 Subject: [PATCH] Remove updater lock from instance runtime state In #5831, I added the `updater_gen` and `updater_id` fields for the instance-updater lock to the `InstanceRuntimeState` struct, based purely on vibes: "They change at runtime, right? Therefore, they ought to be 'runtime state'...". It turns out that this is actually Super Ultra Wrong, because any query which writes an `InstanceRuntimeState` without paying attention to the lock fields can inadvertantly clobber them, either releasing the lock or setting it back to a previous lock holder. These fields should be on the `Instance` struct instead, to avoid this kind of thing. This commit moves them to the correct place. I figured it would be nice to land this separately from #5749, purely for the sake of keeping that diff small. --- nexus/db-model/src/instance.rs | 43 +++++++++---------- nexus/db-queries/src/db/datastore/instance.rs | 10 ++--- 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/nexus/db-model/src/instance.rs b/nexus/db-model/src/instance.rs index 99c0126174..5c35259fc7 100644 --- a/nexus/db-model/src/instance.rs +++ b/nexus/db-model/src/instance.rs @@ -58,6 +58,24 @@ pub struct Instance { #[diesel(embed)] pub runtime_state: InstanceRuntimeState, + + /// The generation number for the updater lock. This is updated whenever the + /// lock is acquired or released, and is used in attempts to set the + /// `updater_id` field to ensure that the snapshot which indicated that the + /// lock was not held is still valid when setting the lock ID. + #[diesel(column_name = updater_gen)] + pub updater_gen: Generation, + + /// A UUID identifying the saga currently holding the update lock on this + /// instance. If this is [`None`] the instance is not locked. Otherwise, if + /// this is [`Some`], the instance is locked by the saga owning this UUID. + /// Note that this is not (presently) the UUID *of* the locking saga, but + /// rather, a UUID *generated by* that saga. Therefore, it may not be + /// useable to look up which saga holds the lock. + /// + /// This field is guarded by the instance's `updater_gen` + #[diesel(column_name = updater_id)] + pub updater_id: Option, } impl Instance { @@ -85,6 +103,9 @@ impl Instance { hostname: params.hostname.to_string(), boot_on_fault: false, runtime_state, + + updater_gen: Generation::new(), + updater_id: None, } } @@ -171,24 +192,6 @@ pub struct InstanceRuntimeState { #[diesel(column_name = migration_id)] pub migration_id: Option, - /// A UUID identifying the saga currently holding the update lock on this - /// instance. If this is [`None`] the instance is not locked. Otherwise, if - /// this is [`Some`], the instance is locked by the saga owning this UUID. - /// Note that this is not (presently) the UUID *of* the locking saga, but - /// rather, a UUID *generated by* that saga. Therefore, it may not be - /// useable to look up which saga holds the lock. - /// - /// This field is guarded by the instance's `updater_gen` - #[diesel(column_name = updater_id)] - pub updater_id: Option, - - /// The generation number for the updater lock. This is updated whenever the - /// lock is acquired or released, and is used in attempts to set the - /// `updater_id` field to ensure that the snapshot which indicated that the - /// lock was not held is still valid when setting the lock ID. - #[diesel(column_name = updater_gen)] - pub updater_gen: Generation, - /// The "internal" state of this instance. The instance's externally-visible /// state may be delegated to the instance's active VMM, if it has one. /// @@ -206,8 +209,6 @@ impl InstanceRuntimeState { dst_propolis_id: None, migration_id: None, gen: Generation::new(), - updater_gen: Generation::new(), - updater_id: None, } } } @@ -231,8 +232,6 @@ impl From propolis_id: state.propolis_id, dst_propolis_id: state.dst_propolis_id, migration_id: state.migration_id, - updater_gen: Generation::new(), - updater_id: None, } } } diff --git a/nexus/db-queries/src/db/datastore/instance.rs b/nexus/db-queries/src/db/datastore/instance.rs index e090210cbe..803a9661b7 100644 --- a/nexus/db-queries/src/db/datastore/instance.rs +++ b/nexus/db-queries/src/db/datastore/instance.rs @@ -646,7 +646,7 @@ impl DataStore { // important than handling that extremely unlikely edge case. let mut did_lock = false; loop { - match instance.runtime_state.updater_id { + match instance.updater_id { // If the `updater_id` field is not null and the ID equals this // saga's ID, we already have the lock. We're done here! Some(lock_id) if lock_id == saga_lock_id => { @@ -659,7 +659,7 @@ impl DataStore { ); return Ok(UpdaterLock { saga_lock_id, - locked_gen: instance.runtime_state.updater_gen, + locked_gen: instance.updater_gen, }); } // The `updater_id` field is set, but it's not our ID. The instance @@ -680,7 +680,7 @@ impl DataStore { } // Okay, now attempt to acquire the lock - let current_gen = instance.runtime_state.updater_gen; + let current_gen = instance.updater_gen; slog::debug!( &opctx.log, "attempting to acquire instance updater lock"; @@ -795,12 +795,12 @@ impl DataStore { UpdateAndQueryResult { status: UpdateStatus::NotUpdatedButExists, ref found, - } if found.runtime_state.updater_gen > locked_gen => Ok(false), + } if found.updater_gen > locked_gen => Ok(false), // The instance exists, but the lock ID doesn't match our lock ID. // This means we were trying to release a lock we never held, whcih // is almost certainly a programmer error. UpdateAndQueryResult { ref found, .. } => { - match found.runtime_state.updater_id { + match found.updater_id { Some(lock_holder) => { debug_assert_ne!(lock_holder, saga_lock_id); Err(Error::internal_error(