From bf0a314dc3ecd767f1ce48855a3dc8792e329052 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Wed, 14 Feb 2024 20:28:41 +0000 Subject: [PATCH] Order `NextItem` subquery results predictably - Fixes #5055 - Add a new column, `index` to the `NextItem` subquery, which indexes the shifts from 0..n. - Add an `ORDER BY index` clause to guarantee order. - Add test Ensure MAC address shift is correct Simplify MAC shift logic Clippy, and remove local env files Review feedback --- nexus/db-queries/src/db/queries/disk.rs | 11 +- .../src/db/queries/network_interface.rs | 104 +++++-- nexus/db-queries/src/db/queries/next_item.rs | 262 ++++++++++++++++-- nexus/db-queries/src/db/queries/vpc.rs | 8 +- 4 files changed, 331 insertions(+), 54 deletions(-) diff --git a/nexus/db-queries/src/db/queries/disk.rs b/nexus/db-queries/src/db/queries/disk.rs index 9fd56c3ce8..dc1a31dd01 100644 --- a/nexus/db-queries/src/db/queries/disk.rs +++ b/nexus/db-queries/src/db/queries/disk.rs @@ -46,11 +46,12 @@ struct NextDiskSlot { impl NextDiskSlot { fn new(instance_id: Uuid) -> Self { - let generator = DefaultShiftGenerator { - base: 0, - max_shift: i64::try_from(MAX_DISKS_PER_INSTANCE).unwrap(), - min_shift: 0, - }; + let generator = DefaultShiftGenerator::new( + 0, + i64::try_from(MAX_DISKS_PER_INSTANCE).unwrap(), + 0, + ) + .expect("invalid min/max shift"); Self { inner: NextItem::new_scoped(generator, instance_id) } } } diff --git a/nexus/db-queries/src/db/queries/network_interface.rs b/nexus/db-queries/src/db/queries/network_interface.rs index 0643089316..a22c80b232 100644 --- a/nexus/db-queries/src/db/queries/network_interface.rs +++ b/nexus/db-queries/src/db/queries/network_interface.rs @@ -516,8 +516,8 @@ impl NextIpv4Address { let subnet = IpNetwork::from(subnet); let net = IpNetwork::from(first_available_address(&subnet)); let max_shift = i64::from(last_address_offset(&subnet)); - let generator = - DefaultShiftGenerator { base: net, max_shift, min_shift: 0 }; + let generator = DefaultShiftGenerator::new(net, max_shift, 0) + .expect("invalid min/max shift"); Self { inner: NextItem::new_scoped(generator, subnet_id) } } } @@ -575,12 +575,13 @@ pub struct NextNicSlot { impl NextNicSlot { pub fn new(parent_id: Uuid) -> Self { - let generator = DefaultShiftGenerator { - base: 0, - max_shift: i64::try_from(MAX_NICS_PER_INSTANCE) + let generator = DefaultShiftGenerator::new( + 0, + i64::try_from(MAX_NICS_PER_INSTANCE) .expect("Too many network interfaces"), - min_shift: 0, - }; + 0, + ) + .expect("invalid min/max shift"); Self { inner: NextItem::new_scoped(generator, parent_id) } } } @@ -607,25 +608,62 @@ pub struct NextMacAddress { >, } +// Helper to ensure we correctly compute the min/max shifts for a next MAC +// query. +#[derive(Copy, Clone, Debug)] +struct NextMacShifts { + base: MacAddr, + min_shift: i64, + max_shift: i64, +} + +impl NextMacShifts { + fn for_guest() -> Self { + let base = MacAddr::random_guest(); + Self::shifts_for(base, MacAddr::MIN_GUEST_ADDR, MacAddr::MAX_GUEST_ADDR) + } + + fn for_system() -> NextMacShifts { + let base = MacAddr::random_system(); + Self::shifts_for( + base, + MacAddr::MIN_SYSTEM_ADDR, + MacAddr::MAX_SYSTEM_ADDR, + ) + } + + fn shifts_for(base: MacAddr, min: i64, max: i64) -> NextMacShifts { + let x = base.to_i64(); + + // The max shift is the distance to the last value. This min shift is + // always expressed as a negative number, giving the largest leftward + // shift, i.e., the distance to the first value. + let max_shift = max - x; + let min_shift = min - x; + Self { base, min_shift, max_shift } + } +} + impl NextMacAddress { pub fn new(vpc_id: Uuid, kind: NetworkInterfaceKind) -> Self { let (base, max_shift, min_shift) = match kind { NetworkInterfaceKind::Instance => { - let base = MacAddr::random_guest(); - let x = base.to_i64(); - let max_shift = MacAddr::MAX_GUEST_ADDR - x; - let min_shift = x - MacAddr::MIN_GUEST_ADDR; + let NextMacShifts { base, min_shift, max_shift } = + NextMacShifts::for_guest(); (base.into(), max_shift, min_shift) } NetworkInterfaceKind::Service => { - let base = MacAddr::random_system(); - let x = base.to_i64(); - let max_shift = MacAddr::MAX_SYSTEM_ADDR - x; - let min_shift = x - MacAddr::MAX_SYSTEM_ADDR; + let NextMacShifts { base, min_shift, max_shift } = + NextMacShifts::for_system(); (base.into(), max_shift, min_shift) } }; - let generator = DefaultShiftGenerator { base, max_shift, min_shift }; + let generator = DefaultShiftGenerator::new(base, max_shift, min_shift) + .unwrap_or_else(|| { + panic!( + "invalid min shift ({min_shift}) or max_shift ({max_shift})" + ) + }); Self { inner: NextItem::new_scoped(generator, vpc_id) } } } @@ -1713,6 +1751,7 @@ mod tests { use crate::db::model::NetworkInterface; use crate::db::model::Project; use crate::db::model::VpcSubnet; + use crate::db::queries::network_interface::NextMacShifts; use async_bb8_diesel::AsyncRunQueryDsl; use dropshot::test_util::LogContext; use ipnetwork::Ipv4Network; @@ -2801,4 +2840,37 @@ mod tests { "fd00::5".parse::().unwrap(), ); } + + #[test] + fn test_next_mac_shifts_for_system() { + let NextMacShifts { base, min_shift, max_shift } = + NextMacShifts::for_system(); + assert!(base.is_system()); + assert!( + min_shift <= 0, + "expected min shift to be negative, found {min_shift}" + ); + assert!(max_shift >= 0, "found {max_shift}"); + let x = base.to_i64(); + assert_eq!(x + min_shift, MacAddr::MIN_SYSTEM_ADDR); + assert_eq!(x + max_shift, MacAddr::MAX_SYSTEM_ADDR); + } + + #[test] + fn test_next_mac_shifts_for_guest() { + let NextMacShifts { base, min_shift, max_shift } = + NextMacShifts::for_guest(); + assert!(base.is_guest()); + assert!( + min_shift <= 0, + "expected min shift to be negative, found {min_shift}" + ); + assert!( + max_shift >= 0, + "expected max shift to be positive, found {max_shift}" + ); + let x = base.to_i64(); + assert_eq!(x + min_shift, MacAddr::MIN_GUEST_ADDR); + assert_eq!(x + max_shift, MacAddr::MAX_GUEST_ADDR); + } } diff --git a/nexus/db-queries/src/db/queries/next_item.rs b/nexus/db-queries/src/db/queries/next_item.rs index 007aec943d..769c891349 100644 --- a/nexus/db-queries/src/db/queries/next_item.rs +++ b/nexus/db-queries/src/db/queries/next_item.rs @@ -35,7 +35,7 @@ use uuid::Uuid; /// SELECT /// + shift AS ip /// FROM -/// generate_series(0, ) AS shift +/// generate_series(0, ) AS shift, /// LEFT OUTER JOIN /// network_interface /// ON @@ -43,21 +43,22 @@ use uuid::Uuid; /// (, + shift, TRUE) /// WHERE /// ip IS NULL +/// ORDER BY ip /// LIMIT 1 /// ``` /// -/// This query selects the lowest address in the IP subnet that's not already -/// allocated to a guest interface. Note that the query is linear in the number -/// of _allocated_ guest addresses. and are chosen -/// based on the subnet and its size, and take into account reserved IP -/// addresses (such as the broadcast address). +/// This query selects the next address after in the IP subnet +/// that's not already allocated to a guest interface. Note that the query is +/// linear in the number of _allocated_ guest addresses. and +/// are chosen based on the subnet and its size, and take into +/// account reserved IP addresses (such as the broadcast address). /// /// General query structure /// ----------------------- /// /// Much of the value of this type comes from the ability to specify the /// starting point for a scan for the next item. In the case above, of an IP -/// address for a guest NIC, we always try to allocate the lowest available +/// address for a guest NIC, we always try to allocate the next available /// address. This implies the search is linear in the number of allocated IP /// addresses, but that runtime cost is acceptable for a few reasons. First, the /// predictability of the addresses is nice. Second, the subnets can generally @@ -81,16 +82,16 @@ use uuid::Uuid; /// FROM /// ( /// SELECT -/// shift +/// "index", shift /// FROM -/// generate_series(0, ) -/// AS shift +/// generate_series(0, ) AS "index" +/// generate_series(0, ) AS shift, /// UNION ALL /// SELECT /// shift /// FROM -/// generate_series(, -1) -/// AS shift +/// generate_series(, ) AS "index" +/// generate_series(, -1) AS shift, /// LEFT OUTER JOIN /// /// ON @@ -98,6 +99,7 @@ use uuid::Uuid; /// (, + shift, TRUE) /// WHERE /// IS NULL +/// ORDER BY "index" /// LIMIT 1 /// ``` /// @@ -120,6 +122,18 @@ use uuid::Uuid; /// +------------------------------------------------+ /// ``` /// +/// Ordering +/// -------- +/// +/// The subquery is designed to select the next address _after_ the provided +/// base. To preserve this behavior even in situations where we wrap around the +/// end of the range, we include the `index` column when generating the shifts, +/// and order the result by that column. +/// +/// The CockroachDB docs on [ordering] specify that queries without an explicit +/// `ORDER BY` clause are returned "as the coordinating nodes receives them." +/// Without this clause, the order is non-deterministic. +/// /// Shift generators /// ---------------- /// @@ -146,6 +160,8 @@ use uuid::Uuid; /// there is no scope, which means the items must be globally unique in the /// entire table (among non-deleted items). The query is structured slightly /// differently in these two cases. +/// +/// [ordering]: https://www.cockroachlabs.com/docs/stable/select-clause#sorting-and-limiting-query-results #[derive(Debug, Clone, Copy)] pub(super) struct NextItem< Table, @@ -220,6 +236,9 @@ where } } +const SHIFT_COLUMN_IDENT: &str = "shift"; +const INDEX_COLUMN_IDENT: &str = "index"; + impl QueryFragment for NextItem @@ -262,7 +281,7 @@ where self.shift_generator.base(), )?; out.push_sql(" + "); - out.push_identifier("shift")?; + out.push_identifier(SHIFT_COLUMN_IDENT)?; out.push_sql(", TRUE) "); push_next_item_where_clause::(out.reborrow()) @@ -303,7 +322,7 @@ where self.shift_generator.base(), )?; out.push_sql(" + "); - out.push_identifier("shift")?; + out.push_identifier(SHIFT_COLUMN_IDENT)?; out.push_sql(", TRUE) "); push_next_item_where_clause::(out.reborrow()) @@ -338,7 +357,7 @@ where shift_generator.base(), )?; out.push_sql(" + "); - out.push_identifier("shift")?; + out.push_identifier(SHIFT_COLUMN_IDENT)?; out.push_sql(" AS "); out.push_identifier(ItemColumn::NAME)?; out.push_sql(" FROM ("); @@ -350,7 +369,7 @@ where // Push the final where clause shared by scoped and unscoped next item queries. // // ```sql -// WHERE IS NULL LIMIT 1 +// WHERE IS NULL ORDER BY "index" LIMIT 1 // ``` fn push_next_item_where_clause( mut out: AstPass, @@ -363,7 +382,9 @@ where { out.push_sql(" WHERE "); out.push_identifier(ItemColumn::NAME)?; - out.push_sql(" IS NULL LIMIT 1"); + out.push_sql(" IS NULL ORDER BY "); + out.push_identifier(INDEX_COLUMN_IDENT)?; + out.push_sql(" LIMIT 1"); Ok(()) } @@ -414,17 +435,24 @@ pub trait ShiftGenerator { /// Return the minimum shift from the base item for the scan. fn min_shift(&self) -> &i64; + /// Return the indices of the generated shifts for the scan. + fn shift_indices(&self) -> &ShiftIndices; + /// Insert the part of the query represented by the shift of items into the /// provided AstPass. /// /// The default implementation pushes: /// /// ```sql - /// SELECT generate_series(0, ) AS shift + /// SELECT + /// generate_series(0, ) as "index" + /// generate_series(0, ) AS shift, /// UNION ALL - /// SELECT generate_series(, -1) AS shift + /// SELECT + /// generate_series(, ) as "index" + /// generate_series(, -1) AS shift, /// ``` - fn walk_ast<'a, Table, ItemColumn>( + fn walk_ast<'a, 'b, Table, ItemColumn>( &'a self, mut out: AstPass<'_, 'a, Pg>, ) -> diesel::QueryResult<()> @@ -434,15 +462,83 @@ pub trait ShiftGenerator { Item: ToSql<::SqlType, Pg> + Copy, ItemColumn: Column
+ Copy, Pg: HasSqlType<::SqlType>, + 'a: 'b, { out.push_sql("SELECT generate_series(0, "); + out.push_bind_param::( + self.shift_indices().first_end(), + )?; + out.push_sql(") AS "); + out.push_identifier(INDEX_COLUMN_IDENT)?; + out.push_sql(", generate_series(0, "); out.push_bind_param::(self.max_shift())?; out.push_sql(") AS "); - out.push_identifier("shift")?; + out.push_identifier(SHIFT_COLUMN_IDENT)?; out.push_sql(" UNION ALL SELECT generate_series("); + out.push_bind_param::( + self.shift_indices().second_start(), + )?; + out.push_sql(", "); + out.push_bind_param::( + self.shift_indices().second_end(), + )?; + out.push_sql(") AS "); + out.push_identifier(INDEX_COLUMN_IDENT)?; + out.push_sql(", generate_series("); out.push_bind_param::(self.min_shift())?; out.push_sql(", -1) AS "); - out.push_identifier("shift") + out.push_identifier(SHIFT_COLUMN_IDENT) + } +} + +/// Helper to compute the range of the _index_ column, used to predictably sort +/// the generated items. +/// +/// This type cannot be created directly, it's generated internally by creating +/// a `DefaultShiftGenerator`. +// NOTE: This type mostly exists to satisfy annoying lifetime constraints +// imposed by Diesel's `AstPass`. One can only push bind parameters that outlive +// the AST pass itself, so you cannot push owned values or even references to +// values generated anywhere within the `walk_ast()` implementation; +#[derive(Copy, Clone, Debug)] +pub struct ShiftIndices { + // The end of the first range. + first_end: i64, + // The start of the second range. + second_start: i64, + // The end of the second range. + // + // This is equal to the number of items generated for the range. + second_end: i64, +} + +impl ShiftIndices { + fn new(max_shift: i64, min_shift: i64) -> Self { + assert!(max_shift >= 0); + assert!(min_shift <= 0); + + // We're just generating the list of indices (0, n_items), but we need + // to split it in the middle. Specifically, we'll split it at + // `max_shift`, and then generate the remainder. + let first_end = max_shift; + let second_start = first_end + 1; + let second_end = max_shift - min_shift; + Self { first_end, second_start, second_end } + } + + /// Return the end of the first set of indices. + pub fn first_end(&self) -> &i64 { + &self.first_end + } + + /// Return the start of the second set of indices. + pub fn second_start(&self) -> &i64 { + &self.second_start + } + + /// Return the end of the second set of indices. + pub fn second_end(&self) -> &i64 { + &self.second_end } } @@ -450,9 +546,24 @@ pub trait ShiftGenerator { /// implementation. #[derive(Debug, Clone, Copy)] pub struct DefaultShiftGenerator { - pub base: Item, - pub max_shift: i64, - pub min_shift: i64, + base: Item, + max_shift: i64, + min_shift: i64, + shift_indices: ShiftIndices, +} + +impl DefaultShiftGenerator { + /// Create a default generator, checking the provided ranges. + /// + /// Returns `None` if either the max_shift is less than 0, or the min_shift + /// is greater than 0. + pub fn new(base: Item, max_shift: i64, min_shift: i64) -> Option { + if max_shift < 0 || min_shift > 0 { + return None; + } + let shift_indices = ShiftIndices::new(max_shift, min_shift); + Some(Self { base, max_shift, min_shift, shift_indices }) + } } impl ShiftGenerator for DefaultShiftGenerator { @@ -467,6 +578,10 @@ impl ShiftGenerator for DefaultShiftGenerator { fn min_shift(&self) -> &i64 { &self.min_shift } + + fn shift_indices(&self) -> &ShiftIndices { + &self.shift_indices + } } #[cfg(test)] @@ -474,6 +589,7 @@ mod tests { use super::DefaultShiftGenerator; use super::NextItem; + use super::ShiftIndices; use crate::db; use async_bb8_diesel::AsyncRunQueryDsl; use async_bb8_diesel::AsyncSimpleConnection; @@ -602,8 +718,7 @@ mod tests { // // This generator should start at 0, and then select over the range [0, // 10], wrapping back to 0. - let generator = - DefaultShiftGenerator { base: 0, max_shift: 10, min_shift: 0 }; + let generator = DefaultShiftGenerator::new(0, 10, 0).unwrap(); let query = NextItemQuery::new(generator); let it = diesel::insert_into(item::dsl::item) .values(query) @@ -623,8 +738,7 @@ mod tests { assert_eq!(it.value, 1); // Insert 10, and guarantee that we get it back. - let generator = - DefaultShiftGenerator { base: 10, max_shift: 0, min_shift: -10 }; + let generator = DefaultShiftGenerator::new(10, 0, -10).unwrap(); let query = NextItemQuery::new(generator); let it = diesel::insert_into(item::dsl::item) .values(query) @@ -647,4 +761,94 @@ mod tests { db.cleanup().await.unwrap(); logctx.cleanup_successful(); } + + #[tokio::test] + async fn test_next_item_query_is_ordered_by_indices() { + // Setup the test database + let logctx = + dev::test_setup_log("test_next_item_query_is_ordered_by_indices"); + let log = logctx.log.new(o!()); + let mut db = test_setup_database(&log).await; + let cfg = crate::db::Config { url: db.pg_config().clone() }; + let pool = Arc::new(crate::db::Pool::new(&logctx.log, &cfg)); + let conn = pool.pool().get().await.unwrap(); + + // We're going to operate on a separate table, for simplicity. + setup_test_schema(&pool).await; + + // To test ordering behavior, we'll generate a range where the natural + // order of the _items_ differs from their indices. I.e., we have some + // non-zero base. We'll make sure we order everything by those indices, + // not the items themselves. + const MIN_SHIFT: i64 = -5; + const MAX_SHIFT: i64 = 5; + const BASE: i32 = 5; + let generator = + DefaultShiftGenerator::new(BASE, MAX_SHIFT, MIN_SHIFT).unwrap(); + let query = NextItemQuery::new(generator); + + // Insert all items until there are none left. + let first_range = i64::from(BASE)..=i64::from(BASE) + MAX_SHIFT; + let second_range = i64::from(BASE) + MIN_SHIFT..i64::from(BASE); + let mut expected = first_range.chain(second_range); + while let Some(expected_value) = expected.next() { + let it = diesel::insert_into(item::dsl::item) + .values(query) + .returning(Item::as_returning()) + .get_result_async(&*conn) + .await + .unwrap(); + assert_eq!(i64::from(it.value), expected_value); + } + assert!( + expected.next().is_none(), + "Should have exhausted the expected values" + ); + diesel::insert_into(item::dsl::item) + .values(query) + .returning(Item::as_returning()) + .get_result_async(&*conn) + .await + .expect_err( + "The next item query should not have further items to generate", + ); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[test] + fn test_shift_indices() { + // In this case, we're generating a list of 11 items, all sequential. So + // we want the first set of arguments to `generate_series()` to be 0, + // 10, and the second set 11, 10. That means the first indices will be + // 0..=10 and the second 11..=10, i.e., empty. + let min_shift = 0; + let max_shift = 10; + let indices = ShiftIndices::new(max_shift, min_shift); + assert_eq!(indices.first_end, 10); + assert_eq!(indices.second_start, 11); + assert_eq!(indices.second_end, 10); + + // Here, the list is split in half. We want to still result in a + // sequence 0..=10, split at 5. So the arguments to generate_series() + // should be (0, 5), and (6, 10). + let min_shift = -5; + let max_shift = 5; + let indices = ShiftIndices::new(max_shift, min_shift); + assert_eq!(indices.first_end, 5); + assert_eq!(indices.second_start, 6); + assert_eq!(indices.second_end, 10); + + // This case tests where most the range is _before_ the base, i.e., the + // max shift is zero. Note that this technically still means we have one + // item in the list of available, which is the base itself (at a shift + // of 0). + let min_shift = -10; + let max_shift = 0; + let indices = ShiftIndices::new(max_shift, min_shift); + assert_eq!(indices.first_end, 0); + assert_eq!(indices.second_start, 1); + assert_eq!(indices.second_end, 10); + } } diff --git a/nexus/db-queries/src/db/queries/vpc.rs b/nexus/db-queries/src/db/queries/vpc.rs index c29a51adb0..2875ae6a05 100644 --- a/nexus/db-queries/src/db/queries/vpc.rs +++ b/nexus/db-queries/src/db/queries/vpc.rs @@ -246,8 +246,8 @@ struct NextVni { impl NextVni { fn new(vni: Vni) -> Self { let VniShifts { min_shift, max_shift } = VniShifts::new(vni); - let generator = - DefaultShiftGenerator { base: vni, max_shift, min_shift }; + let generator = DefaultShiftGenerator::new(vni, max_shift, min_shift) + .expect("invalid min/max shift"); let inner = NextItem::new_unscoped(generator); Self { inner } } @@ -262,8 +262,8 @@ impl NextVni { -i32::try_from(base_u32) .expect("Expected a valid VNI at this point"), ); - let generator = - DefaultShiftGenerator { base: vni, max_shift, min_shift }; + let generator = DefaultShiftGenerator::new(vni, max_shift, min_shift) + .expect("invalid min/max shift"); let inner = NextItem::new_unscoped(generator); Self { inner } }