From 73f92c508c6a363a5a292893469e95354b09e3c4 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Fri, 20 Oct 2023 18:19:33 +0000 Subject: [PATCH] Search entire VNI range in chunks - Remove `IncompleteVpc::with_random_vni()` - Add iterator to search ranges of VNIs sequentially, yielding all range starts in the valid guest VNI range from a random starting VNI. - Instead of a limited retry loop when creating VPCs, search until the iterator is exhausted and we've searched the whole range. --- nexus/db-model/src/vpc.rs | 8 -- nexus/db-queries/src/db/datastore/vpc.rs | 41 +++----- nexus/db-queries/src/db/queries/vpc.rs | 120 +++++++++++++++++++++++ 3 files changed, 134 insertions(+), 35 deletions(-) diff --git a/nexus/db-model/src/vpc.rs b/nexus/db-model/src/vpc.rs index f8dc68c5192..8a4dc0e3493 100644 --- a/nexus/db-model/src/vpc.rs +++ b/nexus/db-model/src/vpc.rs @@ -110,14 +110,6 @@ impl IncompleteVpc { subnet_gen: Generation::new(), }) } - - /// Create a copy of self, but with a new random VNI. - /// - /// This is used to retry insertion of a VPC in the case where we can't find - /// an available VNI. - pub fn with_random_vni(self) -> Self { - Self { vni: Vni(external::Vni::random()), ..self } - } } impl DatastoreCollectionConfig for Vpc { diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index f376eaa9c13..dcdd82cf2d2 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -34,6 +34,7 @@ use crate::db::model::VpcUpdate; use crate::db::model::{Ipv4Net, Ipv6Net}; use crate::db::pagination::paginated; use crate::db::queries::vpc::InsertVpcQuery; +use crate::db::queries::vpc::VniSearchIter; use crate::db::queries::vpc_subnet::FilterConflictingVpcSubnetRangesQuery; use crate::db::queries::vpc_subnet::SubnetError; use async_bb8_diesel::AsyncConnection; @@ -296,12 +297,11 @@ impl DataStore { authz_project: &authz::Project, mut vpc: IncompleteVpc, ) -> Result<(authz::Vpc, Vpc), Error> { - // Attempt to insert a VPC, retrying the query if there are no available - // VNIs. - const N_ATTEMPTS: usize = 3; - let mut attempt = 0; - while attempt < N_ATTEMPTS { - let original_vni = vpc.vni; + // Generate an iterator that allows us to search the entire space of + // VNIs for this VPC, in manageable chunks to limit memory usage. + let vnis = VniSearchIter::new(vpc.vni.0); + for (i, vni) in vnis.enumerate() { + vpc.vni = Vni(vni); match self .project_create_vpc_raw( opctx, @@ -313,26 +313,22 @@ impl DataStore { Ok(Some(vpcs)) => return Ok(vpcs), Err(e) => return Err(e), Ok(None) => { - vpc = vpc.with_random_vni(); debug!( opctx.log, - "VNI exhaustion detected while inserting VPC, retrying"; - "attempt" => attempt, + "No VNIs available within current search range, retrying"; + "attempt" => i, "vpc_name" => %vpc.identity.name, - "original_vni" => ?original_vni, - "new_vni" => ?vpc.vni, + "start_vni" => ?vni, ); - attempt += 1; } } } - // We've failed to find a VNI within our retry limit. We'll return a 503 - // in this case. + // We've failed to find a VNI after searching the entire range, so we'll + // return a 503 at this point. error!( opctx.log, - "failed to find a VNI within retry limit"; - "limit" => N_ATTEMPTS, + "failed to find a VNI after searching entire range"; ); Err(Error::unavail("Failed to find a free VNI for this VPC")) } @@ -1374,19 +1370,10 @@ mod tests { { Ok((_, vpc)) => { assert_eq!(vpc.id(), incomplete_vpc.identity.id); - let min_expected_vni = starting_vni + MAX_VNI_SEARCH_RANGE_SIZE; - assert!(u32::from(vpc.vni.0) > min_expected_vni); + let expected_vni = starting_vni + MAX_VNI_SEARCH_RANGE_SIZE + 1; + assert_eq!(u32::from(vpc.vni.0), expected_vni); info!(log, "successfully created VPC after retries"; "vpc" => ?vpc); } - Err(Error::ServiceUnavailable { internal_message }) => { - // This is pretty surprising, but technically possible. Just - // check that we've actually emitted the right message. - assert_eq!( - internal_message, - "Failed to find a free VNI for this VPC", - ); - info!(log, "failed to create VPC within retry limit"); - } Err(e) => panic!("Unexpected error when inserting VPC: {e}"), }; db.cleanup().await.unwrap(); diff --git a/nexus/db-queries/src/db/queries/vpc.rs b/nexus/db-queries/src/db/queries/vpc.rs index 4b36aabff99..111683b957c 100644 --- a/nexus/db-queries/src/db/queries/vpc.rs +++ b/nexus/db-queries/src/db/queries/vpc.rs @@ -321,10 +321,68 @@ impl VniShifts { } } +/// An iterator yielding sequential starting VNIs. +/// +/// The VPC insertion query requires a search for the next available VNI, using +/// the `NextItem` query. We limit the search for each query to avoid memory +/// issues on any one query. If we fail to find a VNI, we need to search the +/// next range. This iterator yields the starting positions for the `NextItem` +/// query, so that the entire range can be search in chunks until a free VNI is +/// found. +// +// NOTE: It's technically possible for this to lead to searching the very +// initial portion of the range twice. If we end up wrapping around so that the +// last position yielded by this iterator is `start - x`, then we'll end up +// searching from `start - x` to `start + (MAX_VNI_SEARCH_RANGE_SIZE - x)`, and +// so search those first few after `start` again. This is both innocuous and +// really unlikely. +#[derive(Clone, Copy, Debug)] +pub struct VniSearchIter { + start: u32, + current: u32, + has_wrapped: bool, +} + +impl VniSearchIter { + /// Create a search range, starting from the provided VNI. + pub fn new(start: external::Vni) -> Self { + let start = u32::from(start); + Self { start, current: start, has_wrapped: false } + } +} + +impl std::iter::Iterator for VniSearchIter { + type Item = external::Vni; + + fn next(&mut self) -> Option { + // If we've wrapped around and the computed position is beyond where we + // started, then the ite + if self.has_wrapped && self.current > self.start { + return None; + } + + // Compute the next position. + // + // Make sure we wrap around to the mininum guest VNI. Note that we + // consider the end of the range inclusively, so we subtract one in the + // offset below to end up _at_ the min guest VNI. + let mut next = self.current + MAX_VNI_SEARCH_RANGE_SIZE; + if next > external::Vni::MAX_VNI { + next -= external::Vni::MAX_VNI; + next += external::Vni::MIN_GUEST_VNI - 1; + self.has_wrapped = true; + } + let current = self.current; + self.current = next; + Some(external::Vni::try_from(current).unwrap()) + } +} + #[cfg(test)] mod tests { use super::external; use super::Vni; + use super::VniSearchIter; use super::VniShifts; use super::MAX_VNI_SEARCH_RANGE_SIZE; @@ -352,4 +410,66 @@ mod tests { assert_eq!(max_shift, i64::from(offset)); assert_eq!(max_shift - min_shift, i64::from(MAX_VNI_SEARCH_RANGE_SIZE)); } + + #[test] + fn test_vni_search_iter_steps() { + let start = external::Vni::try_from(2048).unwrap(); + let mut it = VniSearchIter::new(start); + let next = it.next().unwrap(); + assert_eq!(next, start); + let next = it.next().unwrap(); + assert_eq!( + next, + external::Vni::try_from( + u32::from(start) + MAX_VNI_SEARCH_RANGE_SIZE + ) + .unwrap() + ); + } + + #[test] + fn test_vni_search_iter_full_count() { + let start = + external::Vni::try_from(external::Vni::MIN_GUEST_VNI).unwrap(); + + let last = VniSearchIter::new(start).last().unwrap(); + println!("{:?}", last); + + pub const fn div_ceil(x: u32, y: u32) -> u32 { + let d = x / y; + let r = x % y; + if r > 0 && y > 0 { + d + 1 + } else { + d + } + } + const N_EXPECTED: u32 = div_ceil( + external::Vni::MAX_VNI - external::Vni::MIN_GUEST_VNI, + MAX_VNI_SEARCH_RANGE_SIZE, + ); + let count = u32::try_from(VniSearchIter::new(start).count()).unwrap(); + assert_eq!(count, N_EXPECTED); + } + + #[test] + fn test_vni_search_iter_wrapping() { + // Start from just before the end of the range. + let start = + external::Vni::try_from(external::Vni::MAX_VNI - 1).unwrap(); + let mut it = VniSearchIter::new(start); + + // We should yield that start position first. + let next = it.next().unwrap(); + assert_eq!(next, start); + + // The next value should be wrapped around to the beginning. + // + // Subtract 2 because we _include_ the max VNI in the search range. + let next = it.next().unwrap(); + assert_eq!( + u32::from(next), + external::Vni::MIN_GUEST_VNI + MAX_VNI_SEARCH_RANGE_SIZE - 2 + ); + } }