From 10ab943e20f0ae15dfebf5dc48f8d8802465dcc1 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 13 Feb 2024 20:27:57 -0800 Subject: [PATCH 01/19] kinda working? --- nexus/db-macros/src/lib.rs | 4 +- nexus/db-model/src/dataset.rs | 1 + nexus/db-model/src/region.rs | 1 + nexus/db-queries/Cargo.toml | 1 + nexus/db-queries/src/db/datastore/region.rs | 23 ++- .../src/db/queries/region_allocation.rs | 194 ++++++++++++++++++ 6 files changed, 216 insertions(+), 8 deletions(-) diff --git a/nexus/db-macros/src/lib.rs b/nexus/db-macros/src/lib.rs index fd15b59128..30208412a1 100644 --- a/nexus/db-macros/src/lib.rs +++ b/nexus/db-macros/src/lib.rs @@ -280,7 +280,7 @@ fn build_resource_identity( let identity_name = format_ident!("{}Identity", struct_name); quote! { #[doc = #identity_doc] - #[derive(Clone, Debug, PartialEq, Eq, Selectable, Queryable, Insertable, serde::Serialize, serde::Deserialize)] + #[derive(Clone, Debug, PartialEq, Eq, Selectable, Queryable, QueryableByName, Insertable, serde::Serialize, serde::Deserialize)] #[diesel(table_name = #table_name) ] pub struct #identity_name { pub id: ::uuid::Uuid, @@ -322,7 +322,7 @@ fn build_asset_identity( let identity_name = format_ident!("{}Identity", struct_name); quote! { #[doc = #identity_doc] - #[derive(Clone, Debug, PartialEq, Selectable, Queryable, Insertable, serde::Serialize, serde::Deserialize)] + #[derive(Clone, Debug, PartialEq, Selectable, Queryable, QueryableByName, Insertable, serde::Serialize, serde::Deserialize)] #[diesel(table_name = #table_name) ] pub struct #identity_name { pub id: ::uuid::Uuid, diff --git a/nexus/db-model/src/dataset.rs b/nexus/db-model/src/dataset.rs index 65c0070509..0ea465bebc 100644 --- a/nexus/db-model/src/dataset.rs +++ b/nexus/db-model/src/dataset.rs @@ -18,6 +18,7 @@ use uuid::Uuid; /// available to a service on the Sled. #[derive( Queryable, + QueryableByName, Insertable, Debug, Clone, diff --git a/nexus/db-model/src/region.rs b/nexus/db-model/src/region.rs index fefc4f4fce..0ea6f82e65 100644 --- a/nexus/db-model/src/region.rs +++ b/nexus/db-model/src/region.rs @@ -15,6 +15,7 @@ use uuid::Uuid; /// allocated within a volume. #[derive( Queryable, + QueryableByName, Insertable, Debug, Clone, diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml index 9cdcc88e6a..d979b01702 100644 --- a/nexus/db-queries/Cargo.toml +++ b/nexus/db-queries/Cargo.toml @@ -15,6 +15,7 @@ base64.workspace = true bb8.workspace = true camino.workspace = true chrono.workspace = true +const_format.workspace = true cookie.workspace = true diesel.workspace = true diesel-dtrace.workspace = true diff --git a/nexus/db-queries/src/db/datastore/region.rs b/nexus/db-queries/src/db/datastore/region.rs index b055a3e85c..24298c4ede 100644 --- a/nexus/db-queries/src/db/datastore/region.rs +++ b/nexus/db-queries/src/db/datastore/region.rs @@ -128,19 +128,30 @@ impl DataStore { let (blocks_per_extent, extent_count) = Self::get_crucible_allocation(&block_size, size); - let dataset_and_regions: Vec<(Dataset, Region)> = - crate::db::queries::region_allocation::RegionAllocate::new( + #[derive(diesel::QueryableByName, Queryable)] + struct DatasetAndRegion { + #[diesel(embed)] + dataset: Dataset, + #[diesel(embed)] + region: Region, + } + + let dataset_and_regions: Vec<_> = + crate::db::queries::region_allocation::allocation_query( volume_id, block_size.to_bytes() as u64, blocks_per_extent, extent_count, allocation_strategy, ) - .get_results_async(&*self.pool_connection_authorized(&opctx).await?) + .get_results_async::( + &*self.pool_connection_authorized(&opctx).await?, + ) .await - .map_err(|e| { - crate::db::queries::region_allocation::from_diesel(e) - })?; + .map_err(|e| crate::db::queries::region_allocation::from_diesel(e))? + .into_iter() + .map(|DatasetAndRegion { dataset, region }| (dataset, region)) + .collect::>(); Ok(dataset_and_regions) } diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index 3c37bf6b2e..f00b8c4384 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -676,3 +676,197 @@ impl Query for RegionAllocate { } impl RunQueryDsl for RegionAllocate {} + +struct BindParamCounter(i32); +impl BindParamCounter { + fn new() -> Self { + Self(0) + } + fn next(&mut self) -> i32 { + self.0 += 1; + self.0 + } +} + +trait SqlQueryBinds { + fn add_bind(self, bind_counter: &mut BindParamCounter) -> Self; +} + +impl<'a, Query> SqlQueryBinds + for diesel::query_builder::BoxedSqlQuery<'a, Pg, Query> +{ + fn add_bind(self, bind_counter: &mut BindParamCounter) -> Self { + self.sql("$").sql(bind_counter.next().to_string()) + } +} + +pub fn allocation_query( + volume_id: uuid::Uuid, + block_size: u64, + blocks_per_extent: u64, + extent_count: u64, + allocation_strategy: &RegionAllocationStrategy, +) -> diesel::query_builder::BoxedSqlQuery< + 'static, + Pg, + diesel::query_builder::SqlQuery, +> { + let (seed, distinct_sleds) = { + let (input_seed, distinct_sleds) = match allocation_strategy { + RegionAllocationStrategy::Random { seed } => (seed, false), + RegionAllocationStrategy::RandomWithDistinctSleds { seed } => { + (seed, true) + } + }; + ( + input_seed.map_or_else( + || { + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos() + }, + |seed| seed as u128, + ), + distinct_sleds, + ) + }; + + let seed = seed.to_le_bytes().to_vec(); + + let size_delta = block_size * blocks_per_extent * extent_count; + let redunancy: i64 = i64::try_from(REGION_REDUNDANCY_THRESHOLD).unwrap(); + + let mut binds = BindParamCounter::new(); + let query = diesel::sql_query( +"WITH + old_regions AS + (SELECT region.id, region.time_created, region.time_modified, region.dataset_id, region.volume_id, region.block_size, region.blocks_per_extent, region.extent_count FROM region WHERE (region.volume_id = ").into_boxed().add_bind(&mut binds).sql(")),") + .bind::(volume_id) + .sql( +" + old_zpool_usage AS + (SELECT dataset.pool_id, sum(dataset.size_used) AS size_used FROM dataset WHERE ((dataset.size_used IS NOT NULL) AND (dataset.time_deleted IS NULL)) GROUP BY dataset.pool_id), +"); + + // We pick one of these branches, depending on whether or not the sleds + // should be distinct. + if distinct_sleds { + query.sql(" + candidate_zpools AS + (SELECT old_zpool_usage.pool_id FROM (old_zpool_usage INNER JOIN (zpool INNER JOIN sled ON (zpool.sled_id = sled.id)) ON (zpool.id = old_zpool_usage.pool_id)) WHERE (((old_zpool_usage.size_used + ").add_bind(&mut binds).sql(" ) <= total_size) AND (sled.provision_state = 'provisionable'))), +") + .bind::(size_delta as i64) + } else { + query.sql(" + candidate_zpools AS + (SELECT DISTINCT ON (zpool.sled_id) old_zpool_usage.pool_id FROM (old_zpool_usage INNER JOIN (zpool INNER JOIN sled ON (zpool.sled_id = sled.id)) ON (zpool.id = old_zpool_usage.pool_id)) WHERE (((old_zpool_usage.size_used + ").add_bind(&mut binds).sql(" ) <= total_size) AND (sled.provision_state = 'provisionable')) ORDER BY zpool.sled_id, md5((CAST(zpool.id as BYTEA) || ").add_bind(&mut binds).sql("))), +") + .bind::(size_delta as i64) + .bind::(seed.clone()) + } + .sql( +" + candidate_datasets AS + (SELECT DISTINCT ON (dataset.pool_id) dataset.id, dataset.pool_id FROM (dataset INNER JOIN candidate_zpools ON (dataset.pool_id = candidate_zpools.pool_id)) WHERE (((dataset.time_deleted IS NULL) AND (dataset.size_used IS NOT NULL)) AND (dataset.kind = 'crucible')) ORDER BY dataset.pool_id, md5((CAST(dataset.id as BYTEA) || ").add_bind(&mut binds).sql("))), +" + ).bind::(seed.clone()) + .sql( +" + shuffled_candidate_datasets AS + (SELECT candidate_datasets.id, candidate_datasets.pool_id FROM candidate_datasets ORDER BY md5((CAST(candidate_datasets.id as BYTEA) || ").add_bind(&mut binds).sql(")) LIMIT ").add_bind(&mut binds).sql("), +" + ).bind::(seed) + .bind::(redunancy) + .sql( +" + candidate_regions AS + (SELECT gen_random_uuid() AS id, now() AS time_created, now() AS time_modified, shuffled_candidate_datasets.id AS dataset_id, ").add_bind(&mut binds).sql(" AS volume_id, ").add_bind(&mut binds).sql(" AS block_size, ").add_bind(&mut binds).sql(" AS blocks_per_extent, ").add_bind(&mut binds).sql(" AS extent_count FROM shuffled_candidate_datasets), +" + ).bind::(volume_id) + .bind::(block_size as i64) + .bind::(blocks_per_extent as i64) + .bind::(extent_count as i64) + .sql( +" + proposed_dataset_changes AS + (SELECT candidate_regions.dataset_id AS id, dataset.pool_id AS pool_id, ((candidate_regions.block_size * candidate_regions.blocks_per_extent) * candidate_regions.extent_count) AS size_used_delta FROM (candidate_regions INNER JOIN dataset ON (dataset.id = candidate_regions.dataset_id))), + do_insert AS + (SELECT (((((SELECT COUNT(*) FROM old_regions LIMIT 1) < ").add_bind(&mut binds).sql(") AND CAST(IF(((SELECT COUNT(*) FROM candidate_zpools LIMIT 1) >= ").add_bind(&mut binds).sql("), 'TRUE', 'Not enough space') AS BOOL)) AND CAST(IF(((SELECT COUNT(*) FROM candidate_regions LIMIT 1) >= ").add_bind(&mut binds).sql("), 'TRUE', 'Not enough datasets') AS BOOL)) AND CAST(IF(((SELECT COUNT(DISTINCT dataset.pool_id) FROM (candidate_regions INNER JOIN dataset ON (candidate_regions.dataset_id = dataset.id)) LIMIT 1) >= ").add_bind(&mut binds).sql("), 'TRUE', 'Not enough unique zpools selected') AS BOOL)) AS insert), +" + ).bind::(redunancy) + .bind::(redunancy) + .bind::(redunancy) + .bind::(redunancy) + .sql( +" + inserted_regions AS + (INSERT INTO region (id, time_created, time_modified, dataset_id, volume_id, block_size, blocks_per_extent, extent_count) SELECT candidate_regions.id, candidate_regions.time_created, candidate_regions.time_modified, candidate_regions.dataset_id, candidate_regions.volume_id, candidate_regions.block_size, candidate_regions.blocks_per_extent, candidate_regions.extent_count FROM candidate_regions WHERE (SELECT do_insert.insert FROM do_insert LIMIT 1) RETURNING region.id, region.time_created, region.time_modified, region.dataset_id, region.volume_id, region.block_size, region.blocks_per_extent, region.extent_count), + updated_datasets AS + (UPDATE dataset SET size_used = (dataset.size_used + (SELECT proposed_dataset_changes.size_used_delta FROM proposed_dataset_changes WHERE (proposed_dataset_changes.id = dataset.id) LIMIT 1)) WHERE ((dataset.id = ANY(SELECT proposed_dataset_changes.id FROM proposed_dataset_changes)) AND (SELECT do_insert.insert FROM do_insert LIMIT 1)) RETURNING dataset.id, dataset.time_created, dataset.time_modified, dataset.time_deleted, dataset.rcgen, dataset.pool_id, dataset.ip, dataset.port, dataset.kind, dataset.size_used) +(SELECT dataset.id, dataset.time_created, dataset.time_modified, dataset.time_deleted, dataset.rcgen, dataset.pool_id, dataset.ip, dataset.port, dataset.kind, dataset.size_used, old_regions.id, old_regions.time_created, old_regions.time_modified, old_regions.dataset_id, old_regions.volume_id, old_regions.block_size, old_regions.blocks_per_extent, old_regions.extent_count FROM (old_regions INNER JOIN dataset ON (old_regions.dataset_id = dataset.id))) UNION (SELECT updated_datasets.id, updated_datasets.time_created, updated_datasets.time_modified, updated_datasets.time_deleted, updated_datasets.rcgen, updated_datasets.pool_id, updated_datasets.ip, updated_datasets.port, updated_datasets.kind, updated_datasets.size_used, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, inserted_regions.dataset_id, inserted_regions.volume_id, inserted_regions.block_size, inserted_regions.blocks_per_extent, inserted_regions.extent_count FROM (inserted_regions INNER JOIN updated_datasets ON (inserted_regions.dataset_id = updated_datasets.id)))" + ) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::db::explain::ExplainableAsync; + use omicron_test_utils::dev; + use nexus_test_utils::db::test_setup_database; + use uuid::Uuid; + + #[test] + fn raw_sql() { + let volume_id = Uuid::new_v4(); + let block_size = 0; + let blocks_per_extent = 1; + let extent_count = 2; + let allocation_strategy = + RegionAllocationStrategy::RandomWithDistinctSleds { seed: None }; + + let region_allocate = RegionAllocate::new( + volume_id, + block_size, + blocks_per_extent, + extent_count, + &allocation_strategy, + ); + + let query = diesel::debug_query::(®ion_allocate); + + assert_eq!(query.to_string(), "foobar"); + } + + #[tokio::test] + async fn explainable() { + let logctx = dev::test_setup_log("explainable"); + let log = logctx.log.new(o!()); + let mut db = test_setup_database(&log).await; + let cfg = crate::db::Config { url: db.pg_config().clone() }; + let pool = crate::db::Pool::new(&logctx.log, &cfg); + let conn = pool.pool().get().await.unwrap(); + + let volume_id = Uuid::new_v4(); + let block_size = 0; + let blocks_per_extent = 1; + let extent_count = 2; + let allocation_strategy = + RegionAllocationStrategy::RandomWithDistinctSleds { seed: None }; + let region_allocate = allocation_query( + volume_id, + block_size, + blocks_per_extent, + extent_count, + &allocation_strategy, + ); + + let _ = region_allocate + .explain_async(&conn) + .await + .expect("Failed to explain query - is it valid SQL?"); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } +} From 47bf04a3495f5ee615c5ae7a8547aa6b1aff733e Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 13 Feb 2024 23:33:05 -0800 Subject: [PATCH 02/19] Removed old query --- Cargo.lock | 21 + Cargo.toml | 1 + nexus/db-model/src/queries/mod.rs | 1 - .../db-model/src/queries/region_allocation.rs | 195 ---- nexus/db-queries/src/db/cast_uuid_as_bytea.rs | 62 -- nexus/db-queries/src/db/mod.rs | 1 - .../src/db/queries/region_allocation.rs | 880 ++++-------------- 7 files changed, 225 insertions(+), 936 deletions(-) delete mode 100644 nexus/db-model/src/queries/region_allocation.rs delete mode 100644 nexus/db-queries/src/db/cast_uuid_as_bytea.rs diff --git a/Cargo.lock b/Cargo.lock index caddb6d8ea..388799ff59 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1090,6 +1090,26 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28c122c3980598d243d63d9a704629a2d748d101f278052ff068be5a4423ab6f" +[[package]] +name = "const_format" +version = "0.2.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a214c7af3d04997541b18d432afaff4c455e79e2029079647e72fc2bd27673" +dependencies = [ + "const_format_proc_macros", +] + +[[package]] +name = "const_format_proc_macros" +version = "0.2.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7f6ff08fd20f4f299298a28e2dfa8a8ba1036e6cd2460ac1de7b425d76f2500" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + [[package]] name = "constant_time_eq" version = "0.2.6" @@ -4360,6 +4380,7 @@ dependencies = [ "camino", "camino-tempfile", "chrono", + "const_format", "cookie", "db-macros", "diesel", diff --git a/Cargo.toml b/Cargo.toml index d33758fc06..048ac8bc60 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -174,6 +174,7 @@ ciborium = "0.2.2" cfg-if = "1.0" chrono = { version = "0.4", features = [ "serde" ] } clap = { version = "4.4", features = ["cargo", "derive", "env", "wrap_help"] } +const_format = "0.2.32" cookie = "0.18" criterion = { version = "0.5.1", features = [ "async_tokio" ] } crossbeam = "0.8" diff --git a/nexus/db-model/src/queries/mod.rs b/nexus/db-model/src/queries/mod.rs index 7724d48bab..e138508f84 100644 --- a/nexus/db-model/src/queries/mod.rs +++ b/nexus/db-model/src/queries/mod.rs @@ -4,5 +4,4 @@ //! Subqueries used in CTEs. -pub mod region_allocation; pub mod virtual_provisioning_collection_update; diff --git a/nexus/db-model/src/queries/region_allocation.rs b/nexus/db-model/src/queries/region_allocation.rs deleted file mode 100644 index a1b9e0373a..0000000000 --- a/nexus/db-model/src/queries/region_allocation.rs +++ /dev/null @@ -1,195 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Describes subqueries which may be issues as a part of CTEs. -//! -//! When possible, it's preferable to define subqueries close to their -//! usage. However, certain Diesel traits (such as those enabling joins) -//! require the table structures to be defined in the same crate. - -// TODO: We're currently piggy-backing on the table macro for convenience. -// We actually do not want to generate an entire table for each subquery - we'd -// like to have a query source (which we can use to generate SELECT statements, -// JOIN, etc), but we don't want this to be an INSERT/UPDATE/DELETE target. -// -// Similarly, we don't want to force callers to supply a "primary key". -// -// I've looked into Diesel's `alias!` macro for this purpose, but unfortunately -// that implementation is too opinionated about the output QueryFragment. -// It expects to use the form: -// -// " as ", which is actually the opposite of what we want in -// a CTE (where we want the alias name to come first). - -use crate::schema::dataset; -use crate::schema::sled; -use crate::schema::zpool; - -table! { - old_regions { - id -> Uuid, - time_created -> Timestamptz, - time_modified -> Timestamptz, - - dataset_id -> Uuid, - volume_id -> Uuid, - - block_size -> Int8, - blocks_per_extent -> Int8, - extent_count -> Int8, - } -} - -table! { - candidate_datasets { - id -> Uuid, - pool_id -> Uuid, - } -} - -table! { - shuffled_candidate_datasets { - id -> Uuid, - pool_id -> Uuid, - } -} - -table! { - candidate_regions { - id -> Uuid, - time_created -> Timestamptz, - time_modified -> Timestamptz, - - dataset_id -> Uuid, - volume_id -> Uuid, - - block_size -> Int8, - blocks_per_extent -> Int8, - extent_count -> Int8, - } -} - -table! { - proposed_dataset_changes { - id -> Uuid, - pool_id -> Uuid, - size_used_delta -> Int8, - } -} - -table! { - old_zpool_usage (pool_id) { - pool_id -> Uuid, - size_used -> Numeric, - } -} - -table! { - candidate_zpools (pool_id) { - pool_id -> Uuid - } -} - -table! { - do_insert (insert) { - insert -> Bool, - } -} - -table! { - one_zpool_per_sled (pool_id) { - pool_id -> Uuid - } -} - -table! { - one_dataset_per_zpool { - id -> Uuid, - pool_id -> Uuid - } -} - -table! { - inserted_regions { - id -> Uuid, - time_created -> Timestamptz, - time_modified -> Timestamptz, - - dataset_id -> Uuid, - volume_id -> Uuid, - - block_size -> Int8, - blocks_per_extent -> Int8, - extent_count -> Int8, - } -} - -table! { - updated_datasets (id) { - id -> Uuid, - time_created -> Timestamptz, - time_modified -> Timestamptz, - time_deleted -> Nullable, - rcgen -> Int8, - - pool_id -> Uuid, - - ip -> Inet, - port -> Int4, - - kind -> crate::DatasetKindEnum, - size_used -> Nullable, - } -} - -diesel::allow_tables_to_appear_in_same_query!( - proposed_dataset_changes, - dataset, -); - -diesel::allow_tables_to_appear_in_same_query!( - do_insert, - candidate_regions, - dataset, - zpool, -); - -diesel::allow_tables_to_appear_in_same_query!( - old_zpool_usage, - zpool, - sled, - proposed_dataset_changes, -); - -diesel::allow_tables_to_appear_in_same_query!(old_regions, dataset,); -diesel::allow_tables_to_appear_in_same_query!(old_regions, zpool,); - -diesel::allow_tables_to_appear_in_same_query!( - inserted_regions, - updated_datasets, -); - -diesel::allow_tables_to_appear_in_same_query!(candidate_zpools, dataset,); -diesel::allow_tables_to_appear_in_same_query!(candidate_zpools, zpool,); -diesel::allow_tables_to_appear_in_same_query!(candidate_datasets, dataset); - -// == Needed for random region allocation == - -pub mod cockroach_md5 { - pub mod functions { - use diesel::sql_types::*; - diesel::sql_function!(fn md5(x: Bytea) -> Bytea); - } - - pub mod helper_types { - pub type Md5 = super::functions::md5::HelperType; - } - - pub mod dsl { - pub use super::functions::*; - pub use super::helper_types::*; - } -} - -// == End random region allocation dependencies == diff --git a/nexus/db-queries/src/db/cast_uuid_as_bytea.rs b/nexus/db-queries/src/db/cast_uuid_as_bytea.rs deleted file mode 100644 index c50c88971f..0000000000 --- a/nexus/db-queries/src/db/cast_uuid_as_bytea.rs +++ /dev/null @@ -1,62 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Cast UUID to BYTES - -use diesel::expression::ValidGrouping; -use diesel::pg::Pg; -use diesel::query_builder::AstPass; -use diesel::query_builder::QueryFragment; -use diesel::query_builder::QueryId; -use diesel::Expression; -use diesel::SelectableExpression; - -/// Cast an expression which evaluates to a Uuid and cast it to a Bytea. It's -/// that simple! -#[derive(ValidGrouping, QueryId)] -pub struct CastUuidToBytea { - expression: E, -} - -impl CastUuidToBytea -where - E: Expression, -{ - pub const fn new(expression: E) -> Self { - Self { expression } - } -} - -impl Expression for CastUuidToBytea -where - E: Expression, -{ - type SqlType = diesel::sql_types::Bytea; -} - -impl diesel::AppearsOnTable for CastUuidToBytea where - E: diesel::AppearsOnTable -{ -} - -impl SelectableExpression for CastUuidToBytea where - E: SelectableExpression -{ -} - -impl QueryFragment for CastUuidToBytea -where - E: QueryFragment, -{ - fn walk_ast<'a>( - &'a self, - mut out: AstPass<'_, 'a, Pg>, - ) -> diesel::QueryResult<()> { - out.push_sql("CAST("); - self.expression.walk_ast(out.reborrow())?; - out.push_sql(" as BYTEA)"); - - Ok(()) - } -} diff --git a/nexus/db-queries/src/db/mod.rs b/nexus/db-queries/src/db/mod.rs index e21ba2e3a8..6cd3911efe 100644 --- a/nexus/db-queries/src/db/mod.rs +++ b/nexus/db-queries/src/db/mod.rs @@ -5,7 +5,6 @@ //! Facilities for working with the Omicron database pub(crate) mod alias; -pub(crate) mod cast_uuid_as_bytea; // This is not intended to be public, but this is necessary to use it from // doctests pub mod collection_attach; diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index f00b8c4384..24ace6467e 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -4,31 +4,15 @@ //! Implementation of queries for provisioning regions. -use crate::db::alias::ExpressionAlias; -use crate::db::cast_uuid_as_bytea::CastUuidToBytea; use crate::db::datastore::REGION_REDUNDANCY_THRESHOLD; -use crate::db::model::{Dataset, DatasetKind, Region}; -use crate::db::pool::DbConnection; -use crate::db::subquery::{AsQuerySource, Cte, CteBuilder, CteQuery}; -use crate::db::true_or_cast_error::{matches_sentinel, TrueOrCastError}; -use db_macros::Subquery; +use crate::db::true_or_cast_error::matches_sentinel; +use const_format::concatcp; use diesel::pg::Pg; -use diesel::query_builder::{AstPass, Query, QueryFragment, QueryId}; use diesel::result::Error as DieselError; -use diesel::PgBinaryExpressionMethods; -use diesel::{ - sql_types, BoolExpressionMethods, Column, CombineDsl, ExpressionMethods, - Insertable, IntoSql, JoinOnDsl, NullableExpressionMethods, QueryDsl, - RunQueryDsl, -}; -use nexus_db_model::queries::region_allocation::{ - candidate_datasets, candidate_regions, candidate_zpools, cockroach_md5, - do_insert, inserted_regions, old_regions, old_zpool_usage, - proposed_dataset_changes, shuffled_candidate_datasets, updated_datasets, -}; -use nexus_db_model::schema; +use diesel::sql_types; use omicron_common::api::external; use omicron_common::nexus_config::RegionAllocationStrategy; +use std::cell::Cell; const NOT_ENOUGH_DATASETS_SENTINEL: &'static str = "Not enough datasets"; const NOT_ENOUGH_ZPOOL_SPACE_SENTINEL: &'static str = "Not enough space"; @@ -74,632 +58,77 @@ pub fn from_diesel(e: DieselError) -> external::Error { error::public_error_from_diesel(e, error::ErrorHandler::Server) } -/// A subquery to find all old regions associated with a particular volume. -#[derive(Subquery, QueryId)] -#[subquery(name = old_regions)] -struct OldRegions { - query: Box>, -} - -impl OldRegions { - fn new(volume_id: uuid::Uuid) -> Self { - use crate::db::schema::region::dsl; - Self { - query: Box::new(dsl::region.filter(dsl::volume_id.eq(volume_id))), - } - } -} - -/// A subquery to find datasets which could be used for provisioning regions. -/// -/// We only consider datasets which are already allocated as "Crucible". -/// This implicitly distinguishes between "M.2s" and "U.2s" -- Nexus needs to -/// determine during dataset provisioning which devices should be considered for -/// usage as Crucible storage. -/// -/// We select only one dataset from each zpool. -#[derive(Subquery, QueryId)] -#[subquery(name = candidate_datasets)] -struct CandidateDatasets { - query: Box>, -} - -impl CandidateDatasets { - fn new(candidate_zpools: &CandidateZpools, seed: u128) -> Self { - use crate::db::schema::dataset::dsl as dataset_dsl; - use candidate_zpools::dsl as candidate_zpool_dsl; - - let seed_bytes = seed.to_le_bytes(); - - let query: Box> = - Box::new( - dataset_dsl::dataset - .inner_join(candidate_zpools.query_source().on( - dataset_dsl::pool_id.eq(candidate_zpool_dsl::pool_id), - )) - .filter(dataset_dsl::time_deleted.is_null()) - .filter(dataset_dsl::size_used.is_not_null()) - .filter(dataset_dsl::kind.eq(DatasetKind::Crucible)) - .distinct_on(dataset_dsl::pool_id) - .order_by(( - dataset_dsl::pool_id, - cockroach_md5::dsl::md5( - CastUuidToBytea::new(dataset_dsl::id) - .concat(seed_bytes.to_vec()), - ), - )) - .select((dataset_dsl::id, dataset_dsl::pool_id)), - ); - Self { query } +struct BindParamCounter(Cell); +impl BindParamCounter { + fn new() -> Self { + Self(0.into()) } -} - -/// Shuffle the candidate datasets, and select REGION_REDUNDANCY_THRESHOLD -/// regions from it. -#[derive(Subquery, QueryId)] -#[subquery(name = shuffled_candidate_datasets)] -struct ShuffledCandidateDatasets { - query: Box>, -} - -impl ShuffledCandidateDatasets { - fn new(candidate_datasets: &CandidateDatasets, seed: u128) -> Self { - use candidate_datasets::dsl as candidate_datasets_dsl; - - let seed_bytes = seed.to_le_bytes(); - - let query: Box> = - Box::new( - candidate_datasets - .query_source() - // We order by md5 to shuffle the ordering of the datasets. - // md5 has a uniform output distribution so it does the job. - .order(cockroach_md5::dsl::md5( - CastUuidToBytea::new(candidate_datasets_dsl::id) - .concat(seed_bytes.to_vec()), - )) - .select(( - candidate_datasets_dsl::id, - candidate_datasets_dsl::pool_id, - )) - .limit(REGION_REDUNDANCY_THRESHOLD.try_into().unwrap()), - ); - Self { query } + fn next(&self) -> i32 { + self.0.set(self.0.get() + 1); + self.0.get() } } -/// A subquery to create the regions-to-be-inserted for the volume. -#[derive(Subquery, QueryId)] -#[subquery(name = candidate_regions)] -struct CandidateRegions { - query: Box>, -} - -diesel::sql_function!(fn gen_random_uuid() -> Uuid); -diesel::sql_function!(fn now() -> Timestamptz); - -impl CandidateRegions { - fn new( - shuffled_candidate_datasets: &ShuffledCandidateDatasets, - volume_id: uuid::Uuid, - block_size: u64, - blocks_per_extent: u64, - extent_count: u64, - ) -> Self { - use schema::region; - use shuffled_candidate_datasets::dsl as shuffled_candidate_datasets_dsl; - - let volume_id = volume_id.into_sql::(); - let block_size = (block_size as i64).into_sql::(); - let blocks_per_extent = - (blocks_per_extent as i64).into_sql::(); - let extent_count = - (extent_count as i64).into_sql::(); - Self { - query: Box::new(shuffled_candidate_datasets.query_source().select( - ( - ExpressionAlias::new::(gen_random_uuid()), - ExpressionAlias::new::(now()), - ExpressionAlias::new::(now()), - ExpressionAlias::new::( - shuffled_candidate_datasets_dsl::id, - ), - ExpressionAlias::new::(volume_id), - ExpressionAlias::new::(block_size), - ExpressionAlias::new::( - blocks_per_extent, - ), - ExpressionAlias::new::(extent_count), - ), - )), - } - } -} - -/// A subquery which summarizes the changes we intend to make, showing: -/// -/// 1. Which datasets will have size adjustments -/// 2. Which pools those datasets belong to -/// 3. The delta in size-used -#[derive(Subquery, QueryId)] -#[subquery(name = proposed_dataset_changes)] -struct ProposedChanges { - query: Box>, +trait SqlQueryBinds { + fn add_bind(self, bind_counter: &BindParamCounter) -> Self; } -impl ProposedChanges { - fn new(candidate_regions: &CandidateRegions) -> Self { - use crate::db::schema::dataset::dsl as dataset_dsl; - use candidate_regions::dsl as candidate_regions_dsl; - Self { - query: Box::new( - candidate_regions.query_source() - .inner_join( - dataset_dsl::dataset.on(dataset_dsl::id.eq(candidate_regions_dsl::dataset_id)) - ) - .select(( - ExpressionAlias::new::(candidate_regions_dsl::dataset_id), - ExpressionAlias::new::(dataset_dsl::pool_id), - ExpressionAlias::new::( - candidate_regions_dsl::block_size * - candidate_regions_dsl::blocks_per_extent * - candidate_regions_dsl::extent_count - ), - )) - ), - } +impl<'a, Query> SqlQueryBinds + for diesel::query_builder::BoxedSqlQuery<'a, Pg, Query> +{ + fn add_bind(self, bind_counter: &BindParamCounter) -> Self { + self.sql("$").sql(bind_counter.next().to_string()) } } -/// A subquery which calculates the old size being used by zpools -/// under consideration as targets for region allocation. -#[derive(Subquery, QueryId)] -#[subquery(name = old_zpool_usage)] -struct OldPoolUsage { - query: Box>, +// A small wrapper around [diesel::query_builder::BoxedSqlQuery] which +// assists with counting bind parameters and recommends avoiding the usage of +// any non-static strings in query construction. +struct QueryBuilder { + query: diesel::query_builder::BoxedSqlQuery<'static, Pg, diesel::query_builder::SqlQuery>, + bind_counter: BindParamCounter, } -impl OldPoolUsage { +impl QueryBuilder { fn new() -> Self { - use crate::db::schema::dataset::dsl as dataset_dsl; Self { - query: Box::new( - dataset_dsl::dataset - .group_by(dataset_dsl::pool_id) - .filter(dataset_dsl::size_used.is_not_null()) - .filter(dataset_dsl::time_deleted.is_null()) - .select(( - dataset_dsl::pool_id, - ExpressionAlias::new::( - diesel::dsl::sum(dataset_dsl::size_used) - .assume_not_null(), - ), - )), - ), + query: diesel::sql_query("").into_boxed(), + bind_counter: BindParamCounter::new(), } } -} - -/// A subquery which identifies zpools with enough space for a region allocation. -#[derive(Subquery, QueryId)] -#[subquery(name = candidate_zpools)] -struct CandidateZpools { - query: Box>, -} - -impl CandidateZpools { - fn new( - old_zpool_usage: &OldPoolUsage, - zpool_size_delta: u64, - seed: u128, - distinct_sleds: bool, - ) -> Self { - use schema::sled::dsl as sled_dsl; - use schema::zpool::dsl as zpool_dsl; - - // Why are we using raw `diesel::dsl::sql` here? - // - // When SQL performs the "SUM" operation on "bigint" type, the result - // is promoted to "numeric" (see: old_zpool_usage::dsl::size_used). - // - // However, we'd like to compare that value with a different value - // (zpool_dsl::total_size) which is still a "bigint". This comparison - // is safe (after all, we basically want to promote "total_size" to a - // Numeric too) but Diesel demands that the input and output SQL types - // of expression methods like ".le" match exactly. - // - // For similar reasons, we use `diesel::dsl::sql` with zpool_size_delta. - // We would like to add it, but diesel only permits us to `to_sql()` it - // into a BigInt, not a Numeric. I welcome a better solution. - let it_will_fit = (old_zpool_usage::dsl::size_used - + diesel::dsl::sql(&zpool_size_delta.to_string())) - .le(diesel::dsl::sql(zpool_dsl::total_size::NAME)); - - // We need to join on the sled table to access provision_state. - let with_sled = sled_dsl::sled.on(zpool_dsl::sled_id.eq(sled_dsl::id)); - let with_zpool = zpool_dsl::zpool - .on(zpool_dsl::id.eq(old_zpool_usage::dsl::pool_id)) - .inner_join(with_sled); - - let sled_is_provisionable = sled_dsl::provision_state - .eq(crate::db::model::SledProvisionState::Provisionable); - - let base_query = old_zpool_usage - .query_source() - .inner_join(with_zpool) - .filter(it_will_fit) - .filter(sled_is_provisionable) - .select((old_zpool_usage::dsl::pool_id,)); - - let query = if distinct_sleds { - let seed_bytes = seed.to_le_bytes(); - - let query: Box> = - Box::new( - base_query - .order_by(( - zpool_dsl::sled_id, - cockroach_md5::dsl::md5( - CastUuidToBytea::new(zpool_dsl::id) - .concat(seed_bytes.to_vec()), - ), - )) - .distinct_on(zpool_dsl::sled_id), - ); - - query - } else { - let query: Box> = - Box::new(base_query); - - query - }; - - Self { query } - } -} - -diesel::sql_function! { - #[aggregate] - fn bool_and(b: sql_types::Bool) -> sql_types::Bool; -} - -/// A subquery which confirms whether or not the insertion and updates should -/// occur. -/// -/// This subquery additionally exits the CTE early with an error if either: -/// 1. Not enough datasets exist to provision regions with our required -/// redundancy, or -/// 2. Not enough space exists on zpools to perform the provisioning. -#[derive(Subquery, QueryId)] -#[subquery(name = do_insert)] -struct DoInsert { - query: Box>, -} - -impl DoInsert { - fn new( - old_regions: &OldRegions, - candidate_regions: &CandidateRegions, - candidate_zpools: &CandidateZpools, - ) -> Self { - let redundancy = REGION_REDUNDANCY_THRESHOLD as i64; - let not_allocated_yet = old_regions - .query_source() - .count() - .single_value() - .assume_not_null() - .lt(redundancy); - - let enough_candidate_zpools = candidate_zpools - .query_source() - .count() - .single_value() - .assume_not_null() - .ge(redundancy); - - let enough_candidate_regions = candidate_regions - .query_source() - .count() - .single_value() - .assume_not_null() - .ge(redundancy); - - // We want to ensure that we do not allocate on two datasets in the same - // zpool, for two reasons - // - Data redundancy: If a drive fails it should only take one of the 3 - // regions with it - // - Risk of overallocation: We only check that each zpool as enough - // room for one region, so we should not allocate more than one region - // to it. - // - // Selecting two datasets on the same zpool will not initially be - // possible, as at the time of writing each zpool only has one dataset. - // Additionally, we intend to modify the allocation strategy to select - // from 3 distinct sleds, removing the possibility entirely. But, if we - // introduce a change that adds another crucible dataset to zpools - // before we improve the allocation strategy, this check will make sure - // we don't violate drive redundancy, and generate an error instead. - use crate::db::schema::dataset::dsl as dataset_dsl; - use candidate_regions::dsl as candidate_dsl; - let enough_unique_candidate_zpools = candidate_regions - .query_source() - .inner_join( - dataset_dsl::dataset - .on(candidate_dsl::dataset_id.eq(dataset_dsl::id)), - ) - .select(diesel::dsl::count_distinct(dataset_dsl::pool_id)) - .single_value() - .assume_not_null() - .ge(redundancy); + // Identifies that a bind parameter should exist in this location within + // the SQL string. + fn param(self) -> Self { Self { - query: Box::new(diesel::select((ExpressionAlias::new::< - do_insert::insert, - >( - not_allocated_yet - .and(TrueOrCastError::new( - enough_candidate_zpools, - NOT_ENOUGH_ZPOOL_SPACE_SENTINEL, - )) - .and(TrueOrCastError::new( - enough_candidate_regions, - NOT_ENOUGH_DATASETS_SENTINEL, - )) - .and(TrueOrCastError::new( - enough_unique_candidate_zpools, - NOT_ENOUGH_UNIQUE_ZPOOLS_SENTINEL, - )), - ),))), + query: self.query.sql("$").sql(self.bind_counter.next().to_string()), + bind_counter: self.bind_counter, } } -} - -/// A subquery which actually inserts the regions. -#[derive(Subquery, QueryId)] -#[subquery(name = inserted_regions)] -struct InsertRegions { - query: Box>, -} - -impl InsertRegions { - fn new(do_insert: &DoInsert, candidate_regions: &CandidateRegions) -> Self { - use crate::db::schema::region; + // Slightly more strict than the "sql" method of Diesel's SqlQuery. + // Only permits "&'static str" intentionally to limit susceptibility to + // SQL injection. + fn sql(self, s: &'static str) -> Self { Self { - query: Box::new( - candidate_regions - .query_source() - .select(candidate_regions::all_columns) - .filter( - do_insert - .query_source() - .select(do_insert::insert) - .single_value() - .assume_not_null(), - ) - .insert_into(region::table) - .returning(region::all_columns), - ), + query: self.query.sql(s), + bind_counter: self.bind_counter, } } -} - -/// A subquery which updates dataset size usage based on inserted regions. -#[derive(Subquery, QueryId)] -#[subquery(name = updated_datasets)] -struct UpdateDatasets { - query: Box>, -} - -impl UpdateDatasets { - fn new( - do_insert: &DoInsert, - proposed_dataset_changes: &ProposedChanges, - ) -> Self { - use crate::db::schema::dataset::dsl as dataset_dsl; - - let datasets_with_updates = proposed_dataset_changes - .query_source() - .select(proposed_dataset_changes::columns::id) - .into_boxed(); + fn bind(self, b: Value) -> Self + where + Pg: sql_types::HasSqlType, + Value: diesel::serialize::ToSql + Send + 'static, + BindSt: Send + 'static + { Self { - query: Box::new( - diesel::update( - dataset_dsl::dataset.filter( - dataset_dsl::id.eq_any(datasets_with_updates) - ) - ) - .filter( - do_insert.query_source() - .select(do_insert::insert) - .single_value() - .assume_not_null() - ) - .set( - dataset_dsl::size_used.eq( - dataset_dsl::size_used + proposed_dataset_changes.query_source() - .filter(proposed_dataset_changes::columns::id.eq(dataset_dsl::id)) - .select(proposed_dataset_changes::columns::size_used_delta) - .single_value() - ) - ) - .returning(crate::db::schema::dataset::all_columns) - ) + query: self.query.bind(b), + bind_counter: self.bind_counter, } } } -/// Constructs a CTE for allocating new regions, and updating the datasets to -/// which those regions belong. -#[derive(QueryId)] -pub struct RegionAllocate { - cte: Cte, -} - -impl RegionAllocate { - pub fn new( - volume_id: uuid::Uuid, - block_size: u64, - blocks_per_extent: u64, - extent_count: u64, - allocation_strategy: &RegionAllocationStrategy, - ) -> Self { - let (seed, distinct_sleds) = { - let (input_seed, distinct_sleds) = match allocation_strategy { - RegionAllocationStrategy::Random { seed } => (seed, false), - RegionAllocationStrategy::RandomWithDistinctSleds { seed } => { - (seed, true) - } - }; - ( - input_seed.map_or_else( - || { - std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap() - .as_nanos() - }, - |seed| seed as u128, - ), - distinct_sleds, - ) - }; - - let size_delta = block_size * blocks_per_extent * extent_count; - - let old_regions = OldRegions::new(volume_id); - - let old_pool_usage = OldPoolUsage::new(); - let candidate_zpools = CandidateZpools::new( - &old_pool_usage, - size_delta, - seed, - distinct_sleds, - ); - - let candidate_datasets = - CandidateDatasets::new(&candidate_zpools, seed); - - let shuffled_candidate_datasets = - ShuffledCandidateDatasets::new(&candidate_datasets, seed); - - let candidate_regions = CandidateRegions::new( - &shuffled_candidate_datasets, - volume_id, - block_size, - blocks_per_extent, - extent_count, - ); - let proposed_changes = ProposedChanges::new(&candidate_regions); - let do_insert = - DoInsert::new(&old_regions, &candidate_regions, &candidate_zpools); - let insert_regions = InsertRegions::new(&do_insert, &candidate_regions); - let updated_datasets = - UpdateDatasets::new(&do_insert, &proposed_changes); - - // Gather together all "(dataset, region)" rows for all regions which - // are allocated to the volume. - // - // This roughly translates to: - // - // old_regions INNER JOIN old_datasets - // UNION - // new_regions INNER JOIN updated_datasets - // - // Note that we cannot simply JOIN the old + new regions, and query for - // their associated datasets: doing so would return the pre-UPDATE - // values of datasets that are updated by this CTE. - let final_select = Box::new( - old_regions - .query_source() - .inner_join( - crate::db::schema::dataset::dsl::dataset - .on(old_regions::dataset_id - .eq(crate::db::schema::dataset::dsl::id)), - ) - .select(( - crate::db::schema::dataset::all_columns, - old_regions::all_columns, - )) - .union( - insert_regions - .query_source() - .inner_join( - updated_datasets::dsl::updated_datasets - .on(inserted_regions::dataset_id - .eq(updated_datasets::id)), - ) - .select(( - updated_datasets::all_columns, - inserted_regions::all_columns, - )), - ), - ); - - let cte = CteBuilder::new() - .add_subquery(old_regions) - .add_subquery(old_pool_usage) - .add_subquery(candidate_zpools) - .add_subquery(candidate_datasets) - .add_subquery(shuffled_candidate_datasets) - .add_subquery(candidate_regions) - .add_subquery(proposed_changes) - .add_subquery(do_insert) - .add_subquery(insert_regions) - .add_subquery(updated_datasets) - .build(final_select); - - Self { cte } - } -} - -impl QueryFragment for RegionAllocate { - fn walk_ast<'a>( - &'a self, - mut out: AstPass<'_, 'a, Pg>, - ) -> diesel::QueryResult<()> { - out.unsafe_to_cache_prepared(); - - self.cte.walk_ast(out.reborrow())?; - Ok(()) - } -} - -type SelectableSql = < - >::SelectExpression as diesel::Expression ->::SqlType; - -impl Query for RegionAllocate { - type SqlType = (SelectableSql, SelectableSql); -} - -impl RunQueryDsl for RegionAllocate {} - -struct BindParamCounter(i32); -impl BindParamCounter { - fn new() -> Self { - Self(0) - } - fn next(&mut self) -> i32 { - self.0 += 1; - self.0 - } -} - -trait SqlQueryBinds { - fn add_bind(self, bind_counter: &mut BindParamCounter) -> Self; -} - -impl<'a, Query> SqlQueryBinds - for diesel::query_builder::BoxedSqlQuery<'a, Pg, Query> -{ - fn add_bind(self, bind_counter: &mut BindParamCounter) -> Self { - self.sql("$").sql(bind_counter.next().to_string()) - } -} - pub fn allocation_query( volume_id: uuid::Uuid, block_size: u64, @@ -737,107 +166,190 @@ pub fn allocation_query( let size_delta = block_size * blocks_per_extent * extent_count; let redunancy: i64 = i64::try_from(REGION_REDUNDANCY_THRESHOLD).unwrap(); - let mut binds = BindParamCounter::new(); - let query = diesel::sql_query( + + let builder = QueryBuilder::new().sql( + // Find all old regions associated with a particular volume "WITH old_regions AS - (SELECT region.id, region.time_created, region.time_modified, region.dataset_id, region.volume_id, region.block_size, region.blocks_per_extent, region.extent_count FROM region WHERE (region.volume_id = ").into_boxed().add_bind(&mut binds).sql(")),") + (SELECT + region.id, + region.time_created, + region.time_modified, + region.dataset_id, + region.volume_id, + region.block_size, + region.blocks_per_extent, + region.extent_count + FROM region WHERE (region.volume_id = ").param().sql(")),") .bind::(volume_id) - .sql( -" - old_zpool_usage AS - (SELECT dataset.pool_id, sum(dataset.size_used) AS size_used FROM dataset WHERE ((dataset.size_used IS NOT NULL) AND (dataset.time_deleted IS NULL)) GROUP BY dataset.pool_id), -"); - // We pick one of these branches, depending on whether or not the sleds - // should be distinct. + // Calculates the old size being used by zpools under consideration as targets for region + // allocation. + .sql(" + old_zpool_usage AS + (SELECT + dataset.pool_id, + sum(dataset.size_used) AS size_used + FROM dataset WHERE ((dataset.size_used IS NOT NULL) AND (dataset.time_deleted IS NULL)) GROUP BY dataset.pool_id),"); + + // Identifies zpools with enough space for region allocation. + // + // NOTE: This changes the format of the underlying SQL query, as it uses + // distinct bind parameters depending on the conditional branch. if distinct_sleds { - query.sql(" + builder.sql(" candidate_zpools AS - (SELECT old_zpool_usage.pool_id FROM (old_zpool_usage INNER JOIN (zpool INNER JOIN sled ON (zpool.sled_id = sled.id)) ON (zpool.id = old_zpool_usage.pool_id)) WHERE (((old_zpool_usage.size_used + ").add_bind(&mut binds).sql(" ) <= total_size) AND (sled.provision_state = 'provisionable'))), -") + (SELECT + old_zpool_usage.pool_id + FROM (old_zpool_usage INNER JOIN (zpool INNER JOIN sled ON (zpool.sled_id = sled.id)) ON (zpool.id = old_zpool_usage.pool_id)) + WHERE (((old_zpool_usage.size_used + ").param().sql(" ) <= total_size) AND (sled.provision_state = 'provisionable'))),") .bind::(size_delta as i64) } else { - query.sql(" + builder.sql(" candidate_zpools AS - (SELECT DISTINCT ON (zpool.sled_id) old_zpool_usage.pool_id FROM (old_zpool_usage INNER JOIN (zpool INNER JOIN sled ON (zpool.sled_id = sled.id)) ON (zpool.id = old_zpool_usage.pool_id)) WHERE (((old_zpool_usage.size_used + ").add_bind(&mut binds).sql(" ) <= total_size) AND (sled.provision_state = 'provisionable')) ORDER BY zpool.sled_id, md5((CAST(zpool.id as BYTEA) || ").add_bind(&mut binds).sql("))), -") + (SELECT DISTINCT ON (zpool.sled_id) + old_zpool_usage.pool_id + FROM (old_zpool_usage INNER JOIN (zpool INNER JOIN sled ON (zpool.sled_id = sled.id)) ON (zpool.id = old_zpool_usage.pool_id)) + WHERE (((old_zpool_usage.size_used + ").param().sql(" ) <= total_size) AND (sled.provision_state = 'provisionable')) + ORDER BY zpool.sled_id, md5((CAST(zpool.id as BYTEA) || ").param().sql("))),") .bind::(size_delta as i64) .bind::(seed.clone()) } + // Find datasets which could be used for provisioning regions. + // + // We only consider datasets which are already allocated as "Crucible". + // This implicitly distinguishes between "M.2s" and "U.2s" -- Nexus needs to + // determine during dataset provisioning which devices should be considered for + // usage as Crucible storage. + // + // We select only one dataset from each zpool. .sql( " candidate_datasets AS - (SELECT DISTINCT ON (dataset.pool_id) dataset.id, dataset.pool_id FROM (dataset INNER JOIN candidate_zpools ON (dataset.pool_id = candidate_zpools.pool_id)) WHERE (((dataset.time_deleted IS NULL) AND (dataset.size_used IS NOT NULL)) AND (dataset.kind = 'crucible')) ORDER BY dataset.pool_id, md5((CAST(dataset.id as BYTEA) || ").add_bind(&mut binds).sql("))), -" - ).bind::(seed.clone()) + (SELECT DISTINCT ON (dataset.pool_id) + dataset.id, + dataset.pool_id + FROM (dataset INNER JOIN candidate_zpools ON (dataset.pool_id = candidate_zpools.pool_id)) + WHERE (((dataset.time_deleted IS NULL) AND (dataset.size_used IS NOT NULL)) AND (dataset.kind = 'crucible')) + ORDER BY dataset.pool_id, md5((CAST(dataset.id as BYTEA) || ").param().sql("))),") + .bind::(seed.clone()) + // We order by md5 to shuffle the ordering of the datasets. + // md5 has a uniform output distribution so it does the job. .sql( " shuffled_candidate_datasets AS - (SELECT candidate_datasets.id, candidate_datasets.pool_id FROM candidate_datasets ORDER BY md5((CAST(candidate_datasets.id as BYTEA) || ").add_bind(&mut binds).sql(")) LIMIT ").add_bind(&mut binds).sql("), -" - ).bind::(seed) + (SELECT + candidate_datasets.id, + candidate_datasets.pool_id + FROM candidate_datasets + ORDER BY md5((CAST(candidate_datasets.id as BYTEA) || ").param().sql(")) LIMIT ").param().sql("),") + .bind::(seed) .bind::(redunancy) + // Create the regions-to-be-inserted for the volume. .sql( " candidate_regions AS - (SELECT gen_random_uuid() AS id, now() AS time_created, now() AS time_modified, shuffled_candidate_datasets.id AS dataset_id, ").add_bind(&mut binds).sql(" AS volume_id, ").add_bind(&mut binds).sql(" AS block_size, ").add_bind(&mut binds).sql(" AS blocks_per_extent, ").add_bind(&mut binds).sql(" AS extent_count FROM shuffled_candidate_datasets), -" - ).bind::(volume_id) + (SELECT + gen_random_uuid() AS id, + now() AS time_created, + now() AS time_modified, + shuffled_candidate_datasets.id AS dataset_id, + ").param().sql(" AS volume_id, + ").param().sql(" AS block_size, + ").param().sql(" AS blocks_per_extent, + ").param().sql(" AS extent_count + FROM shuffled_candidate_datasets),") + .bind::(volume_id) .bind::(block_size as i64) .bind::(blocks_per_extent as i64) .bind::(extent_count as i64) + // A subquery which summarizes the changes we intend to make, showing: + // + // 1. Which datasets will have size adjustments + // 2. Which pools those datasets belong to + // 3. The delta in size-used .sql( " proposed_dataset_changes AS - (SELECT candidate_regions.dataset_id AS id, dataset.pool_id AS pool_id, ((candidate_regions.block_size * candidate_regions.blocks_per_extent) * candidate_regions.extent_count) AS size_used_delta FROM (candidate_regions INNER JOIN dataset ON (dataset.id = candidate_regions.dataset_id))), + (SELECT + candidate_regions.dataset_id AS id, + dataset.pool_id AS pool_id, + ((candidate_regions.block_size * candidate_regions.blocks_per_extent) * candidate_regions.extent_count) AS size_used_delta + FROM (candidate_regions INNER JOIN dataset ON (dataset.id = candidate_regions.dataset_id))),") + // Confirms whether or not the insertion and updates should + // occur. + // + // This subquery additionally exits the CTE early with an error if either: + // 1. Not enough datasets exist to provision regions with our required + // redundancy, or + // 2. Not enough space exists on zpools to perform the provisioning. + // + // We want to ensure that we do not allocate on two datasets in the same + // zpool, for two reasons + // - Data redundancy: If a drive fails it should only take one of the 3 + // regions with it + // - Risk of overallocation: We only check that each zpool as enough + // room for one region, so we should not allocate more than one region + // to it. + // + // Selecting two datasets on the same zpool will not initially be + // possible, as at the time of writing each zpool only has one dataset. + // Additionally, we intend to modify the allocation strategy to select + // from 3 distinct sleds, removing the possibility entirely. But, if we + // introduce a change that adds another crucible dataset to zpools + // before we improve the allocation strategy, this check will make sure + // we don't violate drive redundancy, and generate an error instead. + .sql(" do_insert AS - (SELECT (((((SELECT COUNT(*) FROM old_regions LIMIT 1) < ").add_bind(&mut binds).sql(") AND CAST(IF(((SELECT COUNT(*) FROM candidate_zpools LIMIT 1) >= ").add_bind(&mut binds).sql("), 'TRUE', 'Not enough space') AS BOOL)) AND CAST(IF(((SELECT COUNT(*) FROM candidate_regions LIMIT 1) >= ").add_bind(&mut binds).sql("), 'TRUE', 'Not enough datasets') AS BOOL)) AND CAST(IF(((SELECT COUNT(DISTINCT dataset.pool_id) FROM (candidate_regions INNER JOIN dataset ON (candidate_regions.dataset_id = dataset.id)) LIMIT 1) >= ").add_bind(&mut binds).sql("), 'TRUE', 'Not enough unique zpools selected') AS BOOL)) AS insert), -" - ).bind::(redunancy) + (SELECT + (((((SELECT COUNT(*) FROM old_regions LIMIT 1) < ").param().sql(") AND CAST(IF(((SELECT COUNT(*) FROM candidate_zpools LIMIT 1) >= ").param().sql(concatcp!("), 'TRUE', '", NOT_ENOUGH_ZPOOL_SPACE_SENTINEL, "') AS BOOL)) AND CAST(IF(((SELECT COUNT(*) FROM candidate_regions LIMIT 1) >= ")).param().sql(concatcp!("), 'TRUE', '", NOT_ENOUGH_DATASETS_SENTINEL, "') AS BOOL)) AND CAST(IF(((SELECT COUNT(DISTINCT dataset.pool_id) FROM (candidate_regions INNER JOIN dataset ON (candidate_regions.dataset_id = dataset.id)) LIMIT 1) >= ")).param().sql(concatcp!("), 'TRUE', '", NOT_ENOUGH_UNIQUE_ZPOOLS_SENTINEL, "') AS BOOL)) AS insert),")) + .bind::(redunancy) .bind::(redunancy) .bind::(redunancy) .bind::(redunancy) .sql( " inserted_regions AS - (INSERT INTO region (id, time_created, time_modified, dataset_id, volume_id, block_size, blocks_per_extent, extent_count) SELECT candidate_regions.id, candidate_regions.time_created, candidate_regions.time_modified, candidate_regions.dataset_id, candidate_regions.volume_id, candidate_regions.block_size, candidate_regions.blocks_per_extent, candidate_regions.extent_count FROM candidate_regions WHERE (SELECT do_insert.insert FROM do_insert LIMIT 1) RETURNING region.id, region.time_created, region.time_modified, region.dataset_id, region.volume_id, region.block_size, region.blocks_per_extent, region.extent_count), + (INSERT INTO region + (id, time_created, time_modified, dataset_id, volume_id, block_size, blocks_per_extent, extent_count) + SELECT + candidate_regions.id, + candidate_regions.time_created, + candidate_regions.time_modified, + candidate_regions.dataset_id, + candidate_regions.volume_id, + candidate_regions.block_size, + candidate_regions.blocks_per_extent, + candidate_regions.extent_count + FROM candidate_regions + WHERE + (SELECT do_insert.insert FROM do_insert LIMIT 1) + RETURNING + region.id, + region.time_created, + region.time_modified, + region.dataset_id, + region.volume_id, + region.block_size, + region.blocks_per_extent, + region.extent_count + ), updated_datasets AS (UPDATE dataset SET size_used = (dataset.size_used + (SELECT proposed_dataset_changes.size_used_delta FROM proposed_dataset_changes WHERE (proposed_dataset_changes.id = dataset.id) LIMIT 1)) WHERE ((dataset.id = ANY(SELECT proposed_dataset_changes.id FROM proposed_dataset_changes)) AND (SELECT do_insert.insert FROM do_insert LIMIT 1)) RETURNING dataset.id, dataset.time_created, dataset.time_modified, dataset.time_deleted, dataset.rcgen, dataset.pool_id, dataset.ip, dataset.port, dataset.kind, dataset.size_used) (SELECT dataset.id, dataset.time_created, dataset.time_modified, dataset.time_deleted, dataset.rcgen, dataset.pool_id, dataset.ip, dataset.port, dataset.kind, dataset.size_used, old_regions.id, old_regions.time_created, old_regions.time_modified, old_regions.dataset_id, old_regions.volume_id, old_regions.block_size, old_regions.blocks_per_extent, old_regions.extent_count FROM (old_regions INNER JOIN dataset ON (old_regions.dataset_id = dataset.id))) UNION (SELECT updated_datasets.id, updated_datasets.time_created, updated_datasets.time_modified, updated_datasets.time_deleted, updated_datasets.rcgen, updated_datasets.pool_id, updated_datasets.ip, updated_datasets.port, updated_datasets.kind, updated_datasets.size_used, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, inserted_regions.dataset_id, inserted_regions.volume_id, inserted_regions.block_size, inserted_regions.blocks_per_extent, inserted_regions.extent_count FROM (inserted_regions INNER JOIN updated_datasets ON (inserted_regions.dataset_id = updated_datasets.id)))" - ) + ).query } #[cfg(test)] mod test { use super::*; use crate::db::explain::ExplainableAsync; - use omicron_test_utils::dev; use nexus_test_utils::db::test_setup_database; + use omicron_test_utils::dev; use uuid::Uuid; - #[test] - fn raw_sql() { - let volume_id = Uuid::new_v4(); - let block_size = 0; - let blocks_per_extent = 1; - let extent_count = 2; - let allocation_strategy = - RegionAllocationStrategy::RandomWithDistinctSleds { seed: None }; - - let region_allocate = RegionAllocate::new( - volume_id, - block_size, - blocks_per_extent, - extent_count, - &allocation_strategy, - ); - - let query = diesel::debug_query::(®ion_allocate); - - assert_eq!(query.to_string(), "foobar"); - } - + // Explain the possible forms of the SQL query to ensure that it + // creates a valid SQL string. #[tokio::test] async fn explainable() { let logctx = dev::test_setup_log("explainable"); @@ -848,19 +360,33 @@ mod test { let conn = pool.pool().get().await.unwrap(); let volume_id = Uuid::new_v4(); - let block_size = 0; - let blocks_per_extent = 1; - let extent_count = 2; - let allocation_strategy = - RegionAllocationStrategy::RandomWithDistinctSleds { seed: None }; + let block_size = 512; + let blocks_per_extent = 4; + let extent_count = 8; + + // First structure: Explain the query with "RandomWithDistinctSleds" + let region_allocate = allocation_query( volume_id, block_size, blocks_per_extent, extent_count, - &allocation_strategy, + &RegionAllocationStrategy::RandomWithDistinctSleds { seed: None }, ); + let _ = region_allocate + .explain_async(&conn) + .await + .expect("Failed to explain query - is it valid SQL?"); + + // Second structure: Explain the query with "Random" + let region_allocate = allocation_query( + volume_id, + block_size, + blocks_per_extent, + extent_count, + &RegionAllocationStrategy::Random { seed: None }, + ); let _ = region_allocate .explain_async(&conn) .await From 1c74b04ba3f8b1e2fde652ae8a8379b60bc0883d Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 13 Feb 2024 23:38:08 -0800 Subject: [PATCH 03/19] better formatting --- .../src/db/queries/region_allocation.rs | 61 ++++++++++++++++++- 1 file changed, 59 insertions(+), 2 deletions(-) diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index 24ace6467e..8dbc23c712 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -335,8 +335,65 @@ pub fn allocation_query( region.extent_count ), updated_datasets AS - (UPDATE dataset SET size_used = (dataset.size_used + (SELECT proposed_dataset_changes.size_used_delta FROM proposed_dataset_changes WHERE (proposed_dataset_changes.id = dataset.id) LIMIT 1)) WHERE ((dataset.id = ANY(SELECT proposed_dataset_changes.id FROM proposed_dataset_changes)) AND (SELECT do_insert.insert FROM do_insert LIMIT 1)) RETURNING dataset.id, dataset.time_created, dataset.time_modified, dataset.time_deleted, dataset.rcgen, dataset.pool_id, dataset.ip, dataset.port, dataset.kind, dataset.size_used) -(SELECT dataset.id, dataset.time_created, dataset.time_modified, dataset.time_deleted, dataset.rcgen, dataset.pool_id, dataset.ip, dataset.port, dataset.kind, dataset.size_used, old_regions.id, old_regions.time_created, old_regions.time_modified, old_regions.dataset_id, old_regions.volume_id, old_regions.block_size, old_regions.blocks_per_extent, old_regions.extent_count FROM (old_regions INNER JOIN dataset ON (old_regions.dataset_id = dataset.id))) UNION (SELECT updated_datasets.id, updated_datasets.time_created, updated_datasets.time_modified, updated_datasets.time_deleted, updated_datasets.rcgen, updated_datasets.pool_id, updated_datasets.ip, updated_datasets.port, updated_datasets.kind, updated_datasets.size_used, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, inserted_regions.dataset_id, inserted_regions.volume_id, inserted_regions.block_size, inserted_regions.blocks_per_extent, inserted_regions.extent_count FROM (inserted_regions INNER JOIN updated_datasets ON (inserted_regions.dataset_id = updated_datasets.id)))" + (UPDATE dataset SET + size_used = (dataset.size_used + (SELECT proposed_dataset_changes.size_used_delta FROM proposed_dataset_changes WHERE (proposed_dataset_changes.id = dataset.id) LIMIT 1)) + WHERE ( + (dataset.id = ANY(SELECT proposed_dataset_changes.id FROM proposed_dataset_changes)) AND + (SELECT do_insert.insert FROM do_insert LIMIT 1)) + RETURNING + dataset.id, + dataset.time_created, + dataset.time_modified, + dataset.time_deleted, + dataset.rcgen, + dataset.pool_id, + dataset.ip, + dataset.port, + dataset.kind, + dataset.size_used + ) +(SELECT + dataset.id, + dataset.time_created, + dataset.time_modified, + dataset.time_deleted, + dataset.rcgen, + dataset.pool_id, + dataset.ip, + dataset.port, + dataset.kind, + dataset.size_used, + old_regions.id, + old_regions.time_created, + old_regions.time_modified, + old_regions.dataset_id, + old_regions.volume_id, + old_regions.block_size, + old_regions.blocks_per_extent, + old_regions.extent_count +FROM + (old_regions INNER JOIN dataset ON (old_regions.dataset_id = dataset.id)) +) UNION +(SELECT + updated_datasets.id, + updated_datasets.time_created, + updated_datasets.time_modified, + updated_datasets.time_deleted, + updated_datasets.rcgen, + updated_datasets.pool_id, + updated_datasets.ip, + updated_datasets.port, + updated_datasets.kind, + updated_datasets.size_used, + inserted_regions.id, + inserted_regions.time_created, + inserted_regions.time_modified, + inserted_regions.dataset_id, + inserted_regions.volume_id, + inserted_regions.block_size, + inserted_regions.blocks_per_extent, + inserted_regions.extent_count +FROM (inserted_regions INNER JOIN updated_datasets ON (inserted_regions.dataset_id = updated_datasets.id)))" ).query } From 25820cc26882ce18d3ae92605e1ec5465d117d8f Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 14 Feb 2024 00:04:31 -0800 Subject: [PATCH 04/19] Fix tests --- .../src/db/queries/region_allocation.rs | 113 +++++++++++------- 1 file changed, 69 insertions(+), 44 deletions(-) diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index 8dbc23c712..d953e05b0e 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -85,7 +85,11 @@ impl<'a, Query> SqlQueryBinds // assists with counting bind parameters and recommends avoiding the usage of // any non-static strings in query construction. struct QueryBuilder { - query: diesel::query_builder::BoxedSqlQuery<'static, Pg, diesel::query_builder::SqlQuery>, + query: diesel::query_builder::BoxedSqlQuery< + 'static, + Pg, + diesel::query_builder::SqlQuery, + >, bind_counter: BindParamCounter, } @@ -101,7 +105,10 @@ impl QueryBuilder { // the SQL string. fn param(self) -> Self { Self { - query: self.query.sql("$").sql(self.bind_counter.next().to_string()), + query: self + .query + .sql("$") + .sql(self.bind_counter.next().to_string()), bind_counter: self.bind_counter, } } @@ -110,22 +117,16 @@ impl QueryBuilder { // Only permits "&'static str" intentionally to limit susceptibility to // SQL injection. fn sql(self, s: &'static str) -> Self { - Self { - query: self.query.sql(s), - bind_counter: self.bind_counter, - } + Self { query: self.query.sql(s), bind_counter: self.bind_counter } } fn bind(self, b: Value) -> Self where Pg: sql_types::HasSqlType, Value: diesel::serialize::ToSql + Send + 'static, - BindSt: Send + 'static + BindSt: Send + 'static, { - Self { - query: self.query.bind(b), - bind_counter: self.bind_counter, - } + Self { query: self.query.bind(b), bind_counter: self.bind_counter } } } @@ -166,12 +167,11 @@ pub fn allocation_query( let size_delta = block_size * blocks_per_extent * extent_count; let redunancy: i64 = i64::try_from(REGION_REDUNDANCY_THRESHOLD).unwrap(); - let builder = QueryBuilder::new().sql( // Find all old regions associated with a particular volume "WITH - old_regions AS - (SELECT + old_regions AS ( + SELECT region.id, region.time_created, region.time_modified, @@ -186,8 +186,8 @@ pub fn allocation_query( // Calculates the old size being used by zpools under consideration as targets for region // allocation. .sql(" - old_zpool_usage AS - (SELECT + old_zpool_usage AS ( + SELECT dataset.pool_id, sum(dataset.size_used) AS size_used FROM dataset WHERE ((dataset.size_used IS NOT NULL) AND (dataset.time_deleted IS NULL)) GROUP BY dataset.pool_id),"); @@ -198,22 +198,40 @@ pub fn allocation_query( // distinct bind parameters depending on the conditional branch. if distinct_sleds { builder.sql(" - candidate_zpools AS - (SELECT + candidate_zpools AS ( + SELECT DISTINCT ON (zpool.sled_id) old_zpool_usage.pool_id - FROM (old_zpool_usage INNER JOIN (zpool INNER JOIN sled ON (zpool.sled_id = sled.id)) ON (zpool.id = old_zpool_usage.pool_id)) - WHERE (((old_zpool_usage.size_used + ").param().sql(" ) <= total_size) AND (sled.provision_state = 'provisionable'))),") + FROM ( + old_zpool_usage + INNER JOIN + (zpool INNER JOIN sled ON (zpool.sled_id = sled.id)) ON (zpool.id = old_zpool_usage.pool_id) + ) + WHERE ( + ((old_zpool_usage.size_used + ").param().sql(" ) <= total_size) + AND + (sled.provision_state = 'provisionable') + ) + ORDER BY zpool.sled_id, md5((CAST(zpool.id as BYTEA) || ").param().sql("))),") .bind::(size_delta as i64) + .bind::(seed.clone()) } else { builder.sql(" - candidate_zpools AS - (SELECT DISTINCT ON (zpool.sled_id) + candidate_zpools AS ( + SELECT old_zpool_usage.pool_id - FROM (old_zpool_usage INNER JOIN (zpool INNER JOIN sled ON (zpool.sled_id = sled.id)) ON (zpool.id = old_zpool_usage.pool_id)) - WHERE (((old_zpool_usage.size_used + ").param().sql(" ) <= total_size) AND (sled.provision_state = 'provisionable')) - ORDER BY zpool.sled_id, md5((CAST(zpool.id as BYTEA) || ").param().sql("))),") + FROM ( + old_zpool_usage + INNER JOIN + (zpool INNER JOIN sled ON (zpool.sled_id = sled.id)) + ON (zpool.id = old_zpool_usage.pool_id) + ) + WHERE ( + ((old_zpool_usage.size_used + ").param().sql(" ) <= total_size) + AND + (sled.provision_state = 'provisionable') + ) + ),") .bind::(size_delta as i64) - .bind::(seed.clone()) } // Find datasets which could be used for provisioning regions. // @@ -225,20 +243,24 @@ pub fn allocation_query( // We select only one dataset from each zpool. .sql( " - candidate_datasets AS - (SELECT DISTINCT ON (dataset.pool_id) + candidate_datasets AS ( + SELECT DISTINCT ON (dataset.pool_id) dataset.id, dataset.pool_id FROM (dataset INNER JOIN candidate_zpools ON (dataset.pool_id = candidate_zpools.pool_id)) - WHERE (((dataset.time_deleted IS NULL) AND (dataset.size_used IS NOT NULL)) AND (dataset.kind = 'crucible')) + WHERE ( + ((dataset.time_deleted IS NULL) AND + (dataset.size_used IS NOT NULL)) AND + (dataset.kind = 'crucible') + ) ORDER BY dataset.pool_id, md5((CAST(dataset.id as BYTEA) || ").param().sql("))),") .bind::(seed.clone()) // We order by md5 to shuffle the ordering of the datasets. // md5 has a uniform output distribution so it does the job. .sql( " - shuffled_candidate_datasets AS - (SELECT + shuffled_candidate_datasets AS ( + SELECT candidate_datasets.id, candidate_datasets.pool_id FROM candidate_datasets @@ -248,8 +270,8 @@ pub fn allocation_query( // Create the regions-to-be-inserted for the volume. .sql( " - candidate_regions AS - (SELECT + candidate_regions AS ( + SELECT gen_random_uuid() AS id, now() AS time_created, now() AS time_modified, @@ -268,10 +290,9 @@ pub fn allocation_query( // 1. Which datasets will have size adjustments // 2. Which pools those datasets belong to // 3. The delta in size-used - .sql( -" - proposed_dataset_changes AS - (SELECT + .sql(" + proposed_dataset_changes AS ( + SELECT candidate_regions.dataset_id AS id, dataset.pool_id AS pool_id, ((candidate_regions.block_size * candidate_regions.blocks_per_extent) * candidate_regions.extent_count) AS size_used_delta @@ -300,17 +321,21 @@ pub fn allocation_query( // before we improve the allocation strategy, this check will make sure // we don't violate drive redundancy, and generate an error instead. .sql(" - do_insert AS - (SELECT - (((((SELECT COUNT(*) FROM old_regions LIMIT 1) < ").param().sql(") AND CAST(IF(((SELECT COUNT(*) FROM candidate_zpools LIMIT 1) >= ").param().sql(concatcp!("), 'TRUE', '", NOT_ENOUGH_ZPOOL_SPACE_SENTINEL, "') AS BOOL)) AND CAST(IF(((SELECT COUNT(*) FROM candidate_regions LIMIT 1) >= ")).param().sql(concatcp!("), 'TRUE', '", NOT_ENOUGH_DATASETS_SENTINEL, "') AS BOOL)) AND CAST(IF(((SELECT COUNT(DISTINCT dataset.pool_id) FROM (candidate_regions INNER JOIN dataset ON (candidate_regions.dataset_id = dataset.id)) LIMIT 1) >= ")).param().sql(concatcp!("), 'TRUE', '", NOT_ENOUGH_UNIQUE_ZPOOLS_SENTINEL, "') AS BOOL)) AS insert),")) + do_insert AS ( + SELECT ((( + ((SELECT COUNT(*) FROM old_regions LIMIT 1) < ").param().sql(") AND + CAST(IF(((SELECT COUNT(*) FROM candidate_zpools LIMIT 1) >= ").param().sql(concatcp!("), 'TRUE', '", NOT_ENOUGH_ZPOOL_SPACE_SENTINEL, "') AS BOOL)) AND + CAST(IF(((SELECT COUNT(*) FROM candidate_regions LIMIT 1) >= ")).param().sql(concatcp!("), 'TRUE', '", NOT_ENOUGH_DATASETS_SENTINEL, "') AS BOOL)) AND + CAST(IF(((SELECT COUNT(DISTINCT dataset.pool_id) FROM (candidate_regions INNER JOIN dataset ON (candidate_regions.dataset_id = dataset.id)) LIMIT 1) >= ")).param().sql(concatcp!("), 'TRUE', '", NOT_ENOUGH_UNIQUE_ZPOOLS_SENTINEL, "') AS BOOL) + ) AS insert),")) .bind::(redunancy) .bind::(redunancy) .bind::(redunancy) .bind::(redunancy) .sql( " - inserted_regions AS - (INSERT INTO region + inserted_regions AS ( + INSERT INTO region (id, time_created, time_modified, dataset_id, volume_id, block_size, blocks_per_extent, extent_count) SELECT candidate_regions.id, @@ -334,8 +359,8 @@ pub fn allocation_query( region.blocks_per_extent, region.extent_count ), - updated_datasets AS - (UPDATE dataset SET + updated_datasets AS ( + UPDATE dataset SET size_used = (dataset.size_used + (SELECT proposed_dataset_changes.size_used_delta FROM proposed_dataset_changes WHERE (proposed_dataset_changes.id = dataset.id) LIMIT 1)) WHERE ( (dataset.id = ANY(SELECT proposed_dataset_changes.id FROM proposed_dataset_changes)) AND From cd9b5480a02e2eb226f2ed3427866d437c128f2d Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 14 Feb 2024 01:59:41 -0800 Subject: [PATCH 05/19] Support for 'allcolumns' --- nexus/db-queries/src/db/column_walker.rs | 27 ++- nexus/db-queries/src/db/mod.rs | 1 + .../src/db/queries/region_allocation.rs | 212 ++++-------------- nexus/db-queries/src/db/raw_query_builder.rs | 151 +++++++++++++ 4 files changed, 221 insertions(+), 170 deletions(-) create mode 100644 nexus/db-queries/src/db/raw_query_builder.rs diff --git a/nexus/db-queries/src/db/column_walker.rs b/nexus/db-queries/src/db/column_walker.rs index 64c3b450c8..6e21e0d899 100644 --- a/nexus/db-queries/src/db/column_walker.rs +++ b/nexus/db-queries/src/db/column_walker.rs @@ -4,6 +4,7 @@ //! CTE utility for iterating over all columns in a table. +use crate::db::raw_query_builder::TrustedStr; use diesel::prelude::*; use std::marker::PhantomData; @@ -17,14 +18,30 @@ pub(crate) struct ColumnWalker { remaining: PhantomData, } +pub type AllColumnsOf = ColumnWalker<::AllColumns>; + impl ColumnWalker { - pub fn new() -> Self { + pub const fn new() -> Self { Self { remaining: PhantomData } } } macro_rules! impl_column_walker { ( $len:literal $($column:ident)+ ) => ( + #[allow(dead_code)] + impl<$($column: Column),+> ColumnWalker<($($column,)+)> { + pub fn with_prefix(prefix: &'static str) -> TrustedStr { + // This string is derived from: + // - The "table" type, with associated columns, which + // are not controlled by an arbitrary user, and + // - The "prefix" type, which is a "&'static str" (AKA, + // hopefully known at compile-time, and not leaked). + TrustedStr::this_string_wont_cause_sql_injections( + [$([prefix, $column::NAME].join("."),)+].join(", ") + ) + } + } + impl<$($column: Column),+> IntoIterator for ColumnWalker<($($column,)+)> { type Item = &'static str; type IntoIter = std::array::IntoIter; @@ -109,4 +126,12 @@ mod test { assert_eq!(iter.next(), Some("value")); assert_eq!(iter.next(), None); } + + #[test] + fn test_all_columns_with_prefix() { + assert_eq!( + AllColumnsOf::::with_prefix("foo").as_str(), + "foo.id, foo.value, foo.time_deleted" + ); + } } diff --git a/nexus/db-queries/src/db/mod.rs b/nexus/db-queries/src/db/mod.rs index 6cd3911efe..86778fa2ec 100644 --- a/nexus/db-queries/src/db/mod.rs +++ b/nexus/db-queries/src/db/mod.rs @@ -27,6 +27,7 @@ mod pool_connection; // This is marked public because the error types are used elsewhere, e.g., in // sagas. pub mod queries; +mod raw_query_builder; mod saga_recovery; mod sec_store; pub mod subquery; diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index d953e05b0e..dd7431d723 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -4,7 +4,10 @@ //! Implementation of queries for provisioning regions. +use crate::db::column_walker::AllColumnsOf; use crate::db::datastore::REGION_REDUNDANCY_THRESHOLD; +use crate::db::raw_query_builder::QueryBuilder; +use crate::db::schema; use crate::db::true_or_cast_error::matches_sentinel; use const_format::concatcp; use diesel::pg::Pg; @@ -12,7 +15,9 @@ use diesel::result::Error as DieselError; use diesel::sql_types; use omicron_common::api::external; use omicron_common::nexus_config::RegionAllocationStrategy; -use std::cell::Cell; + +type AllColumnsOfRegion = AllColumnsOf; +type AllColumnsOfDataset = AllColumnsOf; const NOT_ENOUGH_DATASETS_SENTINEL: &'static str = "Not enough datasets"; const NOT_ENOUGH_ZPOOL_SPACE_SENTINEL: &'static str = "Not enough space"; @@ -58,78 +63,6 @@ pub fn from_diesel(e: DieselError) -> external::Error { error::public_error_from_diesel(e, error::ErrorHandler::Server) } -struct BindParamCounter(Cell); -impl BindParamCounter { - fn new() -> Self { - Self(0.into()) - } - fn next(&self) -> i32 { - self.0.set(self.0.get() + 1); - self.0.get() - } -} - -trait SqlQueryBinds { - fn add_bind(self, bind_counter: &BindParamCounter) -> Self; -} - -impl<'a, Query> SqlQueryBinds - for diesel::query_builder::BoxedSqlQuery<'a, Pg, Query> -{ - fn add_bind(self, bind_counter: &BindParamCounter) -> Self { - self.sql("$").sql(bind_counter.next().to_string()) - } -} - -// A small wrapper around [diesel::query_builder::BoxedSqlQuery] which -// assists with counting bind parameters and recommends avoiding the usage of -// any non-static strings in query construction. -struct QueryBuilder { - query: diesel::query_builder::BoxedSqlQuery< - 'static, - Pg, - diesel::query_builder::SqlQuery, - >, - bind_counter: BindParamCounter, -} - -impl QueryBuilder { - fn new() -> Self { - Self { - query: diesel::sql_query("").into_boxed(), - bind_counter: BindParamCounter::new(), - } - } - - // Identifies that a bind parameter should exist in this location within - // the SQL string. - fn param(self) -> Self { - Self { - query: self - .query - .sql("$") - .sql(self.bind_counter.next().to_string()), - bind_counter: self.bind_counter, - } - } - - // Slightly more strict than the "sql" method of Diesel's SqlQuery. - // Only permits "&'static str" intentionally to limit susceptibility to - // SQL injection. - fn sql(self, s: &'static str) -> Self { - Self { query: self.query.sql(s), bind_counter: self.bind_counter } - } - - fn bind(self, b: Value) -> Self - where - Pg: sql_types::HasSqlType, - Value: diesel::serialize::ToSql + Send + 'static, - BindSt: Send + 'static, - { - Self { query: self.query.bind(b), bind_counter: self.bind_counter } - } -} - pub fn allocation_query( volume_id: uuid::Uuid, block_size: u64, @@ -171,15 +104,7 @@ pub fn allocation_query( // Find all old regions associated with a particular volume "WITH old_regions AS ( - SELECT - region.id, - region.time_created, - region.time_modified, - region.dataset_id, - region.volume_id, - region.block_size, - region.blocks_per_extent, - region.extent_count + SELECT ").sql(AllColumnsOfRegion::with_prefix("region")).sql(" FROM region WHERE (region.volume_id = ").param().sql(")),") .bind::(volume_id) @@ -241,8 +166,7 @@ pub fn allocation_query( // usage as Crucible storage. // // We select only one dataset from each zpool. - .sql( -" + .sql(" candidate_datasets AS ( SELECT DISTINCT ON (dataset.pool_id) dataset.id, @@ -253,23 +177,23 @@ pub fn allocation_query( (dataset.size_used IS NOT NULL)) AND (dataset.kind = 'crucible') ) - ORDER BY dataset.pool_id, md5((CAST(dataset.id as BYTEA) || ").param().sql("))),") + ORDER BY dataset.pool_id, md5((CAST(dataset.id as BYTEA) || ").param().sql(")) + ),") .bind::(seed.clone()) // We order by md5 to shuffle the ordering of the datasets. // md5 has a uniform output distribution so it does the job. - .sql( -" + .sql(" shuffled_candidate_datasets AS ( SELECT candidate_datasets.id, candidate_datasets.pool_id FROM candidate_datasets - ORDER BY md5((CAST(candidate_datasets.id as BYTEA) || ").param().sql(")) LIMIT ").param().sql("),") + ORDER BY md5((CAST(candidate_datasets.id as BYTEA) || ").param().sql(")) LIMIT ").param().sql(" + ),") .bind::(seed) .bind::(redunancy) // Create the regions-to-be-inserted for the volume. - .sql( -" + .sql(" candidate_regions AS ( SELECT gen_random_uuid() AS id, @@ -280,7 +204,8 @@ pub fn allocation_query( ").param().sql(" AS block_size, ").param().sql(" AS blocks_per_extent, ").param().sql(" AS extent_count - FROM shuffled_candidate_datasets),") + FROM shuffled_candidate_datasets + ),") .bind::(volume_id) .bind::(block_size as i64) .bind::(blocks_per_extent as i64) @@ -296,7 +221,8 @@ pub fn allocation_query( candidate_regions.dataset_id AS id, dataset.pool_id AS pool_id, ((candidate_regions.block_size * candidate_regions.blocks_per_extent) * candidate_regions.extent_count) AS size_used_delta - FROM (candidate_regions INNER JOIN dataset ON (dataset.id = candidate_regions.dataset_id))),") + FROM (candidate_regions INNER JOIN dataset ON (dataset.id = candidate_regions.dataset_id)) + ),") // Confirms whether or not the insertion and updates should // occur. // @@ -327,99 +253,47 @@ pub fn allocation_query( CAST(IF(((SELECT COUNT(*) FROM candidate_zpools LIMIT 1) >= ").param().sql(concatcp!("), 'TRUE', '", NOT_ENOUGH_ZPOOL_SPACE_SENTINEL, "') AS BOOL)) AND CAST(IF(((SELECT COUNT(*) FROM candidate_regions LIMIT 1) >= ")).param().sql(concatcp!("), 'TRUE', '", NOT_ENOUGH_DATASETS_SENTINEL, "') AS BOOL)) AND CAST(IF(((SELECT COUNT(DISTINCT dataset.pool_id) FROM (candidate_regions INNER JOIN dataset ON (candidate_regions.dataset_id = dataset.id)) LIMIT 1) >= ")).param().sql(concatcp!("), 'TRUE', '", NOT_ENOUGH_UNIQUE_ZPOOLS_SENTINEL, "') AS BOOL) - ) AS insert),")) + ) AS insert + ),")) .bind::(redunancy) .bind::(redunancy) .bind::(redunancy) .bind::(redunancy) - .sql( -" + .sql(" inserted_regions AS ( INSERT INTO region (id, time_created, time_modified, dataset_id, volume_id, block_size, blocks_per_extent, extent_count) - SELECT - candidate_regions.id, - candidate_regions.time_created, - candidate_regions.time_modified, - candidate_regions.dataset_id, - candidate_regions.volume_id, - candidate_regions.block_size, - candidate_regions.blocks_per_extent, - candidate_regions.extent_count + SELECT ").sql(AllColumnsOfRegion::with_prefix("candidate_regions")).sql(" FROM candidate_regions WHERE (SELECT do_insert.insert FROM do_insert LIMIT 1) - RETURNING - region.id, - region.time_created, - region.time_modified, - region.dataset_id, - region.volume_id, - region.block_size, - region.blocks_per_extent, - region.extent_count - ), + RETURNING ").sql(AllColumnsOfRegion::with_prefix("region")).sql(" + ), updated_datasets AS ( UPDATE dataset SET size_used = (dataset.size_used + (SELECT proposed_dataset_changes.size_used_delta FROM proposed_dataset_changes WHERE (proposed_dataset_changes.id = dataset.id) LIMIT 1)) WHERE ( (dataset.id = ANY(SELECT proposed_dataset_changes.id FROM proposed_dataset_changes)) AND (SELECT do_insert.insert FROM do_insert LIMIT 1)) - RETURNING - dataset.id, - dataset.time_created, - dataset.time_modified, - dataset.time_deleted, - dataset.rcgen, - dataset.pool_id, - dataset.ip, - dataset.port, - dataset.kind, - dataset.size_used - ) -(SELECT - dataset.id, - dataset.time_created, - dataset.time_modified, - dataset.time_deleted, - dataset.rcgen, - dataset.pool_id, - dataset.ip, - dataset.port, - dataset.kind, - dataset.size_used, - old_regions.id, - old_regions.time_created, - old_regions.time_modified, - old_regions.dataset_id, - old_regions.volume_id, - old_regions.block_size, - old_regions.blocks_per_extent, - old_regions.extent_count -FROM - (old_regions INNER JOIN dataset ON (old_regions.dataset_id = dataset.id)) -) UNION -(SELECT - updated_datasets.id, - updated_datasets.time_created, - updated_datasets.time_modified, - updated_datasets.time_deleted, - updated_datasets.rcgen, - updated_datasets.pool_id, - updated_datasets.ip, - updated_datasets.port, - updated_datasets.kind, - updated_datasets.size_used, - inserted_regions.id, - inserted_regions.time_created, - inserted_regions.time_modified, - inserted_regions.dataset_id, - inserted_regions.volume_id, - inserted_regions.block_size, - inserted_regions.blocks_per_extent, - inserted_regions.extent_count -FROM (inserted_regions INNER JOIN updated_datasets ON (inserted_regions.dataset_id = updated_datasets.id)))" - ).query + RETURNING ").sql(AllColumnsOfDataset::with_prefix("dataset")).sql(" + ) +( + SELECT ") + .sql(AllColumnsOfDataset::with_prefix("dataset")) + .sql(", ") + .sql(AllColumnsOfRegion::with_prefix("old_regions")).sql(" + FROM + (old_regions INNER JOIN dataset ON (old_regions.dataset_id = dataset.id)) +) +UNION +( + SELECT ") + .sql(AllColumnsOfDataset::with_prefix("updated_datasets")) + .sql(", ") + .sql(AllColumnsOfRegion::with_prefix("inserted_regions")).sql(" + FROM (inserted_regions INNER JOIN updated_datasets ON (inserted_regions.dataset_id = updated_datasets.id)) +)" + ).query() } #[cfg(test)] diff --git a/nexus/db-queries/src/db/raw_query_builder.rs b/nexus/db-queries/src/db/raw_query_builder.rs new file mode 100644 index 0000000000..a303378c0c --- /dev/null +++ b/nexus/db-queries/src/db/raw_query_builder.rs @@ -0,0 +1,151 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Utilities for building string-based queries. +//! +//! These largely side-step Diesel's type system, +//! and are recommended for more complex CTE + +use diesel::pg::Pg; +use diesel::sql_types; +use std::cell::Cell; + +// Keeps a counter to "how many bind parameters have been used" to +// aid in the construction of the query string. +struct BindParamCounter(Cell); +impl BindParamCounter { + fn new() -> Self { + Self(0.into()) + } + fn next(&self) -> i32 { + self.0.set(self.0.get() + 1); + self.0.get() + } +} + +/// A "trusted" string, which can be used to construct SQL queries even +/// though it isn't static. We use "trust" to refer to "protection from +/// SQL injections". +/// +/// This is basically a workaround for cases where we haven't yet been +/// able to construct a query at compile-time. +pub enum TrustedStr { + Static(&'static str), + ValidatedExplicitly(String), +} + +impl TrustedStr { + /// Explicitly constructs a string, with a name that hopefully + /// gives callers some pause when calling this API. + /// + /// If arbitrary user input is provided here, this string COULD + /// cause SQL injection attacks, so each call-site should have a + /// justification for "why it's safe". + pub fn this_string_wont_cause_sql_injections(s: String) -> Self { + Self::ValidatedExplicitly(s) + } + + #[cfg(test)] + pub fn as_str(&self) -> &str { + match self { + Self::Static(s) => s, + Self::ValidatedExplicitly(s) => s.as_str(), + } + } +} + +impl From<&'static str> for TrustedStr { + fn from(s: &'static str) -> Self { + Self::Static(s) + } +} + +trait SqlQueryBinds { + fn add_bind(self, bind_counter: &BindParamCounter) -> Self; +} + +impl<'a, Query> SqlQueryBinds + for diesel::query_builder::BoxedSqlQuery<'a, Pg, Query> +{ + fn add_bind(self, bind_counter: &BindParamCounter) -> Self { + self.sql("$").sql(bind_counter.next().to_string()) + } +} + +type BoxedQuery = diesel::query_builder::BoxedSqlQuery< + 'static, + Pg, + diesel::query_builder::SqlQuery, +>; + +/// A small wrapper around [diesel::query_builder::BoxedSqlQuery] which +/// assists with counting bind parameters and recommends avoiding the usage of +/// any non-static strings in query construction. +// NOTE: I'd really like to eventually be able to construct SQL statements +// entirely at compile-time, but the combination of "const generics" and "const +// fns" in stable Rust just isn't there yet. +// +// It's definitely possible to create static string builders that operate +// entirely at compile-time, like: +// https://play.rust-lang.org/?version=nightly&mode=debug&edition=2021&gist=26d0276648c3315f285372a19d0d492f +// +// But this relies on nightly features. +pub struct QueryBuilder { + query: BoxedQuery, + bind_counter: BindParamCounter, +} + +impl QueryBuilder { + pub fn new() -> Self { + Self { + query: diesel::sql_query("").into_boxed(), + bind_counter: BindParamCounter::new(), + } + } + + /// Identifies that a bind parameter should exist in this location within + /// the SQL string. + /// + /// This should be called the same number of times as [Self::bind]. It is, + /// however, a distinct method, as "identifying bind params" should be + /// decoupled from "using bind parameters" to have an efficient statement + /// cache. + pub fn param(self) -> Self { + Self { + query: self + .query + .sql("$") + .sql(self.bind_counter.next().to_string()), + bind_counter: self.bind_counter, + } + } + + /// Slightly more strict than the "sql" method of Diesel's SqlQuery. + /// Only permits strings which have been validated intentionally to limit + /// susceptibility to SQL injection. + /// + /// See the documentation of [TrustedStr] for more details. + pub fn sql>(self, s: S) -> Self { + let query = match s.into() { + TrustedStr::Static(s) => self.query.sql(s), + TrustedStr::ValidatedExplicitly(s) => self.query.sql(s), + }; + Self { query, bind_counter: self.bind_counter } + } + + /// A call-through function to [diesel::query_builder::BoxedSqlQuery]. + pub fn bind(self, b: Value) -> Self + where + Pg: sql_types::HasSqlType, + Value: diesel::serialize::ToSql + Send + 'static, + BindSt: Send + 'static, + { + Self { query: self.query.bind(b), bind_counter: self.bind_counter } + } + + /// Takes the final boxed query + pub fn query(self) -> BoxedQuery { + self.query + } +} From a756c26c5ebd44587abc2dd738041f5b909869c7 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 14 Feb 2024 13:44:54 -0800 Subject: [PATCH 06/19] Improve logging to help me debug --- nexus/db-queries/src/db/datastore/region.rs | 6 +++++ nexus/src/app/sagas/mod.rs | 23 +++++++++++------ nexus/src/app/sagas/snapshot_create.rs | 28 +++++++++++++++++---- 3 files changed, 44 insertions(+), 13 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/region.rs b/nexus/db-queries/src/db/datastore/region.rs index 24298c4ede..b334ac1085 100644 --- a/nexus/db-queries/src/db/datastore/region.rs +++ b/nexus/db-queries/src/db/datastore/region.rs @@ -153,6 +153,12 @@ impl DataStore { .map(|DatasetAndRegion { dataset, region }| (dataset, region)) .collect::>(); + info!( + self.log, + "Allocated regions for volume"; + "volume_id" => %volume_id, + "datasets_and_regions" => ?dataset_and_regions, + ); Ok(dataset_and_regions) } diff --git a/nexus/src/app/sagas/mod.rs b/nexus/src/app/sagas/mod.rs index e9f800c61b..7edb3a10ec 100644 --- a/nexus/src/app/sagas/mod.rs +++ b/nexus/src/app/sagas/mod.rs @@ -358,8 +358,8 @@ where Err(progenitor_client::Error::CommunicationError(e)) => { warn!( log, - "saw transient communication error {}, retrying...", - e, + "saw transient communication error, retrying..."; + "error" => %e, ); Err(backoff::BackoffError::transient( @@ -382,16 +382,23 @@ where } // Anything else is a permanent error - _ => Err(backoff::BackoffError::Permanent( - progenitor_client::Error::ErrorResponse( - response_value, - ), - )), + _ => { + warn!( + log, + "saw response implying permanent error, aborting"; + "response" => ?response_value + ); + Err(backoff::BackoffError::Permanent( + progenitor_client::Error::ErrorResponse( + response_value, + ), + )) + }, } } Err(e) => { - warn!(log, "saw permanent error {}, aborting", e,); + warn!(log, "saw permanent error, aborting"; "error" => %e); Err(backoff::BackoffError::Permanent(e)) } diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index e017ab377b..3d2dfbe458 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -1175,7 +1175,7 @@ async fn ssc_start_running_snapshot( ); let snapshot_id = sagactx.lookup::("snapshot_id")?; - info!(log, "starting running snapshot for {snapshot_id}"); + info!(log, "starting running snapshot"; "snapshot_id" => %snapshot_id); let (.., disk) = LookupPath::new(&opctx, &osagactx.datastore()) .disk_id(params.disk_id) @@ -1198,7 +1198,13 @@ async fn ssc_start_running_snapshot( let url = format!("http://{}", dataset.address()); let client = CrucibleAgentClient::new(&url); - info!(log, "dataset {:?} region {:?} url {}", dataset, region, url); + info!( + log, + "contacting crucible agent to confirm region exists"; + "dataset" => ?dataset, + "region" => ?region, + "url" => url, + ); // Validate with the Crucible agent that the snapshot exists let crucible_region = retry_until_known_result(log, || async { @@ -1208,7 +1214,11 @@ async fn ssc_start_running_snapshot( .map_err(|e| e.to_string()) .map_err(ActionError::action_failed)?; - info!(log, "crucible region {:?}", crucible_region); + info!( + log, + "confirmed the region exists with crucible agent"; + "crucible region" => ?crucible_region + ); let crucible_snapshot = retry_until_known_result(log, || async { client @@ -1222,7 +1232,11 @@ async fn ssc_start_running_snapshot( .map_err(|e| e.to_string()) .map_err(ActionError::action_failed)?; - info!(log, "crucible snapshot {:?}", crucible_snapshot); + info!( + log, + "successfully accessed crucible snapshot"; + "crucible snapshot" => ?crucible_snapshot + ); // Start the snapshot running let crucible_running_snapshot = @@ -1238,7 +1252,11 @@ async fn ssc_start_running_snapshot( .map_err(|e| e.to_string()) .map_err(ActionError::action_failed)?; - info!(log, "crucible running snapshot {:?}", crucible_running_snapshot); + info!( + log, + "successfully started running region snapshot"; + "crucible running snapshot" => ?crucible_running_snapshot + ); // Map from the region to the snapshot let region_addr = format!( From 1ec1c8193c20748e95e0980cc840cecb8ea0a9c5 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 14 Feb 2024 13:53:14 -0800 Subject: [PATCH 07/19] Add expectorate test --- .../src/db/queries/region_allocation.rs | 35 +++++++ .../output/region_allocate_distinct_sleds.sql | 96 ++++++++++++++++++ .../output/region_allocate_random_sleds.sql | 97 +++++++++++++++++++ 3 files changed, 228 insertions(+) create mode 100644 nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql create mode 100644 nexus/db-queries/tests/output/region_allocate_random_sleds.sql diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index dd7431d723..9169f619a8 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -304,6 +304,41 @@ mod test { use omicron_test_utils::dev; use uuid::Uuid; + // This test is a bit of a "change detector", but it's here to help with + // debugging too. If you change this query, it can be useful to see exactly + // how the output SQL has been altered. + #[tokio::test] + async fn expectorate_query() { + let volume_id = Uuid::new_v4(); + let block_size = 512; + let blocks_per_extent = 4; + let extent_count = 8; + + // First structure: "RandomWithDistinctSleds" + + let region_allocate = allocation_query( + volume_id, + block_size, + blocks_per_extent, + extent_count, + &RegionAllocationStrategy::RandomWithDistinctSleds { seed: Some(1) }, + ); + let s = diesel::debug_query::(®ion_allocate).to_string(); + expectorate::assert_contents("tests/output/region_allocate_distinct_sleds.sql", &s); + + // Second structure: "Random" + + let region_allocate = allocation_query( + volume_id, + block_size, + blocks_per_extent, + extent_count, + &RegionAllocationStrategy::Random { seed: Some(1) }, + ); + let s = diesel::debug_query::(®ion_allocate).to_string(); + expectorate::assert_contents("tests/output/region_allocate_random_sleds.sql", &s); + } + // Explain the possible forms of the SQL query to ensure that it // creates a valid SQL string. #[tokio::test] diff --git a/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql b/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql new file mode 100644 index 0000000000..50b1637753 --- /dev/null +++ b/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql @@ -0,0 +1,96 @@ +WITH + old_regions AS ( + SELECT region.id, region.time_created, region.time_modified, region.dataset_id, region.volume_id, region.block_size, region.blocks_per_extent, region.extent_count + FROM region WHERE (region.volume_id = $1)), + old_zpool_usage AS ( + SELECT + dataset.pool_id, + sum(dataset.size_used) AS size_used + FROM dataset WHERE ((dataset.size_used IS NOT NULL) AND (dataset.time_deleted IS NULL)) GROUP BY dataset.pool_id), + candidate_zpools AS ( + SELECT DISTINCT ON (zpool.sled_id) + old_zpool_usage.pool_id + FROM ( + old_zpool_usage + INNER JOIN + (zpool INNER JOIN sled ON (zpool.sled_id = sled.id)) ON (zpool.id = old_zpool_usage.pool_id) + ) + WHERE ( + ((old_zpool_usage.size_used + $2 ) <= total_size) + AND + (sled.provision_state = 'provisionable') + ) + ORDER BY zpool.sled_id, md5((CAST(zpool.id as BYTEA) || $3))), + candidate_datasets AS ( + SELECT DISTINCT ON (dataset.pool_id) + dataset.id, + dataset.pool_id + FROM (dataset INNER JOIN candidate_zpools ON (dataset.pool_id = candidate_zpools.pool_id)) + WHERE ( + ((dataset.time_deleted IS NULL) AND + (dataset.size_used IS NOT NULL)) AND + (dataset.kind = 'crucible') + ) + ORDER BY dataset.pool_id, md5((CAST(dataset.id as BYTEA) || $4)) + ), + shuffled_candidate_datasets AS ( + SELECT + candidate_datasets.id, + candidate_datasets.pool_id + FROM candidate_datasets + ORDER BY md5((CAST(candidate_datasets.id as BYTEA) || $5)) LIMIT $6 + ), + candidate_regions AS ( + SELECT + gen_random_uuid() AS id, + now() AS time_created, + now() AS time_modified, + shuffled_candidate_datasets.id AS dataset_id, + $7 AS volume_id, + $8 AS block_size, + $9 AS blocks_per_extent, + $10 AS extent_count + FROM shuffled_candidate_datasets + ), + proposed_dataset_changes AS ( + SELECT + candidate_regions.dataset_id AS id, + dataset.pool_id AS pool_id, + ((candidate_regions.block_size * candidate_regions.blocks_per_extent) * candidate_regions.extent_count) AS size_used_delta + FROM (candidate_regions INNER JOIN dataset ON (dataset.id = candidate_regions.dataset_id)) + ), + do_insert AS ( + SELECT ((( + ((SELECT COUNT(*) FROM old_regions LIMIT 1) < $11) AND + CAST(IF(((SELECT COUNT(*) FROM candidate_zpools LIMIT 1) >= $12), 'TRUE', 'Not enough space') AS BOOL)) AND + CAST(IF(((SELECT COUNT(*) FROM candidate_regions LIMIT 1) >= $13), 'TRUE', 'Not enough datasets') AS BOOL)) AND + CAST(IF(((SELECT COUNT(DISTINCT dataset.pool_id) FROM (candidate_regions INNER JOIN dataset ON (candidate_regions.dataset_id = dataset.id)) LIMIT 1) >= $14), 'TRUE', 'Not enough unique zpools selected') AS BOOL) + ) AS insert + ), + inserted_regions AS ( + INSERT INTO region + (id, time_created, time_modified, dataset_id, volume_id, block_size, blocks_per_extent, extent_count) + SELECT candidate_regions.id, candidate_regions.time_created, candidate_regions.time_modified, candidate_regions.dataset_id, candidate_regions.volume_id, candidate_regions.block_size, candidate_regions.blocks_per_extent, candidate_regions.extent_count + FROM candidate_regions + WHERE + (SELECT do_insert.insert FROM do_insert LIMIT 1) + RETURNING region.id, region.time_created, region.time_modified, region.dataset_id, region.volume_id, region.block_size, region.blocks_per_extent, region.extent_count + ), + updated_datasets AS ( + UPDATE dataset SET + size_used = (dataset.size_used + (SELECT proposed_dataset_changes.size_used_delta FROM proposed_dataset_changes WHERE (proposed_dataset_changes.id = dataset.id) LIMIT 1)) + WHERE ( + (dataset.id = ANY(SELECT proposed_dataset_changes.id FROM proposed_dataset_changes)) AND + (SELECT do_insert.insert FROM do_insert LIMIT 1)) + RETURNING dataset.id, dataset.time_created, dataset.time_modified, dataset.time_deleted, dataset.rcgen, dataset.pool_id, dataset.ip, dataset.port, dataset.kind, dataset.size_used + ) +( + SELECT dataset.id, dataset.time_created, dataset.time_modified, dataset.time_deleted, dataset.rcgen, dataset.pool_id, dataset.ip, dataset.port, dataset.kind, dataset.size_used, old_regions.id, old_regions.time_created, old_regions.time_modified, old_regions.dataset_id, old_regions.volume_id, old_regions.block_size, old_regions.blocks_per_extent, old_regions.extent_count + FROM + (old_regions INNER JOIN dataset ON (old_regions.dataset_id = dataset.id)) +) +UNION +( + SELECT updated_datasets.id, updated_datasets.time_created, updated_datasets.time_modified, updated_datasets.time_deleted, updated_datasets.rcgen, updated_datasets.pool_id, updated_datasets.ip, updated_datasets.port, updated_datasets.kind, updated_datasets.size_used, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, inserted_regions.dataset_id, inserted_regions.volume_id, inserted_regions.block_size, inserted_regions.blocks_per_extent, inserted_regions.extent_count + FROM (inserted_regions INNER JOIN updated_datasets ON (inserted_regions.dataset_id = updated_datasets.id)) +) -- binds: [b1aa0a63-9afa-4fc3-b61e-0ebdc7854840, 16384, [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3, b1aa0a63-9afa-4fc3-b61e-0ebdc7854840, 512, 4, 8, 3, 3, 3, 3] \ No newline at end of file diff --git a/nexus/db-queries/tests/output/region_allocate_random_sleds.sql b/nexus/db-queries/tests/output/region_allocate_random_sleds.sql new file mode 100644 index 0000000000..e40caee93e --- /dev/null +++ b/nexus/db-queries/tests/output/region_allocate_random_sleds.sql @@ -0,0 +1,97 @@ +WITH + old_regions AS ( + SELECT region.id, region.time_created, region.time_modified, region.dataset_id, region.volume_id, region.block_size, region.blocks_per_extent, region.extent_count + FROM region WHERE (region.volume_id = $1)), + old_zpool_usage AS ( + SELECT + dataset.pool_id, + sum(dataset.size_used) AS size_used + FROM dataset WHERE ((dataset.size_used IS NOT NULL) AND (dataset.time_deleted IS NULL)) GROUP BY dataset.pool_id), + candidate_zpools AS ( + SELECT + old_zpool_usage.pool_id + FROM ( + old_zpool_usage + INNER JOIN + (zpool INNER JOIN sled ON (zpool.sled_id = sled.id)) + ON (zpool.id = old_zpool_usage.pool_id) + ) + WHERE ( + ((old_zpool_usage.size_used + $2 ) <= total_size) + AND + (sled.provision_state = 'provisionable') + ) + ), + candidate_datasets AS ( + SELECT DISTINCT ON (dataset.pool_id) + dataset.id, + dataset.pool_id + FROM (dataset INNER JOIN candidate_zpools ON (dataset.pool_id = candidate_zpools.pool_id)) + WHERE ( + ((dataset.time_deleted IS NULL) AND + (dataset.size_used IS NOT NULL)) AND + (dataset.kind = 'crucible') + ) + ORDER BY dataset.pool_id, md5((CAST(dataset.id as BYTEA) || $3)) + ), + shuffled_candidate_datasets AS ( + SELECT + candidate_datasets.id, + candidate_datasets.pool_id + FROM candidate_datasets + ORDER BY md5((CAST(candidate_datasets.id as BYTEA) || $4)) LIMIT $5 + ), + candidate_regions AS ( + SELECT + gen_random_uuid() AS id, + now() AS time_created, + now() AS time_modified, + shuffled_candidate_datasets.id AS dataset_id, + $6 AS volume_id, + $7 AS block_size, + $8 AS blocks_per_extent, + $9 AS extent_count + FROM shuffled_candidate_datasets + ), + proposed_dataset_changes AS ( + SELECT + candidate_regions.dataset_id AS id, + dataset.pool_id AS pool_id, + ((candidate_regions.block_size * candidate_regions.blocks_per_extent) * candidate_regions.extent_count) AS size_used_delta + FROM (candidate_regions INNER JOIN dataset ON (dataset.id = candidate_regions.dataset_id)) + ), + do_insert AS ( + SELECT ((( + ((SELECT COUNT(*) FROM old_regions LIMIT 1) < $10) AND + CAST(IF(((SELECT COUNT(*) FROM candidate_zpools LIMIT 1) >= $11), 'TRUE', 'Not enough space') AS BOOL)) AND + CAST(IF(((SELECT COUNT(*) FROM candidate_regions LIMIT 1) >= $12), 'TRUE', 'Not enough datasets') AS BOOL)) AND + CAST(IF(((SELECT COUNT(DISTINCT dataset.pool_id) FROM (candidate_regions INNER JOIN dataset ON (candidate_regions.dataset_id = dataset.id)) LIMIT 1) >= $13), 'TRUE', 'Not enough unique zpools selected') AS BOOL) + ) AS insert + ), + inserted_regions AS ( + INSERT INTO region + (id, time_created, time_modified, dataset_id, volume_id, block_size, blocks_per_extent, extent_count) + SELECT candidate_regions.id, candidate_regions.time_created, candidate_regions.time_modified, candidate_regions.dataset_id, candidate_regions.volume_id, candidate_regions.block_size, candidate_regions.blocks_per_extent, candidate_regions.extent_count + FROM candidate_regions + WHERE + (SELECT do_insert.insert FROM do_insert LIMIT 1) + RETURNING region.id, region.time_created, region.time_modified, region.dataset_id, region.volume_id, region.block_size, region.blocks_per_extent, region.extent_count + ), + updated_datasets AS ( + UPDATE dataset SET + size_used = (dataset.size_used + (SELECT proposed_dataset_changes.size_used_delta FROM proposed_dataset_changes WHERE (proposed_dataset_changes.id = dataset.id) LIMIT 1)) + WHERE ( + (dataset.id = ANY(SELECT proposed_dataset_changes.id FROM proposed_dataset_changes)) AND + (SELECT do_insert.insert FROM do_insert LIMIT 1)) + RETURNING dataset.id, dataset.time_created, dataset.time_modified, dataset.time_deleted, dataset.rcgen, dataset.pool_id, dataset.ip, dataset.port, dataset.kind, dataset.size_used + ) +( + SELECT dataset.id, dataset.time_created, dataset.time_modified, dataset.time_deleted, dataset.rcgen, dataset.pool_id, dataset.ip, dataset.port, dataset.kind, dataset.size_used, old_regions.id, old_regions.time_created, old_regions.time_modified, old_regions.dataset_id, old_regions.volume_id, old_regions.block_size, old_regions.blocks_per_extent, old_regions.extent_count + FROM + (old_regions INNER JOIN dataset ON (old_regions.dataset_id = dataset.id)) +) +UNION +( + SELECT updated_datasets.id, updated_datasets.time_created, updated_datasets.time_modified, updated_datasets.time_deleted, updated_datasets.rcgen, updated_datasets.pool_id, updated_datasets.ip, updated_datasets.port, updated_datasets.kind, updated_datasets.size_used, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, inserted_regions.dataset_id, inserted_regions.volume_id, inserted_regions.block_size, inserted_regions.blocks_per_extent, inserted_regions.extent_count + FROM (inserted_regions INNER JOIN updated_datasets ON (inserted_regions.dataset_id = updated_datasets.id)) +) -- binds: [b1aa0a63-9afa-4fc3-b61e-0ebdc7854840, 16384, [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3, b1aa0a63-9afa-4fc3-b61e-0ebdc7854840, 512, 4, 8, 3, 3, 3, 3] \ No newline at end of file From b2bdaff6d3f5bc1eda3d8c110ac11c285feb88dc Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 14 Feb 2024 14:47:40 -0800 Subject: [PATCH 08/19] Spelling, stable EXPECTORATE tests --- .../db-queries/src/db/queries/region_allocation.rs | 14 +++++++------- .../output/region_allocate_distinct_sleds.sql | 2 +- .../tests/output/region_allocate_random_sleds.sql | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index 9169f619a8..7314c47591 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -98,7 +98,7 @@ pub fn allocation_query( let seed = seed.to_le_bytes().to_vec(); let size_delta = block_size * blocks_per_extent * extent_count; - let redunancy: i64 = i64::try_from(REGION_REDUNDANCY_THRESHOLD).unwrap(); + let redundancy: i64 = i64::try_from(REGION_REDUNDANCY_THRESHOLD).unwrap(); let builder = QueryBuilder::new().sql( // Find all old regions associated with a particular volume @@ -191,7 +191,7 @@ pub fn allocation_query( ORDER BY md5((CAST(candidate_datasets.id as BYTEA) || ").param().sql(")) LIMIT ").param().sql(" ),") .bind::(seed) - .bind::(redunancy) + .bind::(redundancy) // Create the regions-to-be-inserted for the volume. .sql(" candidate_regions AS ( @@ -255,10 +255,10 @@ pub fn allocation_query( CAST(IF(((SELECT COUNT(DISTINCT dataset.pool_id) FROM (candidate_regions INNER JOIN dataset ON (candidate_regions.dataset_id = dataset.id)) LIMIT 1) >= ")).param().sql(concatcp!("), 'TRUE', '", NOT_ENOUGH_UNIQUE_ZPOOLS_SENTINEL, "') AS BOOL) ) AS insert ),")) - .bind::(redunancy) - .bind::(redunancy) - .bind::(redunancy) - .bind::(redunancy) + .bind::(redundancy) + .bind::(redundancy) + .bind::(redundancy) + .bind::(redundancy) .sql(" inserted_regions AS ( INSERT INTO region @@ -309,7 +309,7 @@ mod test { // how the output SQL has been altered. #[tokio::test] async fn expectorate_query() { - let volume_id = Uuid::new_v4(); + let volume_id = Uuid::nil(); let block_size = 512; let blocks_per_extent = 4; let extent_count = 8; diff --git a/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql b/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql index 50b1637753..c971d9bdf2 100644 --- a/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql @@ -93,4 +93,4 @@ UNION ( SELECT updated_datasets.id, updated_datasets.time_created, updated_datasets.time_modified, updated_datasets.time_deleted, updated_datasets.rcgen, updated_datasets.pool_id, updated_datasets.ip, updated_datasets.port, updated_datasets.kind, updated_datasets.size_used, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, inserted_regions.dataset_id, inserted_regions.volume_id, inserted_regions.block_size, inserted_regions.blocks_per_extent, inserted_regions.extent_count FROM (inserted_regions INNER JOIN updated_datasets ON (inserted_regions.dataset_id = updated_datasets.id)) -) -- binds: [b1aa0a63-9afa-4fc3-b61e-0ebdc7854840, 16384, [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3, b1aa0a63-9afa-4fc3-b61e-0ebdc7854840, 512, 4, 8, 3, 3, 3, 3] \ No newline at end of file +) -- binds: [00000000-0000-0000-0000-000000000000, 16384, [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3, 00000000-0000-0000-0000-000000000000, 512, 4, 8, 3, 3, 3, 3] \ No newline at end of file diff --git a/nexus/db-queries/tests/output/region_allocate_random_sleds.sql b/nexus/db-queries/tests/output/region_allocate_random_sleds.sql index e40caee93e..d5b8e5f5fb 100644 --- a/nexus/db-queries/tests/output/region_allocate_random_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_random_sleds.sql @@ -94,4 +94,4 @@ UNION ( SELECT updated_datasets.id, updated_datasets.time_created, updated_datasets.time_modified, updated_datasets.time_deleted, updated_datasets.rcgen, updated_datasets.pool_id, updated_datasets.ip, updated_datasets.port, updated_datasets.kind, updated_datasets.size_used, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, inserted_regions.dataset_id, inserted_regions.volume_id, inserted_regions.block_size, inserted_regions.blocks_per_extent, inserted_regions.extent_count FROM (inserted_regions INNER JOIN updated_datasets ON (inserted_regions.dataset_id = updated_datasets.id)) -) -- binds: [b1aa0a63-9afa-4fc3-b61e-0ebdc7854840, 16384, [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3, b1aa0a63-9afa-4fc3-b61e-0ebdc7854840, 512, 4, 8, 3, 3, 3, 3] \ No newline at end of file +) -- binds: [00000000-0000-0000-0000-000000000000, 16384, [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3, 00000000-0000-0000-0000-000000000000, 512, 4, 8, 3, 3, 3, 3] \ No newline at end of file From 31f697be7e549abab0905cf867ccb0bb2ad270f4 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 14 Feb 2024 14:57:06 -0800 Subject: [PATCH 09/19] Better string validation visibility --- nexus/db-queries/src/db/column_walker.rs | 2 +- nexus/db-queries/src/db/raw_query_builder.rs | 30 +++++++++++--------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/nexus/db-queries/src/db/column_walker.rs b/nexus/db-queries/src/db/column_walker.rs index 6e21e0d899..cace2ba5fb 100644 --- a/nexus/db-queries/src/db/column_walker.rs +++ b/nexus/db-queries/src/db/column_walker.rs @@ -36,7 +36,7 @@ macro_rules! impl_column_walker { // are not controlled by an arbitrary user, and // - The "prefix" type, which is a "&'static str" (AKA, // hopefully known at compile-time, and not leaked). - TrustedStr::this_string_wont_cause_sql_injections( + TrustedStr::i_take_responsibility_for_validating_this_string( [$([prefix, $column::NAME].join("."),)+].join(", ") ) } diff --git a/nexus/db-queries/src/db/raw_query_builder.rs b/nexus/db-queries/src/db/raw_query_builder.rs index a303378c0c..b0094f09ce 100644 --- a/nexus/db-queries/src/db/raw_query_builder.rs +++ b/nexus/db-queries/src/db/raw_query_builder.rs @@ -30,10 +30,7 @@ impl BindParamCounter { /// /// This is basically a workaround for cases where we haven't yet been /// able to construct a query at compile-time. -pub enum TrustedStr { - Static(&'static str), - ValidatedExplicitly(String), -} +pub struct TrustedStr(TrustedStrVariants); impl TrustedStr { /// Explicitly constructs a string, with a name that hopefully @@ -42,25 +39,32 @@ impl TrustedStr { /// If arbitrary user input is provided here, this string COULD /// cause SQL injection attacks, so each call-site should have a /// justification for "why it's safe". - pub fn this_string_wont_cause_sql_injections(s: String) -> Self { - Self::ValidatedExplicitly(s) + pub fn i_take_responsibility_for_validating_this_string(s: String) -> Self { + Self(TrustedStrVariants::ValidatedExplicitly(s)) } #[cfg(test)] pub fn as_str(&self) -> &str { - match self { - Self::Static(s) => s, - Self::ValidatedExplicitly(s) => s.as_str(), + match &self.0 { + TrustedStrVariants::Static(s) => s, + TrustedStrVariants::ValidatedExplicitly(s) => s.as_str(), } } } impl From<&'static str> for TrustedStr { fn from(s: &'static str) -> Self { - Self::Static(s) + Self(TrustedStrVariants::Static(s)) } } +// This enum should be kept non-pub to make it harder to accidentally +// construct a "ValidatedExplicitly" variant. +enum TrustedStrVariants { + Static(&'static str), + ValidatedExplicitly(String), +} + trait SqlQueryBinds { fn add_bind(self, bind_counter: &BindParamCounter) -> Self; } @@ -127,9 +131,9 @@ impl QueryBuilder { /// /// See the documentation of [TrustedStr] for more details. pub fn sql>(self, s: S) -> Self { - let query = match s.into() { - TrustedStr::Static(s) => self.query.sql(s), - TrustedStr::ValidatedExplicitly(s) => self.query.sql(s), + let query = match s.into().0 { + TrustedStrVariants::Static(s) => self.query.sql(s), + TrustedStrVariants::ValidatedExplicitly(s) => self.query.sql(s), }; Self { query, bind_counter: self.bind_counter } } From 2060750c2adbf0a8c45cd65431d97fa86e5db48a Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 14 Feb 2024 15:41:57 -0800 Subject: [PATCH 10/19] fmt --- .../db-queries/src/db/queries/region_allocation.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index 7314c47591..407f2df865 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -321,10 +321,15 @@ mod test { block_size, blocks_per_extent, extent_count, - &RegionAllocationStrategy::RandomWithDistinctSleds { seed: Some(1) }, + &RegionAllocationStrategy::RandomWithDistinctSleds { + seed: Some(1), + }, ); let s = diesel::debug_query::(®ion_allocate).to_string(); - expectorate::assert_contents("tests/output/region_allocate_distinct_sleds.sql", &s); + expectorate::assert_contents( + "tests/output/region_allocate_distinct_sleds.sql", + &s, + ); // Second structure: "Random" @@ -336,7 +341,10 @@ mod test { &RegionAllocationStrategy::Random { seed: Some(1) }, ); let s = diesel::debug_query::(®ion_allocate).to_string(); - expectorate::assert_contents("tests/output/region_allocate_random_sleds.sql", &s); + expectorate::assert_contents( + "tests/output/region_allocate_random_sleds.sql", + &s, + ); } // Explain the possible forms of the SQL query to ensure that it From 291791deba716e796f3e0837ad05cbb483e74736 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 14 Feb 2024 16:27:27 -0800 Subject: [PATCH 11/19] Add test which is breaking in a more succinct way --- nexus/db-queries/src/db/datastore/mod.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index f9e0be81c1..bc20d7a6cc 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -877,10 +877,21 @@ mod test { assert_eq!(expected_region_count, dataset_and_regions.len()); let mut disk_datasets = HashSet::new(); let mut disk_zpools = HashSet::new(); + let mut regions = HashSet::new(); for (dataset, region) in dataset_and_regions { // Must be 3 unique datasets assert!(disk_datasets.insert(dataset.id())); + // All regions should be unique + assert!(regions.insert(region.id())); + + // Check there's no cross contamination between returned UUIDs + // + // This is a little goofy, but it catches a bug that has + // happened before. The returned columns share names (like + // "id"), so we need to process them in-order. + assert!(regions.get(&dataset.id()).is_none()); + assert!(disk_datasets.get(®ion.id()).is_none()); // Dataset must not be non-provisionable. assert_ne!(dataset.id(), non_provisionable_dataset_id); From c868f715ca284b6d3d61a1cfe09c82447ab67b02 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 14 Feb 2024 16:58:14 -0800 Subject: [PATCH 12/19] arcane chicanery which bends the type system to our will --- nexus/db-macros/src/lib.rs | 4 +- nexus/db-model/src/dataset.rs | 1 - nexus/db-model/src/region.rs | 1 - nexus/db-queries/src/db/datastore/region.rs | 21 +++------ .../src/db/queries/region_allocation.rs | 13 +++--- nexus/db-queries/src/db/raw_query_builder.rs | 44 ++++++++++++++++++- 6 files changed, 56 insertions(+), 28 deletions(-) diff --git a/nexus/db-macros/src/lib.rs b/nexus/db-macros/src/lib.rs index 30208412a1..fd15b59128 100644 --- a/nexus/db-macros/src/lib.rs +++ b/nexus/db-macros/src/lib.rs @@ -280,7 +280,7 @@ fn build_resource_identity( let identity_name = format_ident!("{}Identity", struct_name); quote! { #[doc = #identity_doc] - #[derive(Clone, Debug, PartialEq, Eq, Selectable, Queryable, QueryableByName, Insertable, serde::Serialize, serde::Deserialize)] + #[derive(Clone, Debug, PartialEq, Eq, Selectable, Queryable, Insertable, serde::Serialize, serde::Deserialize)] #[diesel(table_name = #table_name) ] pub struct #identity_name { pub id: ::uuid::Uuid, @@ -322,7 +322,7 @@ fn build_asset_identity( let identity_name = format_ident!("{}Identity", struct_name); quote! { #[doc = #identity_doc] - #[derive(Clone, Debug, PartialEq, Selectable, Queryable, QueryableByName, Insertable, serde::Serialize, serde::Deserialize)] + #[derive(Clone, Debug, PartialEq, Selectable, Queryable, Insertable, serde::Serialize, serde::Deserialize)] #[diesel(table_name = #table_name) ] pub struct #identity_name { pub id: ::uuid::Uuid, diff --git a/nexus/db-model/src/dataset.rs b/nexus/db-model/src/dataset.rs index 0ea465bebc..65c0070509 100644 --- a/nexus/db-model/src/dataset.rs +++ b/nexus/db-model/src/dataset.rs @@ -18,7 +18,6 @@ use uuid::Uuid; /// available to a service on the Sled. #[derive( Queryable, - QueryableByName, Insertable, Debug, Clone, diff --git a/nexus/db-model/src/region.rs b/nexus/db-model/src/region.rs index 0ea6f82e65..fefc4f4fce 100644 --- a/nexus/db-model/src/region.rs +++ b/nexus/db-model/src/region.rs @@ -15,7 +15,6 @@ use uuid::Uuid; /// allocated within a volume. #[derive( Queryable, - QueryableByName, Insertable, Debug, Clone, diff --git a/nexus/db-queries/src/db/datastore/region.rs b/nexus/db-queries/src/db/datastore/region.rs index b334ac1085..c7b98b9a82 100644 --- a/nexus/db-queries/src/db/datastore/region.rs +++ b/nexus/db-queries/src/db/datastore/region.rs @@ -128,15 +128,7 @@ impl DataStore { let (blocks_per_extent, extent_count) = Self::get_crucible_allocation(&block_size, size); - #[derive(diesel::QueryableByName, Queryable)] - struct DatasetAndRegion { - #[diesel(embed)] - dataset: Dataset, - #[diesel(embed)] - region: Region, - } - - let dataset_and_regions: Vec<_> = + let dataset_and_regions: Vec<(Dataset, Region)> = crate::db::queries::region_allocation::allocation_query( volume_id, block_size.to_bytes() as u64, @@ -144,14 +136,11 @@ impl DataStore { extent_count, allocation_strategy, ) - .get_results_async::( - &*self.pool_connection_authorized(&opctx).await?, - ) + .get_results_async(&*self.pool_connection_authorized(&opctx).await?) .await - .map_err(|e| crate::db::queries::region_allocation::from_diesel(e))? - .into_iter() - .map(|DatasetAndRegion { dataset, region }| (dataset, region)) - .collect::>(); + .map_err(|e| { + crate::db::queries::region_allocation::from_diesel(e) + })?; info!( self.log, diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index 407f2df865..3d39808831 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -6,7 +6,8 @@ use crate::db::column_walker::AllColumnsOf; use crate::db::datastore::REGION_REDUNDANCY_THRESHOLD; -use crate::db::raw_query_builder::QueryBuilder; +use crate::db::model::{Dataset, Region}; +use crate::db::raw_query_builder::{QueryBuilder, TypedSqlQuery}; use crate::db::schema; use crate::db::true_or_cast_error::matches_sentinel; use const_format::concatcp; @@ -63,17 +64,17 @@ pub fn from_diesel(e: DieselError) -> external::Error { error::public_error_from_diesel(e, error::ErrorHandler::Server) } +type SelectableSql = < + >::SelectExpression as diesel::Expression +>::SqlType; + pub fn allocation_query( volume_id: uuid::Uuid, block_size: u64, blocks_per_extent: u64, extent_count: u64, allocation_strategy: &RegionAllocationStrategy, -) -> diesel::query_builder::BoxedSqlQuery< - 'static, - Pg, - diesel::query_builder::SqlQuery, -> { +) -> TypedSqlQuery<(SelectableSql, SelectableSql)> { let (seed, distinct_sleds) = { let (input_seed, distinct_sleds) = match allocation_strategy { RegionAllocationStrategy::Random { seed } => (seed, false), diff --git a/nexus/db-queries/src/db/raw_query_builder.rs b/nexus/db-queries/src/db/raw_query_builder.rs index b0094f09ce..65ff22f86f 100644 --- a/nexus/db-queries/src/db/raw_query_builder.rs +++ b/nexus/db-queries/src/db/raw_query_builder.rs @@ -7,9 +7,13 @@ //! These largely side-step Diesel's type system, //! and are recommended for more complex CTE +use crate::db::pool::DbConnection; use diesel::pg::Pg; +use diesel::query_builder::{AstPass, Query, QueryFragment, QueryId}; use diesel::sql_types; +use diesel::RunQueryDsl; use std::cell::Cell; +use std::marker::PhantomData; // Keeps a counter to "how many bind parameters have been used" to // aid in the construction of the query string. @@ -149,7 +153,43 @@ impl QueryBuilder { } /// Takes the final boxed query - pub fn query(self) -> BoxedQuery { - self.query + pub fn query(self) -> TypedSqlQuery { + TypedSqlQuery { inner: self.query, _phantom: PhantomData } } } + +/// Diesel's [diesel::query_builder::BoxedSqlQuery] has a few drawbacks that +/// make wrapper more palatable: +/// +/// - It always implements "Query" with SqlType = Untyped, so a caller could try to +/// execute this query and get back any type. +/// - It forces the usage of "QueryableByName", which acts wrong if we're +/// returning multiple columns with the same name (this is normal! If you want +/// to UNION two objects that both have "id" columns, this happens). +#[derive(QueryId)] +pub struct TypedSqlQuery { + inner: diesel::query_builder::BoxedSqlQuery< + 'static, + Pg, + diesel::query_builder::SqlQuery, + >, + _phantom: PhantomData, +} + +impl QueryFragment for TypedSqlQuery { + fn walk_ast<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> diesel::QueryResult<()> { + out.unsafe_to_cache_prepared(); + + self.inner.walk_ast(out.reborrow())?; + Ok(()) + } +} + +impl RunQueryDsl for TypedSqlQuery {} + +impl Query for TypedSqlQuery { + type SqlType = T; +} From e7c71d7b63caac7046fc1eae3e0f11b71272bf9d Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 15 Feb 2024 12:32:36 -0800 Subject: [PATCH 13/19] Auto-format SQL --- .../src/db/queries/region_allocation.rs | 12 +- .../output/region_allocate_distinct_sleds.sql | 335 ++++++++++++----- .../output/region_allocate_random_sleds.sql | 336 +++++++++++++----- test-utils/src/dev/db.rs | 49 +++ 4 files changed, 550 insertions(+), 182 deletions(-) diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index 3d39808831..31bb277af6 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -326,7 +326,11 @@ mod test { seed: Some(1), }, ); - let s = diesel::debug_query::(®ion_allocate).to_string(); + let s = dev::db::format_sql( + &diesel::debug_query::(®ion_allocate).to_string(), + ) + .await + .unwrap(); expectorate::assert_contents( "tests/output/region_allocate_distinct_sleds.sql", &s, @@ -341,7 +345,11 @@ mod test { extent_count, &RegionAllocationStrategy::Random { seed: Some(1) }, ); - let s = diesel::debug_query::(®ion_allocate).to_string(); + let s = dev::db::format_sql( + &diesel::debug_query::(®ion_allocate).to_string(), + ) + .await + .unwrap(); expectorate::assert_contents( "tests/output/region_allocate_random_sleds.sql", &s, diff --git a/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql b/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql index c971d9bdf2..be22f92ed1 100644 --- a/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql @@ -1,96 +1,253 @@ WITH - old_regions AS ( - SELECT region.id, region.time_created, region.time_modified, region.dataset_id, region.volume_id, region.block_size, region.blocks_per_extent, region.extent_count - FROM region WHERE (region.volume_id = $1)), - old_zpool_usage AS ( - SELECT - dataset.pool_id, - sum(dataset.size_used) AS size_used - FROM dataset WHERE ((dataset.size_used IS NOT NULL) AND (dataset.time_deleted IS NULL)) GROUP BY dataset.pool_id), - candidate_zpools AS ( - SELECT DISTINCT ON (zpool.sled_id) - old_zpool_usage.pool_id - FROM ( + old_regions + AS ( + SELECT + region.id, + region.time_created, + region.time_modified, + region.dataset_id, + region.volume_id, + region.block_size, + region.blocks_per_extent, + region.extent_count + FROM + region + WHERE + region.volume_id = $1 + ), + old_zpool_usage + AS ( + SELECT + dataset.pool_id, sum(dataset.size_used) AS size_used + FROM + dataset + WHERE + (dataset.size_used IS NOT NULL) AND (dataset.time_deleted IS NULL) + GROUP BY + dataset.pool_id + ), + candidate_zpools + AS ( + SELECT + DISTINCT ON (zpool.sled_id) old_zpool_usage.pool_id + FROM old_zpool_usage - INNER JOIN - (zpool INNER JOIN sled ON (zpool.sled_id = sled.id)) ON (zpool.id = old_zpool_usage.pool_id) - ) - WHERE ( - ((old_zpool_usage.size_used + $2 ) <= total_size) - AND - (sled.provision_state = 'provisionable') - ) - ORDER BY zpool.sled_id, md5((CAST(zpool.id as BYTEA) || $3))), - candidate_datasets AS ( - SELECT DISTINCT ON (dataset.pool_id) - dataset.id, - dataset.pool_id - FROM (dataset INNER JOIN candidate_zpools ON (dataset.pool_id = candidate_zpools.pool_id)) - WHERE ( - ((dataset.time_deleted IS NULL) AND - (dataset.size_used IS NOT NULL)) AND - (dataset.kind = 'crucible') + INNER JOIN (zpool INNER JOIN sled ON zpool.sled_id = sled.id) ON + zpool.id = old_zpool_usage.pool_id + WHERE + (old_zpool_usage.size_used + $2) <= total_size AND sled.provision_state = 'provisionable' + ORDER BY + zpool.sled_id, md5(CAST(zpool.id AS BYTES) || $3) + ), + candidate_datasets + AS ( + SELECT + DISTINCT ON (dataset.pool_id) dataset.id, dataset.pool_id + FROM + dataset INNER JOIN candidate_zpools ON dataset.pool_id = candidate_zpools.pool_id + WHERE + ((dataset.time_deleted IS NULL) AND (dataset.size_used IS NOT NULL)) + AND dataset.kind = 'crucible' + ORDER BY + dataset.pool_id, md5(CAST(dataset.id AS BYTES) || $4) + ), + shuffled_candidate_datasets + AS ( + SELECT + candidate_datasets.id, candidate_datasets.pool_id + FROM + candidate_datasets + ORDER BY + md5(CAST(candidate_datasets.id AS BYTES) || $5) + LIMIT + $6 + ), + candidate_regions + AS ( + SELECT + gen_random_uuid() AS id, + now() AS time_created, + now() AS time_modified, + shuffled_candidate_datasets.id AS dataset_id, + $7 AS volume_id, + $8 AS block_size, + $9 AS blocks_per_extent, + $10 AS extent_count + FROM + shuffled_candidate_datasets + ), + proposed_dataset_changes + AS ( + SELECT + candidate_regions.dataset_id AS id, + dataset.pool_id AS pool_id, + candidate_regions.block_size + * candidate_regions.blocks_per_extent + * candidate_regions.extent_count + AS size_used_delta + FROM + candidate_regions INNER JOIN dataset ON dataset.id = candidate_regions.dataset_id + ), + do_insert + AS ( + SELECT + ( + ( + (SELECT count(*) FROM old_regions LIMIT 1) < $11 + AND CAST( + IF( + ((SELECT count(*) FROM candidate_zpools LIMIT 1) >= $12), + 'TRUE', + 'Not enough space' + ) + AS BOOL + ) + ) + AND CAST( + IF( + ((SELECT count(*) FROM candidate_regions LIMIT 1) >= $13), + 'TRUE', + 'Not enough datasets' + ) + AS BOOL + ) + ) + AND CAST( + IF( + ( + ( + SELECT + count(DISTINCT dataset.pool_id) + FROM + candidate_regions + INNER JOIN dataset ON candidate_regions.dataset_id = dataset.id + LIMIT + 1 + ) + >= $14 + ), + 'TRUE', + 'Not enough unique zpools selected' + ) + AS BOOL + ) + AS insert + ), + inserted_regions + AS ( + INSERT + INTO + region + ( + id, + time_created, + time_modified, + dataset_id, + volume_id, + block_size, + blocks_per_extent, + extent_count + ) + SELECT + candidate_regions.id, + candidate_regions.time_created, + candidate_regions.time_modified, + candidate_regions.dataset_id, + candidate_regions.volume_id, + candidate_regions.block_size, + candidate_regions.blocks_per_extent, + candidate_regions.extent_count + FROM + candidate_regions + WHERE + (SELECT do_insert.insert FROM do_insert LIMIT 1) + RETURNING + region.id, + region.time_created, + region.time_modified, + region.dataset_id, + region.volume_id, + region.block_size, + region.blocks_per_extent, + region.extent_count + ), + updated_datasets + AS ( + UPDATE + dataset + SET + size_used + = dataset.size_used + + ( + SELECT + proposed_dataset_changes.size_used_delta + FROM + proposed_dataset_changes + WHERE + proposed_dataset_changes.id = dataset.id + LIMIT + 1 + ) + WHERE + dataset.id = ANY (SELECT proposed_dataset_changes.id FROM proposed_dataset_changes) + AND (SELECT do_insert.insert FROM do_insert LIMIT 1) + RETURNING + dataset.id, + dataset.time_created, + dataset.time_modified, + dataset.time_deleted, + dataset.rcgen, + dataset.pool_id, + dataset.ip, + dataset.port, + dataset.kind, + dataset.size_used ) - ORDER BY dataset.pool_id, md5((CAST(dataset.id as BYTEA) || $4)) - ), - shuffled_candidate_datasets AS ( - SELECT - candidate_datasets.id, - candidate_datasets.pool_id - FROM candidate_datasets - ORDER BY md5((CAST(candidate_datasets.id as BYTEA) || $5)) LIMIT $6 - ), - candidate_regions AS ( - SELECT - gen_random_uuid() AS id, - now() AS time_created, - now() AS time_modified, - shuffled_candidate_datasets.id AS dataset_id, - $7 AS volume_id, - $8 AS block_size, - $9 AS blocks_per_extent, - $10 AS extent_count - FROM shuffled_candidate_datasets - ), - proposed_dataset_changes AS ( - SELECT - candidate_regions.dataset_id AS id, - dataset.pool_id AS pool_id, - ((candidate_regions.block_size * candidate_regions.blocks_per_extent) * candidate_regions.extent_count) AS size_used_delta - FROM (candidate_regions INNER JOIN dataset ON (dataset.id = candidate_regions.dataset_id)) - ), - do_insert AS ( - SELECT ((( - ((SELECT COUNT(*) FROM old_regions LIMIT 1) < $11) AND - CAST(IF(((SELECT COUNT(*) FROM candidate_zpools LIMIT 1) >= $12), 'TRUE', 'Not enough space') AS BOOL)) AND - CAST(IF(((SELECT COUNT(*) FROM candidate_regions LIMIT 1) >= $13), 'TRUE', 'Not enough datasets') AS BOOL)) AND - CAST(IF(((SELECT COUNT(DISTINCT dataset.pool_id) FROM (candidate_regions INNER JOIN dataset ON (candidate_regions.dataset_id = dataset.id)) LIMIT 1) >= $14), 'TRUE', 'Not enough unique zpools selected') AS BOOL) - ) AS insert - ), - inserted_regions AS ( - INSERT INTO region - (id, time_created, time_modified, dataset_id, volume_id, block_size, blocks_per_extent, extent_count) - SELECT candidate_regions.id, candidate_regions.time_created, candidate_regions.time_modified, candidate_regions.dataset_id, candidate_regions.volume_id, candidate_regions.block_size, candidate_regions.blocks_per_extent, candidate_regions.extent_count - FROM candidate_regions - WHERE - (SELECT do_insert.insert FROM do_insert LIMIT 1) - RETURNING region.id, region.time_created, region.time_modified, region.dataset_id, region.volume_id, region.block_size, region.blocks_per_extent, region.extent_count - ), - updated_datasets AS ( - UPDATE dataset SET - size_used = (dataset.size_used + (SELECT proposed_dataset_changes.size_used_delta FROM proposed_dataset_changes WHERE (proposed_dataset_changes.id = dataset.id) LIMIT 1)) - WHERE ( - (dataset.id = ANY(SELECT proposed_dataset_changes.id FROM proposed_dataset_changes)) AND - (SELECT do_insert.insert FROM do_insert LIMIT 1)) - RETURNING dataset.id, dataset.time_created, dataset.time_modified, dataset.time_deleted, dataset.rcgen, dataset.pool_id, dataset.ip, dataset.port, dataset.kind, dataset.size_used - ) ( - SELECT dataset.id, dataset.time_created, dataset.time_modified, dataset.time_deleted, dataset.rcgen, dataset.pool_id, dataset.ip, dataset.port, dataset.kind, dataset.size_used, old_regions.id, old_regions.time_created, old_regions.time_modified, old_regions.dataset_id, old_regions.volume_id, old_regions.block_size, old_regions.blocks_per_extent, old_regions.extent_count + SELECT + dataset.id, + dataset.time_created, + dataset.time_modified, + dataset.time_deleted, + dataset.rcgen, + dataset.pool_id, + dataset.ip, + dataset.port, + dataset.kind, + dataset.size_used, + old_regions.id, + old_regions.time_created, + old_regions.time_modified, + old_regions.dataset_id, + old_regions.volume_id, + old_regions.block_size, + old_regions.blocks_per_extent, + old_regions.extent_count FROM - (old_regions INNER JOIN dataset ON (old_regions.dataset_id = dataset.id)) + old_regions INNER JOIN dataset ON old_regions.dataset_id = dataset.id ) UNION -( - SELECT updated_datasets.id, updated_datasets.time_created, updated_datasets.time_modified, updated_datasets.time_deleted, updated_datasets.rcgen, updated_datasets.pool_id, updated_datasets.ip, updated_datasets.port, updated_datasets.kind, updated_datasets.size_used, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, inserted_regions.dataset_id, inserted_regions.volume_id, inserted_regions.block_size, inserted_regions.blocks_per_extent, inserted_regions.extent_count - FROM (inserted_regions INNER JOIN updated_datasets ON (inserted_regions.dataset_id = updated_datasets.id)) -) -- binds: [00000000-0000-0000-0000-000000000000, 16384, [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3, 00000000-0000-0000-0000-000000000000, 512, 4, 8, 3, 3, 3, 3] \ No newline at end of file + ( + SELECT + updated_datasets.id, + updated_datasets.time_created, + updated_datasets.time_modified, + updated_datasets.time_deleted, + updated_datasets.rcgen, + updated_datasets.pool_id, + updated_datasets.ip, + updated_datasets.port, + updated_datasets.kind, + updated_datasets.size_used, + inserted_regions.id, + inserted_regions.time_created, + inserted_regions.time_modified, + inserted_regions.dataset_id, + inserted_regions.volume_id, + inserted_regions.block_size, + inserted_regions.blocks_per_extent, + inserted_regions.extent_count + FROM + inserted_regions + INNER JOIN updated_datasets ON inserted_regions.dataset_id = updated_datasets.id + ) diff --git a/nexus/db-queries/tests/output/region_allocate_random_sleds.sql b/nexus/db-queries/tests/output/region_allocate_random_sleds.sql index d5b8e5f5fb..c5b97df72d 100644 --- a/nexus/db-queries/tests/output/region_allocate_random_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_random_sleds.sql @@ -1,97 +1,251 @@ WITH - old_regions AS ( - SELECT region.id, region.time_created, region.time_modified, region.dataset_id, region.volume_id, region.block_size, region.blocks_per_extent, region.extent_count - FROM region WHERE (region.volume_id = $1)), - old_zpool_usage AS ( - SELECT - dataset.pool_id, - sum(dataset.size_used) AS size_used - FROM dataset WHERE ((dataset.size_used IS NOT NULL) AND (dataset.time_deleted IS NULL)) GROUP BY dataset.pool_id), - candidate_zpools AS ( - SELECT - old_zpool_usage.pool_id - FROM ( - old_zpool_usage - INNER JOIN - (zpool INNER JOIN sled ON (zpool.sled_id = sled.id)) - ON (zpool.id = old_zpool_usage.pool_id) - ) - WHERE ( - ((old_zpool_usage.size_used + $2 ) <= total_size) - AND - (sled.provision_state = 'provisionable') + old_regions + AS ( + SELECT + region.id, + region.time_created, + region.time_modified, + region.dataset_id, + region.volume_id, + region.block_size, + region.blocks_per_extent, + region.extent_count + FROM + region + WHERE + region.volume_id = $1 + ), + old_zpool_usage + AS ( + SELECT + dataset.pool_id, sum(dataset.size_used) AS size_used + FROM + dataset + WHERE + (dataset.size_used IS NOT NULL) AND (dataset.time_deleted IS NULL) + GROUP BY + dataset.pool_id + ), + candidate_zpools + AS ( + SELECT + old_zpool_usage.pool_id + FROM + old_zpool_usage + INNER JOIN (zpool INNER JOIN sled ON zpool.sled_id = sled.id) ON + zpool.id = old_zpool_usage.pool_id + WHERE + (old_zpool_usage.size_used + $2) <= total_size AND sled.provision_state = 'provisionable' + ), + candidate_datasets + AS ( + SELECT + DISTINCT ON (dataset.pool_id) dataset.id, dataset.pool_id + FROM + dataset INNER JOIN candidate_zpools ON dataset.pool_id = candidate_zpools.pool_id + WHERE + ((dataset.time_deleted IS NULL) AND (dataset.size_used IS NOT NULL)) + AND dataset.kind = 'crucible' + ORDER BY + dataset.pool_id, md5(CAST(dataset.id AS BYTES) || $3) + ), + shuffled_candidate_datasets + AS ( + SELECT + candidate_datasets.id, candidate_datasets.pool_id + FROM + candidate_datasets + ORDER BY + md5(CAST(candidate_datasets.id AS BYTES) || $4) + LIMIT + $5 + ), + candidate_regions + AS ( + SELECT + gen_random_uuid() AS id, + now() AS time_created, + now() AS time_modified, + shuffled_candidate_datasets.id AS dataset_id, + $6 AS volume_id, + $7 AS block_size, + $8 AS blocks_per_extent, + $9 AS extent_count + FROM + shuffled_candidate_datasets + ), + proposed_dataset_changes + AS ( + SELECT + candidate_regions.dataset_id AS id, + dataset.pool_id AS pool_id, + candidate_regions.block_size + * candidate_regions.blocks_per_extent + * candidate_regions.extent_count + AS size_used_delta + FROM + candidate_regions INNER JOIN dataset ON dataset.id = candidate_regions.dataset_id + ), + do_insert + AS ( + SELECT + ( + ( + (SELECT count(*) FROM old_regions LIMIT 1) < $10 + AND CAST( + IF( + ((SELECT count(*) FROM candidate_zpools LIMIT 1) >= $11), + 'TRUE', + 'Not enough space' + ) + AS BOOL + ) + ) + AND CAST( + IF( + ((SELECT count(*) FROM candidate_regions LIMIT 1) >= $12), + 'TRUE', + 'Not enough datasets' + ) + AS BOOL + ) + ) + AND CAST( + IF( + ( + ( + SELECT + count(DISTINCT dataset.pool_id) + FROM + candidate_regions + INNER JOIN dataset ON candidate_regions.dataset_id = dataset.id + LIMIT + 1 + ) + >= $13 + ), + 'TRUE', + 'Not enough unique zpools selected' + ) + AS BOOL + ) + AS insert + ), + inserted_regions + AS ( + INSERT + INTO + region + ( + id, + time_created, + time_modified, + dataset_id, + volume_id, + block_size, + blocks_per_extent, + extent_count + ) + SELECT + candidate_regions.id, + candidate_regions.time_created, + candidate_regions.time_modified, + candidate_regions.dataset_id, + candidate_regions.volume_id, + candidate_regions.block_size, + candidate_regions.blocks_per_extent, + candidate_regions.extent_count + FROM + candidate_regions + WHERE + (SELECT do_insert.insert FROM do_insert LIMIT 1) + RETURNING + region.id, + region.time_created, + region.time_modified, + region.dataset_id, + region.volume_id, + region.block_size, + region.blocks_per_extent, + region.extent_count + ), + updated_datasets + AS ( + UPDATE + dataset + SET + size_used + = dataset.size_used + + ( + SELECT + proposed_dataset_changes.size_used_delta + FROM + proposed_dataset_changes + WHERE + proposed_dataset_changes.id = dataset.id + LIMIT + 1 + ) + WHERE + dataset.id = ANY (SELECT proposed_dataset_changes.id FROM proposed_dataset_changes) + AND (SELECT do_insert.insert FROM do_insert LIMIT 1) + RETURNING + dataset.id, + dataset.time_created, + dataset.time_modified, + dataset.time_deleted, + dataset.rcgen, + dataset.pool_id, + dataset.ip, + dataset.port, + dataset.kind, + dataset.size_used ) - ), - candidate_datasets AS ( - SELECT DISTINCT ON (dataset.pool_id) - dataset.id, - dataset.pool_id - FROM (dataset INNER JOIN candidate_zpools ON (dataset.pool_id = candidate_zpools.pool_id)) - WHERE ( - ((dataset.time_deleted IS NULL) AND - (dataset.size_used IS NOT NULL)) AND - (dataset.kind = 'crucible') - ) - ORDER BY dataset.pool_id, md5((CAST(dataset.id as BYTEA) || $3)) - ), - shuffled_candidate_datasets AS ( - SELECT - candidate_datasets.id, - candidate_datasets.pool_id - FROM candidate_datasets - ORDER BY md5((CAST(candidate_datasets.id as BYTEA) || $4)) LIMIT $5 - ), - candidate_regions AS ( - SELECT - gen_random_uuid() AS id, - now() AS time_created, - now() AS time_modified, - shuffled_candidate_datasets.id AS dataset_id, - $6 AS volume_id, - $7 AS block_size, - $8 AS blocks_per_extent, - $9 AS extent_count - FROM shuffled_candidate_datasets - ), - proposed_dataset_changes AS ( - SELECT - candidate_regions.dataset_id AS id, - dataset.pool_id AS pool_id, - ((candidate_regions.block_size * candidate_regions.blocks_per_extent) * candidate_regions.extent_count) AS size_used_delta - FROM (candidate_regions INNER JOIN dataset ON (dataset.id = candidate_regions.dataset_id)) - ), - do_insert AS ( - SELECT ((( - ((SELECT COUNT(*) FROM old_regions LIMIT 1) < $10) AND - CAST(IF(((SELECT COUNT(*) FROM candidate_zpools LIMIT 1) >= $11), 'TRUE', 'Not enough space') AS BOOL)) AND - CAST(IF(((SELECT COUNT(*) FROM candidate_regions LIMIT 1) >= $12), 'TRUE', 'Not enough datasets') AS BOOL)) AND - CAST(IF(((SELECT COUNT(DISTINCT dataset.pool_id) FROM (candidate_regions INNER JOIN dataset ON (candidate_regions.dataset_id = dataset.id)) LIMIT 1) >= $13), 'TRUE', 'Not enough unique zpools selected') AS BOOL) - ) AS insert - ), - inserted_regions AS ( - INSERT INTO region - (id, time_created, time_modified, dataset_id, volume_id, block_size, blocks_per_extent, extent_count) - SELECT candidate_regions.id, candidate_regions.time_created, candidate_regions.time_modified, candidate_regions.dataset_id, candidate_regions.volume_id, candidate_regions.block_size, candidate_regions.blocks_per_extent, candidate_regions.extent_count - FROM candidate_regions - WHERE - (SELECT do_insert.insert FROM do_insert LIMIT 1) - RETURNING region.id, region.time_created, region.time_modified, region.dataset_id, region.volume_id, region.block_size, region.blocks_per_extent, region.extent_count - ), - updated_datasets AS ( - UPDATE dataset SET - size_used = (dataset.size_used + (SELECT proposed_dataset_changes.size_used_delta FROM proposed_dataset_changes WHERE (proposed_dataset_changes.id = dataset.id) LIMIT 1)) - WHERE ( - (dataset.id = ANY(SELECT proposed_dataset_changes.id FROM proposed_dataset_changes)) AND - (SELECT do_insert.insert FROM do_insert LIMIT 1)) - RETURNING dataset.id, dataset.time_created, dataset.time_modified, dataset.time_deleted, dataset.rcgen, dataset.pool_id, dataset.ip, dataset.port, dataset.kind, dataset.size_used - ) ( - SELECT dataset.id, dataset.time_created, dataset.time_modified, dataset.time_deleted, dataset.rcgen, dataset.pool_id, dataset.ip, dataset.port, dataset.kind, dataset.size_used, old_regions.id, old_regions.time_created, old_regions.time_modified, old_regions.dataset_id, old_regions.volume_id, old_regions.block_size, old_regions.blocks_per_extent, old_regions.extent_count + SELECT + dataset.id, + dataset.time_created, + dataset.time_modified, + dataset.time_deleted, + dataset.rcgen, + dataset.pool_id, + dataset.ip, + dataset.port, + dataset.kind, + dataset.size_used, + old_regions.id, + old_regions.time_created, + old_regions.time_modified, + old_regions.dataset_id, + old_regions.volume_id, + old_regions.block_size, + old_regions.blocks_per_extent, + old_regions.extent_count FROM - (old_regions INNER JOIN dataset ON (old_regions.dataset_id = dataset.id)) + old_regions INNER JOIN dataset ON old_regions.dataset_id = dataset.id ) UNION -( - SELECT updated_datasets.id, updated_datasets.time_created, updated_datasets.time_modified, updated_datasets.time_deleted, updated_datasets.rcgen, updated_datasets.pool_id, updated_datasets.ip, updated_datasets.port, updated_datasets.kind, updated_datasets.size_used, inserted_regions.id, inserted_regions.time_created, inserted_regions.time_modified, inserted_regions.dataset_id, inserted_regions.volume_id, inserted_regions.block_size, inserted_regions.blocks_per_extent, inserted_regions.extent_count - FROM (inserted_regions INNER JOIN updated_datasets ON (inserted_regions.dataset_id = updated_datasets.id)) -) -- binds: [00000000-0000-0000-0000-000000000000, 16384, [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3, 00000000-0000-0000-0000-000000000000, 512, 4, 8, 3, 3, 3, 3] \ No newline at end of file + ( + SELECT + updated_datasets.id, + updated_datasets.time_created, + updated_datasets.time_modified, + updated_datasets.time_deleted, + updated_datasets.rcgen, + updated_datasets.pool_id, + updated_datasets.ip, + updated_datasets.port, + updated_datasets.kind, + updated_datasets.size_used, + inserted_regions.id, + inserted_regions.time_created, + inserted_regions.time_modified, + inserted_regions.dataset_id, + inserted_regions.volume_id, + inserted_regions.block_size, + inserted_regions.blocks_per_extent, + inserted_regions.extent_count + FROM + inserted_regions + INNER JOIN updated_datasets ON inserted_regions.dataset_id = updated_datasets.id + ) diff --git a/test-utils/src/dev/db.rs b/test-utils/src/dev/db.rs index da6cc90b03..5766402f7f 100644 --- a/test-utils/src/dev/db.rs +++ b/test-utils/src/dev/db.rs @@ -21,6 +21,7 @@ use std::time::Duration; use tempfile::tempdir; use tempfile::TempDir; use thiserror::Error; +use tokio::io::AsyncWriteExt; use tokio_postgres::config::Host; use tokio_postgres::config::SslMode; @@ -497,6 +498,15 @@ pub enum CockroachStartError { )] TimedOut { pid: u32, time_waited: Duration }, + #[error("failed to write input to cockroachdb")] + FailedToWrite(#[source] std::io::Error), + + #[error("failed to await cockroachdb completing")] + FailedToWait(#[source] std::io::Error), + + #[error("Invalid cockroachdb output")] + InvalidOutput(#[from] std::string::FromUtf8Error), + #[error("unknown error waiting for cockroach to start")] Unknown { #[source] @@ -648,6 +658,45 @@ impl Drop for CockroachInstance { } } +/// Uses cockroachdb to run the "sqlfmt" command. +pub async fn format_sql(input: &str) -> Result { + let mut cmd = tokio::process::Command::new(COCKROACHDB_BIN); + let mut child = cmd + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .args(&[ + "sqlfmt", + "--tab-width", + "2", + "--use-spaces", + "true", + "--print-width", + "100", + ]) + .spawn() + .map_err(|source| CockroachStartError::BadCmd { + cmd: COCKROACHDB_BIN.to_string(), + source, + })?; + let stdin = child.stdin.as_mut().unwrap(); + stdin + .write_all(input.as_bytes()) + .await + .map_err(CockroachStartError::FailedToWrite)?; + let output = child + .wait_with_output() + .await + .map_err(CockroachStartError::FailedToWait)?; + + if !output.status.success() { + return Err(CockroachStartError::Exited { + exit_code: output.status.code().unwrap_or_else(|| -1), + }); + } + + Ok(String::from_utf8(output.stdout)?) +} + /// Verify that CockroachDB has the correct version pub async fn check_db_version() -> Result<(), CockroachStartError> { let mut cmd = tokio::process::Command::new(COCKROACHDB_BIN); From 7f46a0aa3d0857f26f283523b728a4eac358b1c7 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 15 Feb 2024 13:58:14 -0800 Subject: [PATCH 14/19] Add EXPECTORATE tests for virtual_provisioning_collection CTE --- .../src/db/queries/region_allocation.rs | 25 +-- .../virtual_provisioning_collection_update.rs | 68 ++++++++ nexus/db-queries/src/db/raw_query_builder.rs | 15 ++ ...ning_collection_update_delete_instance.sql | 97 +++++++++++ ...oning_collection_update_delete_storage.sql | 86 ++++++++++ ...ning_collection_update_insert_instance.sql | 154 ++++++++++++++++++ ...oning_collection_update_insert_storage.sql | 154 ++++++++++++++++++ 7 files changed, 583 insertions(+), 16 deletions(-) create mode 100644 nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_instance.sql create mode 100644 nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_storage.sql create mode 100644 nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_instance.sql create mode 100644 nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_storage.sql diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index 31bb277af6..1a0b058614 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -301,6 +301,7 @@ UNION mod test { use super::*; use crate::db::explain::ExplainableAsync; + use crate::db::raw_query_builder::expectorate_query_contents; use nexus_test_utils::db::test_setup_database; use omicron_test_utils::dev; use uuid::Uuid; @@ -326,15 +327,11 @@ mod test { seed: Some(1), }, ); - let s = dev::db::format_sql( - &diesel::debug_query::(®ion_allocate).to_string(), - ) - .await - .unwrap(); - expectorate::assert_contents( + expectorate_query_contents( + ®ion_allocate, "tests/output/region_allocate_distinct_sleds.sql", - &s, - ); + ) + .await; // Second structure: "Random" @@ -345,15 +342,11 @@ mod test { extent_count, &RegionAllocationStrategy::Random { seed: Some(1) }, ); - let s = dev::db::format_sql( - &diesel::debug_query::(®ion_allocate).to_string(), - ) - .await - .unwrap(); - expectorate::assert_contents( + expectorate_query_contents( + ®ion_allocate, "tests/output/region_allocate_random_sleds.sql", - &s, - ); + ) + .await; } // Explain the possible forms of the SQL query to ensure that it diff --git a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs index 7672d5af9a..934f1ce078 100644 --- a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs +++ b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs @@ -646,3 +646,71 @@ impl Query for VirtualProvisioningCollectionUpdate { } impl RunQueryDsl for VirtualProvisioningCollectionUpdate {} + +#[cfg(test)] +mod test { + use super::*; + use crate::db::raw_query_builder::expectorate_query_contents; + use uuid::Uuid; + + // This test is a bit of a "change detector", but it's here to help with + // debugging too. If you change this query, it can be useful to see exactly + // how the output SQL has been altered. + #[tokio::test] + async fn expectorate_query() { + let id = Uuid::nil(); + let project_id = Uuid::nil(); + let disk_byte_diff = 2048.try_into().unwrap(); + let storage_type = crate::db::datastore::StorageType::Disk; + + let query = VirtualProvisioningCollectionUpdate::new_insert_storage( + id, + disk_byte_diff, + project_id, + storage_type, + ); + + expectorate_query_contents( + &query, + "tests/output/virtual_provisioning_collection_update_insert_storage.sql", + ).await; + + let query = VirtualProvisioningCollectionUpdate::new_delete_storage( + id, + disk_byte_diff, + project_id, + ); + + expectorate_query_contents( + &query, + "tests/output/virtual_provisioning_collection_update_delete_storage.sql", + ).await; + + let cpus_diff = 4; + let ram_diff = 2048.try_into().unwrap(); + + let query = VirtualProvisioningCollectionUpdate::new_insert_instance( + id, cpus_diff, ram_diff, project_id, + ); + + expectorate_query_contents( + &query, + "tests/output/virtual_provisioning_collection_update_insert_instance.sql", + ).await; + + let max_instance_gen = 0; + + let query = VirtualProvisioningCollectionUpdate::new_delete_instance( + id, + max_instance_gen, + cpus_diff, + ram_diff, + project_id, + ); + + expectorate_query_contents( + &query, + "tests/output/virtual_provisioning_collection_update_delete_instance.sql", + ).await; + } +} diff --git a/nexus/db-queries/src/db/raw_query_builder.rs b/nexus/db-queries/src/db/raw_query_builder.rs index 65ff22f86f..d0decc8551 100644 --- a/nexus/db-queries/src/db/raw_query_builder.rs +++ b/nexus/db-queries/src/db/raw_query_builder.rs @@ -193,3 +193,18 @@ impl RunQueryDsl for TypedSqlQuery {} impl Query for TypedSqlQuery { type SqlType = T; } + +#[cfg(test)] +pub async fn expectorate_query_contents>( + query: T, + path: &str, +) { + use omicron_test_utils::dev; + + let s = + dev::db::format_sql(&diesel::debug_query::(&query).to_string()) + .await + .expect("Failed to format SQL"); + + expectorate::assert_contents(path, &s); +} diff --git a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_instance.sql b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_instance.sql new file mode 100644 index 0000000000..fcabefef26 --- /dev/null +++ b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_instance.sql @@ -0,0 +1,97 @@ +WITH + parent_silo AS (SELECT project.silo_id AS id FROM project WHERE project.id = $1), + all_collections + AS ( + ((SELECT $2 AS id) UNION (SELECT parent_silo.id AS id FROM parent_silo)) + UNION (SELECT $3 AS id) + ), + quotas + AS ( + SELECT + silo_quotas.silo_id, + silo_quotas.cpus, + silo_quotas.memory_bytes AS memory, + silo_quotas.storage_bytes AS storage + FROM + silo_quotas INNER JOIN parent_silo ON silo_quotas.silo_id = parent_silo.id + ), + silo_provisioned + AS ( + SELECT + virtual_provisioning_collection.id, + virtual_provisioning_collection.cpus_provisioned, + virtual_provisioning_collection.ram_provisioned, + virtual_provisioning_collection.virtual_disk_bytes_provisioned + FROM + virtual_provisioning_collection + INNER JOIN parent_silo ON virtual_provisioning_collection.id = parent_silo.id + ), + do_update + AS ( + SELECT + ( + SELECT + count(*) + FROM + virtual_provisioning_resource + WHERE + virtual_provisioning_resource.id = $4 + LIMIT + $5 + ) + = $6 + AS update + ), + unused_cte_arm + AS ( + DELETE FROM + virtual_provisioning_resource + WHERE + virtual_provisioning_resource.id = $7 + AND virtual_provisioning_resource.id + = ( + SELECT + instance.id + FROM + instance + WHERE + instance.id = $8 AND instance.state_generation < $9 + LIMIT + $10 + ) + RETURNING + virtual_provisioning_resource.id, + virtual_provisioning_resource.time_modified, + virtual_provisioning_resource.resource_type, + virtual_provisioning_resource.virtual_disk_bytes_provisioned, + virtual_provisioning_resource.cpus_provisioned, + virtual_provisioning_resource.ram_provisioned + ), + virtual_provisioning_collection + AS ( + UPDATE + virtual_provisioning_collection + SET + time_modified = current_timestamp(), + cpus_provisioned = virtual_provisioning_collection.cpus_provisioned - $11, + ram_provisioned = virtual_provisioning_collection.ram_provisioned - $12 + WHERE + virtual_provisioning_collection.id = ANY (SELECT all_collections.id FROM all_collections) + AND (SELECT do_update.update FROM do_update LIMIT $13) + RETURNING + virtual_provisioning_collection.id, + virtual_provisioning_collection.time_modified, + virtual_provisioning_collection.collection_type, + virtual_provisioning_collection.virtual_disk_bytes_provisioned, + virtual_provisioning_collection.cpus_provisioned, + virtual_provisioning_collection.ram_provisioned + ) +SELECT + virtual_provisioning_collection.id, + virtual_provisioning_collection.time_modified, + virtual_provisioning_collection.collection_type, + virtual_provisioning_collection.virtual_disk_bytes_provisioned, + virtual_provisioning_collection.cpus_provisioned, + virtual_provisioning_collection.ram_provisioned +FROM + virtual_provisioning_collection diff --git a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_storage.sql b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_storage.sql new file mode 100644 index 0000000000..72c0b81e15 --- /dev/null +++ b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_storage.sql @@ -0,0 +1,86 @@ +WITH + parent_silo AS (SELECT project.silo_id AS id FROM project WHERE project.id = $1), + all_collections + AS ( + ((SELECT $2 AS id) UNION (SELECT parent_silo.id AS id FROM parent_silo)) + UNION (SELECT $3 AS id) + ), + quotas + AS ( + SELECT + silo_quotas.silo_id, + silo_quotas.cpus, + silo_quotas.memory_bytes AS memory, + silo_quotas.storage_bytes AS storage + FROM + silo_quotas INNER JOIN parent_silo ON silo_quotas.silo_id = parent_silo.id + ), + silo_provisioned + AS ( + SELECT + virtual_provisioning_collection.id, + virtual_provisioning_collection.cpus_provisioned, + virtual_provisioning_collection.ram_provisioned, + virtual_provisioning_collection.virtual_disk_bytes_provisioned + FROM + virtual_provisioning_collection + INNER JOIN parent_silo ON virtual_provisioning_collection.id = parent_silo.id + ), + do_update + AS ( + SELECT + ( + SELECT + count(*) + FROM + virtual_provisioning_resource + WHERE + virtual_provisioning_resource.id = $4 + LIMIT + $5 + ) + = $6 + AS update + ), + unused_cte_arm + AS ( + DELETE FROM + virtual_provisioning_resource + WHERE + virtual_provisioning_resource.id = $7 + RETURNING + virtual_provisioning_resource.id, + virtual_provisioning_resource.time_modified, + virtual_provisioning_resource.resource_type, + virtual_provisioning_resource.virtual_disk_bytes_provisioned, + virtual_provisioning_resource.cpus_provisioned, + virtual_provisioning_resource.ram_provisioned + ), + virtual_provisioning_collection + AS ( + UPDATE + virtual_provisioning_collection + SET + time_modified = current_timestamp(), + virtual_disk_bytes_provisioned + = virtual_provisioning_collection.virtual_disk_bytes_provisioned - $8 + WHERE + virtual_provisioning_collection.id = ANY (SELECT all_collections.id FROM all_collections) + AND (SELECT do_update.update FROM do_update LIMIT $9) + RETURNING + virtual_provisioning_collection.id, + virtual_provisioning_collection.time_modified, + virtual_provisioning_collection.collection_type, + virtual_provisioning_collection.virtual_disk_bytes_provisioned, + virtual_provisioning_collection.cpus_provisioned, + virtual_provisioning_collection.ram_provisioned + ) +SELECT + virtual_provisioning_collection.id, + virtual_provisioning_collection.time_modified, + virtual_provisioning_collection.collection_type, + virtual_provisioning_collection.virtual_disk_bytes_provisioned, + virtual_provisioning_collection.cpus_provisioned, + virtual_provisioning_collection.ram_provisioned +FROM + virtual_provisioning_collection diff --git a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_instance.sql b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_instance.sql new file mode 100644 index 0000000000..753b7f09f3 --- /dev/null +++ b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_instance.sql @@ -0,0 +1,154 @@ +WITH + parent_silo AS (SELECT project.silo_id AS id FROM project WHERE project.id = $1), + all_collections + AS ( + ((SELECT $2 AS id) UNION (SELECT parent_silo.id AS id FROM parent_silo)) + UNION (SELECT $3 AS id) + ), + quotas + AS ( + SELECT + silo_quotas.silo_id, + silo_quotas.cpus, + silo_quotas.memory_bytes AS memory, + silo_quotas.storage_bytes AS storage + FROM + silo_quotas INNER JOIN parent_silo ON silo_quotas.silo_id = parent_silo.id + ), + silo_provisioned + AS ( + SELECT + virtual_provisioning_collection.id, + virtual_provisioning_collection.cpus_provisioned, + virtual_provisioning_collection.ram_provisioned, + virtual_provisioning_collection.virtual_disk_bytes_provisioned + FROM + virtual_provisioning_collection + INNER JOIN parent_silo ON virtual_provisioning_collection.id = parent_silo.id + ), + do_update + AS ( + SELECT + ( + ( + ( + SELECT + count(*) + FROM + virtual_provisioning_resource + WHERE + virtual_provisioning_resource.id = $4 + LIMIT + $5 + ) + = $6 + AND CAST( + IF( + ( + $7 = $8 + OR (SELECT quotas.cpus FROM quotas LIMIT $9) + >= ( + (SELECT silo_provisioned.cpus_provisioned FROM silo_provisioned LIMIT $10) + + $11 + ) + ), + 'TRUE', + 'Not enough cpus' + ) + AS BOOL + ) + ) + AND CAST( + IF( + ( + $12 = $13 + OR (SELECT quotas.memory FROM quotas LIMIT $14) + >= ( + (SELECT silo_provisioned.ram_provisioned FROM silo_provisioned LIMIT $15) + + $16 + ) + ), + 'TRUE', + 'Not enough memory' + ) + AS BOOL + ) + ) + AND CAST( + IF( + ( + $17 = $18 + OR (SELECT quotas.storage FROM quotas LIMIT $19) + >= ( + ( + SELECT + silo_provisioned.virtual_disk_bytes_provisioned + FROM + silo_provisioned + LIMIT + $20 + ) + + $21 + ) + ), + 'TRUE', + 'Not enough storage' + ) + AS BOOL + ) + AS update + ), + unused_cte_arm + AS ( + INSERT + INTO + virtual_provisioning_resource + ( + id, + time_modified, + resource_type, + virtual_disk_bytes_provisioned, + cpus_provisioned, + ram_provisioned + ) + VALUES + ($22, DEFAULT, $23, $24, $25, $26) + ON CONFLICT + DO + NOTHING + RETURNING + virtual_provisioning_resource.id, + virtual_provisioning_resource.time_modified, + virtual_provisioning_resource.resource_type, + virtual_provisioning_resource.virtual_disk_bytes_provisioned, + virtual_provisioning_resource.cpus_provisioned, + virtual_provisioning_resource.ram_provisioned + ), + virtual_provisioning_collection + AS ( + UPDATE + virtual_provisioning_collection + SET + time_modified = current_timestamp(), + cpus_provisioned = virtual_provisioning_collection.cpus_provisioned + $27, + ram_provisioned = virtual_provisioning_collection.ram_provisioned + $28 + WHERE + virtual_provisioning_collection.id = ANY (SELECT all_collections.id FROM all_collections) + AND (SELECT do_update.update FROM do_update LIMIT $29) + RETURNING + virtual_provisioning_collection.id, + virtual_provisioning_collection.time_modified, + virtual_provisioning_collection.collection_type, + virtual_provisioning_collection.virtual_disk_bytes_provisioned, + virtual_provisioning_collection.cpus_provisioned, + virtual_provisioning_collection.ram_provisioned + ) +SELECT + virtual_provisioning_collection.id, + virtual_provisioning_collection.time_modified, + virtual_provisioning_collection.collection_type, + virtual_provisioning_collection.virtual_disk_bytes_provisioned, + virtual_provisioning_collection.cpus_provisioned, + virtual_provisioning_collection.ram_provisioned +FROM + virtual_provisioning_collection diff --git a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_storage.sql b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_storage.sql new file mode 100644 index 0000000000..040a5dc20c --- /dev/null +++ b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_storage.sql @@ -0,0 +1,154 @@ +WITH + parent_silo AS (SELECT project.silo_id AS id FROM project WHERE project.id = $1), + all_collections + AS ( + ((SELECT $2 AS id) UNION (SELECT parent_silo.id AS id FROM parent_silo)) + UNION (SELECT $3 AS id) + ), + quotas + AS ( + SELECT + silo_quotas.silo_id, + silo_quotas.cpus, + silo_quotas.memory_bytes AS memory, + silo_quotas.storage_bytes AS storage + FROM + silo_quotas INNER JOIN parent_silo ON silo_quotas.silo_id = parent_silo.id + ), + silo_provisioned + AS ( + SELECT + virtual_provisioning_collection.id, + virtual_provisioning_collection.cpus_provisioned, + virtual_provisioning_collection.ram_provisioned, + virtual_provisioning_collection.virtual_disk_bytes_provisioned + FROM + virtual_provisioning_collection + INNER JOIN parent_silo ON virtual_provisioning_collection.id = parent_silo.id + ), + do_update + AS ( + SELECT + ( + ( + ( + SELECT + count(*) + FROM + virtual_provisioning_resource + WHERE + virtual_provisioning_resource.id = $4 + LIMIT + $5 + ) + = $6 + AND CAST( + IF( + ( + $7 = $8 + OR (SELECT quotas.cpus FROM quotas LIMIT $9) + >= ( + (SELECT silo_provisioned.cpus_provisioned FROM silo_provisioned LIMIT $10) + + $11 + ) + ), + 'TRUE', + 'Not enough cpus' + ) + AS BOOL + ) + ) + AND CAST( + IF( + ( + $12 = $13 + OR (SELECT quotas.memory FROM quotas LIMIT $14) + >= ( + (SELECT silo_provisioned.ram_provisioned FROM silo_provisioned LIMIT $15) + + $16 + ) + ), + 'TRUE', + 'Not enough memory' + ) + AS BOOL + ) + ) + AND CAST( + IF( + ( + $17 = $18 + OR (SELECT quotas.storage FROM quotas LIMIT $19) + >= ( + ( + SELECT + silo_provisioned.virtual_disk_bytes_provisioned + FROM + silo_provisioned + LIMIT + $20 + ) + + $21 + ) + ), + 'TRUE', + 'Not enough storage' + ) + AS BOOL + ) + AS update + ), + unused_cte_arm + AS ( + INSERT + INTO + virtual_provisioning_resource + ( + id, + time_modified, + resource_type, + virtual_disk_bytes_provisioned, + cpus_provisioned, + ram_provisioned + ) + VALUES + ($22, DEFAULT, $23, $24, $25, $26) + ON CONFLICT + DO + NOTHING + RETURNING + virtual_provisioning_resource.id, + virtual_provisioning_resource.time_modified, + virtual_provisioning_resource.resource_type, + virtual_provisioning_resource.virtual_disk_bytes_provisioned, + virtual_provisioning_resource.cpus_provisioned, + virtual_provisioning_resource.ram_provisioned + ), + virtual_provisioning_collection + AS ( + UPDATE + virtual_provisioning_collection + SET + time_modified = current_timestamp(), + virtual_disk_bytes_provisioned + = virtual_provisioning_collection.virtual_disk_bytes_provisioned + $27 + WHERE + virtual_provisioning_collection.id = ANY (SELECT all_collections.id FROM all_collections) + AND (SELECT do_update.update FROM do_update LIMIT $28) + RETURNING + virtual_provisioning_collection.id, + virtual_provisioning_collection.time_modified, + virtual_provisioning_collection.collection_type, + virtual_provisioning_collection.virtual_disk_bytes_provisioned, + virtual_provisioning_collection.cpus_provisioned, + virtual_provisioning_collection.ram_provisioned + ) +SELECT + virtual_provisioning_collection.id, + virtual_provisioning_collection.time_modified, + virtual_provisioning_collection.collection_type, + virtual_provisioning_collection.virtual_disk_bytes_provisioned, + virtual_provisioning_collection.cpus_provisioned, + virtual_provisioning_collection.ram_provisioned +FROM + virtual_provisioning_collection From 96983eca2181999972e78cc30a9002269ce3c768 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 16 Feb 2024 12:32:32 -0800 Subject: [PATCH 15/19] Convert virtual provisioning CTE to use raw SQL --- nexus/db-model/src/lib.rs | 16 +- nexus/db-model/src/queries/mod.rs | 7 - .../virtual_provisioning_collection_update.rs | 60 -- nexus/db-queries/src/db/alias.rs | 84 -- nexus/db-queries/src/db/mod.rs | 1 - .../virtual_provisioning_collection_update.rs | 939 ++++++++---------- ...ning_collection_update_delete_instance.sql | 16 +- ...oning_collection_update_delete_storage.sql | 10 +- ...ning_collection_update_insert_instance.sql | 35 +- ...oning_collection_update_insert_storage.sql | 33 +- 10 files changed, 481 insertions(+), 720 deletions(-) delete mode 100644 nexus/db-model/src/queries/mod.rs delete mode 100644 nexus/db-model/src/queries/virtual_provisioning_collection_update.rs delete mode 100644 nexus/db-queries/src/db/alias.rs diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index b77d56059e..35dce8ac5a 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -21,6 +21,7 @@ mod console_session; mod dataset; mod dataset_kind; mod db_metadata; +mod deployment; mod device_auth; mod digest; mod disk; @@ -35,6 +36,7 @@ mod instance_cpu_count; mod instance_state; mod inventory; mod ip_pool; +mod ipv4_nat_entry; mod ipv4net; pub mod ipv6; mod ipv6net; @@ -42,21 +44,12 @@ mod l4_port_range; mod macaddr; mod name; mod network_interface; +mod omicron_zone_config; mod oximeter_info; mod physical_disk; mod physical_disk_kind; mod producer_endpoint; mod project; -mod semver_version; -mod switch_interface; -mod switch_port; -// These actually represent subqueries, not real table. -// However, they must be defined in the same crate as our tables -// for join-based marker trait generation. -mod deployment; -mod ipv4_nat_entry; -mod omicron_zone_config; -pub mod queries; mod quota; mod rack; mod region; @@ -65,6 +58,7 @@ mod role_assignment; mod role_builtin; pub mod saga_types; pub mod schema; +mod semver_version; mod service; mod service_kind; mod silo; @@ -80,6 +74,8 @@ mod sled_underlay_subnet_allocation; mod snapshot; mod ssh_key; mod switch; +mod switch_interface; +mod switch_port; mod tuf_repo; mod unsigned; mod user_builtin; diff --git a/nexus/db-model/src/queries/mod.rs b/nexus/db-model/src/queries/mod.rs deleted file mode 100644 index e138508f84..0000000000 --- a/nexus/db-model/src/queries/mod.rs +++ /dev/null @@ -1,7 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Subqueries used in CTEs. - -pub mod virtual_provisioning_collection_update; diff --git a/nexus/db-model/src/queries/virtual_provisioning_collection_update.rs b/nexus/db-model/src/queries/virtual_provisioning_collection_update.rs deleted file mode 100644 index 124ffe4db6..0000000000 --- a/nexus/db-model/src/queries/virtual_provisioning_collection_update.rs +++ /dev/null @@ -1,60 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Describes the resource provisioning update CTE -//! -//! Refer to -//! for the construction of this query. - -use crate::schema::silo; -use crate::schema::silo_quotas; -use crate::schema::virtual_provisioning_collection; - -table! { - parent_silo { - id -> Uuid, - } -} - -table! { - all_collections { - id -> Uuid, - } -} - -table! { - do_update (update) { - update -> Bool, - } -} - -table! { - quotas (silo_id) { - silo_id -> Uuid, - cpus -> Int8, - memory -> Int8, - storage -> Int8, - } -} - -table! { - silo_provisioned { - id -> Uuid, - virtual_disk_bytes_provisioned -> Int8, - cpus_provisioned -> Int8, - ram_provisioned -> Int8, - } -} - -diesel::allow_tables_to_appear_in_same_query!(silo, parent_silo,); - -diesel::allow_tables_to_appear_in_same_query!( - virtual_provisioning_collection, - silo_quotas, - parent_silo, - all_collections, - do_update, - quotas, - silo_provisioned -); diff --git a/nexus/db-queries/src/db/alias.rs b/nexus/db-queries/src/db/alias.rs deleted file mode 100644 index 0a5bcca743..0000000000 --- a/nexus/db-queries/src/db/alias.rs +++ /dev/null @@ -1,84 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Tools for creating aliases in diesel. - -use diesel::pg::Pg; -use diesel::query_builder::AstPass; -use diesel::query_builder::QueryFragment; -use diesel::Expression; -use diesel::SelectableExpression; - -/// Allows an [`diesel::Expression`] to be referenced by a new name. -/// -/// This generates an ` AS ` SQL fragment. -/// -/// -/// For example: -/// -/// ```ignore -/// diesel::sql_function!(fn gen_random_uuid() -> Uuid); -/// -/// let query = sleds.select( -/// ( -/// ExpressionAlias::(gen_random_uuid()), -/// ExpressionAlias::(gen_random_uuid()), -/// ), -/// ); -/// ``` -/// -/// Produces the following SQL: -/// -/// ```sql -/// SELECT -/// gen_random_uuid() as id, -/// gen_random_uuid() as sled_id, -/// FROM sleds -/// ``` -#[derive(diesel::expression::ValidGrouping, diesel::query_builder::QueryId)] -pub struct ExpressionAlias { - expr: E, - name: &'static str, -} - -impl ExpressionAlias -where - E: Expression, -{ - pub fn new(expr: E) -> Self { - Self { expr, name: C::NAME } - } -} - -impl Expression for ExpressionAlias -where - E: Expression, -{ - type SqlType = E::SqlType; -} - -impl diesel::AppearsOnTable for ExpressionAlias where - E: diesel::AppearsOnTable -{ -} - -impl SelectableExpression for ExpressionAlias where - E: SelectableExpression -{ -} - -impl QueryFragment for ExpressionAlias -where - E: QueryFragment, -{ - fn walk_ast<'a>( - &'a self, - mut out: AstPass<'_, 'a, Pg>, - ) -> diesel::QueryResult<()> { - self.expr.walk_ast(out.reborrow())?; - out.push_sql(" AS "); - out.push_sql(&self.name); - Ok(()) - } -} diff --git a/nexus/db-queries/src/db/mod.rs b/nexus/db-queries/src/db/mod.rs index 25b5620801..db3c4f214c 100644 --- a/nexus/db-queries/src/db/mod.rs +++ b/nexus/db-queries/src/db/mod.rs @@ -4,7 +4,6 @@ //! Facilities for working with the Omicron database -pub(crate) mod alias; // This is not intended to be public, but this is necessary to use it from // doctests pub mod collection_attach; diff --git a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs index 934f1ce078..479361d974 100644 --- a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs +++ b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs @@ -4,32 +4,27 @@ //! Implementation of queries for updating resource provisioning info. -use crate::db::alias::ExpressionAlias; +use crate::db::column_walker::AllColumnsOf; use crate::db::model::ByteCount; use crate::db::model::ResourceTypeProvisioned; use crate::db::model::VirtualProvisioningCollection; use crate::db::model::VirtualProvisioningResource; -use crate::db::pool::DbConnection; +use crate::db::raw_query_builder::{QueryBuilder, TypedSqlQuery}; use crate::db::schema::virtual_provisioning_collection; use crate::db::schema::virtual_provisioning_resource; -use crate::db::subquery::{AsQuerySource, Cte, CteBuilder, CteQuery}; use crate::db::true_or_cast_error::matches_sentinel; -use crate::db::true_or_cast_error::TrueOrCastError; -use db_macros::Subquery; +use const_format::concatcp; use diesel::pg::Pg; -use diesel::query_builder::{AstPass, Query, QueryFragment, QueryId}; use diesel::result::Error as DieselError; -use diesel::{ - sql_types, BoolExpressionMethods, CombineDsl, ExpressionMethods, IntoSql, - JoinOnDsl, NullableExpressionMethods, QueryDsl, RunQueryDsl, - SelectableHelper, -}; -use nexus_db_model::queries::virtual_provisioning_collection_update::{ - all_collections, do_update, parent_silo, quotas, silo_provisioned, -}; +use diesel::sql_types; use omicron_common::api::external; use omicron_common::api::external::MessagePair; +type AllColumnsOfVirtualResource = + AllColumnsOf; +type AllColumnsOfVirtualCollection = + AllColumnsOf; + const NOT_ENOUGH_CPUS_SENTINEL: &'static str = "Not enough cpus"; const NOT_ENOUGH_MEMORY_SENTINEL: &'static str = "Not enough memory"; const NOT_ENOUGH_STORAGE_SENTINEL: &'static str = "Not enough storage"; @@ -77,319 +72,33 @@ pub fn from_diesel(e: DieselError) -> external::Error { error::public_error_from_diesel(e, error::ErrorHandler::Server) } -#[derive(Subquery, QueryId)] -#[subquery(name = parent_silo)] -struct ParentSilo { - query: Box>, -} - -impl ParentSilo { - fn new(project_id: uuid::Uuid) -> Self { - use crate::db::schema::project::dsl; - Self { - query: Box::new( - dsl::project.filter(dsl::id.eq(project_id)).select(( - ExpressionAlias::new::(dsl::silo_id), - )), - ), - } - } -} - -#[derive(Subquery, QueryId)] -#[subquery(name = all_collections)] -struct AllCollections { - query: Box>, -} - -impl AllCollections { - fn new( - project_id: uuid::Uuid, - parent_silo: &ParentSilo, - fleet_id: uuid::Uuid, - ) -> Self { - let project_id = project_id.into_sql::(); - let fleet_id = fleet_id.into_sql::(); - Self { - query: Box::new( - diesel::select((ExpressionAlias::new::< - all_collections::dsl::id, - >(project_id),)) - .union(parent_silo.query_source().select(( - ExpressionAlias::new::( - parent_silo::id, - ), - ))) - .union(diesel::select((ExpressionAlias::new::< - all_collections::dsl::id, - >(fleet_id),))), - ), - } - } -} - -#[derive(Subquery, QueryId)] -#[subquery(name = do_update)] -struct DoUpdate { - query: Box>, -} - -impl DoUpdate { - fn new_for_insert( - silo_provisioned: &SiloProvisioned, - quotas: &Quotas, - resource: VirtualProvisioningResource, - ) -> Self { - use virtual_provisioning_resource::dsl; - - let cpus_provisioned_delta = - resource.cpus_provisioned.into_sql::(); - let memory_provisioned_delta = - i64::from(resource.ram_provisioned).into_sql::(); - let storage_provisioned_delta = - i64::from(resource.virtual_disk_bytes_provisioned) - .into_sql::(); - - let not_allocted = dsl::virtual_provisioning_resource - .find(resource.id) - .count() - .single_value() - .assume_not_null() - .eq(0); - - let has_sufficient_cpus = quotas - .query_source() - .select(quotas::cpus) - .single_value() - .assume_not_null() - .ge(silo_provisioned - .query_source() - .select(silo_provisioned::cpus_provisioned) - .single_value() - .assume_not_null() - + cpus_provisioned_delta); - - let has_sufficient_memory = quotas - .query_source() - .select(quotas::memory) - .single_value() - .assume_not_null() - .ge(silo_provisioned - .query_source() - .select(silo_provisioned::ram_provisioned) - .single_value() - .assume_not_null() - + memory_provisioned_delta); - - let has_sufficient_storage = quotas - .query_source() - .select(quotas::storage) - .single_value() - .assume_not_null() - .ge(silo_provisioned - .query_source() - .select(silo_provisioned::virtual_disk_bytes_provisioned) - .single_value() - .assume_not_null() - + storage_provisioned_delta); - - Self { - query: Box::new(diesel::select((ExpressionAlias::new::< - do_update::update, - >( - not_allocted - .and(TrueOrCastError::new( - cpus_provisioned_delta.eq(0).or(has_sufficient_cpus), - NOT_ENOUGH_CPUS_SENTINEL, - )) - .and(TrueOrCastError::new( - memory_provisioned_delta - .eq(0) - .or(has_sufficient_memory), - NOT_ENOUGH_MEMORY_SENTINEL, - )) - .and(TrueOrCastError::new( - storage_provisioned_delta - .eq(0) - .or(has_sufficient_storage), - NOT_ENOUGH_STORAGE_SENTINEL, - )), - ),))), - } - } - - fn new_for_delete(id: uuid::Uuid) -> Self { - use virtual_provisioning_resource::dsl; - - let already_allocated = dsl::virtual_provisioning_resource - .find(id) - .count() - .single_value() - .assume_not_null() - .eq(1); - - Self { - query: Box::new(diesel::select((ExpressionAlias::new::< - do_update::update, - >(already_allocated),))), - } - } -} - -#[derive(Subquery, QueryId)] -#[subquery(name = virtual_provisioning_collection)] -struct UpdatedProvisions { - query: - Box>, -} - -impl UpdatedProvisions { - fn new( - all_collections: &AllCollections, - do_update: &DoUpdate, - values: V, - ) -> Self - where - V: diesel::AsChangeset, - ::Changeset: - QueryFragment + Send + 'static, - { - use virtual_provisioning_collection::dsl; - - Self { - query: Box::new( - diesel::update(dsl::virtual_provisioning_collection) - .set(values) - .filter( - dsl::id.eq_any( - all_collections - .query_source() - .select(all_collections::id), - ), - ) - .filter( - do_update - .query_source() - .select(do_update::update) - .single_value() - .assume_not_null(), - ) - .returning(virtual_provisioning_collection::all_columns), - ), - } - } -} - -#[derive(Subquery, QueryId)] -#[subquery(name = quotas)] -struct Quotas { - query: Box>, -} - -impl Quotas { - // TODO: We could potentially skip this in cases where we know we're removing a resource instead of inserting - fn new(parent_silo: &ParentSilo) -> Self { - use crate::db::schema::silo_quotas::dsl; - Self { - query: Box::new( - dsl::silo_quotas - .inner_join( - parent_silo - .query_source() - .on(dsl::silo_id.eq(parent_silo::id)), - ) - .select(( - dsl::silo_id, - dsl::cpus, - ExpressionAlias::new::( - dsl::memory_bytes, - ), - ExpressionAlias::new::( - dsl::storage_bytes, - ), - )), - ), - } - } -} - -#[derive(Subquery, QueryId)] -#[subquery(name = silo_provisioned)] -struct SiloProvisioned { - query: Box>, -} - -impl SiloProvisioned { - fn new(parent_silo: &ParentSilo) -> Self { - use virtual_provisioning_collection::dsl; - Self { - query: Box::new( - dsl::virtual_provisioning_collection - .inner_join( - parent_silo - .query_source() - .on(dsl::id.eq(parent_silo::id)), - ) - .select(( - dsl::id, - dsl::cpus_provisioned, - dsl::ram_provisioned, - dsl::virtual_disk_bytes_provisioned, - )), - ), - } - } -} - -// This structure wraps a query, such that it can be used within a CTE. -// -// It generates a name that can be used by the "CteBuilder", but does not -// implement "AsQuerySource". This basically means: -// - It can be used to add data-modifying statements to the CTE -// - The result of the query cannot be referenced by subsequent queries -// -// NOTE: The name for each CTE arm should be unique, so this shouldn't be used -// multiple times within a single CTE. This restriction could be removed by -// generating unique identifiers. -struct UnreferenceableSubquery(Q); - -impl QueryFragment for UnreferenceableSubquery -where - Q: QueryFragment + Send + 'static, -{ - fn walk_ast<'a>( - &'a self, - mut out: diesel::query_builder::AstPass<'_, 'a, Pg>, - ) -> diesel::QueryResult<()> { - out.push_identifier("unused_cte_arm")?; - Ok(()) - } -} - -impl crate::db::subquery::Subquery for UnreferenceableSubquery -where - Q: QueryFragment + Send + 'static, -{ - fn query(&self) -> &dyn QueryFragment { - &self.0 - } -} - /// The virtual resource collection is only updated when a resource is inserted /// or deleted from the resource provisioning table. By probing for the presence /// or absence of a resource, we can update collections at the same time as we /// create or destroy the resource, which helps make the operation idempotent. +#[derive(Clone)] enum UpdateKind { - Insert(VirtualProvisioningResource), - Delete(uuid::Uuid), + InsertStorage(VirtualProvisioningResource), + DeleteStorage { + id: uuid::Uuid, + disk_byte_diff: ByteCount, + }, + InsertInstance(VirtualProvisioningResource), + DeleteInstance { + id: uuid::Uuid, + max_instance_gen: i64, + cpus_diff: i64, + ram_diff: ByteCount, + }, } +type SelectableSql = < + >::SelectExpression as diesel::Expression +>::SqlType; + /// Constructs a CTE for updating resource provisioning information in all /// collections for a particular object. -#[derive(QueryId)] -pub struct VirtualProvisioningCollectionUpdate { - cte: Cte, -} +pub struct VirtualProvisioningCollectionUpdate {} impl VirtualProvisioningCollectionUpdate { // Generic utility for updating all collections including this resource, @@ -399,66 +108,336 @@ impl VirtualProvisioningCollectionUpdate { // - Project // - Silo // - Fleet - // - // Arguments: - // - do_update: A boolean SQL query to answer the question: "Should this update - // be applied"? This query is necessary for idempotency. - // - update: A SQL query to actually modify the resource record. Generally - // this is an "INSERT", "UPDATE", or "DELETE". - // - project_id: The project to which the resource belongs. - // - values: The updated values to propagate through collections (iff - // "do_update" evaluates to "true"). - fn apply_update( + fn apply_update( update_kind: UpdateKind, - update: U, project_id: uuid::Uuid, - values: V, - ) -> Self - where - U: QueryFragment + crate::db::subquery::Subquery + Send + 'static, - V: diesel::AsChangeset, - ::Changeset: - QueryFragment + Send + 'static, - { - let parent_silo = ParentSilo::new(project_id); - let all_collections = AllCollections::new( - project_id, - &parent_silo, - *crate::db::fixed_data::FLEET_ID, - ); - - let quotas = Quotas::new(&parent_silo); - let silo_provisioned = SiloProvisioned::new(&parent_silo); + ) -> TypedSqlQuery> { + let query = QueryBuilder::new().sql(" +WITH + parent_silo AS (SELECT project.silo_id AS id FROM project WHERE project.id = ").param().sql("),") + .bind::(project_id).sql(" + all_collections + AS ( + ((SELECT ").param().sql(" AS id) UNION (SELECT parent_silo.id AS id FROM parent_silo)) + UNION (SELECT ").param().sql(" AS id) + ),") + .bind::(project_id) + .bind::(*crate::db::fixed_data::FLEET_ID) + .sql(" + quotas + AS ( + SELECT + silo_quotas.silo_id, + silo_quotas.cpus, + silo_quotas.memory_bytes AS memory, + silo_quotas.storage_bytes AS storage + FROM + silo_quotas INNER JOIN parent_silo ON silo_quotas.silo_id = parent_silo.id + ), + silo_provisioned + AS ( + SELECT + virtual_provisioning_collection.id, + virtual_provisioning_collection.cpus_provisioned, + virtual_provisioning_collection.ram_provisioned, + virtual_provisioning_collection.virtual_disk_bytes_provisioned + FROM + virtual_provisioning_collection + INNER JOIN parent_silo ON virtual_provisioning_collection.id = parent_silo.id + ),"); + + let query = match update_kind.clone() { + UpdateKind::InsertInstance(resource) | UpdateKind::InsertStorage(resource) => { + query.sql(" + do_update + AS ( + SELECT + ( + ( + ( + SELECT count(*) + FROM virtual_provisioning_resource + WHERE virtual_provisioning_resource.id = ").param().sql(" + LIMIT 1 + ) + = 0 + AND CAST( + IF( + ( + ").param().sql(" = 0 + OR (SELECT quotas.cpus FROM quotas LIMIT 1) + >= ( + (SELECT silo_provisioned.cpus_provisioned FROM silo_provisioned LIMIT 1) + + ").param().sql(concatcp!(" + ) + ), + 'TRUE', + '", NOT_ENOUGH_CPUS_SENTINEL, "' + ) + AS BOOL + ) + ) + AND CAST( + IF( + ( + ")).param().sql(" = 0 + OR (SELECT quotas.memory FROM quotas LIMIT 1) + >= ( + (SELECT silo_provisioned.ram_provisioned FROM silo_provisioned LIMIT 1) + + ").param().sql(concatcp!(" + ) + ), + 'TRUE', + '", NOT_ENOUGH_MEMORY_SENTINEL, "' + ) + AS BOOL + ) + ) + AND CAST( + IF( + ( + ")).param().sql(" = 0 + OR (SELECT quotas.storage FROM quotas LIMIT 1) + >= ( + ( + SELECT + silo_provisioned.virtual_disk_bytes_provisioned + FROM + silo_provisioned + LIMIT + 1 + ) + + ").param().sql(concatcp!(" + ) + ), + 'TRUE', + '", NOT_ENOUGH_STORAGE_SENTINEL, "' + ) + AS BOOL + ) + AS update + ),")) + .bind::(resource.id) + .bind::(resource.cpus_provisioned) + .bind::(resource.cpus_provisioned) + .bind::(resource.ram_provisioned) + .bind::(resource.ram_provisioned) + .bind::(resource.virtual_disk_bytes_provisioned) + .bind::(resource.virtual_disk_bytes_provisioned) + }, + UpdateKind::DeleteInstance { id, .. } | UpdateKind::DeleteStorage { id, .. } => { + query.sql(" + do_update + AS ( + SELECT + ( + SELECT + count(*) + FROM + virtual_provisioning_resource + WHERE + virtual_provisioning_resource.id = ").param().sql(" + LIMIT + 1 + ) + = 1 + AS update + ),") + .bind::(id) + }, + }; - let do_update = match update_kind { - UpdateKind::Insert(resource) => { - DoUpdate::new_for_insert(&silo_provisioned, "as, resource) + let query = match update_kind.clone() { + UpdateKind::InsertInstance(resource) + | UpdateKind::InsertStorage(resource) => query + .sql( + " + unused_cte_arm + AS ( + INSERT + INTO + virtual_provisioning_resource + ( + id, + time_modified, + resource_type, + virtual_disk_bytes_provisioned, + cpus_provisioned, + ram_provisioned + ) + VALUES + (", + ) + .param() + .sql(", DEFAULT, ") + .param() + .sql(", ") + .param() + .sql(", ") + .param() + .sql(", ") + .param() + .sql( + ") + ON CONFLICT + DO + NOTHING + RETURNING ", + ) + .sql(AllColumnsOfVirtualResource::with_prefix( + "virtual_provisioning_resource", + )) + .sql("),") + .bind::(resource.id) + .bind::(resource.resource_type) + .bind::( + resource.virtual_disk_bytes_provisioned, + ) + .bind::(resource.cpus_provisioned) + .bind::(resource.ram_provisioned), + UpdateKind::DeleteInstance { id, max_instance_gen, .. } => { + // The filter condition here ensures that the provisioning record is + // only deleted if the corresponding instance has a generation + // number less than the supplied `max_instance_gen`. This allows a + // caller that is about to apply an instance update that will stop + // the instance and that bears generation G to avoid deleting + // resources if the instance generation was already advanced to or + // past G. + // + // If the relevant instance ID is not in the database, then some + // other operation must have ensured the instance was previously + // stopped (because that's the only way it could have been deleted), + // and that operation should have cleaned up the resources already, + // in which case there's nothing to do here. + query + .sql( + " + unused_cte_arm + AS ( + DELETE FROM + virtual_provisioning_resource + WHERE + virtual_provisioning_resource.id = ", + ) + .param() + .sql( + " + AND + virtual_provisioning_resource.id = ( + SELECT instance.id FROM instance WHERE + instance.id = ", + ) + .param() + .sql( + " AND + instance.state_generation < ", + ) + .param() + .sql( + " LIMIT 1) + RETURNING ", + ) + .sql(AllColumnsOfVirtualResource::with_prefix( + "virtual_provisioning_resource", + )) + .sql("),") + .bind::(id) + .bind::(id) + .bind::(max_instance_gen) } - UpdateKind::Delete(id) => DoUpdate::new_for_delete(id), + UpdateKind::DeleteStorage { id, .. } => query + .sql( + " + unused_cte_arm + AS ( + DELETE FROM + virtual_provisioning_resource + WHERE + virtual_provisioning_resource.id = ", + ) + .param() + .sql( + " + RETURNING ", + ) + .sql(AllColumnsOfVirtualResource::with_prefix( + "virtual_provisioning_resource", + )) + .sql("),") + .bind::(id), }; - let updated_collections = - UpdatedProvisions::new(&all_collections, &do_update, values); - - // TODO: Do we want to select from "all_collections" instead? Seems more - // idempotent; it'll work even when we don't update anything... - let final_select = Box::new( - updated_collections - .query_source() - .select(VirtualProvisioningCollection::as_select()), + let query = query.sql( + " + virtual_provisioning_collection + AS ( + UPDATE + virtual_provisioning_collection + SET", ); + let query = match update_kind.clone() { + UpdateKind::InsertInstance(resource) => query + .sql( + " + time_modified = current_timestamp(), + cpus_provisioned = virtual_provisioning_collection.cpus_provisioned + ", + ) + .param() + .sql( + ", + ram_provisioned = virtual_provisioning_collection.ram_provisioned + ", + ) + .param() + .bind::(resource.cpus_provisioned) + .bind::(resource.ram_provisioned), + UpdateKind::InsertStorage(resource) => query + .sql( + " + time_modified = current_timestamp(), + virtual_disk_bytes_provisioned + = virtual_provisioning_collection.virtual_disk_bytes_provisioned + ", + ) + .param() + .bind::( + resource.virtual_disk_bytes_provisioned, + ), + UpdateKind::DeleteInstance { cpus_diff, ram_diff, .. } => query + .sql( + " + time_modified = current_timestamp(), + cpus_provisioned = virtual_provisioning_collection.cpus_provisioned - ", + ) + .param() + .sql( + ", + ram_provisioned = virtual_provisioning_collection.ram_provisioned - ", + ) + .param() + .bind::(cpus_diff) + .bind::(ram_diff), + UpdateKind::DeleteStorage { disk_byte_diff, .. } => query + .sql( + " + time_modified = current_timestamp(), + virtual_disk_bytes_provisioned + = virtual_provisioning_collection.virtual_disk_bytes_provisioned - ", + ) + .param() + .bind::(disk_byte_diff), + }; - let cte = CteBuilder::new() - .add_subquery(parent_silo) - .add_subquery(all_collections) - .add_subquery(quotas) - .add_subquery(silo_provisioned) - .add_subquery(do_update) - .add_subquery(update) - .add_subquery(updated_collections) - .build(final_select); - - Self { cte } + query.sql(" + WHERE + virtual_provisioning_collection.id = ANY (SELECT all_collections.id FROM all_collections) + AND (SELECT do_update.update FROM do_update LIMIT 1) + RETURNING " + ).sql(AllColumnsOfVirtualCollection::with_prefix("virtual_provisioning_collection")).sql(" + ) +SELECT " + ).sql(AllColumnsOfVirtualCollection::with_prefix("virtual_provisioning_collection")).sql(" +FROM + virtual_provisioning_collection +").query() } pub fn new_insert_storage( @@ -466,62 +445,22 @@ impl VirtualProvisioningCollectionUpdate { disk_byte_diff: ByteCount, project_id: uuid::Uuid, storage_type: crate::db::datastore::StorageType, - ) -> Self { - use virtual_provisioning_collection::dsl as collection_dsl; - use virtual_provisioning_resource::dsl as resource_dsl; - + ) -> TypedSqlQuery> { let mut provision = VirtualProvisioningResource::new(id, storage_type.into()); provision.virtual_disk_bytes_provisioned = disk_byte_diff; - Self::apply_update( - UpdateKind::Insert(provision.clone()), - // The query to actually insert the record. - UnreferenceableSubquery( - diesel::insert_into( - resource_dsl::virtual_provisioning_resource, - ) - .values(provision) - .on_conflict_do_nothing() - .returning(virtual_provisioning_resource::all_columns), - ), - // Within this project, silo, fleet... - project_id, - // ... We add the disk usage. - ( - collection_dsl::time_modified.eq(diesel::dsl::now), - collection_dsl::virtual_disk_bytes_provisioned - .eq(collection_dsl::virtual_disk_bytes_provisioned - + disk_byte_diff), - ), - ) + Self::apply_update(UpdateKind::InsertStorage(provision), project_id) } pub fn new_delete_storage( id: uuid::Uuid, disk_byte_diff: ByteCount, project_id: uuid::Uuid, - ) -> Self { - use virtual_provisioning_collection::dsl as collection_dsl; - use virtual_provisioning_resource::dsl as resource_dsl; - + ) -> TypedSqlQuery> { Self::apply_update( - UpdateKind::Delete(id), - // The query to actually delete the record. - UnreferenceableSubquery( - diesel::delete(resource_dsl::virtual_provisioning_resource) - .filter(resource_dsl::id.eq(id)) - .returning(virtual_provisioning_resource::all_columns), - ), - // Within this project, silo, fleet... + UpdateKind::DeleteStorage { id, disk_byte_diff }, project_id, - // ... We subtract the disk usage. - ( - collection_dsl::time_modified.eq(diesel::dsl::now), - collection_dsl::virtual_disk_bytes_provisioned - .eq(collection_dsl::virtual_disk_bytes_provisioned - - disk_byte_diff), - ), ) } @@ -530,10 +469,7 @@ impl VirtualProvisioningCollectionUpdate { cpus_diff: i64, ram_diff: ByteCount, project_id: uuid::Uuid, - ) -> Self { - use virtual_provisioning_collection::dsl as collection_dsl; - use virtual_provisioning_resource::dsl as resource_dsl; - + ) -> TypedSqlQuery> { let mut provision = VirtualProvisioningResource::new( id, ResourceTypeProvisioned::Instance, @@ -541,28 +477,7 @@ impl VirtualProvisioningCollectionUpdate { provision.cpus_provisioned = cpus_diff; provision.ram_provisioned = ram_diff; - Self::apply_update( - UpdateKind::Insert(provision.clone()), - // The query to actually insert the record. - UnreferenceableSubquery( - diesel::insert_into( - resource_dsl::virtual_provisioning_resource, - ) - .values(provision) - .on_conflict_do_nothing() - .returning(virtual_provisioning_resource::all_columns), - ), - // Within this project, silo, fleet... - project_id, - // ... We update the resource usage. - ( - collection_dsl::time_modified.eq(diesel::dsl::now), - collection_dsl::cpus_provisioned - .eq(collection_dsl::cpus_provisioned + cpus_diff), - collection_dsl::ram_provisioned - .eq(collection_dsl::ram_provisioned + ram_diff), - ), - ) + Self::apply_update(UpdateKind::InsertInstance(provision), project_id) } pub fn new_delete_instance( @@ -571,86 +486,26 @@ impl VirtualProvisioningCollectionUpdate { cpus_diff: i64, ram_diff: ByteCount, project_id: uuid::Uuid, - ) -> Self { - use crate::db::schema::instance::dsl as instance_dsl; - use virtual_provisioning_collection::dsl as collection_dsl; - use virtual_provisioning_resource::dsl as resource_dsl; - + ) -> TypedSqlQuery> { Self::apply_update( - UpdateKind::Delete(id), - // The query to actually delete the record. - // - // The filter condition here ensures that the provisioning record is - // only deleted if the corresponding instance has a generation - // number less than the supplied `max_instance_gen`. This allows a - // caller that is about to apply an instance update that will stop - // the instance and that bears generation G to avoid deleting - // resources if the instance generation was already advanced to or - // past G. - // - // If the relevant instance ID is not in the database, then some - // other operation must have ensured the instance was previously - // stopped (because that's the only way it could have been deleted), - // and that operation should have cleaned up the resources already, - // in which case there's nothing to do here. - // - // There is an additional "direct" filter on the target resource ID - // to avoid a full scan of the resource table. - UnreferenceableSubquery( - diesel::delete(resource_dsl::virtual_provisioning_resource) - .filter(resource_dsl::id.eq(id)) - .filter( - resource_dsl::id.nullable().eq(instance_dsl::instance - .filter(instance_dsl::id.eq(id)) - .filter( - instance_dsl::state_generation - .lt(max_instance_gen), - ) - .select(instance_dsl::id) - .single_value()), - ) - .returning(virtual_provisioning_resource::all_columns), - ), - // Within this project, silo, fleet... + UpdateKind::DeleteInstance { + id, + max_instance_gen, + cpus_diff, + ram_diff, + }, project_id, - // ... We update the resource usage. - ( - collection_dsl::time_modified.eq(diesel::dsl::now), - collection_dsl::cpus_provisioned - .eq(collection_dsl::cpus_provisioned - cpus_diff), - collection_dsl::ram_provisioned - .eq(collection_dsl::ram_provisioned - ram_diff), - ), ) } } -impl QueryFragment for VirtualProvisioningCollectionUpdate { - fn walk_ast<'a>( - &'a self, - mut out: AstPass<'_, 'a, Pg>, - ) -> diesel::QueryResult<()> { - out.unsafe_to_cache_prepared(); - - self.cte.walk_ast(out.reborrow())?; - Ok(()) - } -} - -type SelectableSql = < - >::SelectExpression as diesel::Expression ->::SqlType; - -impl Query for VirtualProvisioningCollectionUpdate { - type SqlType = SelectableSql; -} - -impl RunQueryDsl for VirtualProvisioningCollectionUpdate {} - #[cfg(test)] mod test { use super::*; + use crate::db::explain::ExplainableAsync; use crate::db::raw_query_builder::expectorate_query_contents; + use nexus_test_utils::db::test_setup_database; + use omicron_test_utils::dev; use uuid::Uuid; // This test is a bit of a "change detector", but it's here to help with @@ -713,4 +568,68 @@ mod test { "tests/output/virtual_provisioning_collection_update_delete_instance.sql", ).await; } + + // Explain the possible forms of the SQL query to ensure that it + // creates a valid SQL string. + #[tokio::test] + async fn explainable() { + let logctx = dev::test_setup_log("explainable"); + let log = logctx.log.new(o!()); + let mut db = test_setup_database(&log).await; + let cfg = crate::db::Config { url: db.pg_config().clone() }; + let pool = crate::db::Pool::new(&logctx.log, &cfg); + let conn = pool.pool().get().await.unwrap(); + + let id = Uuid::nil(); + let max_instance_gen = 0; + let project_id = Uuid::nil(); + let cpus_diff = 16.try_into().unwrap(); + let ram_diff = 2048.try_into().unwrap(); + let disk_byte_diff = 2048.try_into().unwrap(); + let storage_type = crate::db::datastore::StorageType::Disk; + + let query = VirtualProvisioningCollectionUpdate::new_insert_storage( + id, + disk_byte_diff, + project_id, + storage_type, + ); + let _ = query + .explain_async(&conn) + .await + .expect("Failed to explain query - is it valid SQL?"); + + let query = VirtualProvisioningCollectionUpdate::new_delete_storage( + id, + disk_byte_diff, + project_id, + ); + let _ = query + .explain_async(&conn) + .await + .expect("Failed to explain query - is it valid SQL?"); + + let query = VirtualProvisioningCollectionUpdate::new_insert_instance( + id, cpus_diff, ram_diff, project_id, + ); + let _ = query + .explain_async(&conn) + .await + .expect("Failed to explain query - is it valid SQL?"); + + let query = VirtualProvisioningCollectionUpdate::new_delete_instance( + id, + max_instance_gen, + cpus_diff, + ram_diff, + project_id, + ); + let _ = query + .explain_async(&conn) + .await + .expect("Failed to explain query - is it valid SQL?"); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } } diff --git a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_instance.sql b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_instance.sql index fcabefef26..48094a8371 100644 --- a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_instance.sql +++ b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_instance.sql @@ -37,9 +37,9 @@ WITH WHERE virtual_provisioning_resource.id = $4 LIMIT - $5 + 1 ) - = $6 + = 1 AS update ), unused_cte_arm @@ -47,7 +47,7 @@ WITH DELETE FROM virtual_provisioning_resource WHERE - virtual_provisioning_resource.id = $7 + virtual_provisioning_resource.id = $5 AND virtual_provisioning_resource.id = ( SELECT @@ -55,9 +55,9 @@ WITH FROM instance WHERE - instance.id = $8 AND instance.state_generation < $9 + instance.id = $6 AND instance.state_generation < $7 LIMIT - $10 + 1 ) RETURNING virtual_provisioning_resource.id, @@ -73,11 +73,11 @@ WITH virtual_provisioning_collection SET time_modified = current_timestamp(), - cpus_provisioned = virtual_provisioning_collection.cpus_provisioned - $11, - ram_provisioned = virtual_provisioning_collection.ram_provisioned - $12 + cpus_provisioned = virtual_provisioning_collection.cpus_provisioned - $8, + ram_provisioned = virtual_provisioning_collection.ram_provisioned - $9 WHERE virtual_provisioning_collection.id = ANY (SELECT all_collections.id FROM all_collections) - AND (SELECT do_update.update FROM do_update LIMIT $13) + AND (SELECT do_update.update FROM do_update LIMIT 1) RETURNING virtual_provisioning_collection.id, virtual_provisioning_collection.time_modified, diff --git a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_storage.sql b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_storage.sql index 72c0b81e15..b607ac4185 100644 --- a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_storage.sql +++ b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_delete_storage.sql @@ -37,9 +37,9 @@ WITH WHERE virtual_provisioning_resource.id = $4 LIMIT - $5 + 1 ) - = $6 + = 1 AS update ), unused_cte_arm @@ -47,7 +47,7 @@ WITH DELETE FROM virtual_provisioning_resource WHERE - virtual_provisioning_resource.id = $7 + virtual_provisioning_resource.id = $5 RETURNING virtual_provisioning_resource.id, virtual_provisioning_resource.time_modified, @@ -63,10 +63,10 @@ WITH SET time_modified = current_timestamp(), virtual_disk_bytes_provisioned - = virtual_provisioning_collection.virtual_disk_bytes_provisioned - $8 + = virtual_provisioning_collection.virtual_disk_bytes_provisioned - $6 WHERE virtual_provisioning_collection.id = ANY (SELECT all_collections.id FROM all_collections) - AND (SELECT do_update.update FROM do_update LIMIT $9) + AND (SELECT do_update.update FROM do_update LIMIT 1) RETURNING virtual_provisioning_collection.id, virtual_provisioning_collection.time_modified, diff --git a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_instance.sql b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_instance.sql index 753b7f09f3..38f10a7148 100644 --- a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_instance.sql +++ b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_instance.sql @@ -39,17 +39,17 @@ WITH WHERE virtual_provisioning_resource.id = $4 LIMIT - $5 + 1 ) - = $6 + = 0 AND CAST( IF( ( - $7 = $8 - OR (SELECT quotas.cpus FROM quotas LIMIT $9) + $5 = 0 + OR (SELECT quotas.cpus FROM quotas LIMIT 1) >= ( - (SELECT silo_provisioned.cpus_provisioned FROM silo_provisioned LIMIT $10) - + $11 + (SELECT silo_provisioned.cpus_provisioned FROM silo_provisioned LIMIT 1) + + $6 ) ), 'TRUE', @@ -61,11 +61,10 @@ WITH AND CAST( IF( ( - $12 = $13 - OR (SELECT quotas.memory FROM quotas LIMIT $14) + $7 = 0 + OR (SELECT quotas.memory FROM quotas LIMIT 1) >= ( - (SELECT silo_provisioned.ram_provisioned FROM silo_provisioned LIMIT $15) - + $16 + (SELECT silo_provisioned.ram_provisioned FROM silo_provisioned LIMIT 1) + $8 ) ), 'TRUE', @@ -77,8 +76,8 @@ WITH AND CAST( IF( ( - $17 = $18 - OR (SELECT quotas.storage FROM quotas LIMIT $19) + $9 = 0 + OR (SELECT quotas.storage FROM quotas LIMIT 1) >= ( ( SELECT @@ -86,9 +85,9 @@ WITH FROM silo_provisioned LIMIT - $20 + 1 ) - + $21 + + $10 ) ), 'TRUE', @@ -112,7 +111,7 @@ WITH ram_provisioned ) VALUES - ($22, DEFAULT, $23, $24, $25, $26) + ($11, DEFAULT, $12, $13, $14, $15) ON CONFLICT DO NOTHING @@ -130,11 +129,11 @@ WITH virtual_provisioning_collection SET time_modified = current_timestamp(), - cpus_provisioned = virtual_provisioning_collection.cpus_provisioned + $27, - ram_provisioned = virtual_provisioning_collection.ram_provisioned + $28 + cpus_provisioned = virtual_provisioning_collection.cpus_provisioned + $16, + ram_provisioned = virtual_provisioning_collection.ram_provisioned + $17 WHERE virtual_provisioning_collection.id = ANY (SELECT all_collections.id FROM all_collections) - AND (SELECT do_update.update FROM do_update LIMIT $29) + AND (SELECT do_update.update FROM do_update LIMIT 1) RETURNING virtual_provisioning_collection.id, virtual_provisioning_collection.time_modified, diff --git a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_storage.sql b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_storage.sql index 040a5dc20c..87cd227ed9 100644 --- a/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_storage.sql +++ b/nexus/db-queries/tests/output/virtual_provisioning_collection_update_insert_storage.sql @@ -39,17 +39,17 @@ WITH WHERE virtual_provisioning_resource.id = $4 LIMIT - $5 + 1 ) - = $6 + = 0 AND CAST( IF( ( - $7 = $8 - OR (SELECT quotas.cpus FROM quotas LIMIT $9) + $5 = 0 + OR (SELECT quotas.cpus FROM quotas LIMIT 1) >= ( - (SELECT silo_provisioned.cpus_provisioned FROM silo_provisioned LIMIT $10) - + $11 + (SELECT silo_provisioned.cpus_provisioned FROM silo_provisioned LIMIT 1) + + $6 ) ), 'TRUE', @@ -61,11 +61,10 @@ WITH AND CAST( IF( ( - $12 = $13 - OR (SELECT quotas.memory FROM quotas LIMIT $14) + $7 = 0 + OR (SELECT quotas.memory FROM quotas LIMIT 1) >= ( - (SELECT silo_provisioned.ram_provisioned FROM silo_provisioned LIMIT $15) - + $16 + (SELECT silo_provisioned.ram_provisioned FROM silo_provisioned LIMIT 1) + $8 ) ), 'TRUE', @@ -77,8 +76,8 @@ WITH AND CAST( IF( ( - $17 = $18 - OR (SELECT quotas.storage FROM quotas LIMIT $19) + $9 = 0 + OR (SELECT quotas.storage FROM quotas LIMIT 1) >= ( ( SELECT @@ -86,9 +85,9 @@ WITH FROM silo_provisioned LIMIT - $20 + 1 ) - + $21 + + $10 ) ), 'TRUE', @@ -112,7 +111,7 @@ WITH ram_provisioned ) VALUES - ($22, DEFAULT, $23, $24, $25, $26) + ($11, DEFAULT, $12, $13, $14, $15) ON CONFLICT DO NOTHING @@ -131,10 +130,10 @@ WITH SET time_modified = current_timestamp(), virtual_disk_bytes_provisioned - = virtual_provisioning_collection.virtual_disk_bytes_provisioned + $27 + = virtual_provisioning_collection.virtual_disk_bytes_provisioned + $16 WHERE virtual_provisioning_collection.id = ANY (SELECT all_collections.id FROM all_collections) - AND (SELECT do_update.update FROM do_update LIMIT $28) + AND (SELECT do_update.update FROM do_update LIMIT 1) RETURNING virtual_provisioning_collection.id, virtual_provisioning_collection.time_modified, From c77a16760f38ff69364720e43bd42e74ce8899b8 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 26 Mar 2024 14:25:29 -0700 Subject: [PATCH 16/19] feedback --- nexus/db-queries/src/db/queries/region_allocation.rs | 11 ++++++----- nexus/db-queries/src/db/raw_query_builder.rs | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index 776fc6c34b..2e4f4cd776 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -243,11 +243,12 @@ pub fn allocation_query( // // Selecting two datasets on the same zpool will not initially be // possible, as at the time of writing each zpool only has one dataset. - // Additionally, we intend to modify the allocation strategy to select - // from 3 distinct sleds, removing the possibility entirely. But, if we - // introduce a change that adds another crucible dataset to zpools - // before we improve the allocation strategy, this check will make sure - // we don't violate drive redundancy, and generate an error instead. + // Additionally, provide a configuration option ("distinct_sleds") to modify + // the allocation strategy to select from 3 distinct sleds, removing the + // possibility entirely. But, if we introduce a change that adds another + // crucible dataset to zpools before we improve the allocation strategy, + // this check will make sure we don't violate drive redundancy, and generate + // an error instead. .sql(" do_insert AS ( SELECT ((( diff --git a/nexus/db-queries/src/db/raw_query_builder.rs b/nexus/db-queries/src/db/raw_query_builder.rs index 65ff22f86f..5c803e20ac 100644 --- a/nexus/db-queries/src/db/raw_query_builder.rs +++ b/nexus/db-queries/src/db/raw_query_builder.rs @@ -159,7 +159,7 @@ impl QueryBuilder { } /// Diesel's [diesel::query_builder::BoxedSqlQuery] has a few drawbacks that -/// make wrapper more palatable: +/// make this wrapper more palatable: /// /// - It always implements "Query" with SqlType = Untyped, so a caller could try to /// execute this query and get back any type. From f2937a3a0785a96345809ff55a971a0af125d0d9 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 28 May 2024 11:26:22 -0700 Subject: [PATCH 17/19] merging --- nexus/db-model/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index c57836a567..51fd0f6c9e 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -62,7 +62,6 @@ mod v2p_mapping; mod deployment; mod ipv4_nat_entry; mod omicron_zone_config; -pub mod queries; mod quota; mod rack; mod region; From 6dda946f0f00aa8402798bcfda32bf18264696d5 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 29 May 2024 11:15:30 -0700 Subject: [PATCH 18/19] Split tests --- .../virtual_provisioning_collection_update.rs | 29 +++++++++++++++---- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs index 934f1ce078..09798e4e5d 100644 --- a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs +++ b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs @@ -653,11 +653,12 @@ mod test { use crate::db::raw_query_builder::expectorate_query_contents; use uuid::Uuid; - // This test is a bit of a "change detector", but it's here to help with - // debugging too. If you change this query, it can be useful to see exactly - // how the output SQL has been altered. + // These tests are a bit of a "change detector", but they're here to help + // with debugging too. If you change this query, it can be useful to see + // exactly how the output SQL has been altered. + #[tokio::test] - async fn expectorate_query() { + async fn expectorate_query_insert_storage() { let id = Uuid::nil(); let project_id = Uuid::nil(); let disk_byte_diff = 2048.try_into().unwrap(); @@ -669,11 +670,17 @@ mod test { project_id, storage_type, ); - expectorate_query_contents( &query, "tests/output/virtual_provisioning_collection_update_insert_storage.sql", ).await; + } + + #[tokio::test] + async fn expectorate_query_delete_storage() { + let id = Uuid::nil(); + let project_id = Uuid::nil(); + let disk_byte_diff = 2048.try_into().unwrap(); let query = VirtualProvisioningCollectionUpdate::new_delete_storage( id, @@ -685,7 +692,12 @@ mod test { &query, "tests/output/virtual_provisioning_collection_update_delete_storage.sql", ).await; + } + #[tokio::test] + async fn expectorate_query_insert_instance() { + let id = Uuid::nil(); + let project_id = Uuid::nil(); let cpus_diff = 4; let ram_diff = 2048.try_into().unwrap(); @@ -697,7 +709,14 @@ mod test { &query, "tests/output/virtual_provisioning_collection_update_insert_instance.sql", ).await; + } + #[tokio::test] + async fn expectorate_query_delete_instance() { + let id = Uuid::nil(); + let project_id = Uuid::nil(); + let cpus_diff = 4; + let ram_diff = 2048.try_into().unwrap(); let max_instance_gen = 0; let query = VirtualProvisioningCollectionUpdate::new_delete_instance( From 7b7571e95f610fd51f798c8441a812aa59c73022 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 29 May 2024 12:17:39 -0700 Subject: [PATCH 19/19] Split up explain tests --- .../virtual_provisioning_collection_update.rs | 62 +++++++++++++++++-- 1 file changed, 57 insertions(+), 5 deletions(-) diff --git a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs index a18481ff09..156691866e 100644 --- a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs +++ b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs @@ -590,9 +590,10 @@ mod test { // Explain the possible forms of the SQL query to ensure that it // creates a valid SQL string. + #[tokio::test] - async fn explainable() { - let logctx = dev::test_setup_log("explainable"); + async fn explain_insert_storage() { + let logctx = dev::test_setup_log("explain_insert_storage"); let log = logctx.log.new(o!()); let mut db = test_setup_database(&log).await; let cfg = crate::db::Config { url: db.pg_config().clone() }; @@ -600,10 +601,7 @@ mod test { let conn = pool.pool().get().await.unwrap(); let id = Uuid::nil(); - let max_instance_gen = 0; let project_id = Uuid::nil(); - let cpus_diff = 16.try_into().unwrap(); - let ram_diff = 2048.try_into().unwrap(); let disk_byte_diff = 2048.try_into().unwrap(); let storage_type = crate::db::datastore::StorageType::Disk; @@ -618,6 +616,23 @@ mod test { .await .expect("Failed to explain query - is it valid SQL?"); + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn explain_delete_storage() { + let logctx = dev::test_setup_log("explain_delete_storage"); + let log = logctx.log.new(o!()); + let mut db = test_setup_database(&log).await; + let cfg = crate::db::Config { url: db.pg_config().clone() }; + let pool = crate::db::Pool::new(&logctx.log, &cfg); + let conn = pool.pool().get().await.unwrap(); + + let id = Uuid::nil(); + let project_id = Uuid::nil(); + let disk_byte_diff = 2048.try_into().unwrap(); + let query = VirtualProvisioningCollectionUpdate::new_delete_storage( id, disk_byte_diff, @@ -628,6 +643,24 @@ mod test { .await .expect("Failed to explain query - is it valid SQL?"); + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn explain_insert_instance() { + let logctx = dev::test_setup_log("explain_insert_instance"); + let log = logctx.log.new(o!()); + let mut db = test_setup_database(&log).await; + let cfg = crate::db::Config { url: db.pg_config().clone() }; + let pool = crate::db::Pool::new(&logctx.log, &cfg); + let conn = pool.pool().get().await.unwrap(); + + let id = Uuid::nil(); + let project_id = Uuid::nil(); + let cpus_diff = 16.try_into().unwrap(); + let ram_diff = 2048.try_into().unwrap(); + let query = VirtualProvisioningCollectionUpdate::new_insert_instance( id, cpus_diff, ram_diff, project_id, ); @@ -636,6 +669,25 @@ mod test { .await .expect("Failed to explain query - is it valid SQL?"); + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn explain_delete_instance() { + let logctx = dev::test_setup_log("explain_delete_instance"); + let log = logctx.log.new(o!()); + let mut db = test_setup_database(&log).await; + let cfg = crate::db::Config { url: db.pg_config().clone() }; + let pool = crate::db::Pool::new(&logctx.log, &cfg); + let conn = pool.pool().get().await.unwrap(); + + let id = Uuid::nil(); + let max_instance_gen = 0; + let project_id = Uuid::nil(); + let cpus_diff = 16.try_into().unwrap(); + let ram_diff = 2048.try_into().unwrap(); + let query = VirtualProvisioningCollectionUpdate::new_delete_instance( id, max_instance_gen,