Include enums in dbinit_equal_sum_of_all_up test (#4472)

While working on #4261 I ~~ran into~~ created a situation where the order of items in a DB enum matters because I want to sort by it. In this case it's about specificity — the type is `silo` or `fleet` and I want to sort by it to ensure I get the most "specific" thing first. @zephraph noticed I had an order mismatch between my migration adding the enum and the copy in `dbinit.sql`, and it wasn't caught by the tests. So, here I've added the list of user-defined enums to the set of things we compare when we ask whether the result of our migrations matches the contents of `dbinit.sql`. There were two existing enums that failed the test because the copy in `dbinit.sql` was sorted alphabetically, but a migration added a new item at the end of the list. Very easy to fix, though now are enums aren't sorted nicely, which is sad. The question is: should we enforce enum order? Seems like we might as well if it doesn't cost us much. But should we write application logic like #4261 that relies on enum order? In that case, at least, I have a reasonable alternative because the list of things I'm sorting is at most 2 long, so I can easily get the whole list and pick the element I want in app code instead of doing an `order by` + `limit 1` in SQL. It is made quite clear in a comment, but it still feels a little dicey.
oxidecomputer · Nov 10, 2023 · 0315715 · 0315715
1 parent fae8f46
commit 0315715
Show file tree

Hide file tree

Showing 16 changed files with 98 additions and 32 deletions.
diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs
@@ -1243,7 +1243,7 @@ table! {
 ///
 /// This should be updated whenever the schema is changed. For more details,
 /// refer to: schema/crdb/README.adoc
-pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(9, 0, 0);
+pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(10, 0, 0);
 
 allow_tables_to_appear_in_same_query!(
     system_update,

diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs
@@ -168,13 +168,15 @@ async fn query_crdb_schema_version(crdb: &CockroachInstance) -> String {
 //
 // Note that for the purposes of schema comparisons, we don't care about parsing
 // the contents of the database, merely the schema and equality of contained data.
-#[derive(Eq, PartialEq, Clone, Debug)]
+#[derive(PartialEq, Clone, Debug)]
 enum AnySqlType {
     DateTime,
     String(String),
     Bool(bool),
     Uuid(Uuid),
     Int8(i64),
+    Float4(f32),
+    TextArray(Vec<String>),
     // TODO: This isn't exhaustive, feel free to add more.
     //
     // These should only be necessary for rows where the database schema changes also choose to
@@ -213,6 +215,14 @@ impl<'a> tokio_postgres::types::FromSql<'a> for AnySqlType {
         if i64::accepts(ty) {
             return Ok(AnySqlType::Int8(i64::from_sql(ty, raw)?));
         }
+        if f32::accepts(ty) {
+            return Ok(AnySqlType::Float4(f32::from_sql(ty, raw)?));
+        }
+        if Vec::<String>::accepts(ty) {
+            return Ok(AnySqlType::TextArray(Vec::<String>::from_sql(
+                ty, raw,
+            )?));
+        }
         Err(anyhow::anyhow!(
             "Cannot parse type {ty}. If you're trying to use this type in a table which is populated \
 during a schema migration, consider adding it to `AnySqlType`."
@@ -224,7 +234,7 @@ during a schema migration, consider adding it to `AnySqlType`."
     }
 }
 
-#[derive(Eq, PartialEq, Debug)]
+#[derive(PartialEq, Debug)]
 struct NamedSqlValue {
     // It's a little redunant to include the column name alongside each value,
     // but it results in a prettier diff.
@@ -240,7 +250,7 @@ impl NamedSqlValue {
 }
 
 // A generic representation of a row of SQL data
-#[derive(Eq, PartialEq, Debug)]
+#[derive(PartialEq, Debug)]
 struct Row {
     values: Vec<NamedSqlValue>,
 }
@@ -262,19 +272,7 @@ impl<'a> From<&'a [&'static str]> for ColumnSelector<'a> {
     }
 }
 
-async fn crdb_show_constraints(
-    crdb: &CockroachInstance,
-    table: &str,
-) -> Vec<Row> {
-    let client = crdb.connect().await.expect("failed to connect");
-
-    let sql = format!("SHOW CONSTRAINTS FROM {table}");
-    let rows = client
-        .query(&sql, &[])
-        .await
-        .unwrap_or_else(|_| panic!("failed to query {table}"));
-    client.cleanup().await.expect("cleaning up after wipe");
-
+fn process_rows(rows: &Vec<tokio_postgres::Row>) -> Vec<Row> {
     let mut result = vec![];
     for row in rows {
         let mut row_result = Row::new();
@@ -290,6 +288,22 @@ async fn crdb_show_constraints(
     result
 }
 
+async fn crdb_show_constraints(
+    crdb: &CockroachInstance,
+    table: &str,
+) -> Vec<Row> {
+    let client = crdb.connect().await.expect("failed to connect");
+
+    let sql = format!("SHOW CONSTRAINTS FROM {table}");
+    let rows = client
+        .query(&sql, &[])
+        .await
+        .unwrap_or_else(|_| panic!("failed to query {table}"));
+    client.cleanup().await.expect("cleaning up after wipe");
+
+    process_rows(&rows)
+}
+
 async fn crdb_select(
     crdb: &CockroachInstance,
     columns: ColumnSelector<'_>,
@@ -324,19 +338,20 @@ async fn crdb_select(
         .unwrap_or_else(|_| panic!("failed to query {table}"));
     client.cleanup().await.expect("cleaning up after wipe");
 
-    let mut result = vec![];
-    for row in rows {
-        let mut row_result = Row::new();
-        for i in 0..row.len() {
-            let column_name = row.columns()[i].name();
-            row_result.values.push(NamedSqlValue {
-                column: column_name.to_string(),
-                value: row.get(i),
-            });
-        }
-        result.push(row_result);
-    }
-    result
+    process_rows(&rows)
+}
+
+async fn crdb_list_enums(crdb: &CockroachInstance) -> Vec<Row> {
+    let client = crdb.connect().await.expect("failed to connect");
+
+    // https://www.cockroachlabs.com/docs/stable/show-enums
+    let rows = client
+        .query("show enums;", &[])
+        .await
+        .unwrap_or_else(|_| panic!("failed to list enums"));
+    client.cleanup().await.expect("cleaning up after wipe");
+
+    process_rows(&rows)
 }
 
 async fn read_all_schema_versions() -> BTreeSet<SemverVersion> {
@@ -569,10 +584,11 @@ const PG_INDEXES: [&'static str; 5] =
 const TABLES: [&'static str; 4] =
     ["table_catalog", "table_schema", "table_name", "table_type"];
 
-#[derive(Eq, PartialEq, Debug)]
+#[derive(PartialEq, Debug)]
 struct InformationSchema {
     columns: Vec<Row>,
     constraint_column_usage: Vec<Row>,
+    enums: Vec<Row>,
     key_column_usage: Vec<Row>,
     referential_constraints: Vec<Row>,
     views: Vec<Row>,
@@ -589,6 +605,13 @@ impl InformationSchema {
         // the columns diff especially needs this: it can be 20k lines otherwise
         similar_asserts::assert_eq!(self.tables, other.tables);
         similar_asserts::assert_eq!(self.columns, other.columns);
+        similar_asserts::assert_eq!(
+            self.enums,
+            other.enums,
+            "Enums did not match. Members must have the same order in dbinit.sql and \
+            migrations. If a migration adds a member, it should use BEFORE or AFTER \
+            to add it in the same order as dbinit.sql."
+        );
         similar_asserts::assert_eq!(self.views, other.views);
         similar_asserts::assert_eq!(
             self.table_constraints,
@@ -624,6 +647,8 @@ impl InformationSchema {
         )
         .await;
 
+        let enums = crdb_list_enums(crdb).await;
+
         let constraint_column_usage = crdb_select(
             crdb,
             CONSTRAINT_COLUMN_USAGE.as_slice().into(),
@@ -694,6 +719,7 @@ impl InformationSchema {
         Self {
             columns,
             constraint_column_usage,
+            enums,
             key_column_usage,
             referential_constraints,
             views,

diff --git a/schema/crdb/10.0.0/README.md b/schema/crdb/10.0.0/README.md
@@ -0,0 +1,12 @@
+# Why?
+
+This migration is part of a PR that adds a check to the schema tests ensuring that the order of enum members is the same when starting from scratch with `dbinit.sql` as it is when building up from existing deployments by running the migrations. The problem: there were already two enums, `dataset_kind` and `service_kind`, where the order did not match, so we have to fix that by putting the enums in the "right" order even on an existing deployment where the order is wrong. To do that, for each of those enums, we:
+
+1. add `clickhouse_keeper2` member
+1. change existing uses of `clickhouse_keeper` to `clickhouse_keeper2`
+1. drop `clickhouse_keeper` member
+1. add `clickhouse_keeper` back in the right order using `AFTER 'clickhouse'`
+1. change uses of `clickhouse_keeper2` back to `clickhouse_keeper`
+1. drop `clickhouse_keeper2`
+
+As there are 6 steps here and two different enums to do them for, there are 12 `up*.sql` files.
diff --git a/schema/crdb/10.0.0/up01.sql b/schema/crdb/10.0.0/up01.sql
@@ -0,0 +1 @@
+ALTER TYPE omicron.public.dataset_kind ADD VALUE IF NOT EXISTS 'clickhouse_keeper2' AFTER 'clickhouse';
diff --git a/schema/crdb/10.0.0/up02.sql b/schema/crdb/10.0.0/up02.sql
@@ -0,0 +1 @@
+ALTER TYPE omicron.public.service_kind ADD VALUE IF NOT EXISTS 'clickhouse_keeper2' AFTER 'clickhouse';
diff --git a/schema/crdb/10.0.0/up03.sql b/schema/crdb/10.0.0/up03.sql
@@ -0,0 +1,5 @@
+set local disallow_full_table_scans = off;
+
+UPDATE omicron.public.dataset
+SET kind = 'clickhouse_keeper2'
+WHERE kind = 'clickhouse_keeper';
diff --git a/schema/crdb/10.0.0/up04.sql b/schema/crdb/10.0.0/up04.sql
@@ -0,0 +1,5 @@
+set local disallow_full_table_scans = off;
+
+UPDATE omicron.public.service
+SET kind = 'clickhouse_keeper2'
+WHERE kind = 'clickhouse_keeper';
diff --git a/schema/crdb/10.0.0/up05.sql b/schema/crdb/10.0.0/up05.sql
@@ -0,0 +1 @@
+ALTER TYPE omicron.public.dataset_kind DROP VALUE 'clickhouse_keeper';
diff --git a/schema/crdb/10.0.0/up06.sql b/schema/crdb/10.0.0/up06.sql
@@ -0,0 +1 @@
+ALTER TYPE omicron.public.service_kind DROP VALUE 'clickhouse_keeper';
diff --git a/schema/crdb/10.0.0/up07.sql b/schema/crdb/10.0.0/up07.sql
@@ -0,0 +1 @@
+ALTER TYPE omicron.public.dataset_kind ADD VALUE 'clickhouse_keeper' AFTER 'clickhouse';
diff --git a/schema/crdb/10.0.0/up08.sql b/schema/crdb/10.0.0/up08.sql
@@ -0,0 +1 @@
+ALTER TYPE omicron.public.service_kind ADD VALUE 'clickhouse_keeper' AFTER 'clickhouse';
diff --git a/schema/crdb/10.0.0/up09.sql b/schema/crdb/10.0.0/up09.sql
@@ -0,0 +1,5 @@
+set local disallow_full_table_scans = off;
+
+UPDATE omicron.public.dataset
+SET kind = 'clickhouse_keeper'
+WHERE kind = 'clickhouse_keeper2';
diff --git a/schema/crdb/10.0.0/up10.sql b/schema/crdb/10.0.0/up10.sql
@@ -0,0 +1,5 @@
+set local disallow_full_table_scans = off;
+
+UPDATE omicron.public.service
+SET kind = 'clickhouse_keeper'
+WHERE kind = 'clickhouse_keeper2';
diff --git a/schema/crdb/10.0.0/up11.sql b/schema/crdb/10.0.0/up11.sql
@@ -0,0 +1 @@
+ALTER TYPE omicron.public.dataset_kind DROP VALUE 'clickhouse_keeper2';
diff --git a/schema/crdb/10.0.0/up12.sql b/schema/crdb/10.0.0/up12.sql
@@ -0,0 +1 @@
+ALTER TYPE omicron.public.service_kind DROP VALUE 'clickhouse_keeper2';
diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql
@@ -2838,7 +2838,7 @@ INSERT INTO omicron.public.db_metadata (
     version,
     target_version
 ) VALUES
-    ( TRUE, NOW(), NOW(), '9.0.0', NULL)
+    ( TRUE, NOW(), NOW(), '10.0.0', NULL)
 ON CONFLICT DO NOTHING;
 
 COMMIT;
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		ALTER TYPE omicron.public.dataset_kind ADD VALUE IF NOT EXISTS 'clickhouse_keeper2' AFTER 'clickhouse';
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		ALTER TYPE omicron.public.service_kind ADD VALUE IF NOT EXISTS 'clickhouse_keeper2' AFTER 'clickhouse';
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		ALTER TYPE omicron.public.dataset_kind DROP VALUE 'clickhouse_keeper';
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		ALTER TYPE omicron.public.service_kind DROP VALUE 'clickhouse_keeper';