Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make dbinit.sql slightly less order-dependent #4288

Merged
merged 5 commits into from
Oct 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 115 additions & 60 deletions nexus/tests/integration_tests/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,35 @@ impl<'a> From<&'a [&'static str]> for ColumnSelector<'a> {
}
}

async fn query_crdb_for_rows_of_strings(
async fn crdb_show_constraints(
crdb: &CockroachInstance,
table: &str,
) -> Vec<Row> {
let client = crdb.connect().await.expect("failed to connect");

let sql = format!("SHOW CONSTRAINTS FROM {table}");
let rows = client
.query(&sql, &[])
.await
.unwrap_or_else(|_| panic!("failed to query {table}"));
client.cleanup().await.expect("cleaning up after wipe");

let mut result = vec![];
for row in rows {
let mut row_result = Row::new();
for i in 0..row.len() {
let column_name = row.columns()[i].name();
row_result.values.push(NamedSqlValue {
column: column_name.to_string(),
value: row.get(i),
});
}
result.push(row_result);
}
result
}

async fn crdb_select(
crdb: &CockroachInstance,
columns: ColumnSelector<'_>,
table: &str,
Expand Down Expand Up @@ -453,22 +481,16 @@ async fn versions_have_idempotent_up() {
logctx.cleanup_successful();
}

const COLUMNS: [&'static str; 6] = [
const COLUMNS: [&'static str; 7] = [
"table_catalog",
"table_schema",
"table_name",
"column_name",
"column_default",
"is_nullable",
"data_type",
];

const CHECK_CONSTRAINTS: [&'static str; 4] = [
"constraint_catalog",
"constraint_schema",
"constraint_name",
"check_clause",
];

const CONSTRAINT_COLUMN_USAGE: [&'static str; 7] = [
"table_catalog",
"table_schema",
Expand Down Expand Up @@ -538,22 +560,9 @@ const PG_INDEXES: [&'static str; 5] =
const TABLES: [&'static str; 4] =
["table_catalog", "table_schema", "table_name", "table_type"];

const TABLE_CONSTRAINTS: [&'static str; 9] = [
"constraint_catalog",
"constraint_schema",
"constraint_name",
"table_catalog",
"table_schema",
"table_name",
"constraint_type",
"is_deferrable",
"initially_deferred",
];

#[derive(Eq, PartialEq, Debug)]
struct InformationSchema {
columns: Vec<Row>,
check_constraints: Vec<Row>,
constraint_column_usage: Vec<Row>,
key_column_usage: Vec<Row>,
referential_constraints: Vec<Row>,
Expand All @@ -562,7 +571,7 @@ struct InformationSchema {
sequences: Vec<Row>,
pg_indexes: Vec<Row>,
tables: Vec<Row>,
table_constraints: Vec<Row>,
table_constraints: BTreeMap<String, Vec<Row>>,
}

impl InformationSchema {
Expand All @@ -576,10 +585,6 @@ impl InformationSchema {
self.table_constraints,
other.table_constraints
);
similar_asserts::assert_eq!(
self.check_constraints,
other.check_constraints
);
similar_asserts::assert_eq!(
self.constraint_column_usage,
other.constraint_column_usage
Expand All @@ -602,97 +607,83 @@ impl InformationSchema {
// https://www.cockroachlabs.com/docs/v23.1/information-schema
//
// For details on each of these tables.
let columns = query_crdb_for_rows_of_strings(
let columns = crdb_select(
crdb,
COLUMNS.as_slice().into(),
"information_schema.columns",
Some("table_schema = 'public'"),
)
.await;

let check_constraints = query_crdb_for_rows_of_strings(
crdb,
CHECK_CONSTRAINTS.as_slice().into(),
"information_schema.check_constraints",
None,
)
.await;

let constraint_column_usage = query_crdb_for_rows_of_strings(
let constraint_column_usage = crdb_select(
crdb,
CONSTRAINT_COLUMN_USAGE.as_slice().into(),
"information_schema.constraint_column_usage",
None,
)
.await;

let key_column_usage = query_crdb_for_rows_of_strings(
let key_column_usage = crdb_select(
crdb,
KEY_COLUMN_USAGE.as_slice().into(),
"information_schema.key_column_usage",
None,
)
.await;

let referential_constraints = query_crdb_for_rows_of_strings(
let referential_constraints = crdb_select(
crdb,
REFERENTIAL_CONSTRAINTS.as_slice().into(),
"information_schema.referential_constraints",
None,
)
.await;

let views = query_crdb_for_rows_of_strings(
let views = crdb_select(
crdb,
VIEWS.as_slice().into(),
"information_schema.views",
None,
)
.await;

let statistics = query_crdb_for_rows_of_strings(
let statistics = crdb_select(
crdb,
STATISTICS.as_slice().into(),
"information_schema.statistics",
None,
)
.await;

let sequences = query_crdb_for_rows_of_strings(
let sequences = crdb_select(
crdb,
SEQUENCES.as_slice().into(),
"information_schema.sequences",
None,
)
.await;

let pg_indexes = query_crdb_for_rows_of_strings(
let pg_indexes = crdb_select(
crdb,
PG_INDEXES.as_slice().into(),
"pg_indexes",
Some("schemaname = 'public'"),
)
.await;

let tables = query_crdb_for_rows_of_strings(
let tables = crdb_select(
crdb,
TABLES.as_slice().into(),
"information_schema.tables",
Some("table_schema = 'public'"),
)
.await;

let table_constraints = query_crdb_for_rows_of_strings(
crdb,
TABLE_CONSTRAINTS.as_slice().into(),
"information_schema.table_constraints",
Some("table_schema = 'public'"),
)
.await;
let table_constraints =
Self::show_constraints_all_tables(&tables, crdb).await;

Self {
columns,
check_constraints,
constraint_column_usage,
key_column_usage,
referential_constraints,
Expand All @@ -705,6 +696,33 @@ impl InformationSchema {
}
}

async fn show_constraints_all_tables(
tables: &Vec<Row>,
crdb: &CockroachInstance,
) -> BTreeMap<String, Vec<Row>> {
let mut map = BTreeMap::new();

for table in tables {
let table = &table.values;
let table_catalog =
table[0].expect("table_catalog").unwrap().as_str();
let table_schema =
table[1].expect("table_schema").unwrap().as_str();
let table_name = table[2].expect("table_name").unwrap().as_str();
let table_type = table[3].expect("table_type").unwrap().as_str();

if table_type != "BASE TABLE" {
continue;
}

let table_name =
format!("{}.{}.{}", table_catalog, table_schema, table_name);
let rows = crdb_show_constraints(crdb, &table_name).await;
map.insert(table_name, rows);
}
map
}

// This would normally be quite an expensive operation, but we expect it'll
// at least be slightly cheaper for the freshly populated DB, which
// shouldn't have that many records yet.
Expand All @@ -731,13 +749,9 @@ impl InformationSchema {
let table_name =
format!("{}.{}.{}", table_catalog, table_schema, table_name);
info!(log, "Querying table: {table_name}");
let rows = query_crdb_for_rows_of_strings(
crdb,
ColumnSelector::Star,
&table_name,
None,
)
.await;
let rows =
crdb_select(crdb, ColumnSelector::Star, &table_name, None)
.await;
info!(log, "Saw data: {rows:?}");
map.insert(table_name, rows);
}
Expand Down Expand Up @@ -1012,6 +1026,47 @@ async fn compare_table_differing_constraint() {
)
.await;

assert_ne!(schema1.check_constraints, schema2.check_constraints);
assert_ne!(schema1.table_constraints, schema2.table_constraints);
logctx.cleanup_successful();
}

#[tokio::test]
async fn compare_table_differing_not_null_order() {
let config = load_test_config();
let logctx = LogContext::new(
"compare_table_differing_not_null_order",
&config.pkg.log,
);
let log = &logctx.log;

let schema1 = get_information_schema(
log,
"
CREATE DATABASE omicron;
CREATE TABLE omicron.public.pet ( id UUID PRIMARY KEY );
CREATE TABLE omicron.public.employee (
id UUID PRIMARY KEY,
name TEXT NOT NULL,
hobbies TEXT
);
",
)
.await;

let schema2 = get_information_schema(
log,
"
CREATE DATABASE omicron;
CREATE TABLE omicron.public.employee (
id UUID PRIMARY KEY,
name TEXT NOT NULL,
hobbies TEXT
);
CREATE TABLE omicron.public.pet ( id UUID PRIMARY KEY );
",
)
.await;

schema1.pretty_assert_eq(&schema2);
logctx.cleanup_successful();
}
41 changes: 9 additions & 32 deletions schema/crdb/README.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -73,38 +73,15 @@ SQL Validation, via Automated Tests:

==== Handling common schema changes

CockroachDB's schema includes a description of all of the database's CHECK
constraints. If a CHECK constraint is anonymous (i.e. it is written simply as
`CHECK <expression>` and not `CONSTRAINT <name> CHECK expression`), CRDB
assigns it a name based on the table and column to which the constraint applies.
The challenge is that CRDB identifies tables and columns using opaque
identifiers whose values depend on the order in which tables and views were
defined in the current database. This means that adding, removing, or renaming
objects needs to be done carefully to preserve the relative ordering of objects
in new databases created by `dbinit.sql` and upgraded databases created by
applying `up.sql` transformations.

===== Adding new columns with constraints

Strongly consider naming new constraints (`CONSTRAINT <name> <expression>`) to
avoid the problems with anonymous constraints described above.

===== Adding tables and views

New tables and views must be added to the end of `dbinit.sql` so that the order
of preceding `CREATE` statements is left unchanged. If your changes fail the
`CHECK` constraints test and you get a constraint name diff like this...

```
NamedSqlValue {
column: "constraint_name",
value: Some(
String(
< "4101115737_149_10_not_null",
> "4101115737_148_10_not_null",
```

...then you've probably inadvertently added a table or view in the wrong place.
Although CockroachDB's schema includes some opaque internally-generated fields
that are order dependent - such as the names of anonymous CHECK constraints -
our schema comparison tools intentionally ignore these values. As a result,
when performing schema changes, the order of new tables and constraints should
generally not be important.

As convention, however, we recommend keeping the `db_metadata` file at the end of
`dbinit.sql`, so that the database does not contain a version until it is fully
populated.

==== Adding new source tables to an existing view

Expand Down
Loading
Loading