Skip to content

Commit

Permalink
Fix IP pools data migration (#4903)
Browse files Browse the repository at this point in the history
Closes #4875

## Problem

After the IP pools migrations on the dogfood rack, the `default` pool
was not marked `is_default=true` for the `oxide` silo when it should
have been.

## Diagnosis

When checking for silo-scoped default pools overriding a fleet-scoped
default, I neglected to require that the silo-scoped defaults in
question were non-deleted. This means that if there was a deleted pool
with `silo_id=<oxide silo id>` and `is_default=true`, that would be
considered an overriding default and leave us with `is_default=false` on
the `default` pool.

Well, I can't check `silo_id` and `is_default` on the pools because
those columns have been dropped, but there is a deleted pool called
`oxide-default` that says in the description it was meant as the default
pool for only the `oxide` silo.

```
oot@[fd00:1122:3344:105::3]:32221/omicron> select * from omicron.public.ip_pool;
                   id                  |        name        |          description           |         time_created          |         time_modified         |         time_deleted          | rcgen
---------------------------------------+--------------------+--------------------------------+-------------------------------+-------------------------------+-------------------------------+--------
  1efa49a2-3f3a-43ab-97ac-d38658069c39 | oxide-default      | oxide silo-only pool - default | 2023-08-31 05:33:00.11079+00  | 2023-08-31 05:33:00.11079+00  | 2023-08-31 06:03:22.426488+00 |     1
```

I think we can be pretty confident this is what got us.

## Fix

Add `AND time_deleted IS NULL` to the subquery.

## Mitigation in existing systems

Already done. Dogfood is the only long-running system where the bad
migration ran, and all I had to do there was use the API to set
`is_default=true` for the (`default` pool, `oxide` silo) link.
  • Loading branch information
david-crespo authored Jan 26, 2024
1 parent 4c15cc0 commit d5dace6
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 50 deletions.
90 changes: 42 additions & 48 deletions nexus/tests/integration_tests/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -951,9 +951,11 @@ const SILO1: Uuid = Uuid::from_u128(0x111151F0_5c3d_4647_83b0_8f3515da7be1);
const SILO2: Uuid = Uuid::from_u128(0x222251F0_5c3d_4647_83b0_8f3515da7be1);

// "6001" -> "Pool"
const POOL0: Uuid = Uuid::from_u128(0x00006001_5c3d_4647_83b0_8f3515da7be1);
const POOL1: Uuid = Uuid::from_u128(0x11116001_5c3d_4647_83b0_8f3515da7be1);
const POOL2: Uuid = Uuid::from_u128(0x22226001_5c3d_4647_83b0_8f3515da7be1);
const POOL3: Uuid = Uuid::from_u128(0x33336001_5c3d_4647_83b0_8f3515da7be1);
const POOL4: Uuid = Uuid::from_u128(0x44446001_5c3d_4647_83b0_8f3515da7be1);

// "513D" -> "Sled"
const SLED1: Uuid = Uuid::from_u128(0x1111513d_5c3d_4647_83b0_8f3515da7be1);
Expand All @@ -975,9 +977,11 @@ fn before_23_0_0(client: &Client) -> BoxFuture<'_, ()> {
// no corresponding silo.
client.batch_execute(&format!("INSERT INTO ip_pool
(id, name, description, time_created, time_modified, time_deleted, rcgen, silo_id, is_default) VALUES
('{POOL0}', 'pool2', '', now(), now(), now(), 1, '{SILO2}', true),
('{POOL1}', 'pool1', '', now(), now(), NULL, 1, '{SILO1}', true),
('{POOL2}', 'pool2', '', now(), now(), NULL, 1, '{SILO2}', false),
('{POOL3}', 'pool3', '', now(), now(), NULL, 1, null, true);
('{POOL3}', 'pool3', '', now(), now(), NULL, 1, null, true),
('{POOL4}', 'pool4', '', now(), now(), NULL, 1, null, false);
")).await.expect("Failed to create IP Pool");
})
}
Expand All @@ -992,56 +996,46 @@ fn after_23_0_0(client: &Client) -> BoxFuture<'_, ()> {
.expect("Failed to query ip pool resource");
let ip_pool_resources = process_rows(&rows);

assert_eq!(ip_pool_resources.len(), 4);
assert_eq!(ip_pool_resources.len(), 6);

fn assert_row(
row: &Vec<ColumnValue>,
ip_pool_id: Uuid,
silo_id: Uuid,
is_default: bool,
) {
let type_silo = SqlEnum::from(("ip_pool_resource_type", "silo"));
assert_eq!(
row,
&vec![
ColumnValue::new("ip_pool_id", ip_pool_id),
ColumnValue::new("resource_type", type_silo),
ColumnValue::new("resource_id", silo_id),
ColumnValue::new("is_default", is_default),
],
);
}

let type_silo = SqlEnum::from(("ip_pool_resource_type", "silo"));
// pool1 was default on silo1, so gets an entry in the join table
// reflecting that
assert_row(&ip_pool_resources[0].values, POOL1, SILO1, true);

// pool1, which referenced silo1 in the "ip_pool" table, has a newly
// created resource.
//
// The same relationship is true for pool2 / silo2.
assert_eq!(
ip_pool_resources[0].values,
vec![
ColumnValue::new("ip_pool_id", POOL1),
ColumnValue::new("resource_type", type_silo.clone()),
ColumnValue::new("resource_id", SILO1),
ColumnValue::new("is_default", true),
],
);
assert_eq!(
ip_pool_resources[1].values,
vec![
ColumnValue::new("ip_pool_id", POOL2),
ColumnValue::new("resource_type", type_silo.clone()),
ColumnValue::new("resource_id", SILO2),
ColumnValue::new("is_default", false),
],
);
// pool1 was default on silo1, so gets an entry in the join table
// reflecting that
assert_row(&ip_pool_resources[1].values, POOL2, SILO2, false);

// pool3 did not previously have a corresponding silo, so now it's associated
// with both silos as a new resource in each.
//
// Additionally, silo1 already had a default pool (pool1), but silo2 did
// not have one. As a result, pool3 becomes the new default pool for silo2.
assert_eq!(
ip_pool_resources[2].values,
vec![
ColumnValue::new("ip_pool_id", POOL3),
ColumnValue::new("resource_type", type_silo.clone()),
ColumnValue::new("resource_id", SILO1),
ColumnValue::new("is_default", false),
],
);
assert_eq!(
ip_pool_resources[3].values,
vec![
ColumnValue::new("ip_pool_id", POOL3),
ColumnValue::new("resource_type", type_silo.clone()),
ColumnValue::new("resource_id", SILO2),
ColumnValue::new("is_default", true),
],
);
// fleet-scoped silos are a little more complicated

// pool3 was a fleet-level default, so now it's associated with both
// silos. silo1 had its own default pool as well (pool1), so pool3
// cannot also be default for silo1. silo2 did not have its own default,
// so pool3 is default for silo2.
assert_row(&ip_pool_resources[2].values, POOL3, SILO1, false);
assert_row(&ip_pool_resources[3].values, POOL3, SILO2, true);

// fleet-level pool that was not default becomes non-default on all silos
assert_row(&ip_pool_resources[4].values, POOL4, SILO1, false);
assert_row(&ip_pool_resources[5].values, POOL4, SILO2, false);
})
}

Expand Down
7 changes: 5 additions & 2 deletions schema/crdb/23.0.0/up4.sql
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@ SELECT
-- AND NOT EXISTS here causes is_default to be false in row 1 if there is a
-- conflicting silo default pool. row 2 is inserted in up5.
p.is_default AND NOT EXISTS (
SELECT 1 FROM omicron.public.ip_pool
WHERE silo_id = s.id AND is_default
SELECT 1
FROM omicron.public.ip_pool p0
WHERE p0.silo_id = s.id
AND p0.is_default
AND p0.time_deleted IS NULL
)
FROM omicron.public.ip_pool AS p
-- cross join means we are looking at the cartesian product of all fleet-scoped
Expand Down

0 comments on commit d5dace6

Please sign in to comment.