Skip to content

Commit

Permalink
Rename disks when un-deleting and faulting
Browse files Browse the repository at this point in the history
When un-deleting phantom disks and setting them to faulted, use a new
name that includes the disk's UUID: this ensures that even if a user
created another disk with the same name in the project, the phantom disk
can still be un-deleted and faulted, and eventually cleaned up.

Fixes oxidecomputer#4673
  • Loading branch information
jmpesp committed Dec 12, 2023
1 parent 4ad7325 commit 51d475b
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 2 deletions.
12 changes: 11 additions & 1 deletion nexus/db-queries/src/db/datastore/disk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -657,7 +657,10 @@ impl DataStore {
/// If the disk delete saga unwinds, then the disk should _not_ remain
/// deleted: disk delete saga should be triggered again in order to fully
/// complete, and the only way to do that is to un-delete the disk. Set it
/// to faulted to ensure that it won't be used.
/// to faulted to ensure that it won't be used. Use the disk's UUID as part
/// of its new name to ensure that even if a user created another disk that
/// shadows this "phantom" disk the original can still be un-deleted and
/// faulted.
pub async fn project_undelete_disk_set_faulted_no_auth(
&self,
disk_id: &Uuid,
Expand All @@ -667,12 +670,19 @@ impl DataStore {

let faulted = api::external::DiskState::Faulted.label();

// If only the UUID is used, you will hit "name cannot be a UUID to
// avoid ambiguity with IDs". Add a small prefix to avoid this, and use
// "deleted" to be unambigious to the user about what they should do
// with this disk.
let new_name = format!("deleted-{disk_id}");

let result = diesel::update(dsl::disk)
.filter(dsl::time_deleted.is_not_null())
.filter(dsl::id.eq(*disk_id))
.set((
dsl::time_deleted.eq(None::<DateTime<Utc>>),
dsl::disk_state.eq(faulted),
dsl::name.eq(new_name),
))
.check_if_exists::<Disk>(*disk_id)
.execute_and_check(&conn)
Expand Down
126 changes: 125 additions & 1 deletion nexus/tests/integration_tests/disks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1245,7 +1245,7 @@ async fn test_disk_virtual_provisioning_collection(
async fn test_disk_virtual_provisioning_collection_failed_delete(
cptestctx: &ControlPlaneTestContext,
) {
// Confirm that there's a panic deleting a project if a disk deletion fails
// Confirm that there's no panic deleting a project if a disk deletion fails
let client = &cptestctx.external_client;
let nexus = &cptestctx.server.apictx().nexus;
let datastore = nexus.datastore();
Expand Down Expand Up @@ -1373,6 +1373,130 @@ async fn test_disk_virtual_provisioning_collection_failed_delete(
.expect("unexpected failure deleting project");
}

#[nexus_test]
async fn test_phantom_disk_rename(cptestctx: &ControlPlaneTestContext) {
// Confirm that phantom disks are renamed when they are un-deleted and
// faulted

let client = &cptestctx.external_client;
let nexus = &cptestctx.server.apictx().nexus;
let datastore = nexus.datastore();

let _disk_test = DiskTest::new(&cptestctx).await;

populate_ip_pool(&client, "default", None).await;
let _project_id1 = create_project(client, PROJECT_NAME).await.identity.id;

// Create a 1 GB disk
let disk_size = ByteCount::from_gibibytes_u32(1);
let disks_url = get_disks_url();
let disk_one = params::DiskCreate {
identity: IdentityMetadataCreateParams {
name: "disk-one".parse().unwrap(),
description: String::from("sells rainsticks"),
},
disk_source: params::DiskSource::Blank {
block_size: params::BlockSize::try_from(512).unwrap(),
},
size: disk_size,
};

NexusRequest::new(
RequestBuilder::new(client, Method::POST, &disks_url)
.body(Some(&disk_one))
.expect_status(Some(StatusCode::CREATED)),
)
.authn_as(AuthnMode::PrivilegedUser)
.execute()
.await
.expect("unexpected failure creating 1 GiB disk");

let disk_url = format!("/v1/disks/{}?project={}", "disk-one", PROJECT_NAME);

// Confirm it's there
let disk = disk_get(&client, &disk_url).await;
assert_eq!(disk.state, DiskState::Detached);

let original_disk_id = disk.identity.id;

// Now, request disk delete
NexusRequest::new(
RequestBuilder::new(client, Method::DELETE, &disk_url)
.expect_status(Some(StatusCode::NO_CONTENT)),
)
.authn_as(AuthnMode::PrivilegedUser)
.execute()
.await
.expect("unexpected failure deleting 1 GiB disk");

// It's gone!
NexusRequest::new(
RequestBuilder::new(client, Method::GET, &disk_url)
.expect_status(Some(StatusCode::NOT_FOUND)),
)
.authn_as(AuthnMode::PrivilegedUser)
.execute()
.await
.expect("unexpected success finding 1 GiB disk");

// Create a new disk with the same name
NexusRequest::new(
RequestBuilder::new(client, Method::POST, &disks_url)
.body(Some(&disk_one))
.expect_status(Some(StatusCode::CREATED)),
)
.authn_as(AuthnMode::PrivilegedUser)
.execute()
.await
.expect("unexpected failure creating 1 GiB disk");

// Confirm it's there
let disk = disk_get(&client, &disk_url).await;
assert_eq!(disk.state, DiskState::Detached);

// Confirm it's not the same disk
let new_disk_id = disk.identity.id;
assert_ne!(original_disk_id, new_disk_id);

// Un-delete the original and set it to faulted
datastore
.project_undelete_disk_set_faulted_no_auth(&original_disk_id)
.await
.unwrap();

// The original disk is now faulted
let disk_url = format!(
"/v1/disks/deleted-{}?project={}",
original_disk_id, PROJECT_NAME
);
let disk = disk_get(&client, &disk_url).await;
assert_eq!(disk.state, DiskState::Faulted);

// Make sure original can still be deleted
NexusRequest::new(
RequestBuilder::new(client, Method::DELETE, &disk_url)
.expect_status(Some(StatusCode::NO_CONTENT)),
)
.authn_as(AuthnMode::PrivilegedUser)
.execute()
.await
.expect("unexpected failure deleting 1 GiB disk");

// Make sure new can be deleted too
let disk_url = format!("/v1/disks/{}?project={}", "disk-one", PROJECT_NAME);
let disk = disk_get(&client, &disk_url).await;
assert_eq!(disk.state, DiskState::Detached);

NexusRequest::new(
RequestBuilder::new(client, Method::DELETE, &disk_url)
.expect_status(Some(StatusCode::NO_CONTENT)),
)
.authn_as(AuthnMode::PrivilegedUser)
.execute()
.await
.expect("unexpected failure deleting 1 GiB disk");
}

// Test disk size accounting
#[nexus_test]
async fn test_disk_size_accounting(cptestctx: &ControlPlaneTestContext) {
Expand Down

0 comments on commit 51d475b

Please sign in to comment.