Skip to content

Commit

Permalink
feat(NNS): allow execution of proposals that remove non-existing nodes (
Browse files Browse the repository at this point in the history
#3339)

Prevent that race conditions such as the one in the following proposal
prevent proposal execution:
https://dashboard.internetcomputer.org/proposal/134665

It's impossible to completely prevent race conditions such as this one
so we should handle them gracefully when possible.

### Refactor Node Removal Logic

- Extracted the node retrieval logic into a new function `get_node` that
returns an `Option<NodeRecord>`.
- Simplified the `get_node_or_panic` function to leverage `get_node` for
node retrieval.

### Improvements in Node Management

- Updated the `do_remove_nodes` logic to skip nodes that aren't found in
the registry, thus handling potential race conditions gracefully.

Also added several tests for the above
  • Loading branch information
sasa-tomic authored Jan 8, 2025
1 parent ea8cf53 commit fc935aa
Show file tree
Hide file tree
Showing 3 changed files with 212 additions and 38 deletions.
31 changes: 15 additions & 16 deletions rs/registry/canister/src/mutations/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,21 @@ use prost::Message;
impl Registry {
/// Get the Node record or panic on error with a message.
pub fn get_node_or_panic(&self, node_id: NodeId) -> NodeRecord {
let RegistryValue {
value: node_record_vec,
version: _,
deletion_marker: _,
} = self
.get(
&make_node_record_key(node_id).into_bytes(),
self.latest_version(),
)
.unwrap_or_else(|| {
panic!(
"{}node record for {:} not found in the registry.",
LOG_PREFIX, node_id
)
});
self.get_node(node_id).unwrap_or_else(|| {
panic!(
"{}node record for {:} not found in the registry.",
LOG_PREFIX, node_id
);
})
}

/// Get the Node record if it exists in the Registry.
pub fn get_node(&self, node_id: NodeId) -> Option<NodeRecord> {
let reg_value: &RegistryValue = self.get(
&make_node_record_key(node_id).into_bytes(),
self.latest_version(),
)?;

NodeRecord::decode(node_record_vec.as_slice()).unwrap()
Some(NodeRecord::decode(reg_value.value.as_slice()).unwrap())
}
}
186 changes: 182 additions & 4 deletions rs/registry/canister/src/mutations/node_management/do_remove_nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,21 @@ impl Registry {
// 3. Loop through each node
let mutations = nodes_to_be_removed
.into_iter().flat_map(|node_to_remove| {
// 4. Skip nodes that are not in the registry.
// This tackles the race condition where a node is removed from the registry
// by another transaction before this transaction is processed.
if self.get_node(node_to_remove).is_none() {
println!("{}do_remove_nodes: node {} not found in registry, skipping", LOG_PREFIX, node_to_remove);
return vec![];
};

// 4. Find the node operator id for this record
// 5. Find the node operator id for this record
// and abort if the node record is not found
let node_operator_id = get_node_operator_id_for_node(self, node_to_remove)
.map_err(|e| format!("{}do_remove_nodes: Aborting node removal: {}", LOG_PREFIX, e))
.unwrap();

// 5. Ensure node is not in a subnet
// 6. Ensure node is not in a subnet
let is_node_in_subnet = find_subnet_for_node(self, node_to_remove, &subnet_list_record);
if let Some(subnet_id) = is_node_in_subnet {
panic!("{}do_remove_nodes: Cannot remove a node that is a member of a subnet. This node is a member of Subnet: {}",
Expand All @@ -49,7 +56,7 @@ impl Registry {
);
}

// 6. Retrieve the NO record and increment its node allowance by 1
// 7. Retrieve the NO record and increment its node allowance by 1
let mut new_node_operator_record = get_node_operator_record(self, node_operator_id)
.map_err(|err| {
format!(
Expand All @@ -70,7 +77,7 @@ impl Registry {
};
node_operator_hmap.insert(node_operator_id.to_string(), new_node_operator_record.node_allowance);

// 7. Finally, generate the following mutations:
// 8. Finally, generate the following mutations:
// * Delete the node
// * Delete entries for node encryption keys
// * Increment NO's allowance by 1
Expand All @@ -97,3 +104,174 @@ pub struct RemoveNodesPayload {
/// The list of Node IDs that will be removed
pub node_ids: Vec<NodeId>,
}

#[cfg(test)]
mod tests {
use super::*;
use crate::common::test_helpers::{invariant_compliant_registry, prepare_registry_with_nodes};
use ic_base_types::PrincipalId;
use ic_protobuf::registry::node_operator::v1::NodeOperatorRecord;
use ic_registry_keys::{make_node_operator_record_key, make_node_record_key};
use ic_registry_transport::insert;
use prost::Message;

#[test]
fn test_remove_nonexistent_node() {
let mut registry = invariant_compliant_registry(0);

let nonexistent_node_id = NodeId::from(PrincipalId::new_user_test_id(999));
let payload = RemoveNodesPayload {
node_ids: vec![nonexistent_node_id],
};

// Should not panic, just skip the nonexistent node
registry.do_remove_nodes(payload);
}

#[test]
fn test_remove_single_node() {
let mut registry = invariant_compliant_registry(0);

// Add a node to the registry
let (mutate_request, node_ids) = prepare_registry_with_nodes(1, 1);
registry.maybe_apply_mutation_internal(mutate_request.mutations);

let node_id = *node_ids.keys().next().unwrap();
let node_operator_id =
PrincipalId::try_from(registry.get_node_or_panic(node_id).node_operator_id).unwrap();

// Allow this node operator to onboard 1 more node; the initial value is not important in this test.
// We just want to later see that node_allowance gets incremented by `do_remove_nodes`.
let initial_allowance = 1;
let node_operator_record = NodeOperatorRecord {
node_allowance: initial_allowance,
..Default::default()
};

registry.maybe_apply_mutation_internal(vec![insert(
make_node_operator_record_key(node_operator_id),
node_operator_record.encode_to_vec(),
)]);

// Remove the node
let payload = RemoveNodesPayload {
node_ids: vec![node_id],
};
registry.do_remove_nodes(payload);
// Verify node is removed
assert!(registry
.get(
make_node_record_key(node_id).as_bytes(),
registry.latest_version()
)
.is_none());

// Verify node operator allowance was incremented
let updated_operator = get_node_operator_record(&registry, node_operator_id).unwrap();
assert_eq!(updated_operator.node_allowance, initial_allowance + 1);
}
#[test]
fn test_remove_multiple_nodes_same_operator() {
let mut registry = invariant_compliant_registry(0);

// Add multiple nodes to the registry
let (mutate_request, node_ids) = prepare_registry_with_nodes(1, 3);
registry.maybe_apply_mutation_internal(mutate_request.mutations);

let node_ids: Vec<NodeId> = node_ids.keys().cloned().collect();
let node_operator_id =
PrincipalId::try_from(registry.get_node_or_panic(node_ids[0]).node_operator_id)
.unwrap();

// Add node operator record
let initial_allowance = 0;
let node_operator_record = NodeOperatorRecord {
node_allowance: initial_allowance,
..Default::default()
};

registry.maybe_apply_mutation_internal(vec![insert(
make_node_operator_record_key(node_operator_id),
node_operator_record.encode_to_vec(),
)]);

// Remove two nodes
let payload = RemoveNodesPayload {
node_ids: node_ids[..2].to_vec(),
};

registry.do_remove_nodes(payload);

// Verify the two nodes are removed
for node_id in &node_ids[..2] {
assert!(registry
.get(
make_node_record_key(*node_id).as_bytes(),
registry.latest_version()
)
.is_none());
}

// Verify the third node is still present
assert!(registry.get_node(node_ids[2]).is_some());

// Verify node operator allowance was incremented by 2
let updated_operator = get_node_operator_record(&registry, node_operator_id).unwrap();
assert_eq!(updated_operator.node_allowance, initial_allowance + 2);
}

#[test]
fn test_remove_duplicate_and_nonexistent_node_ids() {
let mut registry = invariant_compliant_registry(0);

// Add a node to the registry
let (mutate_request, node_ids) = prepare_registry_with_nodes(1, 1);
registry.maybe_apply_mutation_internal(mutate_request.mutations);

let node_id = node_ids.keys().next().unwrap().to_owned();
let node_operator_id =
PrincipalId::try_from(registry.get_node_or_panic(node_id).node_operator_id).unwrap();

// Add node operator record
let initial_allowance = 0;
let node_operator_record = NodeOperatorRecord {
node_allowance: initial_allowance,
..Default::default()
};

registry.maybe_apply_mutation_internal(vec![insert(
make_node_operator_record_key(node_operator_id),
node_operator_record.encode_to_vec(),
)]);

// Try to remove the same node multiple times
let payload = RemoveNodesPayload {
node_ids: vec![
node_id,
NodeId::from(PrincipalId::new_node_test_id(111)),
node_id,
NodeId::from(PrincipalId::new_node_test_id(222)),
node_id,
],
};

registry.do_remove_nodes(payload);

// Verify node is removed
assert!(registry
.get(
make_node_record_key(node_id).as_bytes(),
registry.latest_version()
)
.is_none());

// Verify other node_ids are still in the registry
for other_node_id in node_ids.keys().skip(1) {
assert!(registry.get_node(*other_node_id).is_some());
}

// Verify node operator allowance was incremented only once
let updated_operator = get_node_operator_record(&registry, node_operator_id).unwrap();
assert_eq!(updated_operator.node_allowance, initial_allowance + 1);
}
}
33 changes: 15 additions & 18 deletions rs/tests/nns/node_removal_from_registry_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,23 +116,20 @@ pub fn test(env: TestEnv) {
)
.await;
vote_execute_proposal_assert_executed(&governance_canister, proposal_id).await;
// Confirm that the node was indeed removed by sending the proposal again and asserting failure.
let proposal_id = submit_external_proposal_with_test_id(
&governance_canister,
NnsFunction::RemoveNodes,
RemoveNodesPayload {
node_ids: vec![unassigned_node_id],
},
)
.await;
vote_execute_proposal_assert_failed(
&governance_canister,
proposal_id,
format!(
"Aborting node removal: Node Id {} not found in the registry",
unassigned_node_id
),
)
.await;

// Confirm that the node was indeed removed by checking the unassigned node list in the registry.
topology
.block_for_newer_registry_version()
.await
.expect("Could not obtain updated registry.");
let topology = env.topology_snapshot();
assert_eq!(
topology
.unassigned_nodes()
.filter(|node| node.node_id == unassigned_node_id)
.map(|node| node.node_id)
.collect::<Vec<_>>(),
vec![]
);
});
}

0 comments on commit fc935aa

Please sign in to comment.