diff --git a/scylla/src/transport/cluster.rs b/scylla/src/transport/cluster.rs index d88ee233e8..db2daed443 100644 --- a/scylla/src/transport/cluster.rs +++ b/scylla/src/transport/cluster.rs @@ -20,7 +20,7 @@ use itertools::Itertools; use scylla_cql::errors::{BadQuery, NewSessionError}; use scylla_cql::frame::response::result::TableSpec; use scylla_cql::types::serialize::row::SerializedValues; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::net::SocketAddr; use std::sync::Arc; use std::time::Duration; @@ -285,7 +285,7 @@ impl ClusterData { known_peers: &HashMap>, used_keyspace: &Option, host_filter: Option<&dyn HostFilter>, - tablets: TabletsInfo, + mut tablets: TabletsInfo, ) -> Self { // Create new updated known_peers and ring let mut new_known_peers: HashMap> = @@ -345,6 +345,47 @@ impl ClusterData { } } + { + let removed_nodes = { + let mut removed_nodes = HashSet::new(); + for old_peer in known_peers { + if !new_known_peers.contains_key(old_peer.0) { + removed_nodes.insert(*old_peer.0); + } + } + + removed_nodes + }; + + let table_predicate = |spec: &TableSpec| { + if let Some(ks) = metadata.keyspaces.get(spec.ks_name()) { + ks.tables.contains_key(spec.table_name()) + } else { + false + } + }; + + let recreated_nodes = { + let mut recreated_nodes = HashMap::new(); + for (old_peer_id, old_peer_node) in known_peers { + if let Some(new_peer_node) = new_known_peers.get(old_peer_id) { + if !Arc::ptr_eq(old_peer_node, new_peer_node) { + recreated_nodes.insert(*old_peer_id, Arc::clone(new_peer_node)); + } + } + } + + recreated_nodes + }; + + tablets.perform_maintanance( + &table_predicate, + &removed_nodes, + &new_known_peers, + &recreated_nodes, + ) + } + Self::update_rack_count(&mut datacenters); let keyspaces = metadata.keyspaces; @@ -491,7 +532,22 @@ impl ClusterData { let replica_translator = |uuid: Uuid| self.known_peers.get(&uuid).cloned(); for (table, raw_tablet) in raw_tablets.into_iter() { - let tablet = Tablet::from_raw_tablet(&raw_tablet, replica_translator); + // Should we skip tablets that belong to a keyspace not present in + // self.keyspaces? The keyspace could have been, without driver's knowledge: + // 1. Dropped - in which case we'll remove its info soon (when refreshing + // topology) anyway. + // 2. Created - no harm in storing the info now. + // + // So I think we can safely skip checking keyspace presence. + let tablet = match Tablet::from_raw_tablet(raw_tablet, replica_translator) { + Ok(t) => t, + Err((t, f)) => { + debug!("Nodes ({}) that are replicas for a tablet {{ks: {}, table: {}, range: [{}. {}]}} not present in current ClusterData.known_peers. \ + Skipping these replicas until topology refresh", + f.iter().format(", "), table.ks_name(), table.table_name(), t.range().0.value(), t.range().1.value()); + t + } + }; self.locator.tablets.add_tablet(table, tablet); } } diff --git a/scylla/src/transport/locator/tablets.rs b/scylla/src/transport/locator/tablets.rs index b019c14a23..d52b6cffc7 100644 --- a/scylla/src/transport/locator/tablets.rs +++ b/scylla/src/transport/locator/tablets.rs @@ -1,4 +1,5 @@ #![allow(dead_code)] +use itertools::Itertools; use lazy_static::lazy_static; use scylla_cql::cql_to_rust::FromCqlVal; use scylla_cql::frame::frame_errors::ParseError; @@ -10,7 +11,7 @@ use uuid::Uuid; use crate::routing::{Shard, Token}; use crate::transport::Node; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::sync::Arc; #[derive(Error, Debug)] @@ -21,7 +22,7 @@ pub(crate) enum TabletParsingError { ShardNum(i32), } -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Clone)] struct RawTabletReplicas { replicas: Vec<(Uuid, Shard)>, } @@ -95,18 +96,25 @@ struct TabletReplicas { } impl TabletReplicas { + /// Gets raw replica list (which is an array of (Uuid, Shard)), retrieves + /// `Node` objects and groups node replicas by DC to make life easier for LBP. + /// In case of failure this function returns Self, but with the problematic nodes skipped, + /// and a list of skipped uuids - so that the caller can e.g. do some logging. pub(crate) fn from_raw_replicas( raw_replicas: &RawTabletReplicas, replica_translator: impl Fn(Uuid) -> Option>, - ) -> Self { - let all: Vec<_> = raw_replicas.replicas + ) -> Result)> { + let mut failed = Vec::new(); + let all: Vec<_> = raw_replicas + .replicas .iter() - .filter_map(|(replica, shard)| if let Some(r) = replica_translator(*replica) { - Some((r, *shard as Shard)) - } else { - // TODO: Should this be an error? When can this happen? - warn!("Node {replica} from system.tablets not present in ClusterData.known_peers. Skipping this replica"); - None + .filter_map(|(replica, shard)| { + if let Some(r) = replica_translator(*replica) { + Some((r, *shard as Shard)) + } else { + failed.push(*replica); + None + } }) .collect(); @@ -121,6 +129,27 @@ impl TabletReplicas { } }); + if failed.is_empty() { + Ok(Self { all, per_dc }) + } else { + Err((Self { all, per_dc }, failed)) + } + } + + #[cfg(test)] + fn new_for_test(replicas: Vec>) -> Self { + let all = replicas.into_iter().map(|r| (r, 0)).collect::>(); + let mut per_dc: HashMap, Shard)>> = HashMap::new(); + all.iter().for_each(|(replica, shard)| { + if let Some(dc) = replica.datacenter.as_ref() { + if let Some(replicas) = per_dc.get_mut(dc) { + replicas.push((Arc::clone(replica), *shard)); + } else { + per_dc.insert(dc.to_string(), vec![(Arc::clone(replica), *shard)]); + } + } + }); + Self { all, per_dc } } } @@ -176,17 +205,95 @@ pub(crate) struct Tablet { /// Last token belonging to the tablet, inclusive last_token: Token, replicas: TabletReplicas, + /// If any of the replicas failed to resolve to a Node, + /// then this field will contain the original list of replicas. + failed: Option, } impl Tablet { pub(crate) fn from_raw_tablet( - raw_tablet: &RawTablet, + raw_tablet: RawTablet, + replica_translator: impl Fn(Uuid) -> Option>, + ) -> Result)> { + let replicas_result = + TabletReplicas::from_raw_replicas(&raw_tablet.replicas, replica_translator); + match replicas_result { + Ok(replicas) => Ok(Self { + first_token: raw_tablet.first_token, + last_token: raw_tablet.last_token, + replicas, + failed: None, + }), + Err((replicas, failed_replicas)) => Err(( + Self { + first_token: raw_tablet.first_token, + last_token: raw_tablet.last_token, + replicas, + failed: Some(raw_tablet.replicas), + }, + failed_replicas, + )), + } + } + + pub(crate) fn range(&self) -> (Token, Token) { + (self.first_token, self.last_token) + } + + // Returns `Ok(())` if after the operation Tablet replicas are fully resolved. + // Return `Err(replicas)` if some replicas failed to resolve. `replicas` is a + // list of Uuids that failed to resolve. + fn re_resolve_replicas( + &mut self, replica_translator: impl Fn(Uuid) -> Option>, - ) -> Self { + ) -> Result<(), Vec> { + if let Some(failed) = self.failed.as_ref() { + match TabletReplicas::from_raw_replicas(failed, replica_translator) { + Ok(resolved_replicas) => { + // We managed to successfully resolve all replicas, all is well. + self.replicas = resolved_replicas; + self.failed = None; + Ok(()) + } + Err((_, failed)) => Err(failed), + } + } else { + Ok(()) + } + } + + fn update_stale_nodes(&mut self, recreated_nodes: &HashMap>) { + let mut any_updated = false; + for (node, _) in self.replicas.all.iter_mut() { + if let Some(new_node) = recreated_nodes.get(&node.host_id) { + assert!(!Arc::ptr_eq(new_node, node)); + any_updated = true; + *node = Arc::clone(new_node); + } + } + + if any_updated { + // Now that we know we have some nodes to update we need to go over + // per-dc nodes and update them too. + for (_, dc_nodes) in self.replicas.per_dc.iter_mut() { + for (node, _) in dc_nodes.iter_mut() { + if let Some(new_node) = recreated_nodes.get(&node.host_id) { + *node = Arc::clone(new_node); + } + } + } + } + } + + #[cfg(test)] + fn new_for_test(token: i64, replicas: Vec>, failed: Option>) -> Self { Self { - first_token: raw_tablet.first_token, - last_token: raw_tablet.last_token, - replicas: TabletReplicas::from_raw_replicas(&raw_tablet.replicas, replica_translator), + first_token: Token::new(token), + last_token: Token::new(token), + replicas: TabletReplicas::new_for_test(replicas), + failed: failed.map(|vec| RawTabletReplicas { + replicas: vec.into_iter().map(|id| (id, 0)).collect::>(), + }), } } } @@ -204,6 +311,12 @@ impl Tablet { pub(crate) struct TableTablets { table_spec: TableSpec<'static>, tablet_list: Vec, + /// In order to make typical tablet maintance faster + /// we remember if there were any tablets that have unrecognized uuids in replica list. + /// If there were none, and a few other conditions are satisfied, we can skip nearly whole maintanace. + /// This flag may be falsely true: if we add tablet with unknown replica but later + /// overwrite it with some other tablet. + has_unknown_replicas: bool, } impl TableTablets { @@ -211,6 +324,7 @@ impl TableTablets { Self { table_spec, tablet_list: Default::default(), + has_unknown_replicas: false, } } @@ -248,6 +362,9 @@ impl TableTablets { /// /// This preserves the invariant that all tablets in `self` are non-overlapping. fn add_tablet(&mut self, tablet: Tablet) { + if tablet.failed.is_some() { + self.has_unknown_replicas = true; + } // Smallest `left_idx` for which `tablet.first_token` is LESS OR EQUAL to `tablet_list[left_idx].last_token`. // It implies that `tablet_list[left_idx]` overlaps with `tablet` iff `tablet.last_token` // is GREATER OR EQUAL to `tablet_list[left_idx].first_token`. @@ -265,6 +382,54 @@ impl TableTablets { self.tablet_list.insert(left_idx, tablet); } + fn perform_maintanance( + &mut self, + removed_nodes: &HashSet, + all_current_nodes: &HashMap>, + recreated_nodes: &HashMap>, + ) { + // First we need to re-resolve unknown replicas or remove their tablets. + // It will make later checks easier because we'll know that `failed` field + // is `None` for all tablets. + if self.has_unknown_replicas { + self.tablet_list.retain_mut(|tablet| { + let r = tablet.re_resolve_replicas(|id: Uuid| all_current_nodes.get(&id).cloned()); + if let Err(failed) = &r { + warn!("Nodes ({}) listed as replicas for a tablet {{ks: {}, table: {}, range: [{}. {}]}} are not present in ClusterData.known_peers, \ + despite topology refresh. Removing problematic tablet.", + failed.iter().format(", "), self.table_spec.ks_name(), self.table_spec.table_name(), tablet.first_token.value(), tablet.last_token.value()); + } + + r.is_ok() + }); + } + + // Now we remove all tablets that have replicas on removed nodes. + if !removed_nodes.is_empty() { + self.tablet_list.retain(|tablet| { + tablet + .replicas + .all + .iter() + .all(|node| !removed_nodes.contains(&node.0.host_id)) + }); + } + + // The last thing to do is to replace all old `Node` objects. + // Situations where driver requires this don't happen often: + // - Node IP change + // - Node DC change / Rack change + // so I don't think we should be too concerned about performance of this code. + if !recreated_nodes.is_empty() { + for tablet in self.tablet_list.iter_mut() { + tablet.update_stale_nodes(recreated_nodes); + } + } + + // All unknown replicas were either resolved or whole tablets removed. + self.has_unknown_replicas = false; + } + #[cfg(test)] fn new_for_test() -> Self { Self::new(TableSpec::borrowed("test_ks", "test_table")) @@ -272,6 +437,7 @@ impl TableTablets { } #[derive(Clone, Debug)] +#[cfg_attr(test, derive(PartialEq, Eq))] pub(crate) struct TabletsInfo { // We use hashbrown hashmap instead of std hashmap because with // std one it is not possible to query map with key `TableSpec<'static>` @@ -282,12 +448,16 @@ pub(crate) struct TabletsInfo { // HashBrown on the other hand requires only `Q: Hash + Equivalent + ?Sized`, // and it is easy to create a wrapper type with required `Equivalent` impl. tablets: hashbrown::HashMap, TableTablets>, + /// See `has_unknown_replicas` field in `TableTablets`. + /// The field here will be true if it is true for any TableTablets. + has_unknown_replicas: bool, } impl TabletsInfo { pub(crate) fn new() -> Self { Self { tablets: hashbrown::HashMap::new(), + has_unknown_replicas: false, } } @@ -314,31 +484,115 @@ impl TabletsInfo { } pub(crate) fn add_tablet(&mut self, table_spec: TableSpec<'static>, tablet: Tablet) { + if tablet.failed.is_some() { + self.has_unknown_replicas = true; + } self.tablets .entry(table_spec) .or_insert_with_key(|k| TableTablets::new(k.clone())) .add_tablet(tablet) } + + /// This method is supposed to be called when topology is updated. + /// It goes through tablet info and adjusts it to topology changes, to prevent + /// a situation where local tablet info and a real one are permanently different. + /// What is updated: + /// 1. Info for dropped tables is removed. + /// 2. Tablets where a removed node was one of replicas are removed. + /// Can be skipped if no nodes were removed. + /// 3. Tablets with unrecognized uuids in replica list are resolved again. + /// If this is unsuccessful again then the tablet is removed. + /// This can be skipped if we know we have no such tablets. + /// 4. Rarely, the driver may need to re-create `Node` object for a given node. + /// The old object is replaced with the new one in replica lists. + /// This can be skipped if there were no re-created `Node` objects. + /// + /// In order to not perform unnecessary work during typical schema refresh + /// we avoid iterating through tablets at all if steps 2-4 can be skipped. + /// + /// * `removed_nodes`: Nodes that previously were present in ClusterData but are not anymore. + /// For any such node we should remove all tablets that have it in replica list. + /// This is because otherwise: + /// 1. We would keep old `Node` objects, not allowing them to release memory. + /// 2. We would return removed nodes in LBP + /// 3. When a new node joins and becomes replica for this tablet, we would + /// not use it - instead we would keep querying a subset of replicas. + /// + /// * `all_current_nodes`: Map of all nodes. Required to remap unknown replicas. + /// If we didn't try to remap them and instead just skipped them, + /// then we would only query subset of replicas for the tablet, + /// potentially increasing load on those replicas. + /// The alternative is dropping the tablet immediately, but if there are a lot + /// of requests to a range belonging to this tablet, then we would get a + /// lot of unnecessary feedbacks sent. Thus the current solution: + /// skipping unknown replicas and dropping the tablet if we still can't resolve + /// them after topology refresh. + /// + /// * `recreated_nodes`: There are some situations (IP change, DC / Rack change) where the driver + /// will create a new `Node` object for some node and drop the old one. + /// Tablet info would still contain the old object, so the driver would not use + /// new connections. That means if there were such nodes then we need to go over + /// tablets and replace `Arc` objects for recreated nodes. + /// + /// There are some situations not handled by this maintanance procedure that could + /// still result in permanent difference between local and real tablet info: + /// + /// * Extending replica list for a tablet: If a new replica is added to replica list, + /// then we won't learn about it, because we'll keep querying current replicas, which are + /// still replicas. I'm not sure if this can happen. The only scenario where this seems + /// possible is increasing RF - I'm not sure if this would only add replicas or make more changes. + /// We could probably discover it by comparing replication strategy pre and post topology referesh + /// and if it changed then remove tablet info for this keyspace. + /// + /// * Removing the keyspace and recreating it immediately without tablets. This seems so absurd + /// that we most likely don't need to worry about it, but I'm putting it here as a potential problem + /// for completeness. + + pub(crate) fn perform_maintanance( + &mut self, + table_predicate: &impl Fn(&TableSpec) -> bool, + removed_nodes: &HashSet, + all_current_nodes: &HashMap>, + recreated_nodes: &HashMap>, + ) { + // First we remove info for all tables that are no longer present. + self.tablets.retain(|k, _| table_predicate(k)); + + if !removed_nodes.is_empty() || !recreated_nodes.is_empty() || self.has_unknown_replicas { + for (_, table_tablets) in self.tablets.iter_mut() { + table_tablets.perform_maintanance( + removed_nodes, + all_current_nodes, + recreated_nodes, + ); + } + } + + // All unknown replicas were either resolved or whole tablets removed. + self.has_unknown_replicas = false; + } } #[cfg(test)] mod tests { - use std::collections::HashMap; + use std::collections::{HashMap, HashSet}; use std::sync::Arc; - use scylla_cql::frame::response::result::{ColumnType, CqlValue}; - use scylla_cql::types::serialize::{value::SerializeCql, CellWriter}; + use scylla_cql::frame::response::result::{ColumnType, CqlValue, TableSpec}; + use scylla_cql::types::serialize::value::SerializeCql; + use scylla_cql::types::serialize::CellWriter; use tracing::debug; use uuid::Uuid; use crate::routing::Token; + use crate::test_utils::setup_tracing; use crate::transport::locator::tablets::{ RawTablet, RawTabletReplicas, TabletParsingError, CUSTOM_PAYLOAD_TABLETS_V1_KEY, RAW_TABLETS_CQL_TYPE, }; use crate::transport::Node; - use super::{TableTablets, Tablet, TabletReplicas}; + use super::{TableTablets, Tablet, TabletReplicas, TabletsInfo}; const DC1: &str = "dc1"; const DC2: &str = "dc2"; @@ -527,14 +781,14 @@ mod tests { assert_eq!( replicas, - TabletReplicas { + Ok(TabletReplicas { all: replicas_uids .iter() .cloned() .map(|replica| (translator(replica).unwrap(), 1)) .collect(), per_dc - } + }) ); } @@ -560,6 +814,7 @@ mod tests { first_token: Token::new(*first), last_token: Token::new(*last), replicas: Default::default(), + failed: None, }); } } @@ -716,4 +971,410 @@ mod tests { ], ); } + + const SOME_KS: TableSpec<'static> = TableSpec::borrowed("ks", "tbl"); + + fn node_map(nodes: &[&Arc]) -> HashMap> { + nodes.iter().map(|n| (n.host_id, Arc::clone(n))).collect() + } + + #[test] + fn table_maintenance_tests() { + setup_tracing(); + + let node1 = Arc::new(Node::new_for_test( + Some(Uuid::from_u128(1)), + None, + Some(DC1.to_owned()), + None, + )); + let node2 = Arc::new(Node::new_for_test( + Some(Uuid::from_u128(2)), + None, + Some(DC2.to_owned()), + None, + )); + let node2_v2 = Arc::new(Node::new_for_test( + Some(Uuid::from_u128(2)), + None, + Some(DC2.to_owned()), + None, + )); + let node3 = Arc::new(Node::new_for_test( + Some(Uuid::from_u128(3)), + None, + Some(DC3.to_owned()), + None, + )); + let node3_v2 = Arc::new(Node::new_for_test( + Some(Uuid::from_u128(3)), + None, + Some(DC3.to_owned()), + None, + )); + + type MaintenanceArgs = ( + HashSet, + HashMap>, + HashMap>, + ); + let tests: &mut [(TableTablets, MaintenanceArgs, TableTablets)] = &mut [ + ( + // [Case 0] Nothing changes, no maintenance required + TableTablets { + table_spec: SOME_KS, + tablet_list: vec![ + Tablet::new_for_test(0, vec![node1.clone(), node2.clone()], None), + Tablet::new_for_test(1, vec![node2.clone(), node3.clone()], None), + Tablet::new_for_test(2, vec![node3.clone(), node1.clone()], None), + Tablet::new_for_test(3, vec![node3.clone()], None), + ], + has_unknown_replicas: false, + }, + ( + HashSet::new(), + node_map(&[&node1, &node2, &node3]), + HashMap::new(), + ), + TableTablets { + table_spec: SOME_KS, + tablet_list: vec![ + Tablet::new_for_test(0, vec![node1.clone(), node2.clone()], None), + Tablet::new_for_test(1, vec![node2.clone(), node3.clone()], None), + Tablet::new_for_test(2, vec![node3.clone(), node1.clone()], None), + Tablet::new_for_test(3, vec![node3.clone()], None), + ], + has_unknown_replicas: false, + }, + ), + ( + // [Case 1] Removed node + TableTablets { + table_spec: SOME_KS, + tablet_list: vec![ + Tablet::new_for_test(0, vec![node1.clone(), node2.clone()], None), + Tablet::new_for_test(1, vec![node2.clone(), node3.clone()], None), + Tablet::new_for_test(2, vec![node3.clone(), node1.clone()], None), + Tablet::new_for_test(3, vec![node3.clone()], None), + ], + has_unknown_replicas: false, + }, + ( + HashSet::from([node1.host_id]), + node_map(&[&node2, &node3]), + HashMap::new(), + ), + TableTablets { + table_spec: SOME_KS, + tablet_list: vec![ + Tablet::new_for_test(1, vec![node2.clone(), node3.clone()], None), + Tablet::new_for_test(3, vec![node3.clone()], None), + ], + has_unknown_replicas: false, + }, + ), + ( + // [Case 2] Multiple removed nodes + TableTablets { + table_spec: SOME_KS, + tablet_list: vec![ + Tablet::new_for_test(0, vec![node1.clone(), node2.clone()], None), + Tablet::new_for_test(1, vec![node2.clone(), node3.clone()], None), + Tablet::new_for_test(2, vec![node3.clone(), node1.clone()], None), + Tablet::new_for_test(3, vec![node3.clone()], None), + ], + has_unknown_replicas: false, + }, + ( + HashSet::from([node1.host_id, node2.host_id]), + node_map(&[&node3]), + HashMap::new(), + ), + TableTablets { + table_spec: SOME_KS, + tablet_list: vec![Tablet::new_for_test(3, vec![node3.clone()], None)], + has_unknown_replicas: false, + }, + ), + ( + // [Case 3] Nodes with unresolved replicas + TableTablets { + table_spec: SOME_KS, + tablet_list: vec![ + Tablet::new_for_test( + 0, + vec![node2.clone()], + Some(vec![node1.host_id, node2.host_id]), + ), + Tablet::new_for_test( + 1, + vec![node2.clone()], + Some(vec![node2.host_id, node3.host_id]), + ), + Tablet::new_for_test(2, vec![node3.clone(), node1.clone()], None), + Tablet::new_for_test(3, vec![node3.clone()], None), + ], + has_unknown_replicas: true, + }, + ( + HashSet::new(), + node_map(&[&node1, &node2, &node3]), + HashMap::new(), + ), + TableTablets { + table_spec: SOME_KS, + tablet_list: vec![ + Tablet::new_for_test(0, vec![node1.clone(), node2.clone()], None), + Tablet::new_for_test(1, vec![node2.clone(), node3.clone()], None), + Tablet::new_for_test(2, vec![node3.clone(), node1.clone()], None), + Tablet::new_for_test(3, vec![node3.clone()], None), + ], + has_unknown_replicas: false, + }, + ), + ( + // [Case 4] Some replicas still unresolved + TableTablets { + table_spec: SOME_KS, + tablet_list: vec![ + Tablet::new_for_test( + 0, + vec![node2.clone()], + Some(vec![node1.host_id, node2.host_id]), + ), + Tablet::new_for_test( + 1, + vec![node2.clone()], + Some(vec![node2.host_id, node3.host_id]), + ), + Tablet::new_for_test(2, vec![], Some(vec![node3.host_id, node1.host_id])), + Tablet::new_for_test(3, vec![node2.clone()], None), + Tablet::new_for_test(4, vec![], Some(vec![node3.host_id])), + ], + has_unknown_replicas: true, + }, + (HashSet::new(), node_map(&[&node2, &node3]), HashMap::new()), + TableTablets { + table_spec: SOME_KS, + tablet_list: vec![ + Tablet::new_for_test(1, vec![node2.clone(), node3.clone()], None), + Tablet::new_for_test(3, vec![node2.clone()], None), + Tablet::new_for_test(4, vec![node3.clone()], None), + ], + has_unknown_replicas: false, + }, + ), + ( + // [Case 5] Incorrectly set "has_unknown_replicas" - unknown replicas should be ignored, + // because this stip of the maintenance is skipped. + TableTablets { + table_spec: SOME_KS, + tablet_list: vec![ + Tablet::new_for_test( + 0, + vec![node2.clone()], + Some(vec![node1.host_id, node2.host_id]), + ), + Tablet::new_for_test( + 1, + vec![node2.clone()], + Some(vec![node2.host_id, node3.host_id]), + ), + Tablet::new_for_test(2, vec![node3.clone(), node1.clone()], None), + Tablet::new_for_test(3, vec![node3.clone()], None), + ], + has_unknown_replicas: false, + }, + ( + HashSet::new(), + node_map(&[&node1, &node2, &node3]), + HashMap::new(), + ), + TableTablets { + table_spec: SOME_KS, + tablet_list: vec![ + Tablet::new_for_test( + 0, + vec![node2.clone()], + Some(vec![node1.host_id, node2.host_id]), + ), + Tablet::new_for_test( + 1, + vec![node2.clone()], + Some(vec![node2.host_id, node3.host_id]), + ), + Tablet::new_for_test(2, vec![node3.clone(), node1.clone()], None), + Tablet::new_for_test(3, vec![node3.clone()], None), + ], + has_unknown_replicas: false, + }, + ), + ( + // [Case 6] Recreated one of the nodes + TableTablets { + table_spec: SOME_KS, + tablet_list: vec![ + Tablet::new_for_test(0, vec![node1.clone(), node2.clone()], None), + Tablet::new_for_test(1, vec![node2.clone(), node3.clone()], None), + Tablet::new_for_test(2, vec![node3.clone(), node1.clone()], None), + Tablet::new_for_test(3, vec![node3.clone()], None), + ], + has_unknown_replicas: false, + }, + ( + HashSet::new(), + node_map(&[&node1, &node2, &node3_v2]), + HashMap::from([(node3_v2.host_id, node3_v2.clone())]), + ), + TableTablets { + table_spec: SOME_KS, + tablet_list: vec![ + Tablet::new_for_test(0, vec![node1.clone(), node2.clone()], None), + Tablet::new_for_test(1, vec![node2.clone(), node3_v2.clone()], None), + Tablet::new_for_test(2, vec![node3_v2.clone(), node1.clone()], None), + Tablet::new_for_test(3, vec![node3_v2.clone()], None), + ], + has_unknown_replicas: false, + }, + ), + ( + // [Case 7] Recreated multiple nodes + TableTablets { + table_spec: SOME_KS, + tablet_list: vec![ + Tablet::new_for_test(0, vec![node1.clone(), node2.clone()], None), + Tablet::new_for_test(1, vec![node2.clone(), node3.clone()], None), + Tablet::new_for_test(2, vec![node3.clone(), node1.clone()], None), + Tablet::new_for_test(3, vec![node3.clone()], None), + ], + has_unknown_replicas: false, + }, + ( + HashSet::new(), + node_map(&[&node1, &node2, &node3_v2]), + HashMap::from([ + (node3_v2.host_id, node3_v2.clone()), + (node2_v2.host_id, node2_v2.clone()), + ]), + ), + TableTablets { + table_spec: SOME_KS, + tablet_list: vec![ + Tablet::new_for_test(0, vec![node1.clone(), node2_v2.clone()], None), + Tablet::new_for_test(1, vec![node2_v2.clone(), node3_v2.clone()], None), + Tablet::new_for_test(2, vec![node3_v2.clone(), node1.clone()], None), + Tablet::new_for_test(3, vec![node3_v2.clone()], None), + ], + has_unknown_replicas: false, + }, + ), + ( + // [Case 8] Unknown replica and removed node + TableTablets { + table_spec: SOME_KS, + tablet_list: vec![ + Tablet::new_for_test( + 0, + vec![node2.clone()], + Some(vec![node1.host_id, node2.host_id]), + ), + Tablet::new_for_test(1, vec![node2.clone(), node3.clone()], None), + Tablet::new_for_test( + 2, + vec![node3.clone()], + Some(vec![node3.host_id, node1.host_id]), + ), + Tablet::new_for_test(3, vec![node3.clone()], None), + ], + has_unknown_replicas: true, + }, + ( + HashSet::from([node2.host_id]), + node_map(&[&node1, &node3]), + HashMap::new(), + ), + TableTablets { + table_spec: SOME_KS, + tablet_list: vec![ + Tablet::new_for_test(2, vec![node3.clone(), node1.clone()], None), + Tablet::new_for_test(3, vec![node3.clone()], None), + ], + has_unknown_replicas: false, + }, + ), + ( + // [Case 9] Unknown replica, removed node and recreated node + TableTablets { + table_spec: SOME_KS, + tablet_list: vec![ + Tablet::new_for_test( + 0, + vec![node2.clone()], + Some(vec![node1.host_id, node2.host_id]), + ), + Tablet::new_for_test(1, vec![node2.clone(), node3.clone()], None), + Tablet::new_for_test( + 2, + vec![node3.clone()], + Some(vec![node3.host_id, node1.host_id]), + ), + Tablet::new_for_test(3, vec![node3.clone()], None), + ], + has_unknown_replicas: true, + }, + ( + HashSet::from([node2.host_id]), + node_map(&[&node1, &node3]), + HashMap::from([(node3_v2.host_id, node3_v2.clone())]), + ), + TableTablets { + table_spec: SOME_KS, + tablet_list: vec![ + Tablet::new_for_test(2, vec![node3_v2.clone(), node1.clone()], None), + Tablet::new_for_test(3, vec![node3_v2.clone()], None), + ], + has_unknown_replicas: false, + }, + ), + ]; + + for (i, (pre, (removed, all, recreated), post)) in tests.iter_mut().enumerate() { + tracing::info!("Test case {}", i); + pre.perform_maintanance(removed, all, recreated); + assert_eq!(pre, post); + } + } + + #[test] + fn maintenance_keyspace_remove_test() { + const TABLE_1: TableSpec<'static> = TableSpec::borrowed("ks_1", "table_1"); + const TABLE_2: TableSpec<'static> = TableSpec::borrowed("ks_2", "table_2"); + const TABLE_DROP: TableSpec<'static> = TableSpec::borrowed("ks_drop", "table_drop"); + + let mut pre = TabletsInfo { + tablets: hashbrown::HashMap::from([ + (TABLE_1.clone(), TableTablets::new(TABLE_1.clone())), + (TABLE_DROP.clone(), TableTablets::new(TABLE_DROP.clone())), + (TABLE_2.clone(), TableTablets::new(TABLE_2.clone())), + ]), + has_unknown_replicas: false, + }; + + let expected_after = TabletsInfo { + tablets: hashbrown::HashMap::from([ + (TABLE_1.clone(), TableTablets::new(TABLE_1.clone())), + (TABLE_2.clone(), TableTablets::new(TABLE_2.clone())), + ]), + has_unknown_replicas: false, + }; + + pre.perform_maintanance( + &|spec| *spec != TABLE_DROP, + &HashSet::new(), + &HashMap::new(), + &HashMap::new(), + ); + + assert_eq!(pre, expected_after); + } }