From dac6fb33aa084bda957f9a33a35a0e34e1601839 Mon Sep 17 00:00:00 2001 From: qima Date: Fri, 13 Dec 2024 20:19:30 +0800 Subject: [PATCH 1/2] chore: not to carry out connection pruning --- ant-networking/src/driver.rs | 3 -- ant-networking/src/event/swarm.rs | 66 ----------------------------- ant-networking/src/relay_manager.rs | 6 --- 3 files changed, 75 deletions(-) diff --git a/ant-networking/src/driver.rs b/ant-networking/src/driver.rs index 4534b49110..dd99108ad2 100644 --- a/ant-networking/src/driver.rs +++ b/ant-networking/src/driver.rs @@ -759,7 +759,6 @@ impl NetworkBuilder { quotes_history: Default::default(), replication_targets: Default::default(), last_replication: None, - last_connection_pruning_time: Instant::now(), network_density_samples: FifoRegister::new(100), }; @@ -867,8 +866,6 @@ pub struct SwarmDriver { /// when was the last replication event /// This allows us to throttle replication no matter how it is triggered pub(crate) last_replication: Option, - /// when was the last outdated connection prunning undertaken. - pub(crate) last_connection_pruning_time: Instant, /// FIFO cache for the network density samples pub(crate) network_density_samples: FifoRegister, } diff --git a/ant-networking/src/event/swarm.rs b/ant-networking/src/event/swarm.rs index 3bf65eb6d9..cf4fd1a51a 100644 --- a/ant-networking/src/event/swarm.rs +++ b/ant-networking/src/event/swarm.rs @@ -612,7 +612,6 @@ impl SwarmDriver { debug!("SwarmEvent has been ignored: {other:?}") } } - self.remove_outdated_connections(); self.log_handling(event_string.to_string(), start.elapsed()); @@ -652,71 +651,6 @@ impl SwarmDriver { } } - // Remove outdated connection to a peer if it is not in the RT. - // Optionally force remove all the connections for a provided peer. - fn remove_outdated_connections(&mut self) { - // To avoid this being called too frequenctly, only carry out prunning intervally. - if Instant::now() < self.last_connection_pruning_time + Duration::from_secs(30) { - return; - } - self.last_connection_pruning_time = Instant::now(); - - let mut removed_conns = 0; - self.live_connected_peers.retain(|connection_id, (peer_id, _addr, timeout_time)| { - - // skip if timeout isn't reached yet - if Instant::now() < *timeout_time { - return true; // retain peer - } - - // ignore if peer is present in our RT - if let Some(kbucket) = self.swarm.behaviour_mut().kademlia.kbucket(*peer_id) { - if kbucket - .iter() - .any(|peer_entry| *peer_id == *peer_entry.node.key.preimage()) - { - return true; // retain peer - } - } - - // skip if the peer is a relay server that we're connected to - if let Some(relay_manager) = self.relay_manager.as_ref() { - if relay_manager.keep_alive_peer(peer_id) { - return true; // retain peer - } - } - - // skip if the peer is a node that is being relayed through us - if self.connected_relay_clients.contains(peer_id) { - return true; // retain peer - } - - // actually remove connection - let result = self.swarm.close_connection(*connection_id); - debug!("Removed outdated connection {connection_id:?} to {peer_id:?} with result: {result:?}"); - - removed_conns += 1; - - // do not retain this connection as it has been closed - false - }); - - if removed_conns == 0 { - return; - } - - self.record_connection_metrics(); - - debug!( - "Current libp2p peers pool stats is {:?}", - self.swarm.network_info() - ); - debug!( - "Removed {removed_conns} outdated live connections, still have {} left.", - self.live_connected_peers.len() - ); - } - /// Record the metrics on update of connection state. fn record_connection_metrics(&self) { #[cfg(feature = "open-metrics")] diff --git a/ant-networking/src/relay_manager.rs b/ant-networking/src/relay_manager.rs index 92a1fb8888..171c196625 100644 --- a/ant-networking/src/relay_manager.rs +++ b/ant-networking/src/relay_manager.rs @@ -47,12 +47,6 @@ impl RelayManager { } } - /// Should we keep this peer alive? Closing a connection to that peer would remove that server from the listen addr. - pub(crate) fn keep_alive_peer(&self, peer_id: &PeerId) -> bool { - self.connected_relays.contains_key(peer_id) - || self.waiting_for_reservation.contains_key(peer_id) - } - /// Add a potential candidate to the list if it satisfies all the identify checks and also supports the relay server /// protocol. pub(crate) fn add_potential_candidates( From 629489666696a1c41158df4aabff345fd9d504ca Mon Sep 17 00:00:00 2001 From: qima Date: Sat, 14 Dec 2024 02:59:14 +0800 Subject: [PATCH 2/2] fix: bootstrap node replacement onlly to be carried out once --- ant-networking/src/event/swarm.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/ant-networking/src/event/swarm.rs b/ant-networking/src/event/swarm.rs index cf4fd1a51a..b144b3a801 100644 --- a/ant-networking/src/event/swarm.rs +++ b/ant-networking/src/event/swarm.rs @@ -626,9 +626,12 @@ impl SwarmDriver { fn remove_bootstrap_from_full(&mut self, peer_id: PeerId) { let mut shall_removed = None; + let mut bucket_index = Some(0); + if let Some(kbucket) = self.swarm.behaviour_mut().kademlia.kbucket(peer_id) { if kbucket.num_entries() >= K_VALUE.into() { - if let Some(peers) = self.bootstrap_peers.get(&kbucket.range().0.ilog2()) { + bucket_index = kbucket.range().0.ilog2(); + if let Some(peers) = self.bootstrap_peers.get(&bucket_index) { for peer_entry in kbucket.iter() { if peers.contains(peer_entry.node.key.preimage()) { shall_removed = Some(*peer_entry.node.key.preimage()); @@ -648,6 +651,13 @@ impl SwarmDriver { if let Some(removed_peer) = entry { self.update_on_peer_removal(*removed_peer.node.key.preimage()); } + + // With the switch to using bootstrap cache, workload is distributed already. + // to avoid peers keeps being replaced by each other, + // there shall be just one time of removal to be undertaken. + if let Some(peers) = self.bootstrap_peers.get_mut(&bucket_index) { + let _ = peers.remove(&peer_id); + } } }