Skip to content

Commit

Permalink
fix(udp_traversal): prevent hanging by notifying the other side of wi…
Browse files Browse the repository at this point in the history
…nner status
  • Loading branch information
tbraun96 committed Nov 24, 2024
1 parent 3cbaef8 commit e57488b
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 2 deletions.
18 changes: 17 additions & 1 deletion citadel_wire/src/udp_traversal/multi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ pub(crate) struct DualStackUdpHolePuncher {
#[derive(Serialize, Deserialize, Debug)]
#[allow(variant_size_differences)]
enum DualStackCandidateSignal {
Winner(HolePunchID, HolePunchID),
WinnerCanEnd,
AllFailed,
}
Expand Down Expand Up @@ -230,7 +231,7 @@ async fn drive(

if let Some((local_id, peer_id)) = *commanded_winner.lock().await {
log::trace!(target: "citadel", "Local {local_id:?} has been commanded to use {peer_id:?}");
let receivers = kill_signal_tx.send((local_id, peer_id))?;
let receivers = kill_signal_tx.send((local_id, peer_id)).unwrap_or(0);
log::trace!(target: "citadel", "Sent kill signal to {receivers} hole-punchers");

'pop: while let Some(current_enqueued) = current_enqueued.lock().await.pop() {
Expand Down Expand Up @@ -364,6 +365,17 @@ async fn drive(
} else {
// We are the winner
log::trace!(target: "citadel", "*** Local won! Will command other side to use ({:?}, {:?})", peer_unique_id, local_id);
// Tell the other side we won, that way the rebuilder background process for the other
// side can respond. If we don't send this message, then, it's possible hanging occurs
// on the loser end because the winner combo isn't obtained until this futures
// resolver received a completed future; since in variable NAT setups, the adjacent side may fail
// entirely, it could never finish, thus never trigger the code that sets the commanded_winner
// and thus prompts the background code to return the socket on the adjacent node.
send(
DualStackCandidateSignal::Winner(peer_unique_id, local_id),
conn,
)
.await?;
while let Some(socket) = current_enqueued.lock().await.pop() {
if socket.local_id != local_id {
log::warn!(target: "citadel", "*** Winner: socket ID mismatch. Expected {local_id:?}, got {:?}. Looping ...", socket.local_id);
Expand Down Expand Up @@ -417,6 +429,10 @@ async fn drive(
let reader = async move {
loop {
match receive::<DualStackCandidateSignal, _>(conn).await? {
DualStackCandidateSignal::Winner(local_id, peer_id) => {
log::trace!(target: "citadel", "[READER] Remote commanded local to use peer={peer_id:?} and local={local_id:?}");
*commanded_winner.lock().await = Some((local_id, peer_id));
}
DualStackCandidateSignal::AllFailed => {
// All failed locally, but, remote may claim that it has a valid socket/
// Run the function below to exit if remote already set_failure_occurred
Expand Down
2 changes: 1 addition & 1 deletion netbeam/src/sync/primitives/net_mutex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ impl<S: Subscribable + 'static, T: NetObject> NetMutex<T, S> {
passive_background_handler::<S, T>(channel, shared_state, stop_rx, active_to_bg_rx)
.await
{
log::error!(target: "citadel", "[NetMutex Passive Background Handler] Err: {:?}", err.to_string());
log::warn!(target: "citadel", "[NetMutex Passive Background Handler] Err: {:?}", err.to_string());
}

log::trace!(target: "citadel", "[NetMutex] Passive background handler ending")
Expand Down

0 comments on commit e57488b

Please sign in to comment.