From 27d3bb79863f56ec936f73510999f47ce5f2ff28 Mon Sep 17 00:00:00 2001 From: Aaron Stannard Date: Wed, 30 Oct 2024 12:45:36 -0500 Subject: [PATCH] fixed bugs with multiple `Member`s with same `Address` crashing `ClusterDaemon` close #7370 --- src/core/Akka.Cluster/ClusterDaemon.cs | 77 +++++++++++++------------- 1 file changed, 40 insertions(+), 37 deletions(-) diff --git a/src/core/Akka.Cluster/ClusterDaemon.cs b/src/core/Akka.Cluster/ClusterDaemon.cs index da7031ae17a..c86d0ef6c0e 100644 --- a/src/core/Akka.Cluster/ClusterDaemon.cs +++ b/src/core/Akka.Cluster/ClusterDaemon.cs @@ -1635,22 +1635,22 @@ public void Welcome(Address joinWith, UniqueAddress from, Gossip gossip) public void Leaving(Address address) { // only try to update if the node is available (in the member ring) - if (LatestGossip.Members.Any(m => m.Address.Equals(address) && m.Status is MemberStatus.Joining or MemberStatus.WeaklyUp or MemberStatus.Up)) + foreach(var mem in LatestGossip.Members.Where(m => m.Address.Equals(address))) { - // mark node as LEAVING - var newMembers = LatestGossip.Members.Select(m => + if (mem.Status is MemberStatus.Joining or MemberStatus.WeaklyUp or MemberStatus.Up) { - if (m.Address == address) return m.Copy(status: MemberStatus.Leaving); - return m; - }).ToImmutableSortedSet(); // mark node as LEAVING - var newGossip = LatestGossip.Copy(members: newMembers); - - UpdateLatestGossip(newGossip); + // mark node as LEAVING + var newMembers = LatestGossip.Members + .Remove(mem).Add(mem.Copy(status: MemberStatus.Leaving)); + var newGossip = LatestGossip.Copy(members: newMembers); + + UpdateLatestGossip(newGossip); - _cluster.LogInfo("Marked address [{0}] as [{1}]", address, MemberStatus.Leaving); - PublishMembershipState(); - // immediate gossip to speed up the leaving process - SendGossip(); + _cluster.LogInfo("Marked address [{0}] as [{1}]", address, MemberStatus.Leaving); + PublishMembershipState(); + // immediate gossip to speed up the leaving process + SendGossip(); + } } } @@ -1674,40 +1674,43 @@ public void Downing(Address address) var localGossip = LatestGossip; var localMembers = localGossip.Members; var localOverview = localGossip.Overview; - var localSeen = localOverview.Seen; var localReachability = _membershipState.DcReachability; // check if the node to DOWN is in the 'members' set - var member = localMembers.FirstOrDefault(m => m.Address == address); - if (member != null && member.Status != MemberStatus.Down) + var found = false; + foreach (var member in localMembers.Where(m => m.Address == address)) { - if (localReachability.IsReachable(member.UniqueAddress)) - _cluster.LogInfo("Marking node [{0}] as [{1}]", member.Address, MemberStatus.Down); - else - _cluster.LogInfo("Marking unreachable node [{0}] as [{1}]", member.Address, MemberStatus.Down); + found = true; + if (member.Status != MemberStatus.Down) + { + if (localReachability.IsReachable(member.UniqueAddress)) + _cluster.LogInfo("Marking node [{0}] as [{1}]", member.Address, MemberStatus.Down); + else + _cluster.LogInfo("Marking unreachable node [{0}] as [{1}]", member.Address, MemberStatus.Down); - var newGossip = localGossip.MarkAsDown(member); //update gossip - UpdateLatestGossip(newGossip); + var newGossip = localGossip.MarkAsDown(member); //update gossip + UpdateLatestGossip(newGossip); - PublishMembershipState(); + PublishMembershipState(); - if (address == _cluster.SelfAddress) - { - // spread the word quickly, without waiting for next gossip tick - SendGossipRandom(MaxGossipsBeforeShuttingDownMyself); - } - else - { - // try to gossip immediately to downed node, as a STONITH signal - GossipTo(member.UniqueAddress); + if (address == _cluster.SelfAddress) + { + // spread the word quickly, without waiting for next gossip tick + SendGossipRandom(MaxGossipsBeforeShuttingDownMyself); + } + else + { + // try to gossip immediately to downed node, as a STONITH signal + GossipTo(member.UniqueAddress); + } } + + // if the previous statement did not evaluate to true, then this node is already being downed + } - else if (member != null) - { - // already down - } - else + + if (!found) { _cluster.LogInfo("Ignoring down of unknown node [{0}]", address); }