From be49257aa3783e38c9115ae92fb409d7dad98f76 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Mon, 28 Oct 2024 14:58:02 +0100 Subject: [PATCH 1/3] tests: decom before waiting for cluster health in admin_uuid_operations_test The cluster can't become healthy until ghost nodes are decommissioned. (cherry picked from commit 12bb4ecd43b0dbb3601c390b01f895a1faea0801) --- .../rptest/tests/admin_uuid_operations_test.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/tests/rptest/tests/admin_uuid_operations_test.py b/tests/rptest/tests/admin_uuid_operations_test.py index 80261467b976e..49a9fb5785e9a 100644 --- a/tests/rptest/tests/admin_uuid_operations_test.py +++ b/tests/rptest/tests/admin_uuid_operations_test.py @@ -276,14 +276,12 @@ def test_force_uuid_override(self, mode): backoff_sec=2, err_msg=f"{to_stop.name} did not take the UUID override") - self.logger.debug(f"Wait for the cluster to become healthy...") + self.logger.debug(f"Decommission ghost node [{ghost_node_id}]...") + self._decommission(ghost_node_id) + self.logger.debug(f"...and wait for the cluster to become healthy.") self.wait_until_cluster_healthy(timeout_sec=30) - self.logger.debug( - f".. and decommission ghost node [{ghost_node_id}]...") - self._decommission(ghost_node_id) - self.logger.debug( "Check that all this state sticks across a rolling restart") @@ -373,14 +371,11 @@ def test_force_uuid_override_multinode(self, mode): auto_assign_node_id=True, ) - self.logger.debug("Wait for the cluster to become healthy...") + self.logger.debug(f"Decommission ghost node [{ghost_node_id}]...") + self._decommission(ghost_node_id) + self.logger.debug("...and wait for the cluster to become healthy.") controller_leader = self.wait_until_cluster_healthy(timeout_sec=30) assert controller_leader is not None, "Didn't elect a controller leader" assert controller_leader not in to_stop, f"Unexpected controller leader {controller_leader.account.hostname}" - - self.logger.debug( - f"...and decommission ghost node [{ghost_node_id}]...") - - self._decommission(ghost_node_id) From 44ee1d7eb5da103aa5eb403a8d2d61f063b0c896 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Fri, 25 Oct 2024 13:29:46 +0200 Subject: [PATCH 2/3] raft: reply to heartbeats with group_unavailable if addressed to wrong node Previously, we replied with "failure", which updated the last heartbeat timestamp and gave the leader an impression that the group is actually there. Reply with group_unavailable, which is more appropriate here. (cherry picked from commit 25541ce99da197e8dbc048562301b5d9d0128433) --- src/v/raft/consensus.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/v/raft/consensus.cc b/src/v/raft/consensus.cc index 51851a1705f1b..676231c2b9c1d 100644 --- a/src/v/raft/consensus.cc +++ b/src/v/raft/consensus.cc @@ -3930,7 +3930,7 @@ reply_result consensus::lightweight_heartbeat( target_node, _self, source_node); - return reply_result::failure; + return reply_result::group_unavailable; } /** @@ -3985,7 +3985,7 @@ ss::future consensus::full_heartbeat( target_vnode, _self, source_vnode); - reply.result = reply_result::failure; + reply.result = reply_result::group_unavailable; co_return reply; } /** From cb8a4c0eb2bd7b033317d5a94c1f09d066be46ee Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Fri, 25 Oct 2024 13:34:02 +0200 Subject: [PATCH 3/3] raft: ignore heartbeat replies from unexpected node ids Usually this is a new node replying to heartbeats addressed to the corresponding ghost node. (cherry picked from commit 1c23c3566c317316bc7e46b69f41bd137c395e5e) --- src/v/raft/heartbeat_manager.cc | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/v/raft/heartbeat_manager.cc b/src/v/raft/heartbeat_manager.cc index 4297030447717..ce9bfe71fe03f 100644 --- a/src/v/raft/heartbeat_manager.cc +++ b/src/v/raft/heartbeat_manager.cc @@ -455,6 +455,17 @@ void heartbeat_manager::process_reply( return; } auto& reply = r.value(); + + if (reply.source() != n) { + vlog( + raftlog.warn, + "got heartbeat reply from a different node id {} (expected {}), " + "ignoring", + reply.source(), + n); + return; + } + reply.for_each_lw_reply([this, n, target = reply.target(), &groups]( group_id group, reply_result result) { auto it = _consensus_groups.find(group);