From dbb7c26e087398924f7139789963c45a36d51df9 Mon Sep 17 00:00:00 2001 From: Binbin Date: Thu, 8 Aug 2024 19:58:32 +0800 Subject: [PATCH] Fix CLUSTER SETSLOT block and unblock error when all replicas are offline In CLUSTER SETSLOT propagation logic, if the replicas are down, the client will get block during command processing and then unblock with `NOREPLICAS Not enough good replicas to write`. The reason is that all replicas are down (or some are down), but myself->num_replicas is including all replicas, so the client will get block and always get timeout. We should only wait for those normal replicas, otherwise the waiting propagation will always timeout since there are not enough replicas. Signed-off-by: Binbin --- src/cluster_legacy.c | 13 +++++++---- tests/unit/cluster/slot-migration.tcl | 33 +++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index b35b0b3b8e..c86ecdf91e 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -6259,7 +6259,7 @@ void clusterCommandSetSlot(client *c) { * To mitigate this issue, the following order needs to be enforced for slot * migration finalization such that the replicas finalize the slot ownership * before the primary: - . * + * * 1. Client C issues SETSLOT n NODE B against node B. * 2. Primary B replicates `SETSLOT n NODE B` to all of its replicas (e.g., B', B''). * 3. Upon replication completion, primary B executes `SETSLOT n NODE B` and @@ -6280,26 +6280,29 @@ void clusterCommandSetSlot(client *c) { listIter li; listNode *ln; int legacy_replica_found = 0; + int online_replica_found = 0; listRewind(server.replicas, &li); while ((ln = listNext(&li))) { client *r = ln->value; if (r->replica_version < 0x702ff /* 7.2.255 */) { legacy_replica_found++; - break; + } + if (r->repl_state == REPLICA_STATE_ONLINE) { + online_replica_found++; } } - if (!legacy_replica_found) { + if (!legacy_replica_found && online_replica_found) { forceCommandPropagation(c, PROPAGATE_REPL); /* We are a primary and this is the first time we see this `SETSLOT` * command. Force-replicate the command to all of our replicas - * first and only on success will we handle the command. + * first and only on success we will handle the command. * Note that * 1. All replicas are expected to ack the replication within the given timeout * 2. The repl offset target is set to the primary's current repl offset + 1. * There is no concern of partial replication because replicas always * ack the repl offset at the command boundary. */ - blockClientForReplicaAck(c, timeout_ms, server.primary_repl_offset + 1, myself->num_replicas, 0); + blockClientForReplicaAck(c, timeout_ms, server.primary_repl_offset + 1, online_replica_found, 0); /* Mark client as pending command for execution after replication to replicas. */ c->flag.pending_command = 1; replicationRequestAckFromReplicas(); diff --git a/tests/unit/cluster/slot-migration.tcl b/tests/unit/cluster/slot-migration.tcl index 030404dfde..dd7a2391b6 100644 --- a/tests/unit/cluster/slot-migration.tcl +++ b/tests/unit/cluster/slot-migration.tcl @@ -435,3 +435,36 @@ start_cluster 2 0 {tags {tls:skip external:skip cluster regression} overrides {c R 0 MIGRATE 127.0.0.1 [lindex [R 1 CONFIG GET port] 1] $stream_name 0 5000 } } + +start_cluster 3 6 {tags {external:skip cluster} overrides {cluster-node-timeout 1000} } { + test "Killing all replicas in primary 0" { + assert_equal 2 [s 0 connected_slaves] + catch {R 3 shutdown nosave} + catch {R 6 shutdown nosave} + wait_for_condition 50 100 { + [s 0 connected_slaves] == 0 + } else { + fail "The replicas in primary 0 are still connecting" + } + } + + test "Killing one replica in primary 1" { + assert_equal 2 [s 1 connected_slaves] + catch {R 4 shutdown nosave} + wait_for_condition 50 100 { + [s 0 connected_slaves] == 1 + } else { + fail "The replica in primary 1 is still connecting" + } + } + + test "Slot migration is ok when the replicas are down" { + migrate_slot 0 1 0 + migrate_slot 0 2 1 + assert_equal {OK} [R 0 CLUSTER SETSLOT 0 NODE [R 1 CLUSTER MYID]] + assert_equal {OK} [R 0 CLUSTER SETSLOT 1 NODE [R 2 CLUSTER MYID]] + wait_for_slot_state 0 "" + wait_for_slot_state 1 "" + wait_for_slot_state 2 "" + } +}