From 131857e80a42d3059214beb24a9c2937ba72559e Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Wed, 14 Aug 2024 14:04:29 +0800 Subject: [PATCH] To avoid bouncing -REDIRECT during FAILOVER (#871) Fix #821 During the `FAILOVER` process, when conditions are met (such as when the force time is reached or the primary and replica offsets are consistent), the primary actively becomes the replica and transitions to the `FAILOVER_IN_PROGRESS` state. After the primary becomes the replica, and after handshaking and other operations, it will eventually send the `PSYNC FAILOVER` command to the replica, after which the replica will become the primary. This means that the upgrade of the replica to the primary is an asynchronous operation, which implies that during the `FAILOVER_IN_PROGRESS` state, there may be a period of time where both nodes are replicas. In this scenario, if a `-REDIRECT` is returned, the request will be redirected to the replica and then redirected back, causing back and forth redirection. To avoid this situation, during the `FAILOVER_IN_PROGRESS state`, we temporarily suspend the clients that need to be redirected until the replica truly becomes the primary, and then resume the execution. --------- Signed-off-by: zhaozhao.zz --- src/server.c | 25 ++++++++++++++- tests/integration/replica-redirect.tcl | 42 ++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/src/server.c b/src/server.c index 2316afb7bf..3cd9ec3d9f 100644 --- a/src/server.c +++ b/src/server.c @@ -3912,7 +3912,30 @@ int processCommand(client *c) { if (!server.cluster_enabled && c->capa & CLIENT_CAPA_REDIRECT && server.primary_host && !mustObeyClient(c) && (is_write_command || (is_read_command && !c->flag.readonly))) { - addReplyErrorSds(c, sdscatprintf(sdsempty(), "-REDIRECT %s:%d", server.primary_host, server.primary_port)); + if (server.failover_state == FAILOVER_IN_PROGRESS) { + /* During the FAILOVER process, when conditions are met (such as + * when the force time is reached or the primary and replica offsets + * are consistent), the primary actively becomes the replica and + * transitions to the FAILOVER_IN_PROGRESS state. + * + * After the primary becomes the replica, and after handshaking + * and other operations, it will eventually send the PSYNC FAILOVER + * command to the replica, then the replica will become the primary. + * This means that the upgrade of the replica to the primary is an + * asynchronous operation, which implies that during the + * FAILOVER_IN_PROGRESS state, there may be a period of time where + * both nodes are replicas. + * + * In this scenario, if a -REDIRECT is returned, the request will be + * redirected to the replica and then redirected back, causing back + * and forth redirection. To avoid this situation, during the + * FAILOVER_IN_PROGRESS state, we temporarily suspend the clients + * that need to be redirected until the replica truly becomes the primary, + * and then resume the execution. */ + blockPostponeClient(c); + } else { + addReplyErrorSds(c, sdscatprintf(sdsempty(), "-REDIRECT %s:%d", server.primary_host, server.primary_port)); + } return C_OK; } diff --git a/tests/integration/replica-redirect.tcl b/tests/integration/replica-redirect.tcl index 0db51dd3ff..050cf0368c 100644 --- a/tests/integration/replica-redirect.tcl +++ b/tests/integration/replica-redirect.tcl @@ -2,6 +2,11 @@ start_server {tags {needs:repl external:skip}} { start_server {} { set primary_host [srv -1 host] set primary_port [srv -1 port] + set primary_pid [srv -1 pid] + + set replica_host [srv 0 host] + set replica_port [srv 0 port] + set replica_pid [srv 0 pid] r replicaof $primary_host $primary_port wait_for_condition 50 100 { @@ -32,5 +37,42 @@ start_server {tags {needs:repl external:skip}} { r readonly r get foo } {} + + test {client paused during failover-in-progress} { + pause_process $replica_pid + # replica will never acknowledge this write + r -1 set foo bar + r -1 failover to $replica_host $replica_port TIMEOUT 100 FORCE + + # Wait for primary to give up on sync attempt and start failover + wait_for_condition 50 100 { + [s -1 master_failover_state] == "failover-in-progress" + } else { + fail "Failover from primary to replica did not timeout" + } + + set rd [valkey_deferring_client -1] + $rd client capa redirect + assert_match "OK" [$rd read] + $rd set foo bar + + # Client paused during failover-in-progress, see more details in PR #871 + wait_for_blocked_clients_count 1 100 10 -1 + + resume_process $replica_pid + + # Wait for failover to end + wait_for_condition 50 100 { + [s -1 master_failover_state] == "no-failover" + } else { + fail "Failover from primary to replica did not finish" + } + + assert_match *master* [r role] + assert_match *slave* [r -1 role] + + assert_error "REDIRECT $replica_host:$replica_port" {$rd read} + $rd close + } } }