From c97ac6cec6c6cbf3c49ac3ee4ae6010e9e14491b Mon Sep 17 00:00:00 2001 From: Ping Xie Date: Fri, 13 Sep 2024 20:55:43 -0700 Subject: [PATCH] To avoid bouncing -REDIRECT during FAILOVER (#871) Fix #821 During the `FAILOVER` process, when conditions are met (such as when the force time is reached or the primary and replica offsets are consistent), the primary actively becomes the replica and transitions to the `FAILOVER_IN_PROGRESS` state. After the primary becomes the replica, and after handshaking and other operations, it will eventually send the `PSYNC FAILOVER` command to the replica, after which the replica will become the primary. This means that the upgrade of the replica to the primary is an asynchronous operation, which implies that during the `FAILOVER_IN_PROGRESS` state, there may be a period of time where both nodes are replicas. In this scenario, if a `-REDIRECT` is returned, the request will be redirected to the replica and then redirected back, causing back and forth redirection. To avoid this situation, during the `FAILOVER_IN_PROGRESS state`, we temporarily suspend the clients that need to be redirected until the replica truly becomes the primary, and then resume the execution. --------- Signed-off-by: zhaozhao.zz Signed-off-by: Ping Xie --- src/server.c | 7 ------- tests/integration/replica-redirect.tcl | 4 ++++ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/server.c b/src/server.c index 0a148ae334..43d79aa2bc 100644 --- a/src/server.c +++ b/src/server.c @@ -3963,13 +3963,6 @@ int processCommand(client *c) { * and then resume the execution. */ blockPostponeClient(c); } else { - if (c->cmd->proc == execCommand) { - discardTransaction(c); - } else { - flagTransaction(c); - } - c->duration = 0; - c->cmd->rejected_calls++; addReplyErrorSds(c, sdscatprintf(sdsempty(), "-REDIRECT %s:%d", server.primary_host, server.primary_port)); } return C_OK; diff --git a/tests/integration/replica-redirect.tcl b/tests/integration/replica-redirect.tcl index b4e5a74b66..579880c69b 100644 --- a/tests/integration/replica-redirect.tcl +++ b/tests/integration/replica-redirect.tcl @@ -9,6 +9,10 @@ start_server {tags {needs:repl external:skip}} { set replica_port [srv 0 port] set replica_pid [srv 0 pid] + set replica_host [srv 0 host] + set replica_port [srv 0 port] + set replica_pid [srv 0 pid] + test {write command inside MULTI is QUEUED, EXEC should be REDIRECT} { set rr [valkey_client] $rr client capa redirect