From a81c32079cfd63ae022a3d1a330904f8d9fd1858 Mon Sep 17 00:00:00 2001
From: Sankar <1890648+srgsanky@users.noreply.github.com>
Date: Sun, 16 Jun 2024 20:37:09 -0700
Subject: [PATCH 01/53] Make cluster meet reliable under link failures (#461)

When there is a link failure while an ongoing MEET request is sent the
sending node stops sending anymore MEET and starts sending PINGs. Since
every node responds to PINGs from unknown nodes with a PONG, the
receiving node never adds the sending node. But the sending node adds
the receiving node when it sees a PONG. This can lead to asymmetry in
cluster membership. This changes makes the sender keep sending MEET
until it sees a PONG, avoiding the asymmetry.

---------

Signed-off-by: Sankar <1890648+srgsanky@users.noreply.github.com>
---
 src/cluster_legacy.c                          | 35 ++++++--
 src/debug.c                                   |  6 ++
 src/server.h                                  |  2 +
 tests/unit/cluster/cluster-multiple-meets.tcl | 83 +++++++++++++++++++
 tests/unit/cluster/cluster-reliable-meet.tcl  | 71 ++++++++++++++++
 5 files changed, 190 insertions(+), 7 deletions(-)
 create mode 100644 tests/unit/cluster/cluster-multiple-meets.tcl
 create mode 100644 tests/unit/cluster/cluster-reliable-meet.tcl

diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c
index f566cf5a35..9104a76d87 100644
--- a/src/cluster_legacy.c
+++ b/src/cluster_legacy.c
@@ -2844,7 +2844,16 @@ int clusterIsValidPacket(clusterLink *link) {
  * received from the wrong sender ID). */
 int clusterProcessPacket(clusterLink *link) {
     /* Validate that the packet is well-formed */
-    if (!clusterIsValidPacket(link)) return 1;
+    if (!clusterIsValidPacket(link)) {
+        clusterMsg *hdr = (clusterMsg *)link->rcvbuf;
+        uint16_t type = ntohs(hdr->type);
+        if (server.debug_cluster_close_link_on_packet_drop && type == server.cluster_drop_packet_filter) {
+            freeClusterLink(link);
+            serverLog(LL_WARNING, "Closing link for matching packet type %hu", type);
+            return 0;
+        }
+        return 1;
+    }
 
     clusterMsg *hdr = (clusterMsg *)link->rcvbuf;
     uint16_t type = ntohs(hdr->type);
@@ -2942,6 +2951,13 @@ int clusterProcessPacket(clusterLink *link) {
     if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_PONG || type == CLUSTERMSG_TYPE_MEET) {
         serverLog(LL_DEBUG, "%s packet received: %.40s", clusterGetMessageTypeString(type),
                   link->node ? link->node->name : "NULL");
+
+        if (sender && (sender->flags & CLUSTER_NODE_MEET)) {
+            /* Once we get a response for MEET from the sender, we can stop sending more MEET. */
+            sender->flags &= ~CLUSTER_NODE_MEET;
+            serverLog(LL_NOTICE, "Successfully completed handshake with %.40s (%s)", sender->name,
+                      sender->human_nodename);
+        }
         if (!link->inbound) {
             if (nodeInHandshake(link->node)) {
                 /* If we already have this node, try to change the
@@ -3376,12 +3392,17 @@ void clusterLinkConnectHandler(connection *conn) {
          * replaced by the clusterSendPing() call. */
         node->ping_sent = old_ping_sent;
     }
-    /* We can clear the flag after the first packet is sent.
-     * If we'll never receive a PONG, we'll never send new packets
-     * to this node. Instead after the PONG is received and we
-     * are no longer in meet/handshake status, we want to send
-     * normal PING packets. */
-    node->flags &= ~CLUSTER_NODE_MEET;
+    /* NOTE: Assume the current node is A and is asked to MEET another node B.
+     * Once A sends MEET to B, it cannot clear the MEET flag for B until it
+     * gets a response from B. If the MEET packet is not accepted by B due to
+     * link failure, A must continue sending MEET. If A doesn't continue sending
+     * MEET, A will know about B, but B will never add A. Every node always
+     * responds to PINGs from unknown nodes with a PONG, so A will know about B
+     * and continue sending PINGs. But B won't add A until it sees a MEET (or it
+     * gets to know about A from a trusted third node C). In this case, clearing
+     * the MEET flag here leads to asymmetry in the cluster membership. So, we
+     * clear the MEET flag in clusterProcessPacket.
+     */
 
     serverLog(LL_DEBUG, "Connecting with Node %.40s at %s:%d", node->name, node->ip, node->cport);
 }
diff --git a/src/debug.c b/src/debug.c
index 6394e3f0f4..d9fe93c7d4 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -429,6 +429,9 @@ void debugCommand(client *c) {
             "    Show low level info about `key` and associated value.",
             "DROP-CLUSTER-PACKET-FILTER <packet-type>",
             "    Drop all packets that match the filtered type. Set to -1 allow all packets.",
+            "CLOSE-CLUSTER-LINK-ON-PACKET-DROP <0|1>",
+            "    This is valid only when DROP-CLUSTER-PACKET-FILTER is set to a valid packet type."
+            "    When set to 1, the cluster link is closed after dropping a packet based on the filter."
             "OOM",
             "    Crash the server simulating an out-of-memory error.",
             "PANIC",
@@ -593,6 +596,9 @@ void debugCommand(client *c) {
         if (getLongFromObjectOrReply(c, c->argv[2], &packet_type, NULL) != C_OK) return;
         server.cluster_drop_packet_filter = packet_type;
         addReply(c, shared.ok);
+    } else if (!strcasecmp(c->argv[1]->ptr, "close-cluster-link-on-packet-drop") && c->argc == 3) {
+        server.debug_cluster_close_link_on_packet_drop = atoi(c->argv[2]->ptr);
+        addReply(c, shared.ok);
     } else if (!strcasecmp(c->argv[1]->ptr, "object") && c->argc == 3) {
         dictEntry *de;
         robj *val;
diff --git a/src/server.h b/src/server.h
index ae2d23b99f..c4ce6f655e 100644
--- a/src/server.h
+++ b/src/server.h
@@ -2069,6 +2069,8 @@ struct valkeyServer {
     unsigned long long cluster_link_msg_queue_limit_bytes; /* Memory usage limit on individual link msg queue */
     int cluster_drop_packet_filter;                        /* Debug config that allows tactically
                                                             * dropping packets of a specific type */
+    /* Debug config that goes along with cluster_drop_packet_filter. When set, the link is closed on packet drop. */
+    uint32_t debug_cluster_close_link_on_packet_drop : 1;
     sds cached_cluster_slot_info[CACHE_CONN_TYPE_MAX];
     /* Scripting */
     mstime_t busy_reply_threshold;  /* Script / module timeout in milliseconds */
diff --git a/tests/unit/cluster/cluster-multiple-meets.tcl b/tests/unit/cluster/cluster-multiple-meets.tcl
new file mode 100644
index 0000000000..07a2582133
--- /dev/null
+++ b/tests/unit/cluster/cluster-multiple-meets.tcl
@@ -0,0 +1,83 @@
+# make sure the test infra won't use SELECT
+set old_singledb $::singledb
+set ::singledb 1
+
+tags {tls:skip external:skip cluster} {
+    set base_conf [list cluster-enabled yes]
+    start_multiple_servers 2 [list overrides $base_conf] {
+        test "Cluster nodes are reachable" {
+            for {set id 0} {$id < [llength $::servers]} {incr id} {
+                # Every node should be reachable.
+                wait_for_condition 1000 50 {
+                    ([catch {R $id ping} ping_reply] == 0) &&
+                    ($ping_reply eq {PONG})
+                } else {
+                    catch {R $id ping} err
+                    fail "Node #$id keeps replying '$err' to PING."
+                }
+            }
+        }
+
+        test "Before slots allocation, all nodes report cluster failure" {
+            wait_for_cluster_state fail
+        }
+
+        set CLUSTER_PACKET_TYPE_PONG 1
+        set CLUSTER_PACKET_TYPE_NONE -1
+
+        test "Cluster nodes haven't met each other" {
+            assert {[llength [get_cluster_nodes 1]] == 1}
+            assert {[llength [get_cluster_nodes 0]] == 1}
+        }
+
+        test "Allocate slots" {
+            cluster_allocate_slots 2 0;# primaries replicas
+        }
+
+        test "Multiple MEETs from Node 1 to Node 0 should work" {
+            # Make 1 drop the PONG responses to MEET
+            R 1 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_PONG
+            # It is important to close the connection on drop, otherwise a subsequent MEET won't be sent
+            R 1 DEBUG CLOSE-CLUSTER-LINK-ON-PACKET-DROP 1
+
+            R 1 CLUSTER MEET 127.0.0.1 [srv 0 port]
+
+            # Wait for at least a few MEETs to be sent so that we are sure that 1 is dropping the response to MEET.
+            wait_for_condition 1000 50 {
+                [CI 0 cluster_stats_messages_meet_received] > 1 &&
+                [CI 1 cluster_state] eq {fail} && [CI 0 cluster_state] eq {ok}
+            } else {
+                fail "Cluster node 1 never sent multiple MEETs to 0"
+            }
+
+            # 0 will be connected to 1, but 1 won't see that 0 is connected
+            assert {[llength [get_cluster_nodes 1 connected]] == 1}
+            assert {[llength [get_cluster_nodes 0 connected]] == 2}
+
+            # Drop incoming and outgoing links from/to 1
+            R 0 DEBUG CLUSTERLINK KILL ALL [R 1 CLUSTER MYID]
+
+            # Wait for 0 to know about 1 again after 1 sends a MEET
+            wait_for_condition 1000 50 {
+                [llength [get_cluster_nodes 0 connected]] == 2
+            } else {
+                fail "Cluster node 1 never sent multiple MEETs to 0"
+            }
+
+            # Undo packet drop
+            R 1 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_NONE
+            R 1 DEBUG CLOSE-CLUSTER-LINK-ON-PACKET-DROP 0
+
+            # Both a and b will turn to cluster state ok
+            wait_for_condition 1000 50 {
+                [CI 1 cluster_state] eq {ok} && [CI 0 cluster_state] eq {ok} &&
+                [CI 1 cluster_stats_messages_meet_sent] == [CI 0 cluster_stats_messages_meet_received]
+            } else {
+                fail "1 cluster_state:[CI 1 cluster_state], 0 cluster_state: [CI 0 cluster_state]"
+            }
+        }
+    } ;# stop servers
+} ;# tags
+
+set ::singledb $old_singledb
+
diff --git a/tests/unit/cluster/cluster-reliable-meet.tcl b/tests/unit/cluster/cluster-reliable-meet.tcl
new file mode 100644
index 0000000000..41da97ab9b
--- /dev/null
+++ b/tests/unit/cluster/cluster-reliable-meet.tcl
@@ -0,0 +1,71 @@
+# make sure the test infra won't use SELECT
+set old_singledb $::singledb
+set ::singledb 1
+
+tags {tls:skip external:skip cluster} {
+    set base_conf [list cluster-enabled yes]
+    start_multiple_servers 2 [list overrides $base_conf] {
+        test "Cluster nodes are reachable" {
+            for {set id 0} {$id < [llength $::servers]} {incr id} {
+                # Every node should be reachable.
+                wait_for_condition 1000 50 {
+                    ([catch {R $id ping} ping_reply] == 0) &&
+                    ($ping_reply eq {PONG})
+                } else {
+                    catch {R $id ping} err
+                    fail "Node #$id keeps replying '$err' to PING."
+                }
+            }
+        }
+
+        test "Before slots allocation, all nodes report cluster failure" {
+            wait_for_cluster_state fail
+        }
+
+        set CLUSTER_PACKET_TYPE_MEET 2
+        set CLUSTER_PACKET_TYPE_NONE -1
+
+        test "Cluster nodes haven't met each other" {
+            assert {[llength [get_cluster_nodes 1]] == 1}
+            assert {[llength [get_cluster_nodes 0]] == 1}
+        }
+
+        test "Allocate slots" {
+            cluster_allocate_slots 2 0
+        }
+
+        test "MEET is reliable when target drops the initial MEETs" {
+            # Make 0 drop the initial MEET messages due to link failure
+            R 0 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_MEET
+            R 0 DEBUG CLOSE-CLUSTER-LINK-ON-PACKET-DROP 1
+
+            R 1 CLUSTER MEET 127.0.0.1 [srv 0 port]
+
+            # Wait for at least a few MEETs to be sent so that we are sure that 0 is
+            # dropping them.
+            wait_for_condition 1000 50 {
+                [CI 0 cluster_stats_messages_meet_received] >= 3
+            } else {
+                fail "Cluster node 1 never sent multiple MEETs to 0"
+            }
+
+            # Make sure the nodes still don't know about each other
+            assert {[llength [get_cluster_nodes 1 connected]] == 1}
+            assert {[llength [get_cluster_nodes 0 connected]] == 1}
+
+            R 0 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_NONE
+
+            # If the MEET is reliable, both a and b will turn to cluster state ok
+            wait_for_condition 1000 50 {
+                [CI 1 cluster_state] eq {ok} && [CI 0 cluster_state] eq {ok} &&
+                [CI 0 cluster_stats_messages_meet_received] >= 4 &&
+                [CI 1 cluster_stats_messages_meet_sent] == [CI 0 cluster_stats_messages_meet_received]
+            } else {
+                fail "1 cluster_state:[CI 1 cluster_state], 0 cluster_state: [CI 0 cluster_state]"
+            }
+        }
+    } ;# stop servers
+} ;# tags
+
+set ::singledb $old_singledb
+

From db6d3c1138695947412a745146ca29dbdf2e91c6 Mon Sep 17 00:00:00 2001
From: Binbin <binloveplay1314@qq.com>
Date: Mon, 17 Jun 2024 11:46:08 +0800
Subject: [PATCH 02/53] Only primary with slots has the right to mark a node as
 failed (#634)

In markNodeAsFailingIfNeeded we will count needed_quorum and failures,
needed_quorum is the half the cluster->size and plus one, and
cluster-size
is the size of primary node which contain slots, but when counting
failures, we dit not check if primary has slots.

Only the primary has slots that has the rights to vote, adding a new
clusterNodeIsVotingPrimary to formalize this concept.

Release notes:

bugfix where nodes not in the quorum group might spuriously mark nodes
as failed

---------

Signed-off-by: Binbin <binloveplay1314@qq.com>
Co-authored-by: Ping Xie <pingxie@outlook.com>
---
 src/cluster_legacy.c                   | 23 +++++----
 tests/unit/cluster/failure-marking.tcl | 68 ++++++++++++++++++++++++++
 2 files changed, 82 insertions(+), 9 deletions(-)

diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c
index 9104a76d87..21aa620dd9 100644
--- a/src/cluster_legacy.c
+++ b/src/cluster_legacy.c
@@ -116,6 +116,12 @@ int verifyClusterNodeId(const char *name, int length);
 sds clusterEncodeOpenSlotsAuxField(int rdbflags);
 int clusterDecodeOpenSlotsAuxField(int rdbflags, sds s);
 
+/* Only primaries that own slots have voting rights.
+ * Returns 1 if the node has voting rights, otherwise returns 0. */
+static inline int clusterNodeIsVotingPrimary(clusterNode *n) {
+    return (n->flags & CLUSTER_NODE_PRIMARY) && n->numslots;
+}
+
 int getNodeDefaultClientPort(clusterNode *n) {
     return server.tls_cluster ? n->tls_port : n->tcp_port;
 }
@@ -1867,8 +1873,8 @@ void markNodeAsFailingIfNeeded(clusterNode *node) {
     if (nodeFailed(node)) return;    /* Already FAILing. */
 
     failures = clusterNodeFailureReportsCount(node);
-    /* Also count myself as a voter if I'm a primary. */
-    if (clusterNodeIsPrimary(myself)) failures++;
+    /* Also count myself as a voter if I'm a voting primary. */
+    if (clusterNodeIsVotingPrimary(myself)) failures++;
     if (failures < needed_quorum) return; /* No weak agreement from primaries. */
 
     serverLog(LL_NOTICE, "Marking node %.40s (%s) as failing (quorum reached).", node->name, node->human_nodename);
@@ -1908,7 +1914,7 @@ void clearNodeFailureIfNeeded(clusterNode *node) {
      * 1) The FAIL state is old enough.
      * 2) It is yet serving slots from our point of view (not failed over).
      * Apparently no one is going to fix these slots, clear the FAIL flag. */
-    if (clusterNodeIsPrimary(node) && node->numslots > 0 &&
+    if (clusterNodeIsVotingPrimary(node) &&
         (now - node->fail_time) > (server.cluster_node_timeout * CLUSTER_FAIL_UNDO_TIME_MULT)) {
         serverLog(
             LL_NOTICE,
@@ -2090,8 +2096,8 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) {
         /* Ignore gossips about self. */
         if (node && node != myself) {
             /* We already know this node.
-               Handle failure reports, only when the sender is a primary. */
-            if (sender && clusterNodeIsPrimary(sender)) {
+               Handle failure reports, only when the sender is a voting primary. */
+            if (sender && clusterNodeIsVotingPrimary(sender)) {
                 if (flags & (CLUSTER_NODE_FAIL | CLUSTER_NODE_PFAIL)) {
                     if (clusterNodeAddFailureReport(node, sender)) {
                         serverLog(LL_VERBOSE, "Node %.40s (%s) reported node %.40s (%s) as not reachable.",
@@ -3250,8 +3256,7 @@ int clusterProcessPacket(clusterLink *link) {
         /* We consider this vote only if the sender is a primary serving
          * a non zero number of slots, and its currentEpoch is greater or
          * equal to epoch where this node started the election. */
-        if (clusterNodeIsPrimary(sender) && sender->numslots > 0 &&
-            senderCurrentEpoch >= server.cluster->failover_auth_epoch) {
+        if (clusterNodeIsVotingPrimary(sender) && senderCurrentEpoch >= server.cluster->failover_auth_epoch) {
             server.cluster->failover_auth_count++;
             /* Maybe we reached a quorum here, set a flag to make sure
              * we check ASAP. */
@@ -4768,7 +4773,7 @@ void clusterCron(void) {
             if (!(node->flags & (CLUSTER_NODE_PFAIL | CLUSTER_NODE_FAIL))) {
                 node->flags |= CLUSTER_NODE_PFAIL;
                 update_state = 1;
-                if (clusterNodeIsPrimary(myself) && server.cluster->size == 1) {
+                if (server.cluster->size == 1 && clusterNodeIsVotingPrimary(myself)) {
                     markNodeAsFailingIfNeeded(node);
                 } else {
                     serverLog(LL_DEBUG, "*** NODE %.40s possibly failing", node->name);
@@ -5038,7 +5043,7 @@ void clusterUpdateState(void) {
         while ((de = dictNext(di)) != NULL) {
             clusterNode *node = dictGetVal(de);
 
-            if (clusterNodeIsPrimary(node) && node->numslots) {
+            if (clusterNodeIsVotingPrimary(node)) {
                 server.cluster->size++;
                 if ((node->flags & (CLUSTER_NODE_FAIL | CLUSTER_NODE_PFAIL)) == 0) reachable_primaries++;
             }
diff --git a/tests/unit/cluster/failure-marking.tcl b/tests/unit/cluster/failure-marking.tcl
index c4746c8264..cfed7fff0f 100644
--- a/tests/unit/cluster/failure-marking.tcl
+++ b/tests/unit/cluster/failure-marking.tcl
@@ -16,6 +16,8 @@ start_cluster 1 1 {tags {external:skip cluster}} {
         pause_process $replica1_pid
 
         wait_node_marked_fail 0 $replica1_instance_id
+
+        resume_process $replica1_pid
     }
 }
 
@@ -49,5 +51,71 @@ start_cluster 2 1 {tags {external:skip cluster}} {
         resume_process $primary2_pid
 
         wait_node_marked_fail 0 $replica1_instance_id
+
+        resume_process $replica1_pid
+    }
+}
+
+set old_singledb $::singledb
+set ::singledb 1
+
+tags {external:skip tls:skip cluster} {
+    set base_conf [list cluster-enabled yes cluster-ping-interval 100 cluster-node-timeout 3000 save ""]
+    start_multiple_servers 5 [list overrides $base_conf] {
+        test "Only primary with slots has the right to mark a node as failed" {
+            set primary_host [srv 0 host]
+            set primary_port [srv 0 port]
+            set primary_pid [srv 0 pid]
+            set primary_id [R 0 CLUSTER MYID]
+            set replica_id [R 1 CLUSTER MYID]
+            set replica_pid [srv -1 pid]
+
+            # Meet others nodes.
+            R 1 CLUSTER MEET $primary_host $primary_port
+            R 2 CLUSTER MEET $primary_host $primary_port
+            R 3 CLUSTER MEET $primary_host $primary_port
+            R 4 CLUSTER MEET $primary_host $primary_port
+
+            # Build a single primary cluster.
+            cluster_allocate_slots 1 1
+            wait_for_cluster_propagation
+            R 1 CLUSTER REPLICATE $primary_id
+            wait_for_cluster_propagation
+            wait_for_cluster_state "ok"
+
+            # Pause the primary, marking the primary as pfail.
+            pause_process $primary_pid
+            wait_node_marked_pfail 1 $primary_id
+            wait_node_marked_pfail 2 $primary_id
+            wait_node_marked_pfail 3 $primary_id
+            wait_node_marked_pfail 4 $primary_id
+
+            # Pause the replica, marking the replica as pfail.
+            pause_process $replica_pid
+            wait_node_marked_pfail 2 $replica_id
+            wait_node_marked_pfail 3 $replica_id
+            wait_node_marked_pfail 4 $replica_id
+
+            # Resume the primary, marking the replica as fail.
+            resume_process $primary_pid
+            wait_node_marked_fail 0 $replica_id
+            wait_node_marked_fail 2 $replica_id
+            wait_node_marked_fail 3 $replica_id
+            wait_node_marked_fail 4 $replica_id
+
+            # Check if we got the right failure reports.
+            wait_for_condition 1000 50 {
+                [R 0 CLUSTER COUNT-FAILURE-REPORTS $replica_id] == 0 &&
+                [R 2 CLUSTER COUNT-FAILURE-REPORTS $replica_id] == 1 &&
+                [R 3 CLUSTER COUNT-FAILURE-REPORTS $replica_id] == 1 &&
+                [R 4 CLUSTER COUNT-FAILURE-REPORTS $replica_id] == 1
+            } else {
+                fail "Cluster COUNT-FAILURE-REPORTS is not right."
+            }
+
+            resume_process $replica_pid
+        }
     }
 }
+
+set ::singledb $old_singledb

From 5a51bf5045d1713f5ebf58011dcb8900805be164 Mon Sep 17 00:00:00 2001
From: Andy Pan <i@andypan.me>
Date: Mon, 17 Jun 2024 12:18:20 +0800
Subject: [PATCH 03/53] Combine events to eliminate redundant kevent(2) calls
 (#638)

Combine events to eliminate redundant kevent(2) calls
to improve performance.

---------

Signed-off-by: Andy Pan <i@andypan.me>
---
 src/ae_kqueue.c | 31 ++++++++++++-------------------
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/src/ae_kqueue.c b/src/ae_kqueue.c
index 3cb6fbae4a..4159f25744 100644
--- a/src/ae_kqueue.c
+++ b/src/ae_kqueue.c
@@ -101,31 +101,24 @@ static void aeApiFree(aeEventLoop *eventLoop) {
 
 static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {
     aeApiState *state = eventLoop->apidata;
-    struct kevent ke;
+    struct kevent evs[2];
+    int nch = 0;
 
-    if (mask & AE_READABLE) {
-        EV_SET(&ke, fd, EVFILT_READ, EV_ADD, 0, 0, NULL);
-        if (kevent(state->kqfd, &ke, 1, NULL, 0, NULL) == -1) return -1;
-    }
-    if (mask & AE_WRITABLE) {
-        EV_SET(&ke, fd, EVFILT_WRITE, EV_ADD, 0, 0, NULL);
-        if (kevent(state->kqfd, &ke, 1, NULL, 0, NULL) == -1) return -1;
-    }
-    return 0;
+    if (mask & AE_READABLE) EV_SET(evs + nch++, fd, EVFILT_READ, EV_ADD, 0, 0, NULL);
+    if (mask & AE_WRITABLE) EV_SET(evs + nch++, fd, EVFILT_WRITE, EV_ADD, 0, 0, NULL);
+
+    return kevent(state->kqfd, evs, nch, NULL, 0, NULL);
 }
 
 static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int mask) {
     aeApiState *state = eventLoop->apidata;
-    struct kevent ke;
+    struct kevent evs[2];
+    int nch = 0;
 
-    if (mask & AE_READABLE) {
-        EV_SET(&ke, fd, EVFILT_READ, EV_DELETE, 0, 0, NULL);
-        kevent(state->kqfd, &ke, 1, NULL, 0, NULL);
-    }
-    if (mask & AE_WRITABLE) {
-        EV_SET(&ke, fd, EVFILT_WRITE, EV_DELETE, 0, 0, NULL);
-        kevent(state->kqfd, &ke, 1, NULL, 0, NULL);
-    }
+    if (mask & AE_READABLE) EV_SET(evs + nch++, fd, EVFILT_READ, EV_DELETE, 0, 0, NULL);
+    if (mask & AE_WRITABLE) EV_SET(evs + nch++, fd, EVFILT_WRITE, EV_DELETE, 0, 0, NULL);
+
+    kevent(state->kqfd, evs, nch, NULL, 0, NULL);
 }
 
 static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) {

From 495a121f1938ccba6a249bd44df7d963fd32139a Mon Sep 17 00:00:00 2001
From: Binbin <binloveplay1314@qq.com>
Date: Tue, 18 Jun 2024 10:46:56 +0800
Subject: [PATCH 04/53] Adjust the log level of some logs in the cluster (#633)

I think the log of pfail status changes will be very useful.
The other parts were scanned and found that it can be modified.

Changes:
1. Changing pfail status releated logs from VERBOSE to NOTICE.
2. Changing configEpoch collision log from VERBOSE(warning) to NOTICE.
3. Changing some logs from DEBUG to NOTICE.

Signed-off-by: Binbin <binloveplay1314@qq.com>
Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
---
 src/cluster_legacy.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c
index 21aa620dd9..0803e5039a 100644
--- a/src/cluster_legacy.c
+++ b/src/cluster_legacy.c
@@ -1761,10 +1761,8 @@ void clusterHandleConfigEpochCollision(clusterNode *sender) {
     server.cluster->currentEpoch++;
     myself->configEpoch = server.cluster->currentEpoch;
     clusterSaveConfigOrDie(1);
-    serverLog(LL_VERBOSE,
-              "WARNING: configEpoch collision with node %.40s (%s)."
-              " configEpoch set to %llu",
-              sender->name, sender->human_nodename, (unsigned long long)myself->configEpoch);
+    serverLog(LL_NOTICE, "configEpoch collision with node %.40s (%s). configEpoch set to %llu", sender->name,
+              sender->human_nodename, (unsigned long long)myself->configEpoch);
 }
 
 /* -----------------------------------------------------------------------------
@@ -2100,13 +2098,13 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) {
             if (sender && clusterNodeIsVotingPrimary(sender)) {
                 if (flags & (CLUSTER_NODE_FAIL | CLUSTER_NODE_PFAIL)) {
                     if (clusterNodeAddFailureReport(node, sender)) {
-                        serverLog(LL_VERBOSE, "Node %.40s (%s) reported node %.40s (%s) as not reachable.",
-                                  sender->name, sender->human_nodename, node->name, node->human_nodename);
+                        serverLog(LL_NOTICE, "Node %.40s (%s) reported node %.40s (%s) as not reachable.", sender->name,
+                                  sender->human_nodename, node->name, node->human_nodename);
                     }
                     markNodeAsFailingIfNeeded(node);
                 } else {
                     if (clusterNodeDelFailureReport(node, sender)) {
-                        serverLog(LL_VERBOSE, "Node %.40s (%s) reported node %.40s (%s) is back online.", sender->name,
+                        serverLog(LL_NOTICE, "Node %.40s (%s) reported node %.40s (%s) is back online.", sender->name,
                                   sender->human_nodename, node->name, node->human_nodename);
                     }
                 }
@@ -2993,7 +2991,7 @@ int clusterProcessPacket(clusterLink *link) {
                 /* If the reply has a non matching node ID we
                  * disconnect this node and set it as not having an associated
                  * address. */
-                serverLog(LL_DEBUG,
+                serverLog(LL_NOTICE,
                           "PONG contains mismatching sender ID. About node %.40s (%s) in shard %.40s added %d ms ago, "
                           "having flags %d",
                           link->node->name, link->node->human_nodename, link->node->shard_id,
@@ -4776,7 +4774,7 @@ void clusterCron(void) {
                 if (server.cluster->size == 1 && clusterNodeIsVotingPrimary(myself)) {
                     markNodeAsFailingIfNeeded(node);
                 } else {
-                    serverLog(LL_DEBUG, "*** NODE %.40s possibly failing", node->name);
+                    serverLog(LL_NOTICE, "NODE %.40s (%s) possibly failing.", node->name, node->human_nodename);
                 }
             }
         }

From 4135894a5d40f7270a2127d601fbf4f81ca4ab26 Mon Sep 17 00:00:00 2001
From: Ping Xie <pingxie@google.com>
Date: Mon, 17 Jun 2024 20:31:15 -0700
Subject: [PATCH 05/53] Update remaining `master` references to `primary`
 (#660)

Signed-off-by: Ping Xie <pingxie@google.com>
---
 src/blocked.c                       |   6 +-
 src/cluster.c                       |   4 +-
 src/cluster_legacy.c                |   8 +-
 src/commands.def                    |  19 +--
 src/commands/client-kill.json       |  10 ++
 src/commands/client-list.json       |   4 +
 src/commands/cluster-failover.json  |   2 +-
 src/commands/cluster-replicas.json  |   4 +-
 src/commands/cluster-replicate.json |   2 +-
 src/commands/cluster-slaves.json    |   4 +-
 src/commands/cluster-slots.json     |   2 +-
 src/commands/replicaof.json         |   2 +-
 src/commands/role.json              |  20 +--
 src/config.c                        |   2 +-
 src/networking.c                    |  13 +-
 src/object.c                        |   2 +-
 src/replication.c                   | 182 ++++++++++++++--------------
 src/rio.c                           |   2 +-
 src/valkey-benchmark.c              |   8 +-
 src/valkey-cli.c                    |  73 ++++++-----
 src/valkeymodule.h                  |   2 +-
 tests/integration/replication.tcl   |   4 +-
 tests/unit/auth.tcl                 |   2 +-
 tests/unit/introspection.tcl        |   4 +-
 24 files changed, 199 insertions(+), 182 deletions(-)

diff --git a/src/blocked.c b/src/blocked.c
index 6d8d4fbc7c..08abac15e3 100644
--- a/src/blocked.c
+++ b/src/blocked.c
@@ -86,7 +86,7 @@ void initClientBlockingState(client *c) {
  * flag is set client query buffer is not longer processed, but accumulated,
  * and will be processed when the client is unblocked. */
 void blockClient(client *c, int btype) {
-    /* Master client should never be blocked unless pause or module */
+    /* Primary client should never be blocked unless pause or module */
     serverAssert(!(c->flags & CLIENT_PRIMARY && btype != BLOCKED_MODULE && btype != BLOCKED_POSTPONE));
 
     c->flags |= CLIENT_BLOCKED;
@@ -265,8 +265,8 @@ void replyToClientsBlockedOnShutdown(void) {
 
 /* Mass-unblock clients because something changed in the instance that makes
  * blocking no longer safe. For example clients blocked in list operations
- * in an instance which turns from master to replica is unsafe, so this function
- * is called when a master turns into a replica.
+ * in an instance which turns from primary to replica is unsafe, so this function
+ * is called when a primary turns into a replica.
  *
  * The semantics is to send an -UNBLOCKED error to the client, disconnecting
  * it at the same time. */
diff --git a/src/cluster.c b/src/cluster.c
index 00f3c2d889..8aa6793ba8 100644
--- a/src/cluster.c
+++ b/src/cluster.c
@@ -813,12 +813,12 @@ void clusterCommandHelp(client *c) {
         "    Return the node's shard id.",
         "NODES",
         "    Return cluster configuration seen by node. Output format:",
-        "    <id> <ip:port@bus-port[,hostname]> <flags> <master> <pings> <pongs> <epoch> <link> <slot> ...",
+        "    <id> <ip:port@bus-port[,hostname]> <flags> <primary> <pings> <pongs> <epoch> <link> <slot> ...",
         "REPLICAS <node-id>",
         "    Return <node-id> replicas.",
         "SLOTS",
         "    Return information about slots range mappings. Each range is made of:",
-        "    start, end, master and replicas IP addresses, ports and ids",
+        "    start, end, primary and replicas IP addresses, ports and ids",
         "SHARDS",
         "    Return information about slot range mappings and the nodes associated with them.",
         NULL};
diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c
index 0803e5039a..e9816d52a1 100644
--- a/src/cluster_legacy.c
+++ b/src/cluster_legacy.c
@@ -533,9 +533,9 @@ int clusterLoadConfig(char *filename) {
                 serverAssert(server.cluster->myself == NULL);
                 myself = server.cluster->myself = n;
                 n->flags |= CLUSTER_NODE_MYSELF;
-            } else if (!strcasecmp(s, "master")) {
+            } else if (!strcasecmp(s, "master") || !strcasecmp(s, "primary")) {
                 n->flags |= CLUSTER_NODE_PRIMARY;
-            } else if (!strcasecmp(s, "slave")) {
+            } else if (!strcasecmp(s, "slave") || !strcasecmp(s, "replica")) {
                 n->flags |= CLUSTER_NODE_REPLICA;
             } else if (!strcasecmp(s, "fail?")) {
                 n->flags |= CLUSTER_NODE_PFAIL;
@@ -1903,7 +1903,7 @@ void clearNodeFailureIfNeeded(clusterNode *node) {
      * node again. */
     if (nodeIsReplica(node) || node->numslots == 0) {
         serverLog(LL_NOTICE, "Clear FAIL state for node %.40s (%s):%s is reachable again.", node->name,
-                  node->human_nodename, nodeIsReplica(node) ? "replica" : "master without slots");
+                  node->human_nodename, nodeIsReplica(node) ? "replica" : "primary without slots");
         node->flags &= ~CLUSTER_NODE_FAIL;
         clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE | CLUSTER_TODO_SAVE_CONFIG);
     }
@@ -4154,7 +4154,7 @@ void clusterLogCantFailover(int reason) {
 
     switch (reason) {
     case CLUSTER_CANT_FAILOVER_DATA_AGE:
-        msg = "Disconnected from master for longer than allowed. "
+        msg = "Disconnected from primary for longer than allowed. "
               "Please check the 'cluster-replica-validity-factor' configuration "
               "option.";
         break;
diff --git a/src/commands.def b/src/commands.def
index 06cdb4b87e..cb7fd73cc5 100644
--- a/src/commands.def
+++ b/src/commands.def
@@ -961,7 +961,7 @@ struct COMMAND_STRUCT CLUSTER_Subcommands[] = {
 {MAKE_CMD("countkeysinslot","Returns the number of keys in a hash slot.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_COUNTKEYSINSLOT_History,0,CLUSTER_COUNTKEYSINSLOT_Tips,0,clusterCommand,3,CMD_STALE,0,CLUSTER_COUNTKEYSINSLOT_Keyspecs,0,NULL,1),.args=CLUSTER_COUNTKEYSINSLOT_Args},
 {MAKE_CMD("delslots","Sets hash slots as unbound for a node.","O(N) where N is the total number of hash slot arguments","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_DELSLOTS_History,0,CLUSTER_DELSLOTS_Tips,0,clusterCommand,-3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_DELSLOTS_Keyspecs,0,NULL,1),.args=CLUSTER_DELSLOTS_Args},
 {MAKE_CMD("delslotsrange","Sets hash slot ranges as unbound for a node.","O(N) where N is the total number of the slots between the start slot and end slot arguments.","7.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_DELSLOTSRANGE_History,0,CLUSTER_DELSLOTSRANGE_Tips,0,clusterCommand,-4,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_DELSLOTSRANGE_Keyspecs,0,NULL,1),.args=CLUSTER_DELSLOTSRANGE_Args},
-{MAKE_CMD("failover","Forces a replica to perform a manual failover of its master.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_FAILOVER_History,0,CLUSTER_FAILOVER_Tips,0,clusterCommand,-2,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_FAILOVER_Keyspecs,0,NULL,1),.args=CLUSTER_FAILOVER_Args},
+{MAKE_CMD("failover","Forces a replica to perform a manual failover of its primary.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_FAILOVER_History,0,CLUSTER_FAILOVER_Tips,0,clusterCommand,-2,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_FAILOVER_Keyspecs,0,NULL,1),.args=CLUSTER_FAILOVER_Args},
 {MAKE_CMD("flushslots","Deletes all slots information from a node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_FLUSHSLOTS_History,0,CLUSTER_FLUSHSLOTS_Tips,0,clusterCommand,2,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_FLUSHSLOTS_Keyspecs,0,NULL,0)},
 {MAKE_CMD("forget","Removes a node from the nodes table.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_FORGET_History,0,CLUSTER_FORGET_Tips,0,clusterCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_FORGET_Keyspecs,0,NULL,1),.args=CLUSTER_FORGET_Args},
 {MAKE_CMD("getkeysinslot","Returns the key names in a hash slot.","O(N) where N is the number of requested keys","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_GETKEYSINSLOT_History,0,CLUSTER_GETKEYSINSLOT_Tips,1,clusterCommand,4,CMD_STALE,0,CLUSTER_GETKEYSINSLOT_Keyspecs,0,NULL,2),.args=CLUSTER_GETKEYSINSLOT_Args},
@@ -973,14 +973,14 @@ struct COMMAND_STRUCT CLUSTER_Subcommands[] = {
 {MAKE_CMD("myid","Returns the ID of a node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_MYID_History,0,CLUSTER_MYID_Tips,0,clusterCommand,2,CMD_LOADING|CMD_STALE,0,CLUSTER_MYID_Keyspecs,0,NULL,0)},
 {MAKE_CMD("myshardid","Returns the shard ID of a node.","O(1)","7.2.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_MYSHARDID_History,0,CLUSTER_MYSHARDID_Tips,1,clusterCommand,2,CMD_LOADING|CMD_STALE,0,CLUSTER_MYSHARDID_Keyspecs,0,NULL,0)},
 {MAKE_CMD("nodes","Returns the cluster configuration for a node.","O(N) where N is the total number of Cluster nodes","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_NODES_History,0,CLUSTER_NODES_Tips,1,clusterCommand,2,CMD_LOADING|CMD_STALE,0,CLUSTER_NODES_Keyspecs,0,NULL,0)},
-{MAKE_CMD("replicas","Lists the replica nodes of a master node.","O(N) where N is the number of replicas.","5.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_REPLICAS_History,0,CLUSTER_REPLICAS_Tips,1,clusterCommand,3,CMD_ADMIN|CMD_STALE,0,CLUSTER_REPLICAS_Keyspecs,0,NULL,1),.args=CLUSTER_REPLICAS_Args},
-{MAKE_CMD("replicate","Configure a node as replica of a master node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_REPLICATE_History,0,CLUSTER_REPLICATE_Tips,0,clusterCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_REPLICATE_Keyspecs,0,NULL,1),.args=CLUSTER_REPLICATE_Args},
+{MAKE_CMD("replicas","Lists the replica nodes of a primary node.","O(N) where N is the number of replicas.","5.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_REPLICAS_History,0,CLUSTER_REPLICAS_Tips,1,clusterCommand,3,CMD_ADMIN|CMD_STALE,0,CLUSTER_REPLICAS_Keyspecs,0,NULL,1),.args=CLUSTER_REPLICAS_Args},
+{MAKE_CMD("replicate","Configure a node as replica of a primary node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_REPLICATE_History,0,CLUSTER_REPLICATE_Tips,0,clusterCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_REPLICATE_Keyspecs,0,NULL,1),.args=CLUSTER_REPLICATE_Args},
 {MAKE_CMD("reset","Resets a node.","O(N) where N is the number of known nodes. The command may execute a FLUSHALL as a side effect.","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_RESET_History,0,CLUSTER_RESET_Tips,0,clusterCommand,-2,CMD_ADMIN|CMD_STALE|CMD_NOSCRIPT,0,CLUSTER_RESET_Keyspecs,0,NULL,1),.args=CLUSTER_RESET_Args},
 {MAKE_CMD("saveconfig","Forces a node to save the cluster configuration to disk.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SAVECONFIG_History,0,CLUSTER_SAVECONFIG_Tips,0,clusterCommand,2,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_SAVECONFIG_Keyspecs,0,NULL,0)},
 {MAKE_CMD("set-config-epoch","Sets the configuration epoch for a new node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SET_CONFIG_EPOCH_History,0,CLUSTER_SET_CONFIG_EPOCH_Tips,0,clusterCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_SET_CONFIG_EPOCH_Keyspecs,0,NULL,1),.args=CLUSTER_SET_CONFIG_EPOCH_Args},
 {MAKE_CMD("setslot","Binds a hash slot to a node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SETSLOT_History,1,CLUSTER_SETSLOT_Tips,0,clusterCommand,-4,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE|CMD_MAY_REPLICATE,0,CLUSTER_SETSLOT_Keyspecs,0,NULL,3),.args=CLUSTER_SETSLOT_Args},
 {MAKE_CMD("shards","Returns the mapping of cluster slots to shards.","O(N) where N is the total number of cluster nodes","7.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SHARDS_History,0,CLUSTER_SHARDS_Tips,1,clusterCommand,2,CMD_LOADING|CMD_STALE,0,CLUSTER_SHARDS_Keyspecs,0,NULL,0)},
-{MAKE_CMD("slaves","Lists the replica nodes of a master node.","O(N) where N is the number of replicas.","3.0.0",CMD_DOC_DEPRECATED,"`CLUSTER REPLICAS`","5.0.0","cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SLAVES_History,0,CLUSTER_SLAVES_Tips,1,clusterCommand,3,CMD_ADMIN|CMD_STALE,0,CLUSTER_SLAVES_Keyspecs,0,NULL,1),.args=CLUSTER_SLAVES_Args},
+{MAKE_CMD("slaves","Lists the replica nodes of a primary node.","O(N) where N is the number of replicas.","3.0.0",CMD_DOC_DEPRECATED,"`CLUSTER REPLICAS`","5.0.0","cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SLAVES_History,0,CLUSTER_SLAVES_Tips,1,clusterCommand,3,CMD_ADMIN|CMD_STALE,0,CLUSTER_SLAVES_Keyspecs,0,NULL,1),.args=CLUSTER_SLAVES_Args},
 {MAKE_CMD("slots","Returns the mapping of cluster slots to nodes.","O(N) where N is the total number of Cluster nodes","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SLOTS_History,2,CLUSTER_SLOTS_Tips,1,clusterCommand,2,CMD_LOADING|CMD_STALE,0,CLUSTER_SLOTS_Keyspecs,0,NULL,0)},
 {0}
 };
@@ -1187,6 +1187,7 @@ commandHistory CLIENT_KILL_History[] = {
 {"5.0.0","Replaced `slave` `TYPE` with `replica`. `slave` still supported for backward compatibility."},
 {"6.2.0","`LADDR` option."},
 {"8.0.0","`MAXAGE` option."},
+{"8.0.0","Replaced `master` `TYPE` with `primary`. `master` still supported for backward compatibility."},
 };
 #endif
 
@@ -1204,6 +1205,7 @@ commandHistory CLIENT_KILL_History[] = {
 struct COMMAND_ARG CLIENT_KILL_filter_new_format_client_type_Subargs[] = {
 {MAKE_ARG("normal",ARG_TYPE_PURE_TOKEN,-1,"NORMAL",NULL,NULL,CMD_ARG_NONE,0,NULL)},
 {MAKE_ARG("master",ARG_TYPE_PURE_TOKEN,-1,"MASTER",NULL,"3.2.0",CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("primary",ARG_TYPE_PURE_TOKEN,-1,"PRIMARY",NULL,"8.0.0",CMD_ARG_NONE,0,NULL)},
 {MAKE_ARG("slave",ARG_TYPE_PURE_TOKEN,-1,"SLAVE",NULL,NULL,CMD_ARG_NONE,0,NULL)},
 {MAKE_ARG("replica",ARG_TYPE_PURE_TOKEN,-1,"REPLICA",NULL,"5.0.0",CMD_ARG_NONE,0,NULL)},
 {MAKE_ARG("pubsub",ARG_TYPE_PURE_TOKEN,-1,"PUBSUB",NULL,NULL,CMD_ARG_NONE,0,NULL)},
@@ -1218,7 +1220,7 @@ struct COMMAND_ARG CLIENT_KILL_filter_new_format_skipme_Subargs[] = {
 /* CLIENT KILL filter new_format argument table */
 struct COMMAND_ARG CLIENT_KILL_filter_new_format_Subargs[] = {
 {MAKE_ARG("client-id",ARG_TYPE_INTEGER,-1,"ID",NULL,"2.8.12",CMD_ARG_OPTIONAL,0,NULL)},
-{MAKE_ARG("client-type",ARG_TYPE_ONEOF,-1,"TYPE",NULL,"2.8.12",CMD_ARG_OPTIONAL,5,NULL),.subargs=CLIENT_KILL_filter_new_format_client_type_Subargs},
+{MAKE_ARG("client-type",ARG_TYPE_ONEOF,-1,"TYPE",NULL,"2.8.12",CMD_ARG_OPTIONAL,6,NULL),.subargs=CLIENT_KILL_filter_new_format_client_type_Subargs},
 {MAKE_ARG("username",ARG_TYPE_STRING,-1,"USER",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
 {MAKE_ARG("addr",ARG_TYPE_STRING,-1,"ADDR",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL),.display_text="ip:port"},
 {MAKE_ARG("laddr",ARG_TYPE_STRING,-1,"LADDR",NULL,"6.2.0",CMD_ARG_OPTIONAL,0,NULL),.display_text="ip:port"},
@@ -1248,6 +1250,7 @@ commandHistory CLIENT_LIST_History[] = {
 {"6.2.0","Added `argv-mem`, `tot-mem`, `laddr` and `redir` fields and the optional `ID` filter."},
 {"7.0.0","Added `resp`, `multi-mem`, `rbs` and `rbp` fields."},
 {"7.0.3","Added `ssub` field."},
+{"8.0.0","Replaced `master` `TYPE` with `primary`. `master` still supported for backward compatibility."},
 };
 #endif
 
@@ -1554,8 +1557,8 @@ struct COMMAND_STRUCT CLIENT_Subcommands[] = {
 {MAKE_CMD("help","Returns helpful text about the different subcommands.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_HELP_History,0,CLIENT_HELP_Tips,0,clientCommand,2,CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_HELP_Keyspecs,0,NULL,0)},
 {MAKE_CMD("id","Returns the unique client ID of the connection.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_ID_History,0,CLIENT_ID_Tips,0,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_ID_Keyspecs,0,NULL,0)},
 {MAKE_CMD("info","Returns information about the connection.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_INFO_History,0,CLIENT_INFO_Tips,1,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_INFO_Keyspecs,0,NULL,0)},
-{MAKE_CMD("kill","Terminates open connections.","O(N) where N is the number of client connections","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_KILL_History,6,CLIENT_KILL_Tips,0,clientCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_KILL_Keyspecs,0,NULL,1),.args=CLIENT_KILL_Args},
-{MAKE_CMD("list","Lists open connections.","O(N) where N is the number of client connections","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_LIST_History,6,CLIENT_LIST_Tips,1,clientCommand,-2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_LIST_Keyspecs,0,NULL,2),.args=CLIENT_LIST_Args},
+{MAKE_CMD("kill","Terminates open connections.","O(N) where N is the number of client connections","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_KILL_History,7,CLIENT_KILL_Tips,0,clientCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_KILL_Keyspecs,0,NULL,1),.args=CLIENT_KILL_Args},
+{MAKE_CMD("list","Lists open connections.","O(N) where N is the number of client connections","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_LIST_History,7,CLIENT_LIST_Tips,1,clientCommand,-2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_LIST_Keyspecs,0,NULL,2),.args=CLIENT_LIST_Args},
 {MAKE_CMD("no-evict","Sets the client eviction mode of the connection.","O(1)","7.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_NO_EVICT_History,0,CLIENT_NO_EVICT_Tips,0,clientCommand,3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_NO_EVICT_Keyspecs,0,NULL,1),.args=CLIENT_NO_EVICT_Args},
 {MAKE_CMD("no-touch","Controls whether commands sent by the client affect the LRU/LFU of accessed keys.","O(1)","7.2.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_NO_TOUCH_History,0,CLIENT_NO_TOUCH_Tips,0,clientCommand,3,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,ACL_CATEGORY_CONNECTION,CLIENT_NO_TOUCH_Keyspecs,0,NULL,1),.args=CLIENT_NO_TOUCH_Args},
 {MAKE_CMD("pause","Suspends commands processing.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_PAUSE_History,1,CLIENT_PAUSE_Tips,0,clientCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_PAUSE_Keyspecs,0,NULL,2),.args=CLIENT_PAUSE_Args},
@@ -10816,7 +10819,7 @@ struct COMMAND_STRUCT serverCommandTable[] = {
 {MAKE_CMD("monitor","Listens for all requests received by the server in real-time.",NULL,"1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,MONITOR_History,0,MONITOR_Tips,0,monitorCommand,1,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,MONITOR_Keyspecs,0,NULL,0)},
 {MAKE_CMD("psync","An internal command used in replication.",NULL,"2.8.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,PSYNC_History,0,PSYNC_Tips,0,syncCommand,-3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NO_MULTI|CMD_NOSCRIPT,0,PSYNC_Keyspecs,0,NULL,2),.args=PSYNC_Args},
 {MAKE_CMD("replconf","An internal command for configuring the replication stream.","O(1)","3.0.0",CMD_DOC_SYSCMD,NULL,NULL,"server",COMMAND_GROUP_SERVER,REPLCONF_History,0,REPLCONF_Tips,0,replconfCommand,-1,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_ALLOW_BUSY,0,REPLCONF_Keyspecs,0,NULL,0)},
-{MAKE_CMD("replicaof","Configures a server as replica of another, or promotes it to a master.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,REPLICAOF_History,0,REPLICAOF_Tips,0,replicaofCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT|CMD_STALE,0,REPLICAOF_Keyspecs,0,NULL,1),.args=REPLICAOF_Args},
+{MAKE_CMD("replicaof","Configures a server as replica of another, or promotes it to a primary.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,REPLICAOF_History,0,REPLICAOF_Tips,0,replicaofCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT|CMD_STALE,0,REPLICAOF_Keyspecs,0,NULL,1),.args=REPLICAOF_Args},
 {MAKE_CMD("restore-asking","An internal command for migrating keys in a cluster.","O(1) to create the new key and additional O(N*M) to reconstruct the serialized value, where N is the number of objects composing the value and M their average size. For small string values the time complexity is thus O(1)+O(1*M) where M is small, so simply O(1). However for sorted set values the complexity is O(N*M*log(N)) because inserting values into sorted sets is O(log(N)).","3.0.0",CMD_DOC_SYSCMD,NULL,NULL,"server",COMMAND_GROUP_SERVER,RESTORE_ASKING_History,3,RESTORE_ASKING_Tips,0,restoreCommand,-4,CMD_WRITE|CMD_DENYOOM|CMD_ASKING,ACL_CATEGORY_KEYSPACE|ACL_CATEGORY_DANGEROUS,RESTORE_ASKING_Keyspecs,1,NULL,7),.args=RESTORE_ASKING_Args},
 {MAKE_CMD("role","Returns the replication role.","O(1)","2.8.12",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ROLE_History,0,ROLE_Tips,0,roleCommand,1,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_SENTINEL,ACL_CATEGORY_ADMIN|ACL_CATEGORY_DANGEROUS,ROLE_Keyspecs,0,NULL,0)},
 {MAKE_CMD("save","Synchronously saves the database(s) to disk.","O(N) where N is the total number of keys in all databases","1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,SAVE_History,0,SAVE_Tips,0,saveCommand,1,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT|CMD_NO_MULTI,0,SAVE_Keyspecs,0,NULL,0)},
diff --git a/src/commands/client-kill.json b/src/commands/client-kill.json
index 01079ad993..97fa932cd8 100644
--- a/src/commands/client-kill.json
+++ b/src/commands/client-kill.json
@@ -31,6 +31,10 @@
             [
                 "8.0.0",
                 "`MAXAGE` option."
+            ],
+            [
+                "8.0.0",
+                "Replaced `master` `TYPE` with `primary`. `master` still supported for backward compatibility."
             ]
         ],
         "command_flags": [
@@ -84,6 +88,12 @@
                                         "token": "master",
                                         "since": "3.2.0"
                                     },
+                                    {
+                                        "name": "primary",
+                                        "type": "pure-token",
+                                        "token": "primary",
+                                        "since": "8.0.0"
+                                    },
                                     {
                                         "name": "slave",
                                         "type": "pure-token",
diff --git a/src/commands/client-list.json b/src/commands/client-list.json
index f72ffaf40a..d9c0054e60 100644
--- a/src/commands/client-list.json
+++ b/src/commands/client-list.json
@@ -31,6 +31,10 @@
             [
                 "7.0.3",
                 "Added `ssub` field."
+            ],
+            [
+                "8.0.0",
+                "Replaced `master` `TYPE` with `primary`. `master` still supported for backward compatibility."
             ]
         ],
         "command_flags": [
diff --git a/src/commands/cluster-failover.json b/src/commands/cluster-failover.json
index f58fd562a7..9b31e310eb 100644
--- a/src/commands/cluster-failover.json
+++ b/src/commands/cluster-failover.json
@@ -1,6 +1,6 @@
 {
     "FAILOVER": {
-        "summary": "Forces a replica to perform a manual failover of its master.",
+        "summary": "Forces a replica to perform a manual failover of its primary.",
         "complexity": "O(1)",
         "group": "cluster",
         "since": "3.0.0",
diff --git a/src/commands/cluster-replicas.json b/src/commands/cluster-replicas.json
index 4e8bd4204c..2fb47afea4 100644
--- a/src/commands/cluster-replicas.json
+++ b/src/commands/cluster-replicas.json
@@ -1,6 +1,6 @@
 {
     "REPLICAS": {
-        "summary": "Lists the replica nodes of a master node.",
+        "summary": "Lists the replica nodes of a primary node.",
         "complexity": "O(N) where N is the number of replicas.",
         "group": "cluster",
         "since": "5.0.0",
@@ -21,7 +21,7 @@
             }
         ],
         "reply_schema": {
-            "description": "A list of replica nodes replicating from the specified master node provided in the same format used by CLUSTER NODES.",
+            "description": "A list of replica nodes replicating from the specified primary node provided in the same format used by CLUSTER NODES.",
             "type": "array",
             "items": {
                 "type": "string",
diff --git a/src/commands/cluster-replicate.json b/src/commands/cluster-replicate.json
index 060d4af190..857a8022b8 100644
--- a/src/commands/cluster-replicate.json
+++ b/src/commands/cluster-replicate.json
@@ -1,6 +1,6 @@
 {
     "REPLICATE": {
-        "summary": "Configure a node as replica of a master node.",
+        "summary": "Configure a node as replica of a primary node.",
         "complexity": "O(1)",
         "group": "cluster",
         "since": "3.0.0",
diff --git a/src/commands/cluster-slaves.json b/src/commands/cluster-slaves.json
index db66a1c1db..7059e544bb 100644
--- a/src/commands/cluster-slaves.json
+++ b/src/commands/cluster-slaves.json
@@ -1,6 +1,6 @@
 {
     "SLAVES": {
-        "summary": "Lists the replica nodes of a master node.",
+        "summary": "Lists the replica nodes of a primary node.",
         "complexity": "O(N) where N is the number of replicas.",
         "group": "cluster",
         "since": "3.0.0",
@@ -26,7 +26,7 @@
             }
         ],
         "reply_schema": {
-            "description": "A list of replica nodes replicating from the specified master node provided in the same format used by CLUSTER NODES.",
+            "description": "A list of replica nodes replicating from the specified primary node provided in the same format used by CLUSTER NODES.",
             "type": "array",
             "items": {
                 "type": "string",
diff --git a/src/commands/cluster-slots.json b/src/commands/cluster-slots.json
index ca48f371ea..5d00280f15 100644
--- a/src/commands/cluster-slots.json
+++ b/src/commands/cluster-slots.json
@@ -42,7 +42,7 @@
                     },
                     {
                         "type": "array",
-                        "description": "Master node for the slot range.",
+                        "description": "Primary node for the slot range.",
                         "minItems": 4,
                         "maxItems": 4,
                         "items": [
diff --git a/src/commands/replicaof.json b/src/commands/replicaof.json
index 6ddedf2d68..cd5102171c 100644
--- a/src/commands/replicaof.json
+++ b/src/commands/replicaof.json
@@ -1,6 +1,6 @@
 {
     "REPLICAOF": {
-        "summary": "Configures a server as replica of another, or promotes it to a master.",
+        "summary": "Configures a server as replica of another, or promotes it to a primary.",
         "complexity": "O(1)",
         "group": "server",
         "since": "5.0.0",
diff --git a/src/commands/role.json b/src/commands/role.json
index 1c3a4490ca..d31396faf6 100644
--- a/src/commands/role.json
+++ b/src/commands/role.json
@@ -28,7 +28,7 @@
                             "const": "master"
                         },
                         {
-                            "description": "Current replication master offset.",
+                            "description": "Current replication primary offset.",
                             "type": "integer"
                         },
                         {
@@ -65,18 +65,18 @@
                             "const": "slave"
                         },
                         {
-                            "description": "IP of master.",
+                            "description": "IP of primary.",
                             "type": "string"
                         },
                         {
-                            "description": "Port number of master.",
+                            "description": "Port number of primary.",
                             "type": "integer"
                         },
                         {
-                            "description": "State of the replication from the point of view of the master.",
+                            "description": "State of the replication from the point of view of the primary.",
                             "oneOf": [
                                 {
-                                    "description": "The instance is in handshake with its master.",
+                                    "description": "The instance is in handshake with its primary.",
                                     "const": "handshake"
                                 },
                                 {
@@ -84,15 +84,15 @@
                                     "const": "none"
                                 },
                                 {
-                                    "description": "The instance needs to connect to its master.",
+                                    "description": "The instance needs to connect to its primary.",
                                     "const": "connect"
                                 },
                                 {
-                                    "description": "The master-replica connection is in progress.",
+                                    "description": "The primary-replica connection is in progress.",
                                     "const": "connecting"
                                 },
                                 {
-                                    "description": "The master and replica are trying to perform the synchronization.",
+                                    "description": "The primary and replica are trying to perform the synchronization.",
                                     "const": "sync"
                                 },
                                 {
@@ -106,7 +106,7 @@
                             ]
                         },
                         {
-                            "description": "The amount of data received from the replica so far in terms of master replication offset.",
+                            "description": "The amount of data received from the replica so far in terms of primary replication offset.",
                             "type": "integer"
                         }
                     ]
@@ -120,7 +120,7 @@
                             "const": "sentinel"
                         },
                         {
-                            "description": "List of master names monitored by this sentinel instance.",
+                            "description": "List of primary names monitored by this sentinel instance.",
                             "type": "array",
                             "items": {
                                 "type": "string"
diff --git a/src/config.c b/src/config.c
index 83e2a51db1..2a692ac8fa 100644
--- a/src/config.c
+++ b/src/config.c
@@ -2891,7 +2891,7 @@ static int setConfigReplicaOfOption(standardConfig *config, sds *argv, int argc,
     char *ptr;
     server.primary_port = strtol(argv[1], &ptr, 10);
     if (server.primary_port < 0 || server.primary_port > 65535 || *ptr != '\0') {
-        *err = "Invalid master port";
+        *err = "Invalid primary port";
         return 0;
     }
     server.primary_host = sdsnew(argv[0]);
diff --git a/src/networking.c b/src/networking.c
index ecdeeb6588..d6d3d4fece 100644
--- a/src/networking.c
+++ b/src/networking.c
@@ -32,6 +32,7 @@
 #include "script.h"
 #include "fpconv_dtoa.h"
 #include "fmtargs.h"
+#include <strings.h>
 #include <sys/socket.h>
 #include <sys/uio.h>
 #include <math.h>
@@ -586,11 +587,11 @@ void afterErrorReply(client *c, const char *s, size_t len, int flags) {
             to = "AOF-loading-client";
             from = "server";
         } else if (ctype == CLIENT_TYPE_PRIMARY) {
-            to = "master";
+            to = "primary";
             from = "replica";
         } else {
             to = "replica";
-            from = "master";
+            from = "primary";
         }
 
         if (len > 4096) len = 4096;
@@ -2232,7 +2233,7 @@ int processInlineBuffer(client *c) {
         sdsfreesplitres(argv, argc);
         serverLog(LL_WARNING, "WARNING: Receiving inline protocol from primary, primary stream corruption? Closing the "
                               "primary connection and discarding the cached primary.");
-        setProtocolError("Master using the inline protocol. Desync?", c);
+        setProtocolError("Primary using the inline protocol. Desync?", c);
         return C_ERR;
     }
 
@@ -3075,7 +3076,7 @@ void clientCommand(client *c) {
 "      Kill connections made from the specified address",
 "    * LADDR (<ip:port>|<unixsocket>:0)",
 "      Kill connections made to specified local address",
-"    * TYPE (NORMAL|MASTER|REPLICA|PUBSUB)",
+"    * TYPE (NORMAL|PRIMARY|REPLICA|PUBSUB)",
 "      Kill connections by type.",
 "    * USER <username>",
 "      Kill connections authenticated by <username>.",
@@ -3087,7 +3088,7 @@ void clientCommand(client *c) {
 "      Kill connections older than the specified age.",
 "LIST [options ...]",
 "    Return information about client connections. Options:",
-"    * TYPE (NORMAL|MASTER|REPLICA|PUBSUB)",
+"    * TYPE (NORMAL|PRIMARY|REPLICA|PUBSUB)",
 "      Return clients of specified type.",
 "UNPAUSE",
 "    Stop the current client pause, resuming traffic.",
@@ -3898,7 +3899,7 @@ int getClientTypeByName(char *name) {
         return CLIENT_TYPE_REPLICA;
     else if (!strcasecmp(name, "pubsub"))
         return CLIENT_TYPE_PUBSUB;
-    else if (!strcasecmp(name, "master"))
+    else if (!strcasecmp(name, "master") || !strcasecmp(name, "primary"))
         return CLIENT_TYPE_PRIMARY;
     else
         return -1;
diff --git a/src/object.c b/src/object.c
index 7f93c3768d..73c3de55dd 100644
--- a/src/object.c
+++ b/src/object.c
@@ -1385,7 +1385,7 @@ sds getMemoryDoctorReport(void) {
                        " * Big replica buffers: The replica output buffers in this instance are greater than 10MB for "
                        "each replica (on average). This likely means that there is some replica instance that is "
                        "struggling receiving data, either because it is too slow or because of networking issues. As a "
-                       "result, data piles on the master output buffers. Please try to identify what replica is not "
+                       "result, data piles on the primary output buffers. Please try to identify what replica is not "
                        "receiving data correctly and why. You can use the INFO output in order to check the replicas "
                        "delays and the CLIENT LIST command to check the output buffers of each replica.\n\n");
         }
diff --git a/src/replication.c b/src/replication.c
index 4fe8470371..e74f66a67c 100644
--- a/src/replication.c
+++ b/src/replication.c
@@ -43,8 +43,8 @@
 #include <sys/socket.h>
 #include <sys/stat.h>
 
-void replicationDiscardCachedMaster(void);
-void replicationResurrectCachedMaster(connection *conn);
+void replicationDiscardCachedPrimary(void);
+void replicationResurrectCachedPrimary(connection *conn);
 void replicationSendAck(void);
 int replicaPutOnline(client *replica);
 void replicaStartCommandStream(client *replica);
@@ -114,7 +114,7 @@ int bg_unlink(const char *filename) {
     }
 }
 
-/* ---------------------------------- MASTER -------------------------------- */
+/* ---------------------------------- PRIMARY -------------------------------- */
 
 void createReplicationBacklog(void) {
     serverAssert(server.repl_backlog == NULL);
@@ -420,7 +420,7 @@ void feedReplicationBuffer(char *s, size_t len) {
  * This function is used if the instance is a primary: we use the commands
  * received by our clients in order to create the replication stream.
  * Instead if the instance is a replica and has sub-replicas attached, we use
- * replicationFeedStreamFromMasterStream() */
+ * replicationFeedStreamFromPrimaryStream() */
 void replicationFeedReplicas(int dictid, robj **argv, int argc) {
     int j, len;
     char llstr[LONG_STR_SIZE];
@@ -925,7 +925,7 @@ void syncCommand(client *c) {
                 replicationUnsetPrimary();
             }
             sds client = catClientInfoString(sdsempty(), c);
-            serverLog(LL_NOTICE, "MASTER MODE enabled (failover request from '%s')", client);
+            serverLog(LL_NOTICE, "PRIMARY MODE enabled (failover request from '%s')", client);
             sdsfree(client);
         } else {
             addReplyError(c, "PSYNC FAILOVER replid must match my replid.");
@@ -1706,7 +1706,7 @@ void replicationEmptyDbCallback(dict *d) {
 /* Once we have a link with the primary and the synchronization was
  * performed, this function materializes the primary client we store
  * at server.primary, starting from the specified file descriptor. */
-void replicationCreateMasterClient(connection *conn, int dbid) {
+void replicationCreatePrimaryClient(connection *conn, int dbid) {
     server.primary = createClient(conn);
     if (conn) connSetReadHandler(server.primary->conn, readQueryFromClient);
 
@@ -1793,11 +1793,11 @@ void disklessLoadDiscardTempDb(serverDb *tempDb) {
  * we have no way to incrementally feed our replicas after that.
  * We want our replicas to resync with us as well, if we have any sub-replicas.
  * This is useful on readSyncBulkPayload in places where we just finished transferring db. */
-void replicationAttachToNewMaster(void) {
+void replicationAttachToNewPrimary(void) {
     /* Replica starts to apply data from new primary, we must discard the cached
      * primary structure. */
     serverAssert(server.primary == NULL);
-    replicationDiscardCachedMaster();
+    replicationDiscardCachedPrimary();
 
     disconnectReplicas();     /* Force our replicas to resync with us as well. */
     freeReplicationBacklog(); /* Don't allow our chained replicas to PSYNC. */
@@ -1825,7 +1825,7 @@ void readSyncBulkPayload(connection *conn) {
     if (server.repl_transfer_size == -1) {
         nread = connSyncReadLine(conn, buf, 1024, server.repl_syncio_timeout * 1000);
         if (nread == -1) {
-            serverLog(LL_WARNING, "I/O error reading bulk count from MASTER: %s", connGetLastError(conn));
+            serverLog(LL_WARNING, "I/O error reading bulk count from PRIMARY: %s", connGetLastError(conn));
             goto error;
         } else {
             /* nread here is returned by connSyncReadLine(), which calls syncReadLine() and
@@ -1834,7 +1834,7 @@ void readSyncBulkPayload(connection *conn) {
         }
 
         if (buf[0] == '-') {
-            serverLog(LL_WARNING, "MASTER aborted replication with an error: %s", buf + 1);
+            serverLog(LL_WARNING, "PRIMARY aborted replication with an error: %s", buf + 1);
             goto error;
         } else if (buf[0] == '\0') {
             /* At this stage just a newline works as a PING in order to take
@@ -1844,7 +1844,7 @@ void readSyncBulkPayload(connection *conn) {
             return;
         } else if (buf[0] != '$') {
             serverLog(LL_WARNING,
-                      "Bad protocol from MASTER, the first byte is not '$' (we received '%s'), are you sure the host "
+                      "Bad protocol from PRIMARY, the first byte is not '$' (we received '%s'), are you sure the host "
                       "and port are right?",
                       buf);
             goto error;
@@ -1867,12 +1867,12 @@ void readSyncBulkPayload(connection *conn) {
             /* Set any repl_transfer_size to avoid entering this code path
              * at the next call. */
             server.repl_transfer_size = 0;
-            serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: receiving streamed RDB from primary with EOF %s",
+            serverLog(LL_NOTICE, "PRIMARY <-> REPLICA sync: receiving streamed RDB from primary with EOF %s",
                       use_diskless_load ? "to parser" : "to disk");
         } else {
             usemark = 0;
             server.repl_transfer_size = strtol(buf + 1, NULL, 10);
-            serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: receiving %lld bytes from primary %s",
+            serverLog(LL_NOTICE, "PRIMARY <-> REPLICA sync: receiving %lld bytes from primary %s",
                       (long long)server.repl_transfer_size, use_diskless_load ? "to parser" : "to disk");
         }
         return;
@@ -1894,7 +1894,7 @@ void readSyncBulkPayload(connection *conn) {
                 /* equivalent to EAGAIN */
                 return;
             }
-            serverLog(LL_WARNING, "I/O error trying to sync with MASTER: %s",
+            serverLog(LL_WARNING, "I/O error trying to sync with PRIMARY: %s",
                       (nread == -1) ? connGetLastError(conn) : "connection lost");
             cancelReplicationHandshake(1);
             return;
@@ -1925,7 +1925,7 @@ void readSyncBulkPayload(connection *conn) {
         if ((nwritten = write(server.repl_transfer_fd, buf, nread)) != nread) {
             serverLog(LL_WARNING,
                       "Write error or short write writing to the DB dump file "
-                      "needed for MASTER <-> REPLICA synchronization: %s",
+                      "needed for PRIMARY <-> REPLICA synchronization: %s",
                       (nwritten == -1) ? strerror(errno) : "short write");
             goto error;
         }
@@ -1996,9 +1996,9 @@ void readSyncBulkPayload(connection *conn) {
 
         moduleFireServerEvent(VALKEYMODULE_EVENT_REPL_ASYNC_LOAD, VALKEYMODULE_SUBEVENT_REPL_ASYNC_LOAD_STARTED, NULL);
     } else {
-        replicationAttachToNewMaster();
+        replicationAttachToNewPrimary();
 
-        serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Flushing old data");
+        serverLog(LL_NOTICE, "PRIMARY <-> REPLICA sync: Flushing old data");
         emptyData(-1, empty_db_flags, replicationEmptyDbCallback);
     }
 
@@ -2008,7 +2008,7 @@ void readSyncBulkPayload(connection *conn) {
      * time for non blocking loading. */
     connSetReadHandler(conn, NULL);
 
-    serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Loading DB in memory");
+    serverLog(LL_NOTICE, "PRIMARY <-> REPLICA sync: Loading DB in memory");
     rdbSaveInfo rsi = RDB_SAVE_INFO_INIT;
     if (use_diskless_load) {
         rio rdb;
@@ -2045,7 +2045,7 @@ void readSyncBulkPayload(connection *conn) {
         rdbLoadingCtx loadingCtx = {.dbarray = dbarray, .functions_lib_ctx = functions_lib_ctx};
         if (rdbLoadRioWithLoadingCtx(&rdb, RDBFLAGS_REPLICATION, &rsi, &loadingCtx) != C_OK) {
             /* RDB loading failed. */
-            serverLog(LL_WARNING, "Failed trying to load the MASTER synchronization DB "
+            serverLog(LL_WARNING, "Failed trying to load the PRIMARY synchronization DB "
                                   "from socket, check server logs.");
             loadingFailed = 1;
         } else if (usemark) {
@@ -2068,7 +2068,7 @@ void readSyncBulkPayload(connection *conn) {
 
                 disklessLoadDiscardTempDb(diskless_load_tempDb);
                 functionsLibCtxFree(temp_functions_lib_ctx);
-                serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Discarding temporary DB in background");
+                serverLog(LL_NOTICE, "PRIMARY <-> REPLICA sync: Discarding temporary DB in background");
             } else {
                 /* Remove the half-loaded data in case we started with an empty replica. */
                 emptyData(-1, empty_db_flags, replicationEmptyDbCallback);
@@ -2085,9 +2085,9 @@ void readSyncBulkPayload(connection *conn) {
             /* We will soon swap main db with tempDb and replicas will start
              * to apply data from new primary, we must discard the cached
              * primary structure and force resync of sub-replicas. */
-            replicationAttachToNewMaster();
+            replicationAttachToNewPrimary();
 
-            serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Swapping active DB with loaded DB");
+            serverLog(LL_NOTICE, "PRIMARY <-> REPLICA sync: Swapping active DB with loaded DB");
             swapMainDbWithTempDb(diskless_load_tempDb);
 
             /* swap existing functions ctx with the temporary one */
@@ -2098,7 +2098,7 @@ void readSyncBulkPayload(connection *conn) {
 
             /* Delete the old db as it's useless now. */
             disklessLoadDiscardTempDb(diskless_load_tempDb);
-            serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Discarding old DB in background");
+            serverLog(LL_NOTICE, "PRIMARY <-> REPLICA sync: Discarding old DB in background");
         }
 
         /* Inform about db change, as replication was diskless and didn't cause a save. */
@@ -2117,7 +2117,7 @@ void readSyncBulkPayload(connection *conn) {
         if (fsync(server.repl_transfer_fd) == -1) {
             serverLog(LL_WARNING,
                       "Failed trying to sync the temp DB to disk in "
-                      "MASTER <-> REPLICA synchronization: %s",
+                      "PRIMARY <-> REPLICA synchronization: %s",
                       strerror(errno));
             cancelReplicationHandshake(1);
             return;
@@ -2128,7 +2128,7 @@ void readSyncBulkPayload(connection *conn) {
         if (rename(server.repl_transfer_tmpfile, server.rdb_filename) == -1) {
             serverLog(LL_WARNING,
                       "Failed trying to rename the temp DB into %s in "
-                      "MASTER <-> REPLICA synchronization: %s",
+                      "PRIMARY <-> REPLICA synchronization: %s",
                       server.rdb_filename, strerror(errno));
             cancelReplicationHandshake(1);
             if (old_rdb_fd != -1) close(old_rdb_fd);
@@ -2141,14 +2141,14 @@ void readSyncBulkPayload(connection *conn) {
         if (fsyncFileDir(server.rdb_filename) == -1) {
             serverLog(LL_WARNING,
                       "Failed trying to sync DB directory %s in "
-                      "MASTER <-> REPLICA synchronization: %s",
+                      "PRIMARY <-> REPLICA synchronization: %s",
                       server.rdb_filename, strerror(errno));
             cancelReplicationHandshake(1);
             return;
         }
 
         if (rdbLoad(server.rdb_filename, &rsi, RDBFLAGS_REPLICATION) != RDB_OK) {
-            serverLog(LL_WARNING, "Failed trying to load the MASTER synchronization "
+            serverLog(LL_WARNING, "Failed trying to load the PRIMARY synchronization "
                                   "DB from disk, check server logs.");
             cancelReplicationHandshake(1);
             if (server.rdb_del_sync_files && allPersistenceDisabled()) {
@@ -2181,7 +2181,7 @@ void readSyncBulkPayload(connection *conn) {
     }
 
     /* Final setup of the connected replica <- primary link */
-    replicationCreateMasterClient(server.repl_transfer_s, rsi.repl_stream_db);
+    replicationCreatePrimaryClient(server.repl_transfer_s, rsi.repl_stream_db);
     server.repl_state = REPL_STATE_CONNECTED;
     server.repl_down_since = 0;
 
@@ -2200,11 +2200,11 @@ void readSyncBulkPayload(connection *conn) {
      * or not, in order to behave correctly if they are promoted to
      * primaries after a failover. */
     if (server.repl_backlog == NULL) createReplicationBacklog();
-    serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Finished with success");
+    serverLog(LL_NOTICE, "PRIMARY <-> REPLICA sync: Finished with success");
 
     if (server.supervised_mode == SUPERVISED_SYSTEMD) {
-        serverCommunicateSystemd(
-            "STATUS=MASTER <-> REPLICA sync: Finished with success. Ready to accept connections in read-write mode.\n");
+        serverCommunicateSystemd("STATUS=PRIMARY <-> REPLICA sync: Finished with success. Ready to accept connections "
+                                 "in read-write mode.\n");
     }
 
     /* Send the initial ACK immediately to put this replica in online state. */
@@ -2314,7 +2314,7 @@ char *sendCommandArgv(connection *conn, int argc, char **argv, size_t *argv_lens
  * command in order to obtain the primary replid and the primary replication
  * global offset.
  *
- * This function is designed to be called from syncWithMaster(), so the
+ * This function is designed to be called from syncWithPrimary(), so the
  * following assumptions are made:
  *
  * 1) We pass the function an already connected socket "fd".
@@ -2345,7 +2345,7 @@ char *sendCommandArgv(connection *conn, int argc, char **argv, size_t *argv_lens
  *                      the caller should fall back to SYNC.
  * PSYNC_WRITE_ERROR: There was an error writing the command to the socket.
  * PSYNC_WAIT_REPLY: Call again the function with read_reply set to 1.
- * PSYNC_TRY_LATER: Master is currently in a transient error condition.
+ * PSYNC_TRY_LATER: Primary is currently in a transient error condition.
  *
  * Notable side effects:
  *
@@ -2406,10 +2406,10 @@ int replicaTryPartialResynchronization(connection *conn, int read_reply) {
 
     /* Reading half */
     reply = receiveSynchronousResponse(conn);
-    /* Master did not reply to PSYNC */
+    /* Primary did not reply to PSYNC */
     if (reply == NULL) {
         connSetReadHandler(conn, NULL);
-        serverLog(LL_WARNING, "Master did not reply to PSYNC, will try later");
+        serverLog(LL_WARNING, "Primary did not reply to PSYNC, will try later");
         return PSYNC_TRY_LATER;
     }
 
@@ -2434,7 +2434,7 @@ int replicaTryPartialResynchronization(connection *conn, int read_reply) {
             if (offset) offset++;
         }
         if (!replid || !offset || (offset - replid - 1) != CONFIG_RUN_ID_SIZE) {
-            serverLog(LL_WARNING, "Master replied with wrong +FULLRESYNC syntax.");
+            serverLog(LL_WARNING, "Primary replied with wrong +FULLRESYNC syntax.");
             /* This is an unexpected condition, actually the +FULLRESYNC
              * reply means that the primary supports PSYNC, but the reply
              * format seems wrong. To stay safe we blank the primary
@@ -2469,8 +2469,8 @@ int replicaTryPartialResynchronization(connection *conn, int read_reply) {
             new[CONFIG_RUN_ID_SIZE] = '\0';
 
             if (strcmp(new, server.cached_primary->replid)) {
-                /* Master ID changed. */
-                serverLog(LL_NOTICE, "Master replication ID changed to %s", new);
+                /* Primary ID changed. */
+                serverLog(LL_NOTICE, "Primary replication ID changed to %s", new);
 
                 /* Set the old ID as our ID2, up to the current offset+1. */
                 memcpy(server.replid2, server.cached_primary->replid, sizeof(server.replid2));
@@ -2488,7 +2488,7 @@ int replicaTryPartialResynchronization(connection *conn, int read_reply) {
 
         /* Setup the replication to continue. */
         sdsfree(reply);
-        replicationResurrectCachedMaster(conn);
+        replicationResurrectCachedPrimary(conn);
 
         /* If this instance was restarted and we read the metadata to
          * PSYNC from the persistence file, our replication backlog could
@@ -2506,7 +2506,7 @@ int replicaTryPartialResynchronization(connection *conn, int read_reply) {
 
     if (!strncmp(reply, "-NOMASTERLINK", 13) || !strncmp(reply, "-LOADING", 8)) {
         serverLog(LL_NOTICE,
-                  "Master is currently unable to PSYNC "
+                  "Primary is currently unable to PSYNC "
                   "but should be in the future: %s",
                   reply);
         sdsfree(reply);
@@ -2518,7 +2518,7 @@ int replicaTryPartialResynchronization(connection *conn, int read_reply) {
         serverLog(LL_WARNING, "Unexpected reply to PSYNC from primary: %s", reply);
     } else {
         serverLog(LL_NOTICE,
-                  "Master does not support PSYNC or is in "
+                  "Primary does not support PSYNC or is in "
                   "error state (reply: %s)",
                   reply);
     }
@@ -2528,7 +2528,7 @@ int replicaTryPartialResynchronization(connection *conn, int read_reply) {
 
 /* This handler fires when the non blocking connect was able to
  * establish a connection with the primary. */
-void syncWithMaster(connection *conn) {
+void syncWithPrimary(connection *conn) {
     char tmpfile[256], *err = NULL;
     int dfd = -1, maxtries = 5;
     int psync_result;
@@ -2552,7 +2552,7 @@ void syncWithMaster(connection *conn) {
         serverLog(LL_NOTICE, "Non blocking connect for SYNC fired the event.");
         /* Delete the writable event so that the readable event remains
          * registered and we can wait for the PONG reply. */
-        connSetReadHandler(conn, syncWithMaster);
+        connSetReadHandler(conn, syncWithPrimary);
         connSetWriteHandler(conn, NULL);
         server.repl_state = REPL_STATE_RECEIVE_PING_REPLY;
         /* Send the PING, don't check for errors at all, we have the timeout
@@ -2580,7 +2580,7 @@ void syncWithMaster(connection *conn) {
             sdsfree(err);
             goto error;
         } else {
-            serverLog(LL_NOTICE, "Master replied to PING, replication can continue...");
+            serverLog(LL_NOTICE, "Primary replied to PING, replication can continue...");
         }
         sdsfree(err);
         err = NULL;
@@ -2605,7 +2605,7 @@ void syncWithMaster(connection *conn) {
             if (err) goto write_error;
         }
 
-        /* Set the replica port, so that Master's INFO command can list the
+        /* Set the replica port, so that primary's INFO command can list the
          * replica listening port correctly. */
         {
             int port;
@@ -2621,7 +2621,7 @@ void syncWithMaster(connection *conn) {
             if (err) goto write_error;
         }
 
-        /* Set the replica ip, so that Master's INFO command can list the
+        /* Set the replica ip, so that primary's INFO command can list the
          * replica IP address port correctly in case of port forwarding or NAT.
          * Skip REPLCONF ip-address if there is no replica-announce-ip option set. */
         if (server.replica_announce_ip) {
@@ -2654,7 +2654,7 @@ void syncWithMaster(connection *conn) {
         err = receiveSynchronousResponse(conn);
         if (err == NULL) goto no_response_error;
         if (err[0] == '-') {
-            serverLog(LL_WARNING, "Unable to AUTH to MASTER: %s", err);
+            serverLog(LL_WARNING, "Unable to AUTH to PRIMARY: %s", err);
             sdsfree(err);
             goto error;
         }
@@ -2672,7 +2672,7 @@ void syncWithMaster(connection *conn) {
          * REPLCONF listening-port. */
         if (err[0] == '-') {
             serverLog(LL_NOTICE,
-                      "(Non critical) Master does not understand "
+                      "(Non critical) Primary does not understand "
                       "REPLCONF listening-port: %s",
                       err);
         }
@@ -2692,7 +2692,7 @@ void syncWithMaster(connection *conn) {
          * REPLCONF ip-address. */
         if (err[0] == '-') {
             serverLog(LL_NOTICE,
-                      "(Non critical) Master does not understand "
+                      "(Non critical) Primary does not understand "
                       "REPLCONF ip-address: %s",
                       err);
         }
@@ -2709,7 +2709,7 @@ void syncWithMaster(connection *conn) {
          * REPLCONF capa. */
         if (err[0] == '-') {
             serverLog(LL_NOTICE,
-                      "(Non critical) Master does not understand "
+                      "(Non critical) Primary does not understand "
                       "REPLCONF capa: %s",
                       err);
         }
@@ -2752,7 +2752,7 @@ void syncWithMaster(connection *conn) {
     /* If reached this point, we should be in REPL_STATE_RECEIVE_PSYNC_REPLY. */
     if (server.repl_state != REPL_STATE_RECEIVE_PSYNC_REPLY) {
         serverLog(LL_WARNING,
-                  "syncWithMaster(): state machine error, "
+                  "syncWithPrimary(): state machine error, "
                   "state should be RECEIVE_PSYNC but is %d",
                   server.repl_state);
         goto error;
@@ -2783,9 +2783,9 @@ void syncWithMaster(connection *conn) {
      * uninstalling the read handler from the file descriptor. */
 
     if (psync_result == PSYNC_CONTINUE) {
-        serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Master accepted a Partial Resynchronization.");
+        serverLog(LL_NOTICE, "PRIMARY <-> REPLICA sync: Primary accepted a Partial Resynchronization.");
         if (server.supervised_mode == SUPERVISED_SYSTEMD) {
-            serverCommunicateSystemd("STATUS=MASTER <-> REPLICA sync: Partial Resynchronization accepted. Ready to "
+            serverCommunicateSystemd("STATUS=PRIMARY <-> REPLICA sync: Partial Resynchronization accepted. Ready to "
                                      "accept connections in read-write mode.\n");
         }
         return;
@@ -2797,7 +2797,7 @@ void syncWithMaster(connection *conn) {
     if (psync_result == PSYNC_NOT_SUPPORTED) {
         serverLog(LL_NOTICE, "Retrying with SYNC...");
         if (connSyncWrite(conn, "SYNC\r\n", 6, server.repl_syncio_timeout * 1000) == -1) {
-            serverLog(LL_WARNING, "I/O error writing to MASTER: %s", connGetLastError(conn));
+            serverLog(LL_WARNING, "I/O error writing to PRIMARY: %s", connGetLastError(conn));
             goto error;
         }
     }
@@ -2811,7 +2811,7 @@ void syncWithMaster(connection *conn) {
             sleep(1);
         }
         if (dfd == -1) {
-            serverLog(LL_WARNING, "Opening the temp file needed for MASTER <-> REPLICA synchronization: %s",
+            serverLog(LL_WARNING, "Opening the temp file needed for PRIMARY <-> REPLICA synchronization: %s",
                       strerror(errno));
             goto error;
         }
@@ -2835,7 +2835,7 @@ void syncWithMaster(connection *conn) {
     return;
 
 no_response_error: /* Handle receiveSynchronousResponse() error when primary has no reply */
-    serverLog(LL_WARNING, "Master did not respond to command during SYNC handshake");
+    serverLog(LL_WARNING, "Primary did not respond to command during SYNC handshake");
     /* Fall through to regular error handling */
 
 error:
@@ -2855,11 +2855,11 @@ void syncWithMaster(connection *conn) {
     goto error;
 }
 
-int connectWithMaster(void) {
+int connectWithPrimary(void) {
     server.repl_transfer_s = connCreate(connTypeOfReplication());
     if (connConnect(server.repl_transfer_s, server.primary_host, server.primary_port, server.bind_source_addr,
-                    syncWithMaster) == C_ERR) {
-        serverLog(LL_WARNING, "Unable to connect to MASTER: %s", connGetLastError(server.repl_transfer_s));
+                    syncWithPrimary) == C_ERR) {
+        serverLog(LL_WARNING, "Unable to connect to PRIMARY: %s", connGetLastError(server.repl_transfer_s));
         connClose(server.repl_transfer_s);
         server.repl_transfer_s = NULL;
         return C_ERR;
@@ -2868,7 +2868,7 @@ int connectWithMaster(void) {
 
     server.repl_transfer_lastio = server.unixtime;
     server.repl_state = REPL_STATE_CONNECTING;
-    serverLog(LL_NOTICE, "MASTER <-> REPLICA sync started");
+    serverLog(LL_NOTICE, "PRIMARY <-> REPLICA sync started");
     return C_OK;
 }
 
@@ -2876,7 +2876,7 @@ int connectWithMaster(void) {
  * in progress to undo it.
  * Never call this function directly, use cancelReplicationHandshake() instead.
  */
-void undoConnectWithMaster(void) {
+void undoConnectWithPrimary(void) {
     connClose(server.repl_transfer_s);
     server.repl_transfer_s = NULL;
 }
@@ -2886,7 +2886,7 @@ void undoConnectWithMaster(void) {
  */
 void replicationAbortSyncTransfer(void) {
     serverAssert(server.repl_state == REPL_STATE_TRANSFER);
-    undoConnectWithMaster();
+    undoConnectWithPrimary();
     if (server.repl_transfer_fd != -1) {
         close(server.repl_transfer_fd);
         bg_unlink(server.repl_transfer_tmpfile);
@@ -2909,7 +2909,7 @@ int cancelReplicationHandshake(int reconnect) {
         replicationAbortSyncTransfer();
         server.repl_state = REPL_STATE_CONNECT;
     } else if (server.repl_state == REPL_STATE_CONNECTING || replicaIsInHandshakeState()) {
-        undoConnectWithMaster();
+        undoConnectWithPrimary();
         server.repl_state = REPL_STATE_CONNECT;
     } else {
         return 0;
@@ -2919,8 +2919,8 @@ int cancelReplicationHandshake(int reconnect) {
 
     /* try to re-connect without waiting for replicationCron, this is needed
      * for the "diskless loading short read" test. */
-    serverLog(LL_NOTICE, "Reconnecting to MASTER %s:%d after failure", server.primary_host, server.primary_port);
-    connectWithMaster();
+    serverLog(LL_NOTICE, "Reconnecting to PRIMARY %s:%d after failure", server.primary_host, server.primary_port);
+    connectWithPrimary();
 
     return 1;
 }
@@ -2937,7 +2937,7 @@ void replicationSetPrimary(char *ip, int port) {
     disconnectAllBlockedClients(); /* Clients blocked in primary, now replica. */
 
     /* Setting primary_host only after the call to freeClient since it calls
-     * replicationHandleMasterDisconnection which can trigger a re-connect
+     * replicationHandlePrimaryDisconnection which can trigger a re-connect
      * directly from within that call. */
     server.primary_host = sdsnew(ip);
     server.primary_port = port;
@@ -2955,7 +2955,7 @@ void replicationSetPrimary(char *ip, int port) {
     /* Before destroying our primary state, create a cached primary using
      * our own parameters, to later PSYNC with the new primary. */
     if (was_primary) {
-        replicationDiscardCachedMaster();
+        replicationDiscardCachedPrimary();
         replicationCachePrimaryUsingMyself();
     }
 
@@ -2968,8 +2968,8 @@ void replicationSetPrimary(char *ip, int port) {
         moduleFireServerEvent(VALKEYMODULE_EVENT_PRIMARY_LINK_CHANGE, VALKEYMODULE_SUBEVENT_PRIMARY_LINK_DOWN, NULL);
 
     server.repl_state = REPL_STATE_CONNECT;
-    serverLog(LL_NOTICE, "Connecting to MASTER %s:%d", server.primary_host, server.primary_port);
-    connectWithMaster();
+    serverLog(LL_NOTICE, "Connecting to PRIMARY %s:%d", server.primary_host, server.primary_port);
+    connectWithPrimary();
 }
 
 /* Cancel replication, setting the instance as a primary itself. */
@@ -2981,11 +2981,11 @@ void replicationUnsetPrimary(void) {
         moduleFireServerEvent(VALKEYMODULE_EVENT_PRIMARY_LINK_CHANGE, VALKEYMODULE_SUBEVENT_PRIMARY_LINK_DOWN, NULL);
 
     /* Clear primary_host first, since the freeClient calls
-     * replicationHandleMasterDisconnection which can attempt to re-connect. */
+     * replicationHandlePrimaryDisconnection which can attempt to re-connect. */
     sdsfree(server.primary_host);
     server.primary_host = NULL;
     if (server.primary) freeClient(server.primary);
-    replicationDiscardCachedMaster();
+    replicationDiscardCachedPrimary();
     cancelReplicationHandshake(0);
     /* When a replica is turned into a primary, the current replication ID
      * (that was inherited from the primary at synchronization time) is
@@ -3043,8 +3043,8 @@ void replicationHandlePrimaryDisconnection(void) {
     /* Try to re-connect immediately rather than wait for replicationCron
      * waiting 1 second may risk backlog being recycled. */
     if (server.primary_host) {
-        serverLog(LL_NOTICE, "Reconnecting to MASTER %s:%d", server.primary_host, server.primary_port);
-        connectWithMaster();
+        serverLog(LL_NOTICE, "Reconnecting to PRIMARY %s:%d", server.primary_host, server.primary_port);
+        connectWithPrimary();
     }
 }
 
@@ -3067,7 +3067,7 @@ void replicaofCommand(client *c) {
         if (server.primary_host) {
             replicationUnsetPrimary();
             sds client = catClientInfoString(sdsempty(), c);
-            serverLog(LL_NOTICE, "MASTER MODE enabled (user request from '%s')", client);
+            serverLog(LL_NOTICE, "PRIMARY MODE enabled (user request from '%s')", client);
             sdsfree(client);
         }
     } else {
@@ -3184,7 +3184,7 @@ void replicationSendAck(void) {
     }
 }
 
-/* ---------------------- MASTER CACHING FOR PSYNC -------------------------- */
+/* ---------------------- PRIMARY CACHING FOR PSYNC -------------------------- */
 
 /* In order to implement partial synchronization we need to be able to cache
  * our primary's client structure after a transient disconnection.
@@ -3198,10 +3198,10 @@ void replicationSendAck(void) {
  *
  * The other functions that will deal with the cached primary are:
  *
- * replicationDiscardCachedMaster() that will make sure to kill the client
+ * replicationDiscardCachedPrimary() that will make sure to kill the client
  * as for some reason we don't want to use it in the future.
  *
- * replicationResurrectCachedMaster() that is used after a successful PSYNC
+ * replicationResurrectCachedPrimary() that is used after a successful PSYNC
  * handshake in order to reactivate the cached primary.
  */
 void replicationCachePrimary(client *c) {
@@ -3227,7 +3227,7 @@ void replicationCachePrimary(client *c) {
     resetClient(c);
 
     /* Save the primary. Server.primary will be set to null later by
-     * replicationHandleMasterDisconnection(). */
+     * replicationHandlePrimaryDisconnection(). */
     server.cached_primary = server.primary;
 
     /* Invalidate the Peer ID cache. */
@@ -3262,14 +3262,14 @@ void replicationCachePrimaryUsingMyself(void) {
                          "the new primary with just a partial transfer.");
 
     /* This will be used to populate the field server.primary->reploff
-     * by replicationCreateMasterClient(). We'll later set the created
+     * by replicationCreatePrimaryClient(). We'll later set the created
      * primary as server.cached_primary, so the replica will use such
      * offset for PSYNC. */
     server.primary_initial_offset = server.primary_repl_offset;
 
     /* The primary client we create can be set to any DBID, because
      * the new primary will start its replication stream with SELECT. */
-    replicationCreateMasterClient(NULL, -1);
+    replicationCreatePrimaryClient(NULL, -1);
 
     /* Use our own ID / offset. */
     memcpy(server.primary->replid, server.replid, sizeof(server.replid));
@@ -3282,7 +3282,7 @@ void replicationCachePrimaryUsingMyself(void) {
 
 /* Free a cached primary, called when there are no longer the conditions for
  * a partial resync on reconnection. */
-void replicationDiscardCachedMaster(void) {
+void replicationDiscardCachedPrimary(void) {
     if (server.cached_primary == NULL) return;
 
     serverLog(LL_NOTICE, "Discarding previously cached primary state.");
@@ -3297,7 +3297,7 @@ void replicationDiscardCachedMaster(void) {
  * This function is called when successfully setup a partial resynchronization
  * so the stream of data that we'll receive will start from where this
  * primary left. */
-void replicationResurrectCachedMaster(connection *conn) {
+void replicationResurrectCachedPrimary(connection *conn) {
     server.primary = server.cached_primary;
     server.cached_primary = NULL;
     server.primary->conn = conn;
@@ -3363,8 +3363,8 @@ int checkGoodReplicasStatus(void) {
 /* ----------------------- SYNCHRONOUS REPLICATION --------------------------
  * Synchronous replication design can be summarized in points:
  *
- * - Masters have a global replication offset, used by PSYNC.
- * - Master increment the offset every time new commands are sent to replicas.
+ * - Primary have a global replication offset, used by PSYNC.
+ * - Primary increment the offset every time new commands are sent to replicas.
  * - Replicas ping back primary with the offset processed so far.
  *
  * So synchronous replication adds a new WAIT command in the form:
@@ -3620,14 +3620,14 @@ void replicationCron(void) {
     /* Non blocking connection timeout? */
     if (server.primary_host && (server.repl_state == REPL_STATE_CONNECTING || replicaIsInHandshakeState()) &&
         (time(NULL) - server.repl_transfer_lastio) > server.repl_timeout) {
-        serverLog(LL_WARNING, "Timeout connecting to the MASTER...");
+        serverLog(LL_WARNING, "Timeout connecting to the PRIMARY...");
         cancelReplicationHandshake(1);
     }
 
     /* Bulk transfer I/O timeout? */
     if (server.primary_host && server.repl_state == REPL_STATE_TRANSFER &&
         (time(NULL) - server.repl_transfer_lastio) > server.repl_timeout) {
-        serverLog(LL_WARNING, "Timeout receiving bulk data from MASTER... If the problem persists try to set the "
+        serverLog(LL_WARNING, "Timeout receiving bulk data from PRIMARY... If the problem persists try to set the "
                               "'repl-timeout' parameter in redis.conf to a larger value.");
         cancelReplicationHandshake(1);
     }
@@ -3635,14 +3635,14 @@ void replicationCron(void) {
     /* Timed out primary when we are an already connected replica? */
     if (server.primary_host && server.repl_state == REPL_STATE_CONNECTED &&
         (time(NULL) - server.primary->last_interaction) > server.repl_timeout) {
-        serverLog(LL_WARNING, "MASTER timeout: no data nor PING received...");
+        serverLog(LL_WARNING, "PRIMARY timeout: no data nor PING received...");
         freeClient(server.primary);
     }
 
-    /* Check if we should connect to a MASTER */
+    /* Check if we should connect to a PRIMARY */
     if (server.repl_state == REPL_STATE_CONNECT) {
-        serverLog(LL_NOTICE, "Connecting to MASTER %s:%d", server.primary_host, server.primary_port);
-        connectWithMaster();
+        serverLog(LL_NOTICE, "Connecting to PRIMARY %s:%d", server.primary_host, server.primary_port);
+        connectWithPrimary();
     }
 
     /* Send ACK to primary from time to time.
@@ -4039,7 +4039,7 @@ void failoverCommand(client *c) {
 
 /* Failover cron function, checks coordinated failover state.
  *
- * Implementation note: The current implementation calls replicationSetMaster()
+ * Implementation note: The current implementation calls replicationSetPrimary()
  * to start the failover request, this has some unintended side effects if the
  * failover doesn't work like blocked clients will be unblocked and replicas will
  * be disconnected. This could be optimized further.
diff --git a/src/rio.c b/src/rio.c
index 569d5ddcf0..408a931d17 100644
--- a/src/rio.c
+++ b/src/rio.c
@@ -309,7 +309,7 @@ void rioFreeConn(rio *r, sds *remaining) {
 }
 
 /* ------------------- File descriptor implementation ------------------
- * This target is used to write the RDB file to pipe, when the master just
+ * This target is used to write the RDB file to pipe, when the primary just
  * streams the data to the replicas without creating an RDB on-disk image
  * (diskless replication option).
  * It only implements writes. */
diff --git a/src/valkey-benchmark.c b/src/valkey-benchmark.c
index 802b4c5735..5fe707510f 100644
--- a/src/valkey-benchmark.c
+++ b/src/valkey-benchmark.c
@@ -837,7 +837,7 @@ static void showLatencyReport(void) {
         printf("  %d bytes payload\n", config.datasize);
         printf("  keep alive: %d\n", config.keepalive);
         if (config.cluster_mode) {
-            printf("  cluster mode: yes (%d masters)\n", config.cluster_node_count);
+            printf("  cluster mode: yes (%d primaries)\n", config.cluster_node_count);
             int m;
             for (m = 0; m < config.cluster_node_count; m++) {
                 clusterNode *node = config.cluster_nodes[m];
@@ -1202,7 +1202,7 @@ static int fetchClusterConfiguration(void) {
             }
         }
         if (node->slots_count == 0) {
-            fprintf(stderr, "WARNING: Master node %s:%d has no slots, skipping...\n", node->ip, node->port);
+            fprintf(stderr, "WARNING: Primary node %s:%d has no slots, skipping...\n", node->ip, node->port);
             continue;
         }
         if (!addClusterNode(node)) {
@@ -1747,7 +1747,7 @@ int main(int argc, char **argv) {
             fprintf(stderr, "Invalid cluster: %d node(s).\n", config.cluster_node_count);
             exit(1);
         }
-        printf("Cluster has %d master nodes:\n\n", config.cluster_node_count);
+        printf("Cluster has %d primary nodes:\n\n", config.cluster_node_count);
         int i = 0;
         for (; i < config.cluster_node_count; i++) {
             clusterNode *node = config.cluster_nodes[i];
@@ -1755,7 +1755,7 @@ int main(int argc, char **argv) {
                 fprintf(stderr, "Invalid cluster node #%d\n", i);
                 exit(1);
             }
-            printf("Master %d: ", i);
+            printf("Primary %d: ", i);
             if (node->name) printf("%s ", node->name);
             printf("%s:%d\n", node->ip, node->port);
             node->redis_config = getServerConfig(node->ip, node->port, NULL);
diff --git a/src/valkey-cli.c b/src/valkey-cli.c
index 0f92a5fee7..5d506383c0 100644
--- a/src/valkey-cli.c
+++ b/src/valkey-cli.c
@@ -2603,9 +2603,7 @@ static int parseOptions(int argc, char **argv) {
         } else if (!strcmp(argv[i], "--lru-test") && !lastarg) {
             config.lru_test_mode = 1;
             config.lru_test_sample_size = strtoll(argv[++i], NULL, 10);
-        } else if (!strcmp(argv[i], "--slave")) {
-            config.replica_mode = 1;
-        } else if (!strcmp(argv[i], "--replica")) {
+        } else if (!strcmp(argv[i], "--slave") || !strcmp(argv[i], "--replica")) {
             config.replica_mode = 1;
         } else if (!strcmp(argv[i], "--stat")) {
             config.stat_mode = 1;
@@ -3148,11 +3146,11 @@ void cliLoadPreferences(void) {
  * history file. Currently these commands are include:
  * - AUTH
  * - ACL DELUSER, ACL SETUSER, ACL GETUSER
- * - CONFIG SET masterauth/masteruser/tls-key-file-pass/tls-client-key-file-pass/requirepass
+ * - CONFIG SET primaryauth/primaryuser/tls-key-file-pass/tls-client-key-file-pass/requirepass
  * - HELLO with [AUTH username password]
  * - MIGRATE with [AUTH password] or [AUTH2 username password]
  * - SENTINEL CONFIG SET sentinel-pass password, SENTINEL CONFIG SET sentinel-user username
- * - SENTINEL SET <mastername> auth-pass password, SENTINEL SET <mastername> auth-user username */
+ * - SENTINEL SET <primaryname> auth-pass password, SENTINEL SET <primaryname> auth-user username */
 static int isSensitiveCommand(int argc, char **argv) {
     if (!strcasecmp(argv[0], "auth")) {
         return 1;
@@ -3202,8 +3200,8 @@ static int isSensitiveCommand(int argc, char **argv) {
             (!strcasecmp(argv[3], "sentinel-pass") || !strcasecmp(argv[3], "sentinel-user"))) {
             return 1;
         }
-        /* SENTINEL SET <mastername> auth-pass password
-         * SENTINEL SET <mastername> auth-user username */
+        /* SENTINEL SET <primaryname> auth-pass password
+         * SENTINEL SET <primaryname> auth-user username */
         if (!strcasecmp(argv[1], "set") && (!strcasecmp(argv[3], "auth-pass") || !strcasecmp(argv[3], "auth-user"))) {
             return 1;
         }
@@ -4165,7 +4163,7 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes,
     clusterManagerNode **offenders = NULL;
     int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_count, NULL, NULL);
     if (score == 0) goto cleanup;
-    clusterManagerLogInfo(">>> Trying to optimize slaves allocation "
+    clusterManagerLogInfo(">>> Trying to optimize replicas allocation "
                           "for anti-affinity\n");
     int node_len = cluster_manager.nodes->len;
     int maxiter = 500 * node_len; // Effort is proportional to cluster size...
@@ -4219,9 +4217,9 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes,
     if (perfect)
         msg = "[OK] Perfect anti-affinity obtained!";
     else if (score >= 10000)
-        msg = ("[WARNING] Some slaves are in the same host as their master");
+        msg = ("[WARNING] Some replicsa are in the same host as their primary");
     else
-        msg = ("[WARNING] Some slaves of the same master are in the same host");
+        msg = ("[WARNING] Some replicas of the same primary are in the same host");
     clusterManagerLog(log_level, "%s\n", msg);
 cleanup:
     zfree(offenders);
@@ -4447,13 +4445,13 @@ static void clusterManagerShowClusterInfo(void) {
                 return;
             };
             if (reply != NULL) freeReplyObject(reply);
-            printf("%s:%d (%s...) -> %lld keys | %d slots | %d slaves.\n", node->ip, node->port, name, dbsize,
+            printf("%s:%d (%s...) -> %lld keys | %d slots | %d replicas.\n", node->ip, node->port, name, dbsize,
                    node->slots_count, replicas);
             primaries++;
             keys += dbsize;
         }
     }
-    clusterManagerLogOk("[OK] %lld keys in %d masters.\n", keys, primaries);
+    clusterManagerLogOk("[OK] %lld keys in %d primaries.\n", keys, primaries);
     float keys_per_slot = keys / (float)CLUSTER_MANAGER_SLOTS;
     printf("%.2f keys per slot on average.\n", keys_per_slot);
 }
@@ -4993,7 +4991,8 @@ clusterManagerMoveSlot(clusterManagerNode *source, clusterManagerNode *target, i
          * unblocked with the role change error. */
         success = clusterManagerSetSlot(source, target, slot, "node", err);
         if (!success && err) {
-            const char *acceptable[] = {"ERR Please use SETSLOT only with masters.", "UNBLOCKED"};
+            const char *acceptable[] = {"ERR Please use SETSLOT only with masters.",
+                                        "ERR Please use SETSLOT only with primaries.", "UNBLOCKED"};
             for (size_t i = 0; i < sizeof(acceptable) / sizeof(acceptable[0]); i++) {
                 if (!strncmp(*err, acceptable[i], strlen(acceptable[i]))) {
                     zfree(*err);
@@ -5264,7 +5263,7 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char *
                 currentNode->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT;
             else if (strcmp(flag, "fail") == 0)
                 currentNode->flags |= CLUSTER_MANAGER_FLAG_FAIL;
-            else if (strcmp(flag, "slave") == 0) {
+            else if ((strcmp(flag, "slave") == 0) || (strcmp(flag, "replica") == 0)) {
                 currentNode->flags |= CLUSTER_MANAGER_FLAG_REPLICA;
                 if (primary_id != NULL) {
                     if (currentNode->replicate) sdsfree(currentNode->replicate);
@@ -5352,7 +5351,7 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node) {
             clusterManagerNode *primary = clusterManagerNodeByName(n->replicate);
             if (primary == NULL) {
                 clusterManagerLogWarn("*** WARNING: %s:%d claims to be "
-                                      "slave of unknown node ID %s.\n",
+                                      "replica of unknown node ID %s.\n",
                                       n->ip, n->port, n->replicate);
             } else
                 primary->replicas_count++;
@@ -5712,10 +5711,10 @@ static int clusterManagerFixSlotsCoverage(char *all_slots) {
 
     if (cluster_manager.unreachable_primaries > 0 && !force_fix) {
         clusterManagerLogWarn(
-            "*** Fixing slots coverage with %d unreachable masters is dangerous: valkey-cli will assume that slots "
-            "about masters that are not reachable are not covered, and will try to reassign them to the reachable "
+            "*** Fixing slots coverage with %d unreachable primaries is dangerous: valkey-cli will assume that slots "
+            "about primaries that are not reachable are not covered, and will try to reassign them to the reachable "
             "nodes. This can cause data loss and is rarely what you want to do. If you really want to proceed use the "
-            "--cluster-fix-with-unreachable-masters option.\n",
+            "--cluster-fix-with-unreachable-primaries option.\n",
             cluster_manager.unreachable_primaries);
         exit(1);
     }
@@ -5906,10 +5905,10 @@ static int clusterManagerFixOpenSlot(int slot) {
 
     if (cluster_manager.unreachable_primaries > 0 && !force_fix) {
         clusterManagerLogWarn(
-            "*** Fixing open slots with %d unreachable masters is dangerous: valkey-cli will assume that slots about "
-            "masters that are not reachable are not covered, and will try to reassign them to the reachable nodes. "
+            "*** Fixing open slots with %d unreachable primaries is dangerous: valkey-cli will assume that slots about "
+            "primaries that are not reachable are not covered, and will try to reassign them to the reachable nodes. "
             "This can cause data loss and is rarely what you want to do. If you really want to proceed use the "
-            "--cluster-fix-with-unreachable-masters option.\n",
+            "--cluster-fix-with-unreachable-primaries option.\n",
             cluster_manager.unreachable_primaries);
         exit(1);
     }
@@ -6420,7 +6419,7 @@ static int clusterManagerCheckCluster(int quiet) {
 static clusterManagerNode *clusterNodeForResharding(char *id, clusterManagerNode *target, int *raise_err) {
     clusterManagerNode *node = NULL;
     const char *invalid_node_msg = "*** The specified node (%s) is not known "
-                                   "or not a master, please retry.\n";
+                                   "or not a primary, please retry.\n";
     node = clusterManagerNodeByName(id);
     *raise_err = 0;
     if (!node || node->flags & CLUSTER_MANAGER_FLAG_REPLICA) {
@@ -6642,7 +6641,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) {
     int primaries_count = CLUSTER_MANAGER_PRIMARIES_COUNT(node_len, replicas);
     if (primaries_count < 3) {
         clusterManagerLogErr("*** ERROR: Invalid configuration for cluster creation.\n"
-                             "*** Valkey Cluster requires at least 3 master nodes.\n"
+                             "*** Valkey Cluster requires at least 3 primary nodes.\n"
                              "*** This is not possible with %d nodes and %d replicas per node.",
                              node_len, replicas);
         clusterManagerLogErr("\n*** At least %d nodes are required.\n", 3 * (replicas + 1));
@@ -6696,7 +6695,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) {
         long last = lround(cursor + slots_per_node - 1);
         if (last > CLUSTER_MANAGER_SLOTS || i == (primaries_count - 1)) last = CLUSTER_MANAGER_SLOTS - 1;
         if (last < first) last = first;
-        printf("Master[%d] -> Slots %ld - %ld\n", i, first, last);
+        printf("Primary[%d] -> Slots %ld - %ld\n", i, first, last);
         primary->slots_count = 0;
         for (j = first; j <= last; j++) {
             primary->slots[j] = 1;
@@ -6907,13 +6906,13 @@ static int clusterManagerCommandAddNode(int argc, char **argv) {
         if (primary_id != NULL) {
             primary_node = clusterManagerNodeByName(primary_id);
             if (primary_node == NULL) {
-                clusterManagerLogErr("[ERR] No such master ID %s\n", primary_id);
+                clusterManagerLogErr("[ERR] No such primary ID %s\n", primary_id);
                 return 0;
             }
         } else {
             primary_node = clusterManagerNodeWithLeastReplicas();
             assert(primary_node != NULL);
-            printf("Automatically selected master %s:%d\n", primary_node->ip, primary_node->port);
+            printf("Automatically selected primary %s:%d\n", primary_node->ip, primary_node->port);
         }
     }
 
@@ -7336,7 +7335,7 @@ static int clusterManagerCommandRebalance(int argc, char **argv) {
             float w = atof(++p);
             clusterManagerNode *n = clusterManagerNodeByAbbreviatedName(name);
             if (n == NULL) {
-                clusterManagerLogErr("*** No such master node %s\n", name);
+                clusterManagerLogErr("*** No such primary node %s\n", name);
                 result = 0;
                 goto cleanup;
             }
@@ -8130,13 +8129,13 @@ unsigned long long sendSync(redisContext *c, int send_sync, char *out_eof, int *
     if (send_sync) {
         /* Send the SYNC command. */
         if (cliWriteConn(c, "SYNC\r\n", 6) != 6) {
-            fprintf(stderr, "Error writing to master\n");
+            fprintf(stderr, "Error writing to primary\n");
             exit(1);
         }
     } else {
         /* We have written the command into c->obuf before. */
         if (cliWriteConn(c, "", 0) != 0) {
-            fprintf(stderr, "Error writing to master\n");
+            fprintf(stderr, "Error writing to primary\n");
             exit(1);
         }
     }
@@ -8155,7 +8154,7 @@ unsigned long long sendSync(redisContext *c, int send_sync, char *out_eof, int *
     }
     *p = '\0';
     if (buf[0] == '-') {
-        fprintf(stderr, "SYNC with master failed: %s\n", buf);
+        fprintf(stderr, "SYNC with primary failed: %s\n", buf);
         exit(1);
     }
 
@@ -8207,18 +8206,18 @@ static void replicaMode(int send_sync) {
         memset(lastbytes, 0, RDB_EOF_MARK_SIZE);
         usemark = 1;
         fprintf(stderr,
-                "%s with master, discarding "
+                "%s with primary, discarding "
                 "bytes of bulk transfer until EOF marker...\n",
                 info);
     } else if (out_full_mode == 1 && payload != 0) {
         /* SYNC without EOF marker or PSYNC +FULLRESYNC. */
         fprintf(stderr,
-                "%s with master, discarding %llu "
+                "%s with primary, discarding %llu "
                 "bytes of bulk transfer...\n",
                 info, payload);
     } else if (out_full_mode == 0 && payload == 0) {
         /* PSYNC +CONTINUE (no RDB payload). */
-        fprintf(stderr, "%s with master...\n", info);
+        fprintf(stderr, "%s with primary...\n", info);
     }
 
     /* Discard the payload. */
@@ -8247,12 +8246,12 @@ static void replicaMode(int send_sync) {
 
     if (usemark) {
         unsigned long long offset = ULLONG_MAX - payload;
-        fprintf(stderr, "%s done after %llu bytes. Logging commands from master.\n", info, offset);
+        fprintf(stderr, "%s done after %llu bytes. Logging commands from primary.\n", info, offset);
         /* put the replica online */
         sleep(1);
         sendReplconf("ACK", "0");
     } else
-        fprintf(stderr, "%s done. Logging commands from master.\n", info);
+        fprintf(stderr, "%s done. Logging commands from primary.\n", info);
 
     /* Now we can use hiredis to read the incoming protocol. */
     config.output = OUTPUT_CSV;
@@ -8289,11 +8288,11 @@ static void getRDB(clusterManagerNode *node) {
         memset(lastbytes, 0, RDB_EOF_MARK_SIZE);
         usemark = 1;
         fprintf(stderr,
-                "SYNC sent to master, writing bytes of bulk transfer "
+                "SYNC sent to primary, writing bytes of bulk transfer "
                 "until EOF marker to '%s'\n",
                 filename);
     } else {
-        fprintf(stderr, "SYNC sent to master, writing %llu bytes to '%s'\n", payload, filename);
+        fprintf(stderr, "SYNC sent to primary, writing %llu bytes to '%s'\n", payload, filename);
     }
 
     int write_to_stdout = !strcmp(filename, "-");
diff --git a/src/valkeymodule.h b/src/valkeymodule.h
index a49f83b766..16f7929081 100644
--- a/src/valkeymodule.h
+++ b/src/valkeymodule.h
@@ -1469,7 +1469,7 @@ VALKEYMODULE_API int (*ValkeyModule_SendClusterMessage)(ValkeyModuleCtx *ctx,
 VALKEYMODULE_API int (*ValkeyModule_GetClusterNodeInfo)(ValkeyModuleCtx *ctx,
                                                         const char *id,
                                                         char *ip,
-                                                        char *master_id,
+                                                        char *primary_id,
                                                         int *port,
                                                         int *flags) VALKEYMODULE_ATTR;
 VALKEYMODULE_API char **(*ValkeyModule_GetClusterNodesList)(ValkeyModuleCtx *ctx, size_t *numnodes)VALKEYMODULE_ATTR;
diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl
index e64394ad1b..f56fe0a1dc 100644
--- a/tests/integration/replication.tcl
+++ b/tests/integration/replication.tcl
@@ -51,7 +51,7 @@ start_server {tags {"repl network external:skip"}} {
 
         test {Slave is able to detect timeout during handshake} {
             wait_for_condition 50 1000 {
-                [log_file_matches $slave_log "*Timeout connecting to the MASTER*"]
+                [log_file_matches $slave_log "*Timeout connecting to the PRIMARY*"]
             } else {
                 fail "Replica is not able to detect timeout"
             }
@@ -1390,7 +1390,7 @@ start_server {tags {"repl" "external:skip"}} {
 
         # Check we got the warning logs about the GET command.
         verify_log_message 0 "*Replica generated a reply to command 'get', disconnecting it: *" $lines
-        verify_log_message 0 "*== CRITICAL == This master is sending an error to its replica: *" $lines
+        verify_log_message 0 "*== CRITICAL == This primary is sending an error to its replica: *" $lines
         verify_log_message 0 "*Replica can't interact with the keyspace*" $lines
 
         $rd close
diff --git a/tests/unit/auth.tcl b/tests/unit/auth.tcl
index ee5d2db0fc..5c2071c176 100644
--- a/tests/unit/auth.tcl
+++ b/tests/unit/auth.tcl
@@ -74,7 +74,7 @@ start_server {tags {"auth_binary_password external:skip"}} {
             $slave slaveof $master_host $master_port
 
             # Verify replica is not able to sync with master
-            wait_for_log_messages 0 {"*Unable to AUTH to MASTER*"} $loglines 1000 10
+            wait_for_log_messages 0 {"*Unable to AUTH to PRIMARY*"} $loglines 1000 10
             assert_equal {down} [s 0 master_link_status]
             
             # Test replica with the correct primaryauth
diff --git a/tests/unit/introspection.tcl b/tests/unit/introspection.tcl
index 8ceb03b502..a12a3ba23d 100644
--- a/tests/unit/introspection.tcl
+++ b/tests/unit/introspection.tcl
@@ -827,9 +827,9 @@ start_server {tags {"introspection"}} {
         # Something like `valkey-server --some-config --config-value1 --config-value2 --loglevel debug` would break,
         # because if you want to pass a value to a config starting with `--`, it can only be a single value.
         catch {exec src/valkey-server --replicaof 127.0.0.1 abc} err
-        assert_match {*'replicaof "127.0.0.1" "abc"'*Invalid master port*} $err
+        assert_match {*'replicaof "127.0.0.1" "abc"'*Invalid primary port*} $err
         catch {exec src/valkey-server --replicaof --127.0.0.1 abc} err
-        assert_match {*'replicaof "--127.0.0.1" "abc"'*Invalid master port*} $err
+        assert_match {*'replicaof "--127.0.0.1" "abc"'*Invalid primary port*} $err
         catch {exec src/valkey-server --replicaof --127.0.0.1 --abc} err
         assert_match {*'replicaof "--127.0.0.1"'*wrong number of arguments*} $err
     } {} {external:skip}

From b33f932c5670749bea67933c10578336c67f16e6 Mon Sep 17 00:00:00 2001
From: Madelyn Olson <madelyneolson@gmail.com>
Date: Mon, 17 Jun 2024 21:08:08 -0700
Subject: [PATCH 06/53] Add missing commas from debug command (#662)

The missing commas caused the `DEBUG HELP` to be compressed onto a
single line.

Signed-off-by: Madelyn Olson <madelyneolson@gmail.com>
---
 src/debug.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/debug.c b/src/debug.c
index d9fe93c7d4..c625ab5150 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -430,8 +430,8 @@ void debugCommand(client *c) {
             "DROP-CLUSTER-PACKET-FILTER <packet-type>",
             "    Drop all packets that match the filtered type. Set to -1 allow all packets.",
             "CLOSE-CLUSTER-LINK-ON-PACKET-DROP <0|1>",
-            "    This is valid only when DROP-CLUSTER-PACKET-FILTER is set to a valid packet type."
-            "    When set to 1, the cluster link is closed after dropping a packet based on the filter."
+            "    This is valid only when DROP-CLUSTER-PACKET-FILTER is set to a valid packet type.",
+            "    When set to 1, the cluster link is closed after dropping a packet based on the filter.",
             "OOM",
             "    Crash the server simulating an out-of-memory error.",
             "PANIC",

From a2cc2fe26ddf5fac46476ec1f958dea56e35a513 Mon Sep 17 00:00:00 2001
From: Binbin <binloveplay1314@qq.com>
Date: Tue, 18 Jun 2024 12:18:57 +0800
Subject: [PATCH 07/53] Fix memory leak when loading slot migrations states
 fails (#658)

When we goto eoferr, we need to release the auxkey and auxval,
this is a cleanup, also explicitly check that decoder return
value is C_ERR.

Introduced in #586.

Signed-off-by: Binbin <binloveplay1314@qq.com>
---
 src/rdb.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/rdb.c b/src/rdb.c
index ad7da17ea1..6ce7871031 100644
--- a/src/rdb.c
+++ b/src/rdb.c
@@ -3149,7 +3149,11 @@ int rdbLoadRioWithLoadingCtx(rio *rdb, int rdbflags, rdbSaveInfo *rsi, rdbLoadin
                     if (de != NULL) {
                         handled = 1;
                         rdbAuxFieldCodec *codec = (rdbAuxFieldCodec *)dictGetVal(de);
-                        if (codec->decoder(rdbflags, auxval->ptr) < 0) goto eoferr;
+                        if (codec->decoder(rdbflags, auxval->ptr) == C_ERR) {
+                            decrRefCount(auxkey);
+                            decrRefCount(auxval);
+                            goto eoferr;
+                        }
                     }
                 }
 

From be2c3216824207613cf00b1e5579ee510b7fadc2 Mon Sep 17 00:00:00 2001
From: ranshid <88133677+ranshid@users.noreply.github.com>
Date: Tue, 18 Jun 2024 22:04:06 +0300
Subject: [PATCH 08/53] Support RDB compatability with Redis 7.2.4 RDB format
 (#665)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR makes our current RDB format compatible with the Redis 7.2.4 RDB
format. there are 2 changes introduced in this PR:
1. Move back the RDB version to 11
2. Make slot info section persist as AUX data instead of dedicated
section.

We have introduced slot-info as part of the work to replace cluster
metadata with slot specific dictionaries. This caused us to bump the RDB
version and thus we prevent downgrade (which is conceptualy O.K but
better be prevented). We do not require the slot-info section to exist,
so making it an AUX section will help suppport version downgrade from
Valkey 8.

fixes: [#645](https://github.com/valkey-io/valkey/issues/645)

NOTE: tested manually by:
1. connecting Redis 7.2.4 replica to a Valkey 8(RC)
2. upgrade/downgrade Redis 7.2.4 cluster and Valkey 8(RC) cluster

---------

Signed-off-by: ranshid <ranshid@amazon.com>
Co-authored-by: Viktor Söderqvist <viktor.soderqvist@est.tech>
---
 src/rdb.c              | 47 ++++++++++++++++++++++--------------------
 src/rdb.h              |  3 +--
 src/valkey-check-rdb.c |  6 ------
 3 files changed, 26 insertions(+), 30 deletions(-)

diff --git a/src/rdb.c b/src/rdb.c
index 6ce7871031..07fc70c16d 100644
--- a/src/rdb.c
+++ b/src/rdb.c
@@ -1349,15 +1349,14 @@ ssize_t rdbSaveDb(rio *rdb, int dbid, int rdbflags, long *key_counter) {
         int curr_slot = kvstoreIteratorGetCurrentDictIndex(kvs_it);
         /* Save slot info. */
         if (server.cluster_enabled && curr_slot != last_slot) {
-            if ((res = rdbSaveType(rdb, RDB_OPCODE_SLOT_INFO)) < 0) goto werr;
-            written += res;
-            if ((res = rdbSaveLen(rdb, curr_slot)) < 0) goto werr;
-            written += res;
-            if ((res = rdbSaveLen(rdb, kvstoreDictSize(db->keys, curr_slot))) < 0) goto werr;
-            written += res;
-            if ((res = rdbSaveLen(rdb, kvstoreDictSize(db->expires, curr_slot))) < 0) goto werr;
-            written += res;
+            sds slot_info = sdscatprintf(sdsempty(), "%i,%lu,%lu", curr_slot, kvstoreDictSize(db->keys, curr_slot),
+                                         kvstoreDictSize(db->expires, curr_slot));
+            if ((res = rdbSaveAuxFieldStrStr(rdb, "slot-info", slot_info)) < 0) {
+                sdsfree(slot_info);
+                goto werr;
+            }
             last_slot = curr_slot;
+            sdsfree(slot_info);
         }
         sds keystr = dictGetKey(de);
         robj key, *o = dictGetVal(de);
@@ -3078,20 +3077,6 @@ int rdbLoadRioWithLoadingCtx(rio *rdb, int rdbflags, rdbSaveInfo *rsi, rdbLoadin
             if ((expires_size = rdbLoadLen(rdb, NULL)) == RDB_LENERR) goto eoferr;
             should_expand_db = 1;
             continue; /* Read next opcode. */
-        } else if (type == RDB_OPCODE_SLOT_INFO) {
-            uint64_t slot_id, slot_size, expires_slot_size;
-            if ((slot_id = rdbLoadLen(rdb, NULL)) == RDB_LENERR) goto eoferr;
-            if ((slot_size = rdbLoadLen(rdb, NULL)) == RDB_LENERR) goto eoferr;
-            if ((expires_slot_size = rdbLoadLen(rdb, NULL)) == RDB_LENERR) goto eoferr;
-            if (!server.cluster_enabled) {
-                continue; /* Ignore gracefully. */
-            }
-            /* In cluster mode we resize individual slot specific dictionaries based on the number of keys that slot
-             * holds. */
-            kvstoreDictExpand(db->keys, slot_id, slot_size);
-            kvstoreDictExpand(db->expires, slot_id, expires_slot_size);
-            should_expand_db = 0;
-            continue; /* Read next opcode. */
         } else if (type == RDB_OPCODE_AUX) {
             /* AUX: generic string-string fields. Use to add state to RDB
              * which is backward compatible. Implementations of RDB loading
@@ -3141,6 +3126,24 @@ int rdbLoadRioWithLoadingCtx(rio *rdb, int rdbflags, rdbSaveInfo *rsi, rdbLoadin
                 if (isbase) serverLog(LL_NOTICE, "RDB is base AOF");
             } else if (!strcasecmp(auxkey->ptr, "redis-bits")) {
                 /* Just ignored. */
+            } else if (!strcasecmp(auxkey->ptr, "slot-info")) {
+                int slot_id;
+                unsigned long slot_size, expires_slot_size;
+                /* Try to parse the slot information. In case the number of parsed arguments is smaller than expected
+                 * we'll fail the RDB load. */
+                if (sscanf(auxval->ptr, "%i,%lu,%lu", &slot_id, &slot_size, &expires_slot_size) < 3) {
+                    decrRefCount(auxkey);
+                    decrRefCount(auxval);
+                    goto eoferr;
+                }
+
+                if (server.cluster_enabled) {
+                    /* In cluster mode we resize individual slot specific dictionaries based on the number of keys that
+                     * slot holds. */
+                    kvstoreDictExpand(db->keys, slot_id, slot_size);
+                    kvstoreDictExpand(db->expires, slot_id, expires_slot_size);
+                    should_expand_db = 0;
+                }
             } else {
                 /* Check if this is a dynamic aux field */
                 int handled = 0;
diff --git a/src/rdb.h b/src/rdb.h
index 393d2f658a..3b17cbe9de 100644
--- a/src/rdb.h
+++ b/src/rdb.h
@@ -38,7 +38,7 @@
 
 /* The current RDB version. When the format changes in a way that is no longer
  * backward compatible this number gets incremented. */
-#define RDB_VERSION 12
+#define RDB_VERSION 11
 
 /* Defines related to the dump file format. To store 32 bits lengths for short
  * keys requires a lot of space, so we check the most significant 2 bits of
@@ -101,7 +101,6 @@
 #define rdbIsObjectType(t) (((t) >= 0 && (t) <= 7) || ((t) >= 9 && (t) <= 21))
 
 /* Special RDB opcodes (saved/loaded with rdbSaveType/rdbLoadType). */
-#define RDB_OPCODE_SLOT_INFO 244       /* Individual slot info, such as slot id and size (cluster mode only). */
 #define RDB_OPCODE_FUNCTION2 245       /* function library data */
 #define RDB_OPCODE_FUNCTION_PRE_GA 246 /* old function library data for 7.0 rc1 and rc2 */
 #define RDB_OPCODE_MODULE_AUX 247      /* Module auxiliary data. */
diff --git a/src/valkey-check-rdb.c b/src/valkey-check-rdb.c
index 7e93f70360..0b2fdbb666 100644
--- a/src/valkey-check-rdb.c
+++ b/src/valkey-check-rdb.c
@@ -256,12 +256,6 @@ int redis_check_rdb(char *rdbfilename, FILE *fp) {
             if ((db_size = rdbLoadLen(&rdb, NULL)) == RDB_LENERR) goto eoferr;
             if ((expires_size = rdbLoadLen(&rdb, NULL)) == RDB_LENERR) goto eoferr;
             continue; /* Read type again. */
-        } else if (type == RDB_OPCODE_SLOT_INFO) {
-            uint64_t slot_id, slot_size, expires_slot_size;
-            if ((slot_id = rdbLoadLen(&rdb, NULL)) == RDB_LENERR) goto eoferr;
-            if ((slot_size = rdbLoadLen(&rdb, NULL)) == RDB_LENERR) goto eoferr;
-            if ((expires_slot_size = rdbLoadLen(&rdb, NULL)) == RDB_LENERR) goto eoferr;
-            continue; /* Read type again. */
         } else if (type == RDB_OPCODE_AUX) {
             /* AUX: generic string-string fields. Use to add state to RDB
              * which is backward compatible. Implementations of RDB loading

From ae2d4217e147996bd6c546f559aa564f873f9203 Mon Sep 17 00:00:00 2001
From: kukey <wei.kukey@gmail.com>
Date: Wed, 19 Jun 2024 08:48:58 +0800
Subject: [PATCH 09/53] Add new SCRIPT SHOW subcommand to dump script via sha1
 (#617)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In some scenarios, the business may not be able to find the
previously used Lua script and only have a SHA signature.
Or there are multiple identical evalsha's args in monitor/slowlog,
and admin is not able to distinguish the script body.

Add a new script subcommmand to show the contents of script
given the scripts sha1. Returns a NOSCRIPT error if the script
is not present in the cache.

Usage: `SCRIPT SHOW sha1`
Complexity: `O(1)`

Closes #604.
Doc PR: https://github.com/valkey-io/valkey-doc/pull/143

---------

Signed-off-by: wei.kukey <wei.kukey@gmail.com>
Signed-off-by: Madelyn Olson <madelyneolson@gmail.com>
Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
Co-authored-by: Binbin <binloveplay1314@qq.com>
Co-authored-by: Viktor Söderqvist <viktor.soderqvist@est.tech>
---
 src/commands.def              | 23 +++++++++++++++++++++++
 src/commands/script-show.json | 27 +++++++++++++++++++++++++++
 src/eval.c                    | 12 ++++++++++++
 src/server.c                  |  2 +-
 tests/unit/scripting.tcl      | 15 +++++++++++++++
 5 files changed, 78 insertions(+), 1 deletion(-)
 create mode 100644 src/commands/script-show.json

diff --git a/src/commands.def b/src/commands.def
index cb7fd73cc5..989dd1864d 100644
--- a/src/commands.def
+++ b/src/commands.def
@@ -5333,6 +5333,28 @@ struct COMMAND_ARG SCRIPT_LOAD_Args[] = {
 {MAKE_ARG("script",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
 };
 
+/********** SCRIPT SHOW ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* SCRIPT SHOW history */
+#define SCRIPT_SHOW_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* SCRIPT SHOW tips */
+#define SCRIPT_SHOW_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* SCRIPT SHOW key specs */
+#define SCRIPT_SHOW_Keyspecs NULL
+#endif
+
+/* SCRIPT SHOW argument table */
+struct COMMAND_ARG SCRIPT_SHOW_Args[] = {
+{MAKE_ARG("sha1",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
 /* SCRIPT command table */
 struct COMMAND_STRUCT SCRIPT_Subcommands[] = {
 {MAKE_CMD("debug","Sets the debug mode of server-side Lua scripts.","O(1)","3.2.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,SCRIPT_DEBUG_History,0,SCRIPT_DEBUG_Tips,0,scriptCommand,3,CMD_NOSCRIPT,ACL_CATEGORY_SCRIPTING,SCRIPT_DEBUG_Keyspecs,0,NULL,1),.args=SCRIPT_DEBUG_Args},
@@ -5341,6 +5363,7 @@ struct COMMAND_STRUCT SCRIPT_Subcommands[] = {
 {MAKE_CMD("help","Returns helpful text about the different subcommands.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,SCRIPT_HELP_History,0,SCRIPT_HELP_Tips,0,scriptCommand,2,CMD_LOADING|CMD_STALE,ACL_CATEGORY_SCRIPTING,SCRIPT_HELP_Keyspecs,0,NULL,0)},
 {MAKE_CMD("kill","Terminates a server-side Lua script during execution.","O(1)","2.6.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,SCRIPT_KILL_History,0,SCRIPT_KILL_Tips,2,scriptCommand,2,CMD_NOSCRIPT|CMD_ALLOW_BUSY,ACL_CATEGORY_SCRIPTING,SCRIPT_KILL_Keyspecs,0,NULL,0)},
 {MAKE_CMD("load","Loads a server-side Lua script to the script cache.","O(N) with N being the length in bytes of the script body.","2.6.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,SCRIPT_LOAD_History,0,SCRIPT_LOAD_Tips,2,scriptCommand,3,CMD_NOSCRIPT|CMD_STALE,ACL_CATEGORY_SCRIPTING,SCRIPT_LOAD_Keyspecs,0,NULL,1),.args=SCRIPT_LOAD_Args},
+{MAKE_CMD("show","Show server-side Lua script in the script cache.","O(1).","8.0.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,SCRIPT_SHOW_History,0,SCRIPT_SHOW_Tips,0,scriptCommand,3,CMD_NOSCRIPT,ACL_CATEGORY_SCRIPTING,SCRIPT_SHOW_Keyspecs,0,NULL,1),.args=SCRIPT_SHOW_Args},
 {0}
 };
 
diff --git a/src/commands/script-show.json b/src/commands/script-show.json
new file mode 100644
index 0000000000..f22fa29675
--- /dev/null
+++ b/src/commands/script-show.json
@@ -0,0 +1,27 @@
+{
+    "SHOW": {
+        "summary": "Show server-side Lua script in the script cache.",
+        "complexity": "O(1).",
+        "group": "scripting",
+        "since": "8.0.0",
+        "arity": 3,
+        "container": "SCRIPT",
+        "function": "scriptCommand",
+        "command_flags": [
+            "NOSCRIPT"
+        ],
+        "acl_categories": [
+            "SCRIPTING"
+        ],
+        "arguments": [
+            {
+                "name": "sha1",
+                "type": "string"
+            }
+        ],
+        "reply_schema": {
+            "description": "Lua script if sha1 hash exists in script cache.",
+            "type": "string"
+        }
+    }
+}
diff --git a/src/eval.c b/src/eval.c
index e747c233e8..f4d09a5aa6 100644
--- a/src/eval.c
+++ b/src/eval.c
@@ -682,6 +682,8 @@ void scriptCommand(client *c) {
 "    Kill the currently executing Lua script.",
 "LOAD <script>",
 "    Load a script into the scripts cache without executing it.",
+"SHOW <sha1>",
+"    Show a script from the scripts cache.",
 NULL
         };
         /* clang-format on */
@@ -735,6 +737,16 @@ NULL
             addReplyError(c, "Use SCRIPT DEBUG YES/SYNC/NO");
             return;
         }
+    } else if (c->argc == 3 && !strcasecmp(c->argv[1]->ptr, "show")) {
+        dictEntry *de;
+        luaScript *ls;
+
+        if (sdslen(c->argv[2]->ptr) == 40 && (de = dictFind(lctx.lua_scripts, c->argv[2]->ptr))) {
+            ls = dictGetVal(de);
+            addReplyBulk(c, ls->body);
+        } else {
+            addReplyErrorObject(c, shared.noscripterr);
+        }
     } else {
         addReplySubcommandSyntaxError(c);
     }
diff --git a/src/server.c b/src/server.c
index e5df60be65..fe522b3e5d 100644
--- a/src/server.c
+++ b/src/server.c
@@ -1814,7 +1814,7 @@ void createSharedObjects(void) {
     shared.syntaxerr = createObject(OBJ_STRING, sdsnew("-ERR syntax error\r\n"));
     shared.sameobjecterr = createObject(OBJ_STRING, sdsnew("-ERR source and destination objects are the same\r\n"));
     shared.outofrangeerr = createObject(OBJ_STRING, sdsnew("-ERR index out of range\r\n"));
-    shared.noscripterr = createObject(OBJ_STRING, sdsnew("-NOSCRIPT No matching script. Please use EVAL.\r\n"));
+    shared.noscripterr = createObject(OBJ_STRING, sdsnew("-NOSCRIPT No matching script.\r\n"));
     createSharedObjectsWithCompat();
     shared.primarydownerr = createObject(
         OBJ_STRING, sdsnew("-MASTERDOWN Link with MASTER is down and replica-serve-stale-data is set to 'no'.\r\n"));
diff --git a/tests/unit/scripting.tcl b/tests/unit/scripting.tcl
index 3f8c761bd3..e19ccdf721 100644
--- a/tests/unit/scripting.tcl
+++ b/tests/unit/scripting.tcl
@@ -638,6 +638,21 @@ start_server {tags {"scripting"}} {
             [r evalsha b534286061d4b9e4026607613b95c06c06015ae8 0]
     } {b534286061d4b9e4026607613b95c06c06015ae8 loaded}
 
+    test {SCRIPT SHOW - is able to dump scripts from the scripting cache} {
+        r script load "return 'dump'"
+        r script show 4f5a49d7b18244a3b100d159b78b51474e23e081
+    } {return 'dump'}
+
+    test {SCRIPT SHOW - wrong sha1 length or invalid sha1 char return noscript error} {
+        assert_error {NOSCRIPT*} {r script show b534286061d4b06c06015ae8}
+        assert_error {NOSCRIPT*} {r script show AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA}
+    }
+
+    test {SCRIPT SHOW - script not exist return noscript error} {
+        r script flush
+        assert_error {NOSCRIPT*} {r script show 4f5a49d7b18244a3b100d159b78b51474e23e081}
+    }
+
     test "SORT is normally not alpha re-ordered for the scripting engine" {
         r del myset
         r sadd myset 1 2 3 4 10

From e84eda90925d4e9c276a57ddb0c9dfc2ec497367 Mon Sep 17 00:00:00 2001
From: Wen Hui <wen.hui.ware@gmail.com>
Date: Wed, 19 Jun 2024 17:16:35 -0400
Subject: [PATCH 10/53] Remove useless code in sentinel source code (#676)

Just remove them.

Signed-off-by: hwware <wen.hui.ware@gmail.com>
---
 src/sentinel.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/sentinel.c b/src/sentinel.c
index 49adcc05c9..f5795f53df 100644
--- a/src/sentinel.c
+++ b/src/sentinel.c
@@ -5109,11 +5109,6 @@ void sentinelHandleRedisInstance(sentinelRedisInstance *ri) {
     /* Every kind of instance */
     sentinelCheckSubjectivelyDown(ri);
 
-    /* Primaries and replicas */
-    if (ri->flags & (SRI_PRIMARY | SRI_REPLICA)) {
-        /* Nothing so far. */
-    }
-
     /* Only primaries */
     if (ri->flags & SRI_PRIMARY) {
         sentinelCheckObjectivelyDown(ri);

From 5d2348cee288d3e3f94e51bac51f6597315af043 Mon Sep 17 00:00:00 2001
From: Wen Hui <wen.hui.ware@gmail.com>
Date: Wed, 19 Jun 2024 17:16:49 -0400
Subject: [PATCH 11/53] Update json file and sentinelCommand function for
 Valkey Sentinel  (#675)

In this PR, we update master keyword to primary keyword several in
sentinel command json file and sentinelCommand function.
And there is no update for configurable parameters in sentinel.conf file

Signed-off-by: hwware <wen.hui.ware@gmail.com>
---
 src/commands.def                              | 34 +++++++-------
 src/commands/sentinel-ckquorum.json           |  4 +-
 src/commands/sentinel-failover.json           |  4 +-
 .../sentinel-get-master-addr-by-name.json     |  4 +-
 src/commands/sentinel-info-cache.json         |  6 +--
 .../sentinel-is-master-down-by-addr.json      |  6 +--
 src/commands/sentinel-master.json             |  6 +--
 src/commands/sentinel-masters.json            |  6 +--
 src/commands/sentinel-remove.json             |  2 +-
 src/commands/sentinel-replicas.json           |  4 +-
 src/commands/sentinel-reset.json              |  6 +--
 src/commands/sentinel-sentinels.json          |  2 +-
 src/commands/sentinel-set.json                |  4 +-
 src/commands/sentinel-slaves.json             |  2 +-
 src/commands/slaveof.json                     |  2 +-
 src/commands/waitaof.json                     |  2 +-
 src/sentinel.c                                | 44 +++++++++----------
 17 files changed, 69 insertions(+), 69 deletions(-)

diff --git a/src/commands.def b/src/commands.def
index 989dd1864d..e4484529a2 100644
--- a/src/commands.def
+++ b/src/commands.def
@@ -5403,7 +5403,7 @@ struct COMMAND_STRUCT SCRIPT_Subcommands[] = {
 
 /* SENTINEL CKQUORUM argument table */
 struct COMMAND_ARG SENTINEL_CKQUORUM_Args[] = {
-{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("primary-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
 };
 
 /********** SENTINEL CONFIG ********************/
@@ -5489,7 +5489,7 @@ struct COMMAND_ARG SENTINEL_DEBUG_Args[] = {
 
 /* SENTINEL FAILOVER argument table */
 struct COMMAND_ARG SENTINEL_FAILOVER_Args[] = {
-{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("primary-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
 };
 
 /********** SENTINEL FLUSHCONFIG ********************/
@@ -5528,7 +5528,7 @@ struct COMMAND_ARG SENTINEL_FAILOVER_Args[] = {
 
 /* SENTINEL GET_MASTER_ADDR_BY_NAME argument table */
 struct COMMAND_ARG SENTINEL_GET_MASTER_ADDR_BY_NAME_Args[] = {
-{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("primary-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
 };
 
 /********** SENTINEL HELP ********************/
@@ -5614,7 +5614,7 @@ struct COMMAND_ARG SENTINEL_IS_MASTER_DOWN_BY_ADDR_Args[] = {
 
 /* SENTINEL MASTER argument table */
 struct COMMAND_ARG SENTINEL_MASTER_Args[] = {
-{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("primary-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
 };
 
 /********** SENTINEL MASTERS ********************/
@@ -5712,7 +5712,7 @@ struct COMMAND_ARG SENTINEL_MONITOR_Args[] = {
 
 /* SENTINEL REMOVE argument table */
 struct COMMAND_ARG SENTINEL_REMOVE_Args[] = {
-{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("primary-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
 };
 
 /********** SENTINEL REPLICAS ********************/
@@ -5734,7 +5734,7 @@ struct COMMAND_ARG SENTINEL_REMOVE_Args[] = {
 
 /* SENTINEL REPLICAS argument table */
 struct COMMAND_ARG SENTINEL_REPLICAS_Args[] = {
-{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("primary-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
 };
 
 /********** SENTINEL RESET ********************/
@@ -5778,7 +5778,7 @@ struct COMMAND_ARG SENTINEL_RESET_Args[] = {
 
 /* SENTINEL SENTINELS argument table */
 struct COMMAND_ARG SENTINEL_SENTINELS_Args[] = {
-{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("primary-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
 };
 
 /********** SENTINEL SET ********************/
@@ -5806,7 +5806,7 @@ struct COMMAND_ARG SENTINEL_SET_data_Subargs[] = {
 
 /* SENTINEL SET argument table */
 struct COMMAND_ARG SENTINEL_SET_Args[] = {
-{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("primary-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
 {MAKE_ARG("data",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,2,NULL),.subargs=SENTINEL_SET_data_Subargs},
 };
 
@@ -5858,7 +5858,7 @@ struct COMMAND_ARG SENTINEL_SIMULATE_FAILURE_Args[] = {
 
 /* SENTINEL SLAVES argument table */
 struct COMMAND_ARG SENTINEL_SLAVES_Args[] = {
-{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("primary-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
 };
 
 /* SENTINEL command table */
@@ -5868,20 +5868,20 @@ struct COMMAND_STRUCT SENTINEL_Subcommands[] = {
 {MAKE_CMD("debug","Lists or updates the current configurable parameters of Sentinel.","O(N) where N is the number of configurable parameters","7.0.0",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_DEBUG_History,0,SENTINEL_DEBUG_Tips,0,sentinelCommand,-2,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_DEBUG_Keyspecs,0,NULL,1),.args=SENTINEL_DEBUG_Args},
 {MAKE_CMD("failover","Forces a Sentinel failover.",NULL,"2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_FAILOVER_History,0,SENTINEL_FAILOVER_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_FAILOVER_Keyspecs,0,NULL,1),.args=SENTINEL_FAILOVER_Args},
 {MAKE_CMD("flushconfig","Rewrites the Sentinel configuration file.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_FLUSHCONFIG_History,0,SENTINEL_FLUSHCONFIG_Tips,0,sentinelCommand,2,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_FLUSHCONFIG_Keyspecs,0,NULL,0)},
-{MAKE_CMD("get-master-addr-by-name","Returns the port and address of a master instance.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_GET_MASTER_ADDR_BY_NAME_History,0,SENTINEL_GET_MASTER_ADDR_BY_NAME_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_GET_MASTER_ADDR_BY_NAME_Keyspecs,0,NULL,1),.args=SENTINEL_GET_MASTER_ADDR_BY_NAME_Args},
+{MAKE_CMD("get-master-addr-by-name","Returns the port and address of a primary instance.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_GET_MASTER_ADDR_BY_NAME_History,0,SENTINEL_GET_MASTER_ADDR_BY_NAME_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_GET_MASTER_ADDR_BY_NAME_Keyspecs,0,NULL,1),.args=SENTINEL_GET_MASTER_ADDR_BY_NAME_Args},
 {MAKE_CMD("help","Returns helpful text about the different subcommands.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_HELP_History,0,SENTINEL_HELP_Tips,0,sentinelCommand,2,CMD_LOADING|CMD_STALE|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_HELP_Keyspecs,0,NULL,0)},
 {MAKE_CMD("info-cache","Returns the cached `INFO` replies from the deployment's instances.","O(N) where N is the number of instances","3.2.0",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_INFO_CACHE_History,0,SENTINEL_INFO_CACHE_Tips,0,sentinelCommand,-3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_INFO_CACHE_Keyspecs,0,NULL,1),.args=SENTINEL_INFO_CACHE_Args},
-{MAKE_CMD("is-master-down-by-addr","Determines whether a master instance is down.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_IS_MASTER_DOWN_BY_ADDR_History,0,SENTINEL_IS_MASTER_DOWN_BY_ADDR_Tips,0,sentinelCommand,6,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_IS_MASTER_DOWN_BY_ADDR_Keyspecs,0,NULL,4),.args=SENTINEL_IS_MASTER_DOWN_BY_ADDR_Args},
-{MAKE_CMD("master","Returns the state of a master instance.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_MASTER_History,0,SENTINEL_MASTER_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_MASTER_Keyspecs,0,NULL,1),.args=SENTINEL_MASTER_Args},
-{MAKE_CMD("masters","Returns a list of monitored masters.","O(N) where N is the number of masters","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_MASTERS_History,0,SENTINEL_MASTERS_Tips,0,sentinelCommand,2,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_MASTERS_Keyspecs,0,NULL,0)},
+{MAKE_CMD("is-master-down-by-addr","Determines whether a primary instance is down.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_IS_MASTER_DOWN_BY_ADDR_History,0,SENTINEL_IS_MASTER_DOWN_BY_ADDR_Tips,0,sentinelCommand,6,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_IS_MASTER_DOWN_BY_ADDR_Keyspecs,0,NULL,4),.args=SENTINEL_IS_MASTER_DOWN_BY_ADDR_Args},
+{MAKE_CMD("master","Returns the state of a primary instance.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_MASTER_History,0,SENTINEL_MASTER_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_MASTER_Keyspecs,0,NULL,1),.args=SENTINEL_MASTER_Args},
+{MAKE_CMD("masters","Returns a list of monitored primaries.","O(N) where N is the number of primaries","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_MASTERS_History,0,SENTINEL_MASTERS_Tips,0,sentinelCommand,2,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_MASTERS_Keyspecs,0,NULL,0)},
 {MAKE_CMD("monitor","Starts monitoring.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_MONITOR_History,0,SENTINEL_MONITOR_Tips,0,sentinelCommand,6,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_MONITOR_Keyspecs,0,NULL,4),.args=SENTINEL_MONITOR_Args},
 {MAKE_CMD("myid","Returns the Sentinel instance ID.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_MYID_History,0,SENTINEL_MYID_Tips,0,sentinelCommand,2,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_MYID_Keyspecs,0,NULL,0)},
 {MAKE_CMD("pending-scripts","Returns information about pending scripts for Sentinel.",NULL,"2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_PENDING_SCRIPTS_History,0,SENTINEL_PENDING_SCRIPTS_Tips,0,sentinelCommand,2,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_PENDING_SCRIPTS_Keyspecs,0,NULL,0)},
 {MAKE_CMD("remove","Stops monitoring.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_REMOVE_History,0,SENTINEL_REMOVE_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_REMOVE_Keyspecs,0,NULL,1),.args=SENTINEL_REMOVE_Args},
 {MAKE_CMD("replicas","Returns a list of the monitored replicas.","O(N) where N is the number of replicas","5.0.0",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_REPLICAS_History,0,SENTINEL_REPLICAS_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_REPLICAS_Keyspecs,0,NULL,1),.args=SENTINEL_REPLICAS_Args},
-{MAKE_CMD("reset","Resets masters by name matching a pattern.","O(N) where N is the number of monitored masters","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_RESET_History,0,SENTINEL_RESET_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_RESET_Keyspecs,0,NULL,1),.args=SENTINEL_RESET_Args},
+{MAKE_CMD("reset","Resets primaries by name matching a pattern.","O(N) where N is the number of monitored primaries","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_RESET_History,0,SENTINEL_RESET_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_RESET_Keyspecs,0,NULL,1),.args=SENTINEL_RESET_Args},
 {MAKE_CMD("sentinels","Returns a list of Sentinel instances.","O(N) where N is the number of Sentinels","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_SENTINELS_History,0,SENTINEL_SENTINELS_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_SENTINELS_Keyspecs,0,NULL,1),.args=SENTINEL_SENTINELS_Args},
-{MAKE_CMD("set","Changes the configuration of a monitored master.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_SET_History,0,SENTINEL_SET_Tips,0,sentinelCommand,-5,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_SET_Keyspecs,0,NULL,2),.args=SENTINEL_SET_Args},
+{MAKE_CMD("set","Changes the configuration of a monitored primary.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_SET_History,0,SENTINEL_SET_Tips,0,sentinelCommand,-5,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_SET_Keyspecs,0,NULL,2),.args=SENTINEL_SET_Args},
 {MAKE_CMD("simulate-failure","Simulates failover scenarios.",NULL,"3.2.0",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_SIMULATE_FAILURE_History,0,SENTINEL_SIMULATE_FAILURE_Tips,0,sentinelCommand,-3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_SIMULATE_FAILURE_Keyspecs,0,NULL,1),.args=SENTINEL_SIMULATE_FAILURE_Args},
 {MAKE_CMD("slaves","Returns a list of the monitored replicas.","O(N) where N is the number of replicas.","2.8.0",CMD_DOC_DEPRECATED,"`SENTINEL REPLICAS`","5.0.0","sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_SLAVES_History,0,SENTINEL_SLAVES_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_SLAVES_Keyspecs,0,NULL,1),.args=SENTINEL_SLAVES_Args},
 {0}
@@ -10743,7 +10743,7 @@ struct COMMAND_STRUCT serverCommandTable[] = {
 {MAKE_CMD("type","Determines the type of value stored at a key.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,TYPE_History,0,TYPE_Tips,0,typeCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_KEYSPACE,TYPE_Keyspecs,1,NULL,1),.args=TYPE_Args},
 {MAKE_CMD("unlink","Asynchronously deletes one or more keys.","O(1) for each key removed regardless of its size. Then the command does O(N) work in a different thread in order to reclaim memory, where N is the number of allocations the deleted objects where composed of.","4.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,UNLINK_History,0,UNLINK_Tips,2,unlinkCommand,-2,CMD_WRITE|CMD_FAST,ACL_CATEGORY_KEYSPACE,UNLINK_Keyspecs,1,NULL,1),.args=UNLINK_Args},
 {MAKE_CMD("wait","Blocks until the asynchronous replication of all preceding write commands sent by the connection is completed.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,WAIT_History,0,WAIT_Tips,2,waitCommand,3,CMD_BLOCKING,ACL_CATEGORY_CONNECTION,WAIT_Keyspecs,0,NULL,2),.args=WAIT_Args},
-{MAKE_CMD("waitaof","Blocks until all of the preceding write commands sent by the connection are written to the append-only file of the master and/or replicas.","O(1)","7.2.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,WAITAOF_History,0,WAITAOF_Tips,2,waitaofCommand,4,CMD_BLOCKING,ACL_CATEGORY_CONNECTION,WAITAOF_Keyspecs,0,NULL,3),.args=WAITAOF_Args},
+{MAKE_CMD("waitaof","Blocks until all of the preceding write commands sent by the connection are written to the append-only file of the primary and/or replicas.","O(1)","7.2.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,WAITAOF_History,0,WAITAOF_Tips,2,waitaofCommand,4,CMD_BLOCKING,ACL_CATEGORY_CONNECTION,WAITAOF_Keyspecs,0,NULL,3),.args=WAITAOF_Args},
 /* geo */
 {MAKE_CMD("geoadd","Adds one or more members to a geospatial index. The key is created if it doesn't exist.","O(log(N)) for each item added, where N is the number of elements in the sorted set.","3.2.0",CMD_DOC_NONE,NULL,NULL,"geo",COMMAND_GROUP_GEO,GEOADD_History,1,GEOADD_Tips,0,geoaddCommand,-5,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_GEO,GEOADD_Keyspecs,1,NULL,4),.args=GEOADD_Args},
 {MAKE_CMD("geodist","Returns the distance between two members of a geospatial index.","O(1)","3.2.0",CMD_DOC_NONE,NULL,NULL,"geo",COMMAND_GROUP_GEO,GEODIST_History,0,GEODIST_Tips,0,geodistCommand,-4,CMD_READONLY,ACL_CATEGORY_GEO,GEODIST_Keyspecs,1,NULL,4),.args=GEODIST_Args},
@@ -10847,7 +10847,7 @@ struct COMMAND_STRUCT serverCommandTable[] = {
 {MAKE_CMD("role","Returns the replication role.","O(1)","2.8.12",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ROLE_History,0,ROLE_Tips,0,roleCommand,1,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_SENTINEL,ACL_CATEGORY_ADMIN|ACL_CATEGORY_DANGEROUS,ROLE_Keyspecs,0,NULL,0)},
 {MAKE_CMD("save","Synchronously saves the database(s) to disk.","O(N) where N is the total number of keys in all databases","1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,SAVE_History,0,SAVE_Tips,0,saveCommand,1,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT|CMD_NO_MULTI,0,SAVE_Keyspecs,0,NULL,0)},
 {MAKE_CMD("shutdown","Synchronously saves the database(s) to disk and shuts down the server.","O(N) when saving, where N is the total number of keys in all databases when saving data, otherwise O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,SHUTDOWN_History,1,SHUTDOWN_Tips,0,shutdownCommand,-1,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_NO_MULTI|CMD_SENTINEL|CMD_ALLOW_BUSY,0,SHUTDOWN_Keyspecs,0,NULL,1),.args=SHUTDOWN_Args},
-{MAKE_CMD("slaveof","Sets a server as a replica of another, or promotes it to being a master.","O(1)","1.0.0",CMD_DOC_DEPRECATED,"`REPLICAOF`","5.0.0","server",COMMAND_GROUP_SERVER,SLAVEOF_History,0,SLAVEOF_Tips,0,replicaofCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT|CMD_STALE,0,SLAVEOF_Keyspecs,0,NULL,1),.args=SLAVEOF_Args},
+{MAKE_CMD("slaveof","Sets a server as a replica of another, or promotes it to being a primary.","O(1)","1.0.0",CMD_DOC_DEPRECATED,"`REPLICAOF`","5.0.0","server",COMMAND_GROUP_SERVER,SLAVEOF_History,0,SLAVEOF_Tips,0,replicaofCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT|CMD_STALE,0,SLAVEOF_Keyspecs,0,NULL,1),.args=SLAVEOF_Args},
 {MAKE_CMD("slowlog","A container for slow log commands.","Depends on subcommand.","2.2.12",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,SLOWLOG_History,0,SLOWLOG_Tips,0,NULL,-2,0,0,SLOWLOG_Keyspecs,0,NULL,0),.subcommands=SLOWLOG_Subcommands},
 {MAKE_CMD("swapdb","Swaps two databases.","O(N) where N is the count of clients watching or blocking on keys from both databases.","4.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,SWAPDB_History,0,SWAPDB_Tips,0,swapdbCommand,3,CMD_WRITE|CMD_FAST,ACL_CATEGORY_KEYSPACE|ACL_CATEGORY_DANGEROUS,SWAPDB_Keyspecs,0,NULL,2),.args=SWAPDB_Args},
 {MAKE_CMD("sync","An internal command used in replication.",NULL,"1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,SYNC_History,0,SYNC_Tips,0,syncCommand,1,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NO_MULTI|CMD_NOSCRIPT,0,SYNC_Keyspecs,0,NULL,0)},
diff --git a/src/commands/sentinel-ckquorum.json b/src/commands/sentinel-ckquorum.json
index e79132303f..7f4428c3fe 100644
--- a/src/commands/sentinel-ckquorum.json
+++ b/src/commands/sentinel-ckquorum.json
@@ -13,12 +13,12 @@
         ],
         "reply_schema": {
             "type": "string",
-            "description": "Returns OK if the current Sentinel configuration is able to reach the quorum needed to failover a master, and the majority needed to authorize the failover.",
+            "description": "Returns OK if the current Sentinel configuration is able to reach the quorum needed to failover a primary, and the majority needed to authorize the failover.",
             "pattern": "OK"
         },
         "arguments": [
             {
-                "name": "master-name",
+                "name": "primary-name",
                 "type": "string"
             }
         ]
diff --git a/src/commands/sentinel-failover.json b/src/commands/sentinel-failover.json
index 8a211990f2..8e7c3ea3e7 100644
--- a/src/commands/sentinel-failover.json
+++ b/src/commands/sentinel-failover.json
@@ -13,11 +13,11 @@
         ],
         "reply_schema": {
             "const": "OK",
-            "description": "Force a fail over as if the master was not reachable, and without asking for agreement to other Sentinels."
+            "description": "Force a fail over as if the primary was not reachable, and without asking for agreement to other Sentinels."
         },
         "arguments": [
             {
-                "name": "master-name",
+                "name": "primary-name",
                 "type": "string"
             }
         ]
diff --git a/src/commands/sentinel-get-master-addr-by-name.json b/src/commands/sentinel-get-master-addr-by-name.json
index 1bcbec5341..2d7fc50eda 100644
--- a/src/commands/sentinel-get-master-addr-by-name.json
+++ b/src/commands/sentinel-get-master-addr-by-name.json
@@ -1,6 +1,6 @@
 {
     "GET-MASTER-ADDR-BY-NAME": {
-        "summary": "Returns the port and address of a master instance.",
+        "summary": "Returns the port and address of a primary instance.",
         "complexity": "O(1)",
         "group": "sentinel",
         "since": "2.8.4",
@@ -30,7 +30,7 @@
         },
         "arguments": [
             {
-                "name": "master-name",
+                "name": "primary-name",
                 "type": "string"
             }
         ]
diff --git a/src/commands/sentinel-info-cache.json b/src/commands/sentinel-info-cache.json
index af89f182ea..44edcf35e3 100644
--- a/src/commands/sentinel-info-cache.json
+++ b/src/commands/sentinel-info-cache.json
@@ -14,7 +14,7 @@
         ],
         "reply_schema": {
             "type": "array",
-            "description": "This is actually a map, the odd entries are a master name, and the even entries are the last cached INFO output from that master and all its replicas.",
+            "description": "This is actually a map, the odd entries are a primary name, and the even entries are the last cached INFO output from that primary and all its replicas.",
             "minItems": 0,
             "maxItems": 4294967295,
             "items": [
@@ -22,11 +22,11 @@
                     "oneOf": [
                         {
                             "type": "string",
-                            "description": "The master name."
+                            "description": "The primary name."
                         },
                         {
                             "type": "array",
-                            "description": "This is an array of pairs, the odd entries are the INFO age, and the even entries are the cached INFO string. The first pair belong to the master and the rest are its replicas.",
+                            "description": "This is an array of pairs, the odd entries are the INFO age, and the even entries are the cached INFO string. The first pair belong to the primary and the rest are its replicas.",
                             "minItems": 2,
                             "maxItems": 2,
                             "items": [
diff --git a/src/commands/sentinel-is-master-down-by-addr.json b/src/commands/sentinel-is-master-down-by-addr.json
index fd7698014c..3ecf8723fb 100644
--- a/src/commands/sentinel-is-master-down-by-addr.json
+++ b/src/commands/sentinel-is-master-down-by-addr.json
@@ -1,6 +1,6 @@
 {
     "IS-MASTER-DOWN-BY-ADDR": {
-        "summary": "Determines whether a master instance is down.",
+        "summary": "Determines whether a primary instance is down.",
         "complexity": "O(1)",
         "group": "sentinel",
         "since": "2.8.4",
@@ -21,11 +21,11 @@
                     "oneOf": [
                         {
                             "const": 0,
-                            "description": "Master is up."
+                            "description": "Primary is up."
                         },
                         {
                             "const": 1,
-                            "description": "Master is down."
+                            "description": "Primary is down."
                         }
                     ]
                 },
diff --git a/src/commands/sentinel-master.json b/src/commands/sentinel-master.json
index ff94617aeb..3af3227394 100644
--- a/src/commands/sentinel-master.json
+++ b/src/commands/sentinel-master.json
@@ -1,6 +1,6 @@
 {
     "MASTER": {
-        "summary": "Returns the state of a master instance.",
+        "summary": "Returns the state of a primary instance.",
         "complexity": "O(1)",
         "group": "sentinel",
         "since": "2.8.4",
@@ -14,14 +14,14 @@
         ],
         "reply_schema": {
             "type": "object",
-            "description": "The state and info of the specified master.",
+            "description": "The state and info of the specified primary.",
             "additionalProperties": {
                 "type": "string"
             }
         },
         "arguments": [
             {
-                "name": "master-name",
+                "name": "primary-name",
                 "type": "string"
             }
         ]
diff --git a/src/commands/sentinel-masters.json b/src/commands/sentinel-masters.json
index 26992585a1..b6aa86d02a 100644
--- a/src/commands/sentinel-masters.json
+++ b/src/commands/sentinel-masters.json
@@ -1,7 +1,7 @@
 {
     "MASTERS": {
-        "summary": "Returns a list of monitored masters.",
-        "complexity": "O(N) where N is the number of masters",
+        "summary": "Returns a list of monitored primaries.",
+        "complexity": "O(N) where N is the number of primaries",
         "group": "sentinel",
         "since": "2.8.4",
         "arity": 2,
@@ -14,7 +14,7 @@
         ],
         "reply_schema": {
             "type": "array",
-            "description": "List of monitored masters, and their state.",
+            "description": "List of monitored primaries, and their state.",
             "items": {
                 "type": "object",
                 "additionalProperties": {
diff --git a/src/commands/sentinel-remove.json b/src/commands/sentinel-remove.json
index 1fe084f42c..7d545c3715 100644
--- a/src/commands/sentinel-remove.json
+++ b/src/commands/sentinel-remove.json
@@ -17,7 +17,7 @@
         },
         "arguments": [
             {
-                "name": "master-name",
+                "name": "primary-name",
                 "type": "string"
             }
         ]
diff --git a/src/commands/sentinel-replicas.json b/src/commands/sentinel-replicas.json
index 32b04e994a..a81ed0ef00 100644
--- a/src/commands/sentinel-replicas.json
+++ b/src/commands/sentinel-replicas.json
@@ -14,7 +14,7 @@
         ],
         "reply_schema": {
             "type": "array",
-            "description": "List of replicas for this master, and their state.",
+            "description": "List of replicas for this primary, and their state.",
             "items": {
                 "type": "object",
                 "additionalProperties": {
@@ -24,7 +24,7 @@
         },
         "arguments": [
             {
-                "name": "master-name",
+                "name": "primary-name",
                 "type": "string"
             }
         ]
diff --git a/src/commands/sentinel-reset.json b/src/commands/sentinel-reset.json
index 5d2a63f3d5..35153609cb 100644
--- a/src/commands/sentinel-reset.json
+++ b/src/commands/sentinel-reset.json
@@ -1,7 +1,7 @@
 {
     "RESET": {
-        "summary": "Resets masters by name matching a pattern.",
-        "complexity": "O(N) where N is the number of monitored masters",
+        "summary": "Resets primaries by name matching a pattern.",
+        "complexity": "O(N) where N is the number of monitored primaries",
         "group": "sentinel",
         "since": "2.8.4",
         "arity": 3,
@@ -14,7 +14,7 @@
         ],
         "reply_schema": {
             "type": "integer",
-            "description": "The number of masters that were reset."
+            "description": "The number of primaries that were reset."
         },
         "arguments": [
             {
diff --git a/src/commands/sentinel-sentinels.json b/src/commands/sentinel-sentinels.json
index fdaa5cb992..dae12c5a9b 100644
--- a/src/commands/sentinel-sentinels.json
+++ b/src/commands/sentinel-sentinels.json
@@ -24,7 +24,7 @@
         },
         "arguments": [
             {
-                "name": "master-name",
+                "name": "primary-name",
                 "type": "string"
             }
         ]
diff --git a/src/commands/sentinel-set.json b/src/commands/sentinel-set.json
index abca33b89a..43523e6d6b 100644
--- a/src/commands/sentinel-set.json
+++ b/src/commands/sentinel-set.json
@@ -1,6 +1,6 @@
 {
     "SET": {
-        "summary": "Changes the configuration of a monitored master.",
+        "summary": "Changes the configuration of a monitored primary.",
         "complexity": "O(1)",
         "group": "sentinel",
         "since": "2.8.4",
@@ -17,7 +17,7 @@
         },
         "arguments": [
             {
-                "name": "master-name",
+                "name": "primary-name",
                 "type": "string"
             },
             {
diff --git a/src/commands/sentinel-slaves.json b/src/commands/sentinel-slaves.json
index c1fec41bb2..9792270982 100644
--- a/src/commands/sentinel-slaves.json
+++ b/src/commands/sentinel-slaves.json
@@ -29,7 +29,7 @@
         },
         "arguments": [
             {
-                "name": "master-name",
+                "name": "primary-name",
                 "type": "string"
             }
         ]
diff --git a/src/commands/slaveof.json b/src/commands/slaveof.json
index ca30982887..509bdfbee3 100644
--- a/src/commands/slaveof.json
+++ b/src/commands/slaveof.json
@@ -1,6 +1,6 @@
 {
     "SLAVEOF": {
-        "summary": "Sets a server as a replica of another, or promotes it to being a master.",
+        "summary": "Sets a server as a replica of another, or promotes it to being a primary.",
         "complexity": "O(1)",
         "group": "server",
         "since": "1.0.0",
diff --git a/src/commands/waitaof.json b/src/commands/waitaof.json
index 19b514c274..d664000b5f 100644
--- a/src/commands/waitaof.json
+++ b/src/commands/waitaof.json
@@ -1,6 +1,6 @@
 {
     "WAITAOF": {
-        "summary": "Blocks until all of the preceding write commands sent by the connection are written to the append-only file of the master and/or replicas.",
+        "summary": "Blocks until all of the preceding write commands sent by the connection are written to the append-only file of the primary and/or replicas.",
         "complexity": "O(1)",
         "group": "generic",
         "since": "7.2.0",
diff --git a/src/sentinel.c b/src/sentinel.c
index f5795f53df..71b548debc 100644
--- a/src/sentinel.c
+++ b/src/sentinel.c
@@ -3675,9 +3675,9 @@ void sentinelCommand(client *c) {
     if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr, "help")) {
         /* clang-format off */
         const char *help[] = {
-"CKQUORUM <master-name>",
+"CKQUORUM <primary-name>",
 "    Check if the current Sentinel configuration is able to reach the quorum",
-"    needed to failover a master and the majority needed to authorize the",
+"    needed to failover a primary and the majority needed to authorize the",
 "    failover.",
 "CONFIG SET param value [param value ...]",
 "    Set a global Sentinel configuration parameter.",
@@ -3686,39 +3686,39 @@ void sentinelCommand(client *c) {
 "DEBUG [<param> <value> ...]",
 "    Show a list of configurable time parameters and their values (milliseconds).",
 "    Or update current configurable parameters values (one or more).",
-"GET-MASTER-ADDR-BY-NAME <master-name>",
-"    Return the ip and port number of the master with that name.",
-"FAILOVER <master-name>",
-"    Manually failover a master node without asking for agreement from other",
+"GET-MASTER-ADDR-BY-NAME <primary-name>",
+"    Return the ip and port number of the primary with that name.",
+"FAILOVER <primary-name>",
+"    Manually failover a primary node without asking for agreement from other",
 "    Sentinels",
 "FLUSHCONFIG",
 "    Force Sentinel to rewrite its configuration on disk, including the current",
 "    Sentinel state.",
-"INFO-CACHE <master-name>",
-"    Return last cached INFO output from masters and all its replicas.",
+"INFO-CACHE <primary-name>",
+"    Return last cached INFO output from primaries and all its replicas.",
 "IS-MASTER-DOWN-BY-ADDR <ip> <port> <current-epoch> <runid>",
-"    Check if the master specified by ip:port is down from current Sentinel's",
+"    Check if the primary specified by ip:port is down from current Sentinel's",
 "    point of view.",
-"MASTER <master-name>",
-"    Show the state and info of the specified master.",
+"MASTER <primary-name>",
+"    Show the state and info of the specified primary.",
 "MASTERS",
-"    Show a list of monitored masters and their state.",
+"    Show a list of monitored primaries and their state.",
 "MONITOR <name> <ip> <port> <quorum>",
-"    Start monitoring a new master with the specified name, ip, port and quorum.",
+"    Start monitoring a new primary with the specified name, ip, port and quorum.",
 "MYID",
 "    Return the ID of the Sentinel instance.",
 "PENDING-SCRIPTS",
 "    Get pending scripts information.",
-"REMOVE <master-name>",
-"    Remove master from Sentinel's monitor list.",
-"REPLICAS <master-name>",
-"    Show a list of replicas for this master and their state.",
+"REMOVE <primary-name>",
+"    Remove primary from Sentinel's monitor list.",
+"REPLICAS <primary-name>",
+"    Show a list of replicas for this primary and their states.",
 "RESET <pattern>",
-"    Reset masters for specific master name matching this pattern.",
-"SENTINELS <master-name>",
-"    Show a list of Sentinel instances for this master and their state.",
-"SET <master-name> <option> <value> [<option> <value> ...]",
-"    Set configuration parameters for certain masters.",
+"    Reset primaries for specific primary name matching this pattern.",
+"SENTINELS <primary-name>",
+"    Show a list of Sentinel instances for this primary and their state.",
+"SET <primary-name> <option> <value> [<option> <value> ...]",
+"    Set configuration parameters for certain primaries.",
 "SIMULATE-FAILURE [CRASH-AFTER-ELECTION] [CRASH-AFTER-PROMOTION] [HELP]",
 "    Simulate a Sentinel crash.",
 NULL

From 0143b7c9dd7ec38fec5469a1055d96e8f4f0984c Mon Sep 17 00:00:00 2001
From: poiuj <1099644+poiuj@users.noreply.github.com>
Date: Thu, 20 Jun 2024 02:13:55 +0300
Subject: [PATCH 12/53] Add zfree_with_size to optimize sdsfree since we can
 get zmalloc_size from the header (#453)

### Description ###
zfree updates memory statistics. It gets the size of the buffer from
jemalloc by calling zmalloc_size. This operation is costly. We can avoid
it if we know the buffer size. For example, we can calculate size of sds
from the data we have in its header.

This commit introduces zfree_with_size function that accepts both
pointer to a buffer, and its size. zfree is refactored to call
zfree_with_size.

sdsfree uses the new interface for all but SDS_TYPE_5.

### Benchmark ###

Dataset is 3 million strings. Each benchmark run uses its own value size
(8192, 512, and 120). The benchmark is 100% write load for 5 minutes.

```
value size       new tps      old tps      %       new us/call    old us/call    %
8k               272088.53    269971.75    0.78    1.83           1.92           -4.69
512              356881.91    352856.72    1.14    1.27           1.35           -5.93
120              377523.81    368774.78    2.37    1.14           1.19           -4.20
```

---------

Signed-off-by: Vadym Khoptynets <vadymkh@amazon.com>
Signed-off-by: Madelyn Olson <madelyneolson@gmail.com>
Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
---
 src/sds.c      | 13 +++++++++----
 src/sdsalloc.h |  1 +
 src/zmalloc.c  | 49 +++++++++++++++++++++++++++++++++++++------------
 src/zmalloc.h  |  1 +
 4 files changed, 48 insertions(+), 16 deletions(-)

diff --git a/src/sds.c b/src/sds.c
index c47901d73a..c52c14759b 100644
--- a/src/sds.c
+++ b/src/sds.c
@@ -195,7 +195,7 @@ sds sdsdup(const sds s) {
 /* Free an sds string. No operation is performed if 's' is NULL. */
 void sdsfree(sds s) {
     if (s == NULL) return;
-    s_free((char *)s - sdsHdrSize(s[-1]));
+    s_free_with_size(sdsAllocPtr(s), sdsAllocSize(s));
 }
 
 /* Set the sds string length to the length as obtained with strlen(), so
@@ -369,7 +369,7 @@ sds sdsResize(sds s, size_t size, int would_regrow) {
          * We aim to avoid calling realloc() when using Jemalloc if there is no
          * change in the allocation size, as it incurs a cost even if the
          * allocation size stays the same. */
-        bufsize = zmalloc_size(sh);
+        bufsize = sdsAllocSize(s);
         alloc_already_optimal = (je_nallocx(newlen, 0) == bufsize);
 #endif
         if (!alloc_already_optimal) {
@@ -412,8 +412,13 @@ sds sdsResize(sds s, size_t size, int would_regrow) {
  * 4) The implicit null term.
  */
 size_t sdsAllocSize(sds s) {
-    size_t alloc = sdsalloc(s);
-    return sdsHdrSize(s[-1]) + alloc + 1;
+    char type = s[-1] & SDS_TYPE_MASK;
+    /* SDS_TYPE_5 header doesn't contain the size of the allocation */
+    if (type == SDS_TYPE_5) {
+        return s_malloc_size(sdsAllocPtr(s));
+    } else {
+        return sdsHdrSize(type) + sdsalloc(s) + 1;
+    }
 }
 
 /* Return the pointer of the actual SDS allocation (normally SDS strings
diff --git a/src/sdsalloc.h b/src/sdsalloc.h
index 6644eb3c83..6fd076d9f0 100644
--- a/src/sdsalloc.h
+++ b/src/sdsalloc.h
@@ -45,6 +45,7 @@
 #define s_trymalloc ztrymalloc
 #define s_tryrealloc ztryrealloc
 #define s_free zfree
+#define s_free_with_size zfree_with_size
 #define s_malloc_usable zmalloc_usable
 #define s_realloc_usable zrealloc_usable
 #define s_trymalloc_usable ztrymalloc_usable
diff --git a/src/zmalloc.c b/src/zmalloc.c
index 0117d8d91a..3ab646dd71 100644
--- a/src/zmalloc.c
+++ b/src/zmalloc.c
@@ -31,6 +31,7 @@
 #include "fmacros.h"
 #include "config.h"
 #include "solarisfixes.h"
+#include "serverassert.h"
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -361,22 +362,48 @@ size_t zmalloc_usable_size(void *ptr) {
 }
 #endif
 
-void zfree(void *ptr) {
-#ifndef HAVE_MALLOC_SIZE
-    void *realptr;
-    size_t oldsize;
+/* Frees the memory buffer pointed to by ptr and updates statistics. When using
+ * jemalloc it uses the fast track by specifying the buffer size.
+ *
+ * ptr must have been returned by a previous call to the system allocator which
+ * returned the usable size, such as zmalloc_usable. ptr must not be NULL. The
+ * caller is responsible to provide the actual allocation size, which may be
+ * different from the requested size. */
+static inline void zfree_internal(void *ptr, size_t size) {
+    assert(ptr != NULL);
+    update_zmalloc_stat_free(size);
+
+#ifdef USE_JEMALLOC
+    je_sdallocx(ptr, size, 0);
+#else
+    free(ptr);
 #endif
+}
 
+void zfree(void *ptr) {
     if (ptr == NULL) return;
+
 #ifdef HAVE_MALLOC_SIZE
-    update_zmalloc_stat_free(zmalloc_size(ptr));
-    free(ptr);
+    size_t size = zmalloc_size(ptr);
 #else
-    realptr = (char *)ptr - PREFIX_SIZE;
-    oldsize = *((size_t *)realptr);
-    update_zmalloc_stat_free(oldsize + PREFIX_SIZE);
-    free(realptr);
+    ptr = (char *)ptr - PREFIX_SIZE;
+    size_t data_size = *((size_t *)ptr);
+    size_t size = data_size + PREFIX_SIZE;
+#endif
+
+    zfree_internal(ptr, size);
+}
+
+/* Like zfree(), but doesn't call zmalloc_size(). */
+void zfree_with_size(void *ptr, size_t size) {
+    if (ptr == NULL) return;
+
+#ifndef HAVE_MALLOC_SIZE
+    ptr = (char *)ptr - PREFIX_SIZE;
+    size += PREFIX_SIZE;
 #endif
+
+    zfree_internal(ptr, size);
 }
 
 char *zstrdup(const char *s) {
@@ -604,8 +631,6 @@ size_t zmalloc_get_rss(void) {
 
 #if defined(USE_JEMALLOC)
 
-#include "serverassert.h"
-
 #define STRINGIFY_(x) #x
 #define STRINGIFY(x) STRINGIFY_(x)
 
diff --git a/src/zmalloc.h b/src/zmalloc.h
index a909366c13..421cb9bcaa 100644
--- a/src/zmalloc.h
+++ b/src/zmalloc.h
@@ -118,6 +118,7 @@ __attribute__((malloc, alloc_size(1), noinline)) void *ztrymalloc(size_t size);
 __attribute__((malloc, alloc_size(1), noinline)) void *ztrycalloc(size_t size);
 __attribute__((alloc_size(2), noinline)) void *ztryrealloc(void *ptr, size_t size);
 void zfree(void *ptr);
+void zfree_with_size(void *ptr, size_t size);
 void *zmalloc_usable(size_t size, size_t *usable);
 void *zcalloc_usable(size_t size, size_t *usable);
 void *zrealloc_usable(void *ptr, size_t size, size_t *usable);

From bf1fb1fd3658f57b3040f4e869c0c906c3fe3d9d Mon Sep 17 00:00:00 2001
From: Binbin <binloveplay1314@qq.com>
Date: Thu, 20 Jun 2024 10:28:47 +0800
Subject: [PATCH 13/53] Fix copy-paste error in scripts eviction test (#671)

The test needs to test "return 2" but mistakenly uses "return 1".
Also remove a extra debug print.

Signed-off-by: Binbin <binloveplay1314@qq.com>
---
 tests/unit/scripting.tcl | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/unit/scripting.tcl b/tests/unit/scripting.tcl
index e19ccdf721..9e174c18d1 100644
--- a/tests/unit/scripting.tcl
+++ b/tests/unit/scripting.tcl
@@ -1570,7 +1570,6 @@ start_server {tags {"scripting needs:debug external:skip"}} {
         r write $cmd
         r flush
         set ret [r read]
-        puts $ret
         assert_match {*PONG*} $ret
         reconnect 
         assert_equal [r ping] {PONG}
@@ -1634,7 +1633,7 @@ start_server {tags {"scripting external:skip"}} {
         # "return 1" is ok since it was moved to tail.
         assert_equal 1 [r evalsha e0e1f9fabfc9d4800c877a703b823ac0578ff8db 0]
         # "return 2" is ok since it was moved to tail.
-        assert_equal 1 [r evalsha e0e1f9fabfc9d4800c877a703b823ac0578ff8db 0]
+        assert_equal 2 [r evalsha 7f923f79fe76194c868d7e1d0820de36700eb649 0]
         # "return 3" was evicted.
         assert_error {NOSCRIPT*} {r evalsha 09d3822de862f46d784e6a36848b4f0736dda47a 0}
         # Others are ok.

From ce79539047ccecf39951168e5f30e2cd9a3135ec Mon Sep 17 00:00:00 2001
From: Madelyn Olson <madelyneolson@gmail.com>
Date: Fri, 21 Jun 2024 00:29:05 -0700
Subject: [PATCH 14/53] Fail tests immediately if the server is no longer
 running (#678)

Fix a minor inconvenience I have when writing tests. If I have a typo or
forget to generate the tls certificates, the start_server handle will
just loop for 2 minutes before printing the error. This just fails and
prints as soon as it sees the error.

Signed-off-by: Madelyn Olson <madelyneolson@gmail.com>
---
 tests/support/server.tcl | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/tests/support/server.tcl b/tests/support/server.tcl
index ddcdbcddbc..b0750fcb8a 100644
--- a/tests/support/server.tcl
+++ b/tests/support/server.tcl
@@ -304,7 +304,7 @@ proc spawn_server {config_file stdout stderr args} {
 }
 
 # Wait for actual startup, return 1 if port is busy, 0 otherwise
-proc wait_server_started {config_file stdout pid} {
+proc wait_server_started {config_file stdout stderr pid} {
     set checkperiod 100; # Milliseconds
     set maxiter [expr {120*1000/$checkperiod}] ; # Wait up to 2 minutes.
     set port_busy 0
@@ -328,6 +328,13 @@ proc wait_server_started {config_file stdout pid} {
             set port_busy 1
             break
         }
+
+        # Configuration errors are unexpected, but it's helpful to fail fast
+        # to give the feedback to the test runner.
+        if {[regexp {FATAL CONFIG FILE ERROR} [exec cat $stderr]]} {
+            start_server_error $config_file "Configuration issue prevented Valkey startup"
+            break
+        }
     }
     return $port_busy
 }
@@ -568,7 +575,7 @@ proc start_server {options {code undefined}} {
         set pid [spawn_server $config_file $stdout $stderr $args]
 
         # check that the server actually started
-        set port_busy [wait_server_started $config_file $stdout $pid]
+        set port_busy [wait_server_started $config_file $stdout $stderr $pid]
 
         # Sometimes we have to try a different port, even if we checked
         # for availability. Other test clients may grab the port before we
@@ -778,7 +785,7 @@ proc restart_server {level wait_ready rotate_logs {reconnect 1} {shutdown sigter
     set pid [spawn_server $config_file $stdout $stderr {}]
 
     # check that the server actually started
-    wait_server_started $config_file $stdout $pid
+    wait_server_started $config_file $stdout $stderr $pid
 
     # update the pid in the servers list
     dict set srv "pid" $pid

From 32ca6e5b38bfc4a15878c2b6ff3d2c71da1026e3 Mon Sep 17 00:00:00 2001
From: Ping Xie <pingxie@google.com>
Date: Sun, 23 Jun 2024 22:08:52 -0700
Subject: [PATCH 15/53]  Improve `CLUSTER SETSLOT` replication handling to
 support older replica versions. (#686)

---
 src/cluster_legacy.c | 49 ++++++++++++++++++++++++++++++--------------
 1 file changed, 34 insertions(+), 15 deletions(-)

diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c
index e9816d52a1..def572c249 100644
--- a/src/cluster_legacy.c
+++ b/src/cluster_legacy.c
@@ -6022,23 +6022,42 @@ void clusterCommandSetSlot(client *c) {
      * a reliable slot ownership transfer even if the primary node went down during
      * the process. */
     if (nodeIsPrimary(myself) && myself->num_replicas != 0 && (c->flags & CLIENT_REPLICATION_DONE) == 0) {
-        forceCommandPropagation(c, PROPAGATE_REPL);
-        /* We are a primary and this is the first time we see this `SETSLOT`
-         * command. Force-replicate the command to all of our replicas
-         * first and only on success will we handle the command.
-         * Note that
-         * 1. All replicas are expected to ack the replication within the given timeout
-         * 2. The repl offset target is set to the primary's current repl offset + 1.
-         *    There is no concern of partial replication because replicas always
-         *    ack the repl offset at the command boundary. */
-        blockClientForReplicaAck(c, timeout_ms, server.primary_repl_offset + 1, myself->num_replicas, 0);
-        /* Mark client as pending command for execution after replication to replicas. */
-        c->flags |= CLIENT_PENDING_COMMAND;
-        replicationRequestAckFromReplicas();
-        return;
+        /* Iterate through the list of replicas to check if there are any running
+         * a version older than 8.0.0. Replicas with versions older than 8.0.0 do
+         * not support the CLUSTER SETSLOT command on replicas. If such a replica
+         * is found, we should skip the replication and fall back to the old
+         * non-replicated behavior.*/
+        listIter li;
+        listNode *ln;
+        int legacy_replica_found = 0;
+        listRewind(server.replicas, &li);
+        while ((ln = listNext(&li))) {
+            client *r = ln->value;
+            if (r->replica_version < 0x80000 /* 8.0.0 */) {
+                legacy_replica_found++;
+                break;
+            }
+        }
+
+        if (!legacy_replica_found) {
+            forceCommandPropagation(c, PROPAGATE_REPL);
+            /* We are a primary and this is the first time we see this `SETSLOT`
+             * command. Force-replicate the command to all of our replicas
+             * first and only on success will we handle the command.
+             * Note that
+             * 1. All replicas are expected to ack the replication within the given timeout
+             * 2. The repl offset target is set to the primary's current repl offset + 1.
+             *    There is no concern of partial replication because replicas always
+             *    ack the repl offset at the command boundary. */
+            blockClientForReplicaAck(c, timeout_ms, server.primary_repl_offset + 1, myself->num_replicas, 0);
+            /* Mark client as pending command for execution after replication to replicas. */
+            c->flags |= CLIENT_PENDING_COMMAND;
+            replicationRequestAckFromReplicas();
+            return;
+        }
     }
 
-    /* Slot states have been updated on the replicas (if any).
+    /* Slot states have been updated on the compatible replicas (if any).
      * Now exuecte the command on the primary. */
     if (!strcasecmp(c->argv[3]->ptr, "migrating")) {
         serverLog(LL_NOTICE, "Migrating slot %d to node %.40s (%s)", slot, n->name, n->human_nodename);

From 4d3d6c06a15fc5a1a2f9e17b6341cc46850a30f4 Mon Sep 17 00:00:00 2001
From: Lipeng Zhu <lipeng.zhu@intel.com>
Date: Tue, 25 Jun 2024 09:33:30 +0800
Subject: [PATCH 16/53] Reduce redundant call of prepareClientToWrite when call
 addReply* continuously. (#670)

## Description

While exploring hotspots with profiling some benchmark workloads, we
noticed the high cycles ratio of `prepareClientToWrite`, taking about 9%
of the CPU of `smembers`, `lrange` commands. After deep dive the code
logic, we thought we can gain the performance by reducing the redundant
call of `prepareClientToWrite` when call addReply* continuously.

For example: In
https://github.com/valkey-io/valkey/blob/unstable/src/networking.c#L1080-L1082,
`prepareClientToWrite` is called three times in a row.

---------

Signed-off-by: Lipeng Zhu <lipeng.zhu@intel.com>
Co-authored-by: Wangyang Guo <wangyang.guo@intel.com>
---
 src/networking.c | 43 ++++++++++++++++++++++++++-----------------
 src/server.h     |  1 -
 2 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/src/networking.c b/src/networking.c
index d6d3d4fece..dff4226c54 100644
--- a/src/networking.c
+++ b/src/networking.c
@@ -963,7 +963,7 @@ void addReplyHumanLongDouble(client *c, long double d) {
 
 /* Add a long long as integer reply or bulk len / multi bulk count.
  * Basically this is used to output <prefix><long long><crlf>. */
-void addReplyLongLongWithPrefix(client *c, long long ll, char prefix) {
+static void _addReplyLongLongWithPrefix(client *c, long long ll, char prefix) {
     char buf[128];
     int len;
 
@@ -973,16 +973,16 @@ void addReplyLongLongWithPrefix(client *c, long long ll, char prefix) {
     const int opt_hdr = ll < OBJ_SHARED_BULKHDR_LEN && ll >= 0;
     const size_t hdr_len = OBJ_SHARED_HDR_STRLEN(ll);
     if (prefix == '*' && opt_hdr) {
-        addReplyProto(c, shared.mbulkhdr[ll]->ptr, hdr_len);
+        _addReplyToBufferOrList(c, shared.mbulkhdr[ll]->ptr, hdr_len);
         return;
     } else if (prefix == '$' && opt_hdr) {
-        addReplyProto(c, shared.bulkhdr[ll]->ptr, hdr_len);
+        _addReplyToBufferOrList(c, shared.bulkhdr[ll]->ptr, hdr_len);
         return;
     } else if (prefix == '%' && opt_hdr) {
-        addReplyProto(c, shared.maphdr[ll]->ptr, hdr_len);
+        _addReplyToBufferOrList(c, shared.maphdr[ll]->ptr, hdr_len);
         return;
     } else if (prefix == '~' && opt_hdr) {
-        addReplyProto(c, shared.sethdr[ll]->ptr, hdr_len);
+        _addReplyToBufferOrList(c, shared.sethdr[ll]->ptr, hdr_len);
         return;
     }
 
@@ -990,7 +990,7 @@ void addReplyLongLongWithPrefix(client *c, long long ll, char prefix) {
     len = ll2string(buf + 1, sizeof(buf) - 1, ll);
     buf[len + 1] = '\r';
     buf[len + 2] = '\n';
-    addReplyProto(c, buf, len + 3);
+    _addReplyToBufferOrList(c, buf, len + 3);
 }
 
 void addReplyLongLong(client *c, long long ll) {
@@ -998,13 +998,16 @@ void addReplyLongLong(client *c, long long ll) {
         addReply(c, shared.czero);
     else if (ll == 1)
         addReply(c, shared.cone);
-    else
-        addReplyLongLongWithPrefix(c, ll, ':');
+    else {
+        if (prepareClientToWrite(c) != C_OK) return;
+        _addReplyLongLongWithPrefix(c, ll, ':');
+    }
 }
 
 void addReplyAggregateLen(client *c, long length, int prefix) {
     serverAssert(length >= 0);
-    addReplyLongLongWithPrefix(c, length, prefix);
+    if (prepareClientToWrite(c) != C_OK) return;
+    _addReplyLongLongWithPrefix(c, length, prefix);
 }
 
 void addReplyArrayLen(client *c, long length) {
@@ -1064,8 +1067,8 @@ void addReplyNullArray(client *c) {
 /* Create the length prefix of a bulk reply, example: $2234 */
 void addReplyBulkLen(client *c, robj *obj) {
     size_t len = stringObjectLen(obj);
-
-    addReplyLongLongWithPrefix(c, len, '$');
+    if (prepareClientToWrite(c) != C_OK) return;
+    _addReplyLongLongWithPrefix(c, len, '$');
 }
 
 /* Add an Object as a bulk reply */
@@ -1077,16 +1080,22 @@ void addReplyBulk(client *c, robj *obj) {
 
 /* Add a C buffer as bulk reply */
 void addReplyBulkCBuffer(client *c, const void *p, size_t len) {
-    addReplyLongLongWithPrefix(c, len, '$');
-    addReplyProto(c, p, len);
-    addReplyProto(c, "\r\n", 2);
+    if (prepareClientToWrite(c) != C_OK) return;
+    _addReplyLongLongWithPrefix(c, len, '$');
+    _addReplyToBufferOrList(c, p, len);
+    _addReplyToBufferOrList(c, "\r\n", 2);
 }
 
 /* Add sds to reply (takes ownership of sds and frees it) */
 void addReplyBulkSds(client *c, sds s) {
-    addReplyLongLongWithPrefix(c, sdslen(s), '$');
-    addReplySds(c, s);
-    addReplyProto(c, "\r\n", 2);
+    if (prepareClientToWrite(c) != C_OK) {
+        sdsfree(s);
+        return;
+    }
+    _addReplyLongLongWithPrefix(c, sdslen(s), '$');
+    _addReplyToBufferOrList(c, s, sdslen(s));
+    sdsfree(s);
+    _addReplyToBufferOrList(c, "\r\n", 2);
 }
 
 /* Set sds to a deferred reply (for symmetry with addReplyBulkSds it also frees the sds) */
diff --git a/src/server.h b/src/server.h
index c4ce6f655e..a12f091ba9 100644
--- a/src/server.h
+++ b/src/server.h
@@ -2667,7 +2667,6 @@ void addReplyErrorArity(client *c);
 void addReplyErrorExpireTime(client *c);
 void addReplyStatus(client *c, const char *status);
 void addReplyDouble(client *c, double d);
-void addReplyLongLongWithPrefix(client *c, long long ll, char prefix);
 void addReplyBigNum(client *c, const char *num, size_t len);
 void addReplyHumanLongDouble(client *c, long double d);
 void addReplyLongLong(client *c, long long ll);

From 3df9d4279414bf46a9d85d599d2e60055ddaea1d Mon Sep 17 00:00:00 2001
From: ranshid <88133677+ranshid@users.noreply.github.com>
Date: Tue, 25 Jun 2024 18:18:07 +0300
Subject: [PATCH 17/53] Fix bad memory accounting for sds when no malloc_size
 available (#694)

Issue Introduced  by #453.
When we check the SDS _TYPE_5 allocation size we mistakenly used
zmalloc_size which DOES take the PREFIX size into account when no
malloc_size support.
Later when we free we add the PREFIX_SIZE again which leads to negative
memory accounting on some tests.
Example test failure:
https://github.com/valkey-io/valkey/actions/runs/9654170962/job/26627901497

Signed-off-by: ranshid <ranshid@amazon.com>
---
 src/sds.c      | 2 +-
 src/sdsalloc.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/sds.c b/src/sds.c
index c52c14759b..1c0ddd559d 100644
--- a/src/sds.c
+++ b/src/sds.c
@@ -415,7 +415,7 @@ size_t sdsAllocSize(sds s) {
     char type = s[-1] & SDS_TYPE_MASK;
     /* SDS_TYPE_5 header doesn't contain the size of the allocation */
     if (type == SDS_TYPE_5) {
-        return s_malloc_size(sdsAllocPtr(s));
+        return s_malloc_usable_size(sdsAllocPtr(s));
     } else {
         return sdsHdrSize(type) + sdsalloc(s) + 1;
     }
diff --git a/src/sdsalloc.h b/src/sdsalloc.h
index 6fd076d9f0..dfa8257ebd 100644
--- a/src/sdsalloc.h
+++ b/src/sdsalloc.h
@@ -51,5 +51,6 @@
 #define s_trymalloc_usable ztrymalloc_usable
 #define s_tryrealloc_usable ztryrealloc_usable
 #define s_malloc_size zmalloc_size
+#define s_malloc_usable_size zmalloc_usable_size
 
 #endif

From b49eaad367c334d5226a785123bc9cf4a1f89a25 Mon Sep 17 00:00:00 2001
From: "Kyle Kim (kimkyle@)" <105247741+kyle-yh-kim@users.noreply.github.com>
Date: Tue, 25 Jun 2024 13:24:53 -0400
Subject: [PATCH 18/53] Introduce a minimal debugger for .tcl integration test
 suite. (#683)

Introduce a break-point function called `bp`, based on the tcl wiki's
minimal debugger.

```tcl
 proc bp {{s {}}} {
    if ![info exists ::bp_skip] {
        set ::bp_skip [list]
    } elseif {[lsearch -exact $::bp_skip $s]>=0} return
    if [catch {info level -1} who] {set who ::}
    while 1 {
        puts -nonewline "$who/$s> "; flush stdout
        gets stdin line
        if {$line=="c"} {puts "continuing.."; break}
        if {$line=="i"} {set line "info locals"}
        catch {uplevel 1 $line} res
        puts $res
    }
 }
```

```
... your test code before break-point
bp 1
... your test code after break-point
```

The `bp 1` will give back the tcl interpreter to the developer, and
allow you to interactively print local variables (through `puts`), run
functions and so forth.

Source: https://wiki.tcl-lang.org/page/A+minimal+debugger

---------

Signed-off-by: Kyle Kim <kimkyle@amazon.com>
Signed-off-by: Madelyn Olson <madelyneolson@gmail.com>
Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
---
 tests/README.md        | 32 ++++++++++++++++++++++++++++++++
 tests/support/util.tcl | 22 ++++++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/tests/README.md b/tests/README.md
index 9d9c657760..efe936aa5b 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -1,6 +1,18 @@
 Valkey Test Suite
 =================
 
+Overview
+--------
+
+Integration tests are written in Tcl, a high-level, general-purpose, interpreted, dynamic programming language [[source](https://wiki.tcl-lang.org/page/What+is+Tcl)].
+`runtest` is the main entrance point for running integration tests.
+For example, to run a single test;
+
+```shell
+./runtest --single unit/your_test_name
+# For additional arguments, you may refer to the `runtest` script itself.
+```
+
 The normal execution mode of the test suite involves starting and manipulating
 local `valkey-server` instances, inspecting process state, log files, etc.
 
@@ -19,6 +31,26 @@ match different external server configurations:
 | `--cluster-mode`     | Run in strict Valkey Cluster compatibility mode. |
 | `--large-memory`     | Enables tests that consume more than 100mb |
 
+Debugging
+---------
+
+You can set a breakpoint and invoke a minimal debugger using the `bp` function.
+
+```
+... your test code before break-point
+bp 1
+... your test code after break-point
+```
+
+The `bp 1` will give back the tcl interpreter to the developer, and allow you to interactively print local variables (through `puts`), run functions and so forth [[source](https://wiki.tcl-lang.org/page/A+minimal+debugger)]. 
+`bp` takes a single argument, which is `1` for the case above, and is used to label a breakpoint with a string.
+Labels are printed out when breakpoints are hit, so you can identify which breakpoint was triggered.
+Breakpoints can be skipped by setting the global variable `::bp_skip`, and by providing the labels you want to skip.
+
+The minimal debugger comes with the following predefined functions.
+* Press `c` to continue past the breakpoint.
+* Press `i` to print local variables.
+
 Tags
 ----
 
diff --git a/tests/support/util.tcl b/tests/support/util.tcl
index 9d69e44232..c6c405b191 100644
--- a/tests/support/util.tcl
+++ b/tests/support/util.tcl
@@ -1162,3 +1162,25 @@ proc generate_largevalue_test_array {} {
     set largevalue(quicklist) [string repeat "x" 8192]
     return [array get largevalue]
 }
+
+# Breakpoint function, which invokes a minimal debugger.
+# This function can be placed within the desired Tcl tests for debugging purposes.
+# 
+# Arguments:
+# * 's': breakpoint label, which is printed when breakpoints are hit for unique identification.
+# 
+# Source: https://wiki.tcl-lang.org/page/A+minimal+debugger
+proc bp {{s {}}} {
+    if ![info exists ::bp_skip] {
+        set ::bp_skip [list]
+    } elseif {[lsearch -exact $::bp_skip $s]>=0} return
+    if [catch {info level -1} who] {set who ::}
+    while 1 {
+        puts -nonewline "$who/$s> "; flush stdout
+        gets stdin line
+        if {$line=="c"} {puts "continuing.."; break}
+        if {$line=="i"} {set line "info locals"}
+        catch {uplevel 1 $line} res
+        puts $res
+    }
+}

From 495c35d918ffc92b5fe5b7180ea722c282826460 Mon Sep 17 00:00:00 2001
From: Pierre <105686771+pieturin@users.noreply.github.com>
Date: Tue, 25 Jun 2024 15:18:30 -0700
Subject: [PATCH 19/53] Add check in CLUSTERLINK KILL cmd to avoid freeing
 links to myself (#689)

Add check in CLUSTERLINK KILL cmd to avoid freeing cluster bus links to
myself. Also add an assert in `freeClusterLink()`.

Testing:
```
127.0.0.1:6379> debug clusterlink kill all c0404ee68574c6aa1048aaebfe90283afe51d2fc
(error) ERR Cannot free cluster link(s) to myself
```

Signed-off-by: Pierre Turin <pieturin@amazon.com>
---
 src/cluster_legacy.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c
index def572c249..b913cd5671 100644
--- a/src/cluster_legacy.c
+++ b/src/cluster_legacy.c
@@ -1185,6 +1185,7 @@ clusterLink *createClusterLink(clusterNode *node) {
  * This function will just make sure that the original node associated
  * with this link will have the 'link' field set to NULL. */
 void freeClusterLink(clusterLink *link) {
+    serverAssert(link != NULL);
     if (link->conn) {
         connClose(link->conn);
         link->conn = NULL;
@@ -5815,6 +5816,10 @@ int handleDebugClusterCommand(client *c) {
         addReplyErrorFormat(c, "Unknown node %s", (char *)c->argv[4]->ptr);
         return 1;
     }
+    if (n == server.cluster->myself) {
+        addReplyErrorFormat(c, "Cannot free cluster link(s) to myself");
+        return 1;
+    }
 
     /* Terminate the link based on the direction or all. */
     if (!strcasecmp(c->argv[3]->ptr, "from")) {

From ab3873011a6c208fab4005f7ace87df03850cf03 Mon Sep 17 00:00:00 2001
From: Ouri Half <112880714+ouriamzn@users.noreply.github.com>
Date: Wed, 26 Jun 2024 19:47:59 +0300
Subject: [PATCH 20/53] Replacing REDIS_STATIC with static (#691)

As discussed, we want to remove the old `REDIS_STATIC` flag which is no
longer relevant.

When moving from Redis to Valkey we renamed all REDIS flags in Makefile.
The REDIS_STATIC flag was renamed to SERVER_STATIC, but this change was
not updated in some of the files.

After discussing it with @madolson and @ranshid, we decided that since
this was introduced 10 years ago, and in many places in the code base we
simply use `static`, we should simplify and remove the flag entirely.

---------

Signed-off-by: Ouri Half <ourih@amazon.com>
---
 deps/Makefile   |  2 +-
 src/Makefile    |  2 +-
 src/quicklist.c | 42 ++++++++++++++++++------------------------
 3 files changed, 20 insertions(+), 26 deletions(-)

diff --git a/deps/Makefile b/deps/Makefile
index 67b7d41026..f1e4bd6ce2 100644
--- a/deps/Makefile
+++ b/deps/Makefile
@@ -79,7 +79,7 @@ ifeq ($(uname_S),SunOS)
 	LUA_CFLAGS= -D__C99FEATURES__=1
 endif
 
-LUA_CFLAGS+= -Wall -DLUA_ANSI -DENABLE_CJSON_GLOBAL -DREDIS_STATIC='' -DLUA_USE_MKSTEMP $(CFLAGS)
+LUA_CFLAGS+= -Wall -DLUA_ANSI -DENABLE_CJSON_GLOBAL -DLUA_USE_MKSTEMP $(CFLAGS)
 LUA_LDFLAGS+= $(LDFLAGS)
 ifeq ($(LUA_DEBUG),yes)
 	LUA_CFLAGS+= -O0 -g -DLUA_USE_APICHECK
diff --git a/src/Makefile b/src/Makefile
index 302ad06b84..2217597d1f 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -35,7 +35,7 @@ DEPENDENCY_TARGETS=hiredis linenoise lua hdr_histogram fpconv
 NODEPS:=clean distclean
 
 # Default settings
-STD=-pedantic -DSERVER_STATIC=''
+STD=-pedantic
 
 # Use -Wno-c11-extensions on clang, either where explicitly used or on
 # platforms we can assume it's being used.
diff --git a/src/quicklist.c b/src/quicklist.c
index b56763c0fe..f381afb00c 100644
--- a/src/quicklist.c
+++ b/src/quicklist.c
@@ -39,10 +39,6 @@
 #include "lzf.h"
 #include "serverassert.h"
 
-#ifndef REDIS_STATIC
-#define REDIS_STATIC static
-#endif
-
 /* Optimization levels for size-based filling.
  * Note that the largest possible limit is 64k, so even if each record takes
  * just one byte, it still won't overflow the 16 bit count field. */
@@ -100,8 +96,8 @@ quicklistBookmark *_quicklistBookmarkFindByName(quicklist *ql, const char *name)
 quicklistBookmark *_quicklistBookmarkFindByNode(quicklist *ql, quicklistNode *node);
 void _quicklistBookmarkDelete(quicklist *ql, quicklistBookmark *bm);
 
-REDIS_STATIC quicklistNode *_quicklistSplitNode(quicklistNode *node, int offset, int after);
-REDIS_STATIC quicklistNode *_quicklistMergeNodes(quicklist *quicklist, quicklistNode *center);
+static quicklistNode *_quicklistSplitNode(quicklistNode *node, int offset, int after);
+static quicklistNode *_quicklistMergeNodes(quicklist *quicklist, quicklistNode *center);
 
 /* Simple way to give quicklistEntry structs default values with one call. */
 #define initEntry(e)                                                                                                   \
@@ -169,7 +165,7 @@ quicklist *quicklistNew(int fill, int compress) {
     return quicklist;
 }
 
-REDIS_STATIC quicklistNode *quicklistCreateNode(void) {
+static quicklistNode *quicklistCreateNode(void) {
     quicklistNode *node;
     node = zmalloc(sizeof(*node));
     node->entry = NULL;
@@ -213,7 +209,7 @@ void quicklistRelease(quicklist *quicklist) {
 /* Compress the listpack in 'node' and update encoding details.
  * Returns 1 if listpack compressed successfully.
  * Returns 0 if compression failed or if listpack too small to compress. */
-REDIS_STATIC int __quicklistCompressNode(quicklistNode *node) {
+static int __quicklistCompressNode(quicklistNode *node) {
 #ifdef SERVER_TEST
     node->attempted_compress = 1;
 #endif
@@ -253,7 +249,7 @@ REDIS_STATIC int __quicklistCompressNode(quicklistNode *node) {
 
 /* Uncompress the listpack in 'node' and update encoding details.
  * Returns 1 on successful decode, 0 on failure to decode. */
-REDIS_STATIC int __quicklistDecompressNode(quicklistNode *node) {
+static int __quicklistDecompressNode(quicklistNode *node) {
 #ifdef SERVER_TEST
     node->attempted_compress = 0;
 #endif
@@ -304,7 +300,7 @@ size_t quicklistGetLzf(const quicklistNode *node, void **data) {
  * The only way to guarantee interior nodes get compressed is to iterate
  * to our "interior" compress depth then compress the next node we find.
  * If compress depth is larger than the entire list, we return immediately. */
-REDIS_STATIC void __quicklistCompress(const quicklist *quicklist, quicklistNode *node) {
+static void __quicklistCompress(const quicklist *quicklist, quicklistNode *node) {
     if (quicklist->len == 0) return;
 
     /* The head and tail should never be compressed (we should not attempt to recompress them) */
@@ -398,8 +394,7 @@ REDIS_STATIC void __quicklistCompress(const quicklist *quicklist, quicklistNode
  * Insert 'new_node' before 'old_node' if 'after' is 0.
  * Note: 'new_node' is *always* uncompressed, so if we assign it to
  *       head or tail, we do not need to uncompress it. */
-REDIS_STATIC void
-__quicklistInsertNode(quicklist *quicklist, quicklistNode *old_node, quicklistNode *new_node, int after) {
+static void __quicklistInsertNode(quicklist *quicklist, quicklistNode *old_node, quicklistNode *new_node, int after) {
     if (after) {
         new_node->prev = old_node;
         if (old_node) {
@@ -431,11 +426,11 @@ __quicklistInsertNode(quicklist *quicklist, quicklistNode *old_node, quicklistNo
 }
 
 /* Wrappers for node inserting around existing node. */
-REDIS_STATIC void _quicklistInsertNodeBefore(quicklist *quicklist, quicklistNode *old_node, quicklistNode *new_node) {
+static void _quicklistInsertNodeBefore(quicklist *quicklist, quicklistNode *old_node, quicklistNode *new_node) {
     __quicklistInsertNode(quicklist, old_node, new_node, 0);
 }
 
-REDIS_STATIC void _quicklistInsertNodeAfter(quicklist *quicklist, quicklistNode *old_node, quicklistNode *new_node) {
+static void _quicklistInsertNodeAfter(quicklist *quicklist, quicklistNode *old_node, quicklistNode *new_node) {
     __quicklistInsertNode(quicklist, old_node, new_node, 1);
 }
 
@@ -496,7 +491,7 @@ static int isLargeElement(size_t sz, int fill) {
         return sz > quicklistNodeNegFillLimit(fill);
 }
 
-REDIS_STATIC int _quicklistNodeAllowInsert(const quicklistNode *node, const int fill, const size_t sz) {
+static int _quicklistNodeAllowInsert(const quicklistNode *node, const int fill, const size_t sz) {
     if (unlikely(!node)) return 0;
 
     if (unlikely(QL_NODE_IS_PLAIN(node) || isLargeElement(sz, fill))) return 0;
@@ -511,7 +506,7 @@ REDIS_STATIC int _quicklistNodeAllowInsert(const quicklistNode *node, const int
     return 1;
 }
 
-REDIS_STATIC int _quicklistNodeAllowMerge(const quicklistNode *a, const quicklistNode *b, const int fill) {
+static int _quicklistNodeAllowMerge(const quicklistNode *a, const quicklistNode *b, const int fill) {
     if (!a || !b) return 0;
 
     if (unlikely(QL_NODE_IS_PLAIN(a) || QL_NODE_IS_PLAIN(b))) return 0;
@@ -640,7 +635,7 @@ void quicklistAppendPlainNode(quicklist *quicklist, unsigned char *data, size_t
         }                                                                                                              \
     } while (0)
 
-REDIS_STATIC void __quicklistDelNode(quicklist *quicklist, quicklistNode *node) {
+static void __quicklistDelNode(quicklist *quicklist, quicklistNode *node) {
     /* Update the bookmark if any */
     quicklistBookmark *bm = _quicklistBookmarkFindByNode(quicklist, node);
     if (bm) {
@@ -680,7 +675,7 @@ REDIS_STATIC void __quicklistDelNode(quicklist *quicklist, quicklistNode *node)
  *
  * Returns 1 if the entire node was deleted, 0 if node still exists.
  * Also updates in/out param 'p' with the next offset in the listpack. */
-REDIS_STATIC int quicklistDelIndex(quicklist *quicklist, quicklistNode *node, unsigned char **p) {
+static int quicklistDelIndex(quicklist *quicklist, quicklistNode *node, unsigned char **p) {
     int gone = 0;
 
     if (unlikely(QL_NODE_IS_PLAIN(node))) {
@@ -824,7 +819,7 @@ int quicklistReplaceAtIndex(quicklist *quicklist, long index, void *data, size_t
  *
  * Returns the input node picked to merge against or NULL if
  * merging was not possible. */
-REDIS_STATIC quicklistNode *_quicklistListpackMerge(quicklist *quicklist, quicklistNode *a, quicklistNode *b) {
+static quicklistNode *_quicklistListpackMerge(quicklist *quicklist, quicklistNode *a, quicklistNode *b) {
     D("Requested merge (a,b) (%u, %u)", a->count, b->count);
 
     quicklistDecompressNode(a);
@@ -864,7 +859,7 @@ REDIS_STATIC quicklistNode *_quicklistListpackMerge(quicklist *quicklist, quickl
  *
  * Returns the new 'center' after merging.
  */
-REDIS_STATIC quicklistNode *_quicklistMergeNodes(quicklist *quicklist, quicklistNode *center) {
+static quicklistNode *_quicklistMergeNodes(quicklist *quicklist, quicklistNode *center) {
     int fill = quicklist->fill;
     quicklistNode *prev, *prev_prev, *next, *next_next, *target;
     prev = prev_prev = next = next_next = target = NULL;
@@ -926,7 +921,7 @@ REDIS_STATIC quicklistNode *_quicklistMergeNodes(quicklist *quicklist, quicklist
  * The input node keeps all elements not taken by the returned node.
  *
  * Returns newly created node or NULL if split not possible. */
-REDIS_STATIC quicklistNode *_quicklistSplitNode(quicklistNode *node, int offset, int after) {
+static quicklistNode *_quicklistSplitNode(quicklistNode *node, int offset, int after) {
     size_t zl_sz = node->sz;
 
     quicklistNode *new_node = quicklistCreateNode();
@@ -962,8 +957,7 @@ REDIS_STATIC quicklistNode *_quicklistSplitNode(quicklistNode *node, int offset,
  *
  * If after==1, the new value is inserted after 'entry', otherwise
  * the new value is inserted before 'entry'. */
-REDIS_STATIC void
-_quicklistInsert(quicklistIter *iter, quicklistEntry *entry, void *value, const size_t sz, int after) {
+static void _quicklistInsert(quicklistIter *iter, quicklistEntry *entry, void *value, const size_t sz, int after) {
     quicklist *quicklist = iter->quicklist;
     int full = 0, at_tail = 0, at_head = 0, avail_next = 0, avail_prev = 0;
     int fill = quicklist->fill;
@@ -1548,7 +1542,7 @@ int quicklistPopCustom(quicklist *quicklist,
 }
 
 /* Return a malloc'd copy of data passed in */
-REDIS_STATIC void *_quicklistSaver(unsigned char *data, size_t sz) {
+static void *_quicklistSaver(unsigned char *data, size_t sz) {
     unsigned char *vstr;
     if (data) {
         vstr = zmalloc(sz);

From 28c5a17edfff1c4607b16d3416bb1e021986d138 Mon Sep 17 00:00:00 2001
From: "zhaozhao.zz" <zhaozhao.zz@alibaba-inc.com>
Date: Thu, 27 Jun 2024 19:00:45 +0800
Subject: [PATCH 21/53] replica redirect read&write to primary in standalone
 mode (#325)

To implement #319

1. replica is able to redirect read and write commands to it's primary
in standalone mode
    * reply with "-REDIRECT primary-ip:port"
2. add a subcommand `CLIENT CAPA redirect`, a client can announce the
capability to handle redirection
    * if a client can handle redirection, the data access commands (read and
write) will be redirected
3. allow `readonly` and `readwrite` command in standalone mode, may be a
breaking change
    * a client with redirect capability cannot process read commands on a
replica by default
    * use READONLY command can allow read commands on a replica

---------

Signed-off-by: zhaozhao.zz <zhaozhao.zz@alibaba-inc.com>
---
 src/cluster.c                          |  8 ------
 src/commands.def                       | 23 ++++++++++++++++
 src/commands/client-capa.json          | 29 +++++++++++++++++++++
 src/networking.c                       |  8 ++++++
 src/server.c                           |  6 +++++
 src/server.h                           |  4 +++
 tests/integration/replica-redirect.tcl | 36 ++++++++++++++++++++++++++
 7 files changed, 106 insertions(+), 8 deletions(-)
 create mode 100644 src/commands/client-capa.json
 create mode 100644 tests/integration/replica-redirect.tcl

diff --git a/src/cluster.c b/src/cluster.c
index 8aa6793ba8..d9da706c7b 100644
--- a/src/cluster.c
+++ b/src/cluster.c
@@ -1449,20 +1449,12 @@ void askingCommand(client *c) {
  * In this mode replica will not redirect clients as long as clients access
  * with read-only commands to keys that are served by the replica's primary. */
 void readonlyCommand(client *c) {
-    if (server.cluster_enabled == 0) {
-        addReplyError(c, "This instance has cluster support disabled");
-        return;
-    }
     c->flags |= CLIENT_READONLY;
     addReply(c, shared.ok);
 }
 
 /* The READWRITE command just clears the READONLY command state. */
 void readwriteCommand(client *c) {
-    if (server.cluster_enabled == 0) {
-        addReplyError(c, "This instance has cluster support disabled");
-        return;
-    }
     c->flags &= ~CLIENT_READONLY;
     addReply(c, shared.ok);
 }
diff --git a/src/commands.def b/src/commands.def
index e4484529a2..709eca91a2 100644
--- a/src/commands.def
+++ b/src/commands.def
@@ -1089,6 +1089,28 @@ struct COMMAND_ARG CLIENT_CACHING_Args[] = {
 {MAKE_ARG("mode",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=CLIENT_CACHING_mode_Subargs},
 };
 
+/********** CLIENT CAPA ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLIENT CAPA history */
+#define CLIENT_CAPA_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLIENT CAPA tips */
+#define CLIENT_CAPA_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLIENT CAPA key specs */
+#define CLIENT_CAPA_Keyspecs NULL
+#endif
+
+/* CLIENT CAPA argument table */
+struct COMMAND_ARG CLIENT_CAPA_Args[] = {
+{MAKE_ARG("capability",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
 /********** CLIENT GETNAME ********************/
 
 #ifndef SKIP_CMD_HISTORY_TABLE
@@ -1552,6 +1574,7 @@ struct COMMAND_ARG CLIENT_UNBLOCK_Args[] = {
 /* CLIENT command table */
 struct COMMAND_STRUCT CLIENT_Subcommands[] = {
 {MAKE_CMD("caching","Instructs the server whether to track the keys in the next request.","O(1)","6.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_CACHING_History,0,CLIENT_CACHING_Tips,0,clientCommand,3,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_CACHING_Keyspecs,0,NULL,1),.args=CLIENT_CACHING_Args},
+{MAKE_CMD("capa","A client claims its capability.","O(1)","8.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_CAPA_History,0,CLIENT_CAPA_Tips,0,clientCommand,-3,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,ACL_CATEGORY_CONNECTION,CLIENT_CAPA_Keyspecs,0,NULL,1),.args=CLIENT_CAPA_Args},
 {MAKE_CMD("getname","Returns the name of the connection.","O(1)","2.6.9",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_GETNAME_History,0,CLIENT_GETNAME_Tips,0,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_GETNAME_Keyspecs,0,NULL,0)},
 {MAKE_CMD("getredir","Returns the client ID to which the connection's tracking notifications are redirected.","O(1)","6.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_GETREDIR_History,0,CLIENT_GETREDIR_Tips,0,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_GETREDIR_Keyspecs,0,NULL,0)},
 {MAKE_CMD("help","Returns helpful text about the different subcommands.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_HELP_History,0,CLIENT_HELP_Tips,0,clientCommand,2,CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_HELP_Keyspecs,0,NULL,0)},
diff --git a/src/commands/client-capa.json b/src/commands/client-capa.json
new file mode 100644
index 0000000000..3c16cd44f9
--- /dev/null
+++ b/src/commands/client-capa.json
@@ -0,0 +1,29 @@
+{
+    "CAPA": {
+        "summary": "A client claims its capability.",
+        "complexity": "O(1)",
+        "group": "connection",
+        "since": "8.0.0",
+        "arity": -3,
+        "container": "CLIENT",
+        "function": "clientCommand",
+        "command_flags": [
+            "NOSCRIPT",
+            "LOADING",
+            "STALE"
+        ],
+        "acl_categories": [
+            "CONNECTION"
+        ],
+        "reply_schema": {
+            "const": "OK"
+        },
+        "arguments": [
+            {
+                "multiple": "true",
+                "name": "capability",
+                "type": "string"
+            }
+        ]
+    }
+}
diff --git a/src/networking.c b/src/networking.c
index dff4226c54..ba40db6c61 100644
--- a/src/networking.c
+++ b/src/networking.c
@@ -168,6 +168,7 @@ client *createClient(connection *conn) {
     c->bulklen = -1;
     c->sentlen = 0;
     c->flags = 0;
+    c->capa = 0;
     c->slot = -1;
     c->ctime = c->last_interaction = server.unixtime;
     c->duration = 0;
@@ -3589,6 +3590,13 @@ NULL
         } else {
             addReplyErrorObject(c, shared.syntaxerr);
         }
+    } else if (!strcasecmp(c->argv[1]->ptr, "capa") && c->argc >= 3) {
+        for (int i = 2; i < c->argc; i++) {
+            if (!strcasecmp(c->argv[i]->ptr, "redirect")) {
+                c->capa |= CLIENT_CAPA_REDIRECT;
+            }
+        }
+        addReply(c, shared.ok);
     } else {
         addReplySubcommandSyntaxError(c);
     }
diff --git a/src/server.c b/src/server.c
index fe522b3e5d..a8d44a080c 100644
--- a/src/server.c
+++ b/src/server.c
@@ -3867,6 +3867,12 @@ int processCommand(client *c) {
         }
     }
 
+    if (!server.cluster_enabled && c->capa & CLIENT_CAPA_REDIRECT && server.primary_host && !mustObeyClient(c) &&
+        (is_write_command || (is_read_command && !(c->flags & CLIENT_READONLY)))) {
+        addReplyErrorSds(c, sdscatprintf(sdsempty(), "-REDIRECT %s:%d", server.primary_host, server.primary_port));
+        return C_OK;
+    }
+
     /* Disconnect some clients if total clients memory is too high. We do this
      * before key eviction, after the last command was executed and consumed
      * some client output buffer memory. */
diff --git a/src/server.h b/src/server.h
index a12f091ba9..bb432c8968 100644
--- a/src/server.h
+++ b/src/server.h
@@ -429,6 +429,9 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT];
 #define CLIENT_REPLICATION_DONE (1ULL << 51)         /* Indicate that replication has been done on the client */
 #define CLIENT_AUTHENTICATED (1ULL << 52)            /* Indicate a client has successfully authenticated */
 
+/* Client capabilities */
+#define CLIENT_CAPA_REDIRECT (1 << 0) /* Indicate that the client can handle redirection */
+
 /* Client block type (btype field in client structure)
  * if CLIENT_BLOCKED flag is set. */
 typedef enum blocking_type {
@@ -1205,6 +1208,7 @@ typedef struct client {
     uint64_t flags; /* Client flags: CLIENT_* macros. */
     connection *conn;
     int resp;                            /* RESP protocol version. Can be 2 or 3. */
+    uint32_t capa;                       /* Client capabilities: CLIENT_CAPA* macros. */
     serverDb *db;                        /* Pointer to currently SELECTed DB. */
     robj *name;                          /* As set by CLIENT SETNAME. */
     robj *lib_name;                      /* The client library name as set by CLIENT SETINFO. */
diff --git a/tests/integration/replica-redirect.tcl b/tests/integration/replica-redirect.tcl
new file mode 100644
index 0000000000..0db51dd3ff
--- /dev/null
+++ b/tests/integration/replica-redirect.tcl
@@ -0,0 +1,36 @@
+start_server {tags {needs:repl external:skip}} {
+    start_server {} {
+        set primary_host [srv -1 host]
+        set primary_port [srv -1 port]
+
+        r replicaof $primary_host $primary_port
+        wait_for_condition 50 100 {
+            [s 0 master_link_status] eq {up}
+        } else {
+            fail "Replicas not replicating from primary"
+        }
+
+        test {replica allow read command by default} {
+            r get foo
+        } {}
+
+        test {replica reply READONLY error for write command by default} {
+            assert_error {READONLY*} {r set foo bar}
+        }
+
+        test {replica redirect read and write command after CLIENT CAPA REDIRECT} {
+            r client capa redirect
+            assert_error "REDIRECT $primary_host:$primary_port" {r set foo bar}
+            assert_error "REDIRECT $primary_host:$primary_port" {r get foo}
+        }
+
+        test {non-data access commands are not redirected} {
+            r ping
+        } {PONG}
+
+        test {replica allow read command in READONLY mode} {
+            r readonly
+            r get foo
+        } {}
+    }
+}

From 2b0723957e264f042951145a8c64eba65eb5a037 Mon Sep 17 00:00:00 2001
From: Binbin <binloveplay1314@qq.com>
Date: Fri, 28 Jun 2024 03:27:09 +0800
Subject: [PATCH 22/53] Enable protected-configs, debug and module commands in
 create-cluster script (#701)

The create-cluster in utils mainly used to create a test cluster,
turning on these options is useful for testing purposes.

Signed-off-by: Binbin <binloveplay1314@qq.com>
---
 utils/create-cluster/create-cluster | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/utils/create-cluster/create-cluster b/utils/create-cluster/create-cluster
index f4f44871fc..cfad7fbf9d 100755
--- a/utils/create-cluster/create-cluster
+++ b/utils/create-cluster/create-cluster
@@ -28,7 +28,7 @@ then
     while [ $((PORT < ENDPORT)) != "0" ]; do
         PORT=$((PORT+1))
         echo "Starting $PORT"
-        $BIN_PATH/valkey-server --port $PORT --protected-mode $PROTECTED_MODE --cluster-enabled yes --cluster-config-file nodes-${PORT}.conf --cluster-node-timeout $TIMEOUT --appendonly yes --appendfilename appendonly-${PORT}.aof --appenddirname appendonlydir-${PORT} --dbfilename dump-${PORT}.rdb --logfile ${PORT}.log --daemonize yes ${ADDITIONAL_OPTIONS}
+        $BIN_PATH/valkey-server --port $PORT --protected-mode $PROTECTED_MODE --cluster-enabled yes --cluster-config-file nodes-${PORT}.conf --cluster-node-timeout $TIMEOUT --appendonly yes --appendfilename appendonly-${PORT}.aof --appenddirname appendonlydir-${PORT} --dbfilename dump-${PORT}.rdb --logfile ${PORT}.log --daemonize yes --enable-protected-configs yes --enable-debug-command yes --enable-module-command yes ${ADDITIONAL_OPTIONS}
     done
     exit 0
 fi
@@ -70,7 +70,7 @@ then
     while [ $((PORT < ENDPORT)) != "0" ]; do
         PORT=$((PORT+1))
         echo "Starting $PORT"
-        $BIN_PATH/valkey-server --port $PORT --protected-mode $PROTECTED_MODE --cluster-enabled yes --cluster-config-file nodes-${PORT}.conf --cluster-node-timeout $TIMEOUT --appendonly yes --appendfilename appendonly-${PORT}.aof --appenddirname appendonlydir-${PORT} --dbfilename dump-${PORT}.rdb --logfile ${PORT}.log --daemonize yes ${ADDITIONAL_OPTIONS}
+        $BIN_PATH/valkey-server --port $PORT --protected-mode $PROTECTED_MODE --cluster-enabled yes --cluster-config-file nodes-${PORT}.conf --cluster-node-timeout $TIMEOUT --appendonly yes --appendfilename appendonly-${PORT}.aof --appenddirname appendonlydir-${PORT} --dbfilename dump-${PORT}.rdb --logfile ${PORT}.log --daemonize yes --enable-protected-configs yes --enable-debug-command yes --enable-module-command yes ${ADDITIONAL_OPTIONS}
     done
     exit 0
 fi

From ad5704f80368dcd994c03e57a455c95dc48396b2 Mon Sep 17 00:00:00 2001
From: John Sully <john@csquare.ca>
Date: Thu, 27 Jun 2024 15:30:26 -0400
Subject: [PATCH 23/53] Upstream the availability zone info string from KeyDB
 (#700)

When Redis/Valkey/KeyDB is run in a cloud environment across multiple
AZ's it is preferable to keep traffic local to an AZ both for cost
reasons and for latency. This is typically done when you are enabling
reads on replicas with the READONLY command.

For this change we are creating a setting that is echo'd back in the
info command. We do not want to add the cloud SDKs as dependencies and
this is the easiest way around that. It is fairly trivial to grab the AZ
from the cloud and push that into your setting file.

Currently at Snapchat we have a custom client that after connecting
reads this from the server and will preferentially use that server if
the AZ string matches its internally configured AZ.

In the future it would be ideal if we used this information when
performing failover or even exposed it in cluster nodes.

Signed-off-by: John Sully <john@csquare.ca>
---
 src/config.c | 1 +
 src/server.c | 3 ++-
 src/server.h | 1 +
 valkey.conf  | 6 ++++++
 4 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/config.c b/src/config.c
index 2a692ac8fa..1088613255 100644
--- a/src/config.c
+++ b/src/config.c
@@ -3100,6 +3100,7 @@ standardConfig static_configs[] = {
     /* SDS Configs */
     createSDSConfig("primaryauth", "masterauth", MODIFIABLE_CONFIG | SENSITIVE_CONFIG, EMPTY_STRING_IS_NULL, server.primary_auth, NULL, NULL, NULL),
     createSDSConfig("requirepass", NULL, MODIFIABLE_CONFIG | SENSITIVE_CONFIG, EMPTY_STRING_IS_NULL, server.requirepass, NULL, NULL, updateRequirePass),
+    createSDSConfig("availability-zone", NULL, MODIFIABLE_CONFIG, 0, server.availability_zone, "", NULL, NULL),
 
     /* Enum Configs */
     createEnumConfig("supervised", NULL, IMMUTABLE_CONFIG, supervised_mode_enum, server.supervised_mode, SUPERVISED_NONE, NULL, NULL),
diff --git a/src/server.c b/src/server.c
index a8d44a080c..ee1bcd088f 100644
--- a/src/server.c
+++ b/src/server.c
@@ -5387,7 +5387,8 @@ sds genValkeyInfoString(dict *section_dict, int all_sections, int everything) {
             "lru_clock:%u\r\n", server.lruclock,
             "executable:%s\r\n", server.executable ? server.executable : "",
             "config_file:%s\r\n", server.configfile ? server.configfile : "",
-            "io_threads_active:%i\r\n", server.io_threads_active));
+            "io_threads_active:%i\r\n", server.io_threads_active,
+            "availability_zone:%s\r\n", server.availability_zone));
         /* clang-format on */
 
         /* Conditional properties */
diff --git a/src/server.h b/src/server.h
index bb432c8968..90efc6aa9c 100644
--- a/src/server.h
+++ b/src/server.h
@@ -2125,6 +2125,7 @@ struct valkeyServer {
                                                 is down, doesn't affect pubsub global. */
     long reply_buffer_peak_reset_time; /* The amount of time (in milliseconds) to wait between reply buffer peak resets */
     int reply_buffer_resizing_enabled; /* Is reply buffer resizing enabled (1 by default) */
+    sds availability_zone; /* When run in a cloud environment we can configure the availability zone it is running in */
     /* Local environment */
     char *locale_collate;
 };
diff --git a/valkey.conf b/valkey.conf
index 05301d1bee..e4ffd0f8ad 100644
--- a/valkey.conf
+++ b/valkey.conf
@@ -2319,3 +2319,9 @@ jemalloc-bg-thread yes
 # to suppress
 #
 # ignore-warnings ARM64-COW-BUG
+
+# Inform Valkey of the availability zone if running in a cloud environment.  Currently
+# this is only exposed via the info command for clients to use, but in the future we
+# we may also use this when making decisions for replication.
+#
+# availability-zone "zone-name"
\ No newline at end of file

From 7719dbb84bbe6861ac60277915ab22d208159680 Mon Sep 17 00:00:00 2001
From: Wen Hui <wen.hui.ware@gmail.com>
Date: Thu, 27 Jun 2024 16:23:53 -0400
Subject: [PATCH 24/53] Update readonly and readwrite json (#704)

Update and align with the latest readonly.md and readwrite.md doc under
https://github.com/valkey-io/valkey-doc/tree/main/commands

Signed-off-by: hwware <wen.hui.ware@gmail.com>
---
 src/commands.def            | 4 ++--
 src/commands/readonly.json  | 2 +-
 src/commands/readwrite.json | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/commands.def b/src/commands.def
index 709eca91a2..ef84aa0ccb 100644
--- a/src/commands.def
+++ b/src/commands.def
@@ -10726,8 +10726,8 @@ struct COMMAND_STRUCT serverCommandTable[] = {
 /* cluster */
 {MAKE_CMD("asking","Signals that a cluster client is following an -ASK redirect.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,ASKING_History,0,ASKING_Tips,0,askingCommand,1,CMD_FAST,ACL_CATEGORY_CONNECTION,ASKING_Keyspecs,0,NULL,0)},
 {MAKE_CMD("cluster","A container for Cluster commands.","Depends on subcommand.","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_History,0,CLUSTER_Tips,0,NULL,-2,0,0,CLUSTER_Keyspecs,0,NULL,0),.subcommands=CLUSTER_Subcommands},
-{MAKE_CMD("readonly","Enables read-only queries for a connection to a Cluster replica node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,READONLY_History,0,READONLY_Tips,0,readonlyCommand,1,CMD_FAST|CMD_LOADING|CMD_STALE,ACL_CATEGORY_CONNECTION,READONLY_Keyspecs,0,NULL,0)},
-{MAKE_CMD("readwrite","Enables read-write queries for a connection to a Reids Cluster replica node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,READWRITE_History,0,READWRITE_Tips,0,readwriteCommand,1,CMD_FAST|CMD_LOADING|CMD_STALE,ACL_CATEGORY_CONNECTION,READWRITE_Keyspecs,0,NULL,0)},
+{MAKE_CMD("readonly","Enables read-only queries for a connection to a Valkey replica node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,READONLY_History,0,READONLY_Tips,0,readonlyCommand,1,CMD_FAST|CMD_LOADING|CMD_STALE,ACL_CATEGORY_CONNECTION,READONLY_Keyspecs,0,NULL,0)},
+{MAKE_CMD("readwrite","Enables read-write queries for a connection to a Valkey replica node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,READWRITE_History,0,READWRITE_Tips,0,readwriteCommand,1,CMD_FAST|CMD_LOADING|CMD_STALE,ACL_CATEGORY_CONNECTION,READWRITE_Keyspecs,0,NULL,0)},
 /* connection */
 {MAKE_CMD("auth","Authenticates the connection.","O(N) where N is the number of passwords defined for the user","1.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,AUTH_History,1,AUTH_Tips,0,authCommand,-2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_NO_AUTH|CMD_SENTINEL|CMD_ALLOW_BUSY,ACL_CATEGORY_CONNECTION,AUTH_Keyspecs,0,NULL,2),.args=AUTH_Args},
 {MAKE_CMD("client","A container for client connection commands.","Depends on subcommand.","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_History,0,CLIENT_Tips,0,NULL,-2,CMD_SENTINEL,0,CLIENT_Keyspecs,0,NULL,0),.subcommands=CLIENT_Subcommands},
diff --git a/src/commands/readonly.json b/src/commands/readonly.json
index 4478cfb797..8fe27c6d99 100644
--- a/src/commands/readonly.json
+++ b/src/commands/readonly.json
@@ -1,6 +1,6 @@
 {
     "READONLY": {
-        "summary": "Enables read-only queries for a connection to a Cluster replica node.",
+        "summary": "Enables read-only queries for a connection to a Valkey replica node.",
         "complexity": "O(1)",
         "group": "cluster",
         "since": "3.0.0",
diff --git a/src/commands/readwrite.json b/src/commands/readwrite.json
index 440dd596b9..dd3762ff8c 100644
--- a/src/commands/readwrite.json
+++ b/src/commands/readwrite.json
@@ -1,6 +1,6 @@
 {
     "READWRITE": {
-        "summary": "Enables read-write queries for a connection to a Reids Cluster replica node.",
+        "summary": "Enables read-write queries for a connection to a Valkey replica node.",
         "complexity": "O(1)",
         "group": "cluster",
         "since": "3.0.0",

From 1269532fbd2c9fce170651159476ab49f1c5dff9 Mon Sep 17 00:00:00 2001
From: "Kyle Kim (kimkyle@)" <105247741+kyle-yh-kim@users.noreply.github.com>
Date: Thu, 27 Jun 2024 19:58:27 -0400
Subject: [PATCH 25/53] Introduce CLUSTER SLOT-STATS command (#20). (#351)

The command provides detailed slot usage statistics upon invocation,
with initial support for key-count metric. cpu-usec (approved) and
memory-bytes (pending-approval) metrics will soon follow after the
merger of this PR.

---------

Signed-off-by: Kyle Kim <kimkyle@amazon.com>
Signed-off-by: Madelyn Olson <madelyneolson@gmail.com>
Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
---
 src/Makefile                         |   2 +-
 src/cluster.h                        |   2 +
 src/cluster_slot_stats.c             | 182 +++++++++++++++
 src/commands.def                     |  51 +++++
 src/commands/cluster-slot-stats.json | 102 +++++++++
 src/server.h                         |   1 +
 tests/unit/cluster/slot-stats.tcl    | 323 +++++++++++++++++++++++++++
 7 files changed, 662 insertions(+), 1 deletion(-)
 create mode 100644 src/cluster_slot_stats.c
 create mode 100644 src/commands/cluster-slot-stats.json
 create mode 100644 tests/unit/cluster/slot-stats.tcl

diff --git a/src/Makefile b/src/Makefile
index 2217597d1f..18e5527eff 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -401,7 +401,7 @@ endif
 ENGINE_NAME=valkey
 SERVER_NAME=$(ENGINE_NAME)-server$(PROG_SUFFIX)
 ENGINE_SENTINEL_NAME=$(ENGINE_NAME)-sentinel$(PROG_SUFFIX)
-ENGINE_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o
+ENGINE_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o
 ENGINE_CLI_NAME=$(ENGINE_NAME)-cli$(PROG_SUFFIX)
 ENGINE_CLI_OBJ=anet.o adlist.o dict.o valkey-cli.o zmalloc.o release.o ae.o serverassert.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o
 ENGINE_BENCHMARK_NAME=$(ENGINE_NAME)-benchmark$(PROG_SUFFIX)
diff --git a/src/cluster.h b/src/cluster.h
index f163e7f688..ec22d1f7fc 100644
--- a/src/cluster.h
+++ b/src/cluster.h
@@ -107,6 +107,8 @@ int detectAndUpdateCachedNodeHealth(void);
 client *createCachedResponseClient(void);
 void deleteCachedResponseClient(client *recording_client);
 void clearCachedClusterSlotsResponse(void);
+unsigned int countKeysInSlot(unsigned int hashslot);
+int getSlotOrReply(client *c, robj *o);
 
 /* functions with shared implementations */
 int clusterNodeIsMyself(clusterNode *n);
diff --git a/src/cluster_slot_stats.c b/src/cluster_slot_stats.c
new file mode 100644
index 0000000000..515be588f7
--- /dev/null
+++ b/src/cluster_slot_stats.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright Valkey Contributors.
+ * All rights reserved.
+ * SPDX-License-Identifier: BSD 3-Clause
+ */
+
+#include "server.h"
+#include "cluster.h"
+
+#define UNASSIGNED_SLOT 0
+
+typedef enum {
+    KEY_COUNT,
+    INVALID,
+} slotStatTypes;
+
+/* -----------------------------------------------------------------------------
+ * CLUSTER SLOT-STATS command
+ * -------------------------------------------------------------------------- */
+
+/* Struct used to temporarily hold slot statistics for sorting. */
+typedef struct {
+    int slot;
+    uint64_t stat;
+} slotStatForSort;
+
+static int doesSlotBelongToMyShard(int slot) {
+    clusterNode *myself = getMyClusterNode();
+    clusterNode *primary = clusterNodeGetPrimary(myself);
+
+    return clusterNodeCoversSlot(primary, slot);
+}
+
+static int markSlotsAssignedToMyShard(unsigned char *assigned_slots, int start_slot, int end_slot) {
+    int assigned_slots_count = 0;
+    for (int slot = start_slot; slot <= end_slot; slot++) {
+        if (doesSlotBelongToMyShard(slot)) {
+            assigned_slots[slot]++;
+            assigned_slots_count++;
+        }
+    }
+    return assigned_slots_count;
+}
+
+static uint64_t getSlotStat(int slot, int stat_type) {
+    serverAssert(stat_type != INVALID);
+    uint64_t slot_stat = 0;
+    if (stat_type == KEY_COUNT) {
+        slot_stat = countKeysInSlot(slot);
+    }
+    return slot_stat;
+}
+
+static int slotStatForSortAscCmp(const void *a, const void *b) {
+    slotStatForSort entry_a = *((slotStatForSort *)a);
+    slotStatForSort entry_b = *((slotStatForSort *)b);
+    return entry_a.stat - entry_b.stat;
+}
+
+static int slotStatForSortDescCmp(const void *a, const void *b) {
+    slotStatForSort entry_a = *((slotStatForSort *)a);
+    slotStatForSort entry_b = *((slotStatForSort *)b);
+    return entry_b.stat - entry_a.stat;
+}
+
+static void collectAndSortSlotStats(slotStatForSort slot_stats[], int order_by, int desc) {
+    int i = 0;
+
+    for (int slot = 0; slot < CLUSTER_SLOTS; slot++) {
+        if (doesSlotBelongToMyShard(slot)) {
+            slot_stats[i].slot = slot;
+            slot_stats[i].stat = getSlotStat(slot, order_by);
+            i++;
+        }
+    }
+    qsort(slot_stats, i, sizeof(slotStatForSort), (desc) ? slotStatForSortDescCmp : slotStatForSortAscCmp);
+}
+
+static void addReplySlotStat(client *c, int slot) {
+    addReplyArrayLen(c, 2); /* Array of size 2, where 0th index represents (int) slot,
+                             * and 1st index represents (map) usage statistics. */
+    addReplyLongLong(c, slot);
+    addReplyMapLen(c, 1); /* Nested map representing slot usage statistics. */
+    addReplyBulkCString(c, "key-count");
+    addReplyLongLong(c, countKeysInSlot(slot));
+}
+
+/* Adds reply for the SLOTSRANGE variant.
+ * Response is ordered in ascending slot number. */
+static void addReplySlotsRange(client *c, unsigned char *assigned_slots, int startslot, int endslot, int len) {
+    addReplyArrayLen(c, len); /* Top level RESP reply format is defined as an array, due to ordering invariance. */
+
+    for (int slot = startslot; slot <= endslot; slot++) {
+        if (assigned_slots[slot]) addReplySlotStat(c, slot);
+    }
+}
+
+static void addReplySortedSlotStats(client *c, slotStatForSort slot_stats[], long limit) {
+    int num_slots_assigned = getMyShardSlotCount();
+    int len = min(limit, num_slots_assigned);
+    addReplyArrayLen(c, len); /* Top level RESP reply format is defined as an array, due to ordering invariance. */
+
+    for (int i = 0; i < len; i++) {
+        addReplySlotStat(c, slot_stats[i].slot);
+    }
+}
+
+/* Adds reply for the ORDERBY variant.
+ * Response is ordered based on the sort result. */
+static void addReplyOrderBy(client *c, int order_by, long limit, int desc) {
+    slotStatForSort slot_stats[CLUSTER_SLOTS];
+    collectAndSortSlotStats(slot_stats, order_by, desc);
+    addReplySortedSlotStats(c, slot_stats, limit);
+}
+
+void clusterSlotStatsCommand(client *c) {
+    if (server.cluster_enabled == 0) {
+        addReplyError(c, "This instance has cluster support disabled");
+        return;
+    }
+
+    /* Parse additional arguments. */
+    if (c->argc == 5 && !strcasecmp(c->argv[2]->ptr, "slotsrange")) {
+        /* CLUSTER SLOT-STATS SLOTSRANGE start-slot end-slot */
+        int startslot, endslot;
+        if ((startslot = getSlotOrReply(c, c->argv[3])) == C_ERR ||
+            (endslot = getSlotOrReply(c, c->argv[4])) == C_ERR) {
+            return;
+        }
+        if (startslot > endslot) {
+            addReplyErrorFormat(c, "Start slot number %d is greater than end slot number %d", startslot, endslot);
+            return;
+        }
+        /* Initialize slot assignment array. */
+        unsigned char assigned_slots[CLUSTER_SLOTS] = {UNASSIGNED_SLOT};
+        int assigned_slots_count = markSlotsAssignedToMyShard(assigned_slots, startslot, endslot);
+        addReplySlotsRange(c, assigned_slots, startslot, endslot, assigned_slots_count);
+
+    } else if (c->argc >= 4 && !strcasecmp(c->argv[2]->ptr, "orderby")) {
+        /* CLUSTER SLOT-STATS ORDERBY metric [LIMIT limit] [ASC | DESC] */
+        int desc = 1, order_by = INVALID;
+        if (!strcasecmp(c->argv[3]->ptr, "key-count")) {
+            order_by = KEY_COUNT;
+        } else {
+            addReplyError(c, "Unrecognized sort metric for ORDER BY. The supported metrics are: key-count.");
+            return;
+        }
+        int i = 4; /* Next argument index, following ORDERBY */
+        int limit_counter = 0, asc_desc_counter = 0;
+        long limit;
+        while (i < c->argc) {
+            int moreargs = c->argc > i + 1;
+            if (!strcasecmp(c->argv[i]->ptr, "limit") && moreargs) {
+                if (getRangeLongFromObjectOrReply(
+                        c, c->argv[i + 1], 1, CLUSTER_SLOTS, &limit,
+                        "Limit has to lie in between 1 and 16384 (maximum number of slots).") != C_OK) {
+                    return;
+                }
+                i++;
+                limit_counter++;
+            } else if (!strcasecmp(c->argv[i]->ptr, "asc")) {
+                desc = 0;
+                asc_desc_counter++;
+            } else if (!strcasecmp(c->argv[i]->ptr, "desc")) {
+                desc = 1;
+                asc_desc_counter++;
+            } else {
+                addReplyErrorObject(c, shared.syntaxerr);
+                return;
+            }
+            if (limit_counter > 1 || asc_desc_counter > 1) {
+                addReplyError(c, "Multiple filters of the same type are disallowed.");
+                return;
+            }
+            i++;
+        }
+        addReplyOrderBy(c, order_by, limit, desc);
+
+    } else {
+        addReplySubcommandSyntaxError(c);
+    }
+}
diff --git a/src/commands.def b/src/commands.def
index ef84aa0ccb..99f4872f0e 100644
--- a/src/commands.def
+++ b/src/commands.def
@@ -930,6 +930,56 @@ struct COMMAND_ARG CLUSTER_SLAVES_Args[] = {
 {MAKE_ARG("node-id",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
 };
 
+/********** CLUSTER SLOT_STATS ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* CLUSTER SLOT_STATS history */
+#define CLUSTER_SLOT_STATS_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* CLUSTER SLOT_STATS tips */
+const char *CLUSTER_SLOT_STATS_Tips[] = {
+"nondeterministic_output",
+"request_policy:all_shards",
+};
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* CLUSTER SLOT_STATS key specs */
+#define CLUSTER_SLOT_STATS_Keyspecs NULL
+#endif
+
+/* CLUSTER SLOT_STATS filter slotsrange argument table */
+struct COMMAND_ARG CLUSTER_SLOT_STATS_filter_slotsrange_Subargs[] = {
+{MAKE_ARG("start-slot",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("end-slot",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* CLUSTER SLOT_STATS filter orderby order argument table */
+struct COMMAND_ARG CLUSTER_SLOT_STATS_filter_orderby_order_Subargs[] = {
+{MAKE_ARG("asc",ARG_TYPE_PURE_TOKEN,-1,"ASC",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("desc",ARG_TYPE_PURE_TOKEN,-1,"DESC",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* CLUSTER SLOT_STATS filter orderby argument table */
+struct COMMAND_ARG CLUSTER_SLOT_STATS_filter_orderby_Subargs[] = {
+{MAKE_ARG("metric",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("limit",ARG_TYPE_INTEGER,-1,"LIMIT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("order",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=CLUSTER_SLOT_STATS_filter_orderby_order_Subargs},
+};
+
+/* CLUSTER SLOT_STATS filter argument table */
+struct COMMAND_ARG CLUSTER_SLOT_STATS_filter_Subargs[] = {
+{MAKE_ARG("slotsrange",ARG_TYPE_BLOCK,-1,"SLOTSRANGE",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=CLUSTER_SLOT_STATS_filter_slotsrange_Subargs},
+{MAKE_ARG("orderby",ARG_TYPE_BLOCK,-1,"ORDERBY",NULL,NULL,CMD_ARG_NONE,3,NULL),.subargs=CLUSTER_SLOT_STATS_filter_orderby_Subargs},
+};
+
+/* CLUSTER SLOT_STATS argument table */
+struct COMMAND_ARG CLUSTER_SLOT_STATS_Args[] = {
+{MAKE_ARG("filter",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=CLUSTER_SLOT_STATS_filter_Subargs},
+};
+
 /********** CLUSTER SLOTS ********************/
 
 #ifndef SKIP_CMD_HISTORY_TABLE
@@ -981,6 +1031,7 @@ struct COMMAND_STRUCT CLUSTER_Subcommands[] = {
 {MAKE_CMD("setslot","Binds a hash slot to a node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SETSLOT_History,1,CLUSTER_SETSLOT_Tips,0,clusterCommand,-4,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE|CMD_MAY_REPLICATE,0,CLUSTER_SETSLOT_Keyspecs,0,NULL,3),.args=CLUSTER_SETSLOT_Args},
 {MAKE_CMD("shards","Returns the mapping of cluster slots to shards.","O(N) where N is the total number of cluster nodes","7.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SHARDS_History,0,CLUSTER_SHARDS_Tips,1,clusterCommand,2,CMD_LOADING|CMD_STALE,0,CLUSTER_SHARDS_Keyspecs,0,NULL,0)},
 {MAKE_CMD("slaves","Lists the replica nodes of a primary node.","O(N) where N is the number of replicas.","3.0.0",CMD_DOC_DEPRECATED,"`CLUSTER REPLICAS`","5.0.0","cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SLAVES_History,0,CLUSTER_SLAVES_Tips,1,clusterCommand,3,CMD_ADMIN|CMD_STALE,0,CLUSTER_SLAVES_Keyspecs,0,NULL,1),.args=CLUSTER_SLAVES_Args},
+{MAKE_CMD("slot-stats","Return an array of slot usage statistics for slots assigned to the current node.","O(N) where N is the total number of slots based on arguments. O(N*log(N)) with ORDERBY subcommand.","8.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SLOT_STATS_History,0,CLUSTER_SLOT_STATS_Tips,2,clusterSlotStatsCommand,-4,CMD_STALE|CMD_LOADING,0,CLUSTER_SLOT_STATS_Keyspecs,0,NULL,1),.args=CLUSTER_SLOT_STATS_Args},
 {MAKE_CMD("slots","Returns the mapping of cluster slots to nodes.","O(N) where N is the total number of Cluster nodes","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SLOTS_History,2,CLUSTER_SLOTS_Tips,1,clusterCommand,2,CMD_LOADING|CMD_STALE,0,CLUSTER_SLOTS_Keyspecs,0,NULL,0)},
 {0}
 };
diff --git a/src/commands/cluster-slot-stats.json b/src/commands/cluster-slot-stats.json
new file mode 100644
index 0000000000..7dfcd415ec
--- /dev/null
+++ b/src/commands/cluster-slot-stats.json
@@ -0,0 +1,102 @@
+{
+    "SLOT-STATS": {
+        "summary": "Return an array of slot usage statistics for slots assigned to the current node.",
+        "complexity": "O(N) where N is the total number of slots based on arguments. O(N*log(N)) with ORDERBY subcommand.",
+        "group": "cluster",
+        "since": "8.0.0",
+        "arity": -4,
+        "container": "CLUSTER",
+        "function": "clusterSlotStatsCommand",
+        "command_flags": [
+            "STALE",
+            "LOADING"
+        ],
+        "command_tips": [
+            "NONDETERMINISTIC_OUTPUT",
+            "REQUEST_POLICY:ALL_SHARDS"
+        ],
+        "reply_schema": {
+            "type": "array",
+            "description": "Array of nested arrays, where the inner array element represents a slot and its respective usage statistics.",
+            "items": {
+                "type": "array",
+                "description": "Array of size 2, where 0th index represents (int) slot and 1st index represents (map) usage statistics.",
+                "minItems": 2,
+                "maxItems": 2,
+                "items": [
+                    {
+                        "description": "Slot Number.",
+                        "type": "integer"
+                    },
+                    {
+                        "type": "object",
+                        "description": "Map of slot usage statistics.",
+                        "additionalProperties": false,
+                        "properties": {
+                            "key-count": {
+                                "type": "integer"
+                            }
+                        }
+                    }
+                ]
+            }
+        },
+        "arguments": [
+            {
+                "name": "filter",
+                "type": "oneof",
+                "arguments": [
+                    {
+                        "token": "SLOTSRANGE",
+                        "name": "slotsrange",
+                        "type": "block",
+                        "arguments": [
+                            {
+                                "name": "start-slot",
+                                "type": "integer"
+                            },
+                            {
+                                "name": "end-slot",
+                                "type": "integer"
+                            }
+                        ]
+                    },
+                    {
+                        "token": "ORDERBY",
+                        "name": "orderby",
+                        "type": "block",
+                        "arguments": [
+                            {
+                                "name": "metric",
+                                "type": "string"
+                            },
+                            {
+                                "token": "LIMIT",
+                                "name": "limit",
+                                "type": "integer",
+                                "optional": true
+                            },
+                            {
+                                "name": "order",
+                                "type": "oneof",
+                                "optional": true,
+                                "arguments": [
+                                    {
+                                        "name": "asc",
+                                        "type": "pure-token",
+                                        "token": "ASC"
+                                    },
+                                    {
+                                        "name": "desc",
+                                        "type": "pure-token",
+                                        "token": "DESC"
+                                    }
+                                ]
+                            }
+                        ]
+                    }
+                ]
+            }
+        ]
+    }
+}
\ No newline at end of file
diff --git a/src/server.h b/src/server.h
index 90efc6aa9c..f6be39acca 100644
--- a/src/server.h
+++ b/src/server.h
@@ -3750,6 +3750,7 @@ void sunsubscribeCommand(client *c);
 void watchCommand(client *c);
 void unwatchCommand(client *c);
 void clusterCommand(client *c);
+void clusterSlotStatsCommand(client *c);
 void restoreCommand(client *c);
 void migrateCommand(client *c);
 void askingCommand(client *c);
diff --git a/tests/unit/cluster/slot-stats.tcl b/tests/unit/cluster/slot-stats.tcl
new file mode 100644
index 0000000000..c2923dc8bb
--- /dev/null
+++ b/tests/unit/cluster/slot-stats.tcl
@@ -0,0 +1,323 @@
+# Integration tests for CLUSTER SLOT-STATS command.
+
+# -----------------------------------------------------------------------------
+# Helper functions for CLUSTER SLOT-STATS test cases.
+# -----------------------------------------------------------------------------
+
+# Converts array RESP response into a dict.
+# This is useful for many test cases, where unnecessary nesting is removed.
+proc convert_array_into_dict {slot_stats} {
+    set res [dict create]
+    foreach slot_stat $slot_stats {
+        # slot_stat is an array of size 2, where 0th index represents (int) slot, 
+        # and 1st index represents (map) usage statistics.
+        dict set res [lindex $slot_stat 0] [lindex $slot_stat 1]
+    }
+    return $res
+}
+
+proc initialize_expected_slots_dict {} {
+    set expected_slots [dict create]
+    for {set i 0} {$i < 16384} {incr i 1} {
+        dict set expected_slots $i 0
+    }
+    return $expected_slots
+}
+
+proc initialize_expected_slots_dict_with_range {start_slot end_slot} {
+    assert {$start_slot <= $end_slot}
+    set expected_slots [dict create]
+    for {set i $start_slot} {$i <= $end_slot} {incr i 1} {
+        dict set expected_slots $i 0
+    }
+    return $expected_slots
+}
+
+proc assert_empty_slot_stats {slot_stats} {
+    set slot_stats [convert_array_into_dict $slot_stats]
+    dict for {slot stats} $slot_stats {
+        assert {[dict get $stats key-count] == 0}
+    }
+}
+
+proc assert_empty_slot_stats_with_exception {slot_stats exception_slots} {
+    set slot_stats [convert_array_into_dict $slot_stats]
+    dict for {slot stats} $slot_stats {
+        if {[dict exists $exception_slots $slot]} {
+            set expected_key_count [dict get $exception_slots $slot]
+            assert {[dict get $stats key-count] == $expected_key_count}
+        } else {
+            assert {[dict get $stats key-count] == 0}
+        }
+    }
+}
+
+proc assert_all_slots_have_been_seen {expected_slots} {
+    dict for {k v} $expected_slots {
+        assert {$v == 1}
+    }
+}
+
+proc assert_slot_visibility {slot_stats expected_slots} {
+    set slot_stats [convert_array_into_dict $slot_stats]
+    dict for {slot _} $slot_stats {
+        assert {[dict exists $expected_slots $slot]}
+        dict set expected_slots $slot 1
+    }
+
+    assert_all_slots_have_been_seen $expected_slots
+}
+
+proc assert_slot_stats_key_count {slot_stats expected_slots_key_count} {
+    set slot_stats [convert_array_into_dict $slot_stats]
+    dict for {slot stats} $slot_stats {
+        if {[dict exists $expected_slots_key_count $slot]} {
+            set key_count [dict get $stats key-count]
+            set key_count_expected [dict get $expected_slots_key_count $slot]
+            assert {$key_count == $key_count_expected}
+        }
+    }
+}
+
+proc assert_slot_stats_monotonic_order {slot_stats orderby is_desc} {
+    # For Tcl dict, the order of iteration is the order in which the keys were inserted into the dictionary
+    # Thus, the response ordering is preserved upon calling 'convert_array_into_dict()'.
+    # Source: https://www.tcl.tk/man/tcl8.6.11/TclCmd/dict.htm
+    set slot_stats [convert_array_into_dict $slot_stats]
+    set prev_metric -1
+    dict for {_ stats} $slot_stats {
+        set curr_metric [dict get $stats $orderby]
+        if {$prev_metric != -1} {
+            if {$is_desc == 1} {
+                assert {$prev_metric >= $curr_metric}
+            } else {
+                assert {$prev_metric <= $curr_metric}
+            }
+        }
+        set prev_metric $curr_metric
+    }
+}
+
+proc assert_slot_stats_monotonic_descent {slot_stats orderby} {
+    assert_slot_stats_monotonic_order $slot_stats $orderby 1
+}
+
+proc assert_slot_stats_monotonic_ascent {slot_stats orderby} {
+    assert_slot_stats_monotonic_order $slot_stats $orderby 0
+}
+
+proc wait_for_replica_key_exists {key key_count} {
+    wait_for_condition 1000 50 {
+        [R 1 exists $key] eq "$key_count"
+    } else {
+        fail "Test key was not replicated"
+    }
+}
+
+# -----------------------------------------------------------------------------
+# Test cases for CLUSTER SLOT-STATS correctness, without additional arguments.
+# -----------------------------------------------------------------------------
+
+start_cluster 1 0 {tags {external:skip cluster}} {
+
+    # Define shared variables.
+    set key "FOO"
+    set key_slot [R 0 cluster keyslot $key]
+    set expected_slots_to_key_count [dict create $key_slot 1]
+
+    test "CLUSTER SLOT-STATS contains default value upon valkey-server startup" {
+        set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
+        assert_empty_slot_stats $slot_stats
+    }
+
+    test "CLUSTER SLOT-STATS contains correct metrics upon key introduction" {
+        R 0 SET $key TEST
+        set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
+        assert_empty_slot_stats_with_exception $slot_stats $expected_slots_to_key_count
+    }
+
+    test "CLUSTER SLOT-STATS contains correct metrics upon key mutation" {
+        R 0 SET $key NEW_VALUE
+        set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
+        assert_empty_slot_stats_with_exception $slot_stats $expected_slots_to_key_count
+    }
+
+    test "CLUSTER SLOT-STATS contains correct metrics upon key deletion" {
+        R 0 DEL $key
+        set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
+        assert_empty_slot_stats $slot_stats
+    }
+
+    test "CLUSTER SLOT-STATS slot visibility based on slot ownership changes" {
+        R 0 CONFIG SET cluster-require-full-coverage no
+        
+        R 0 CLUSTER DELSLOTS $key_slot
+        set expected_slots [initialize_expected_slots_dict]
+        dict unset expected_slots $key_slot
+        set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
+        assert {[dict size $expected_slots] == 16383}
+        assert_slot_visibility $slot_stats $expected_slots
+
+        R 0 CLUSTER ADDSLOTS $key_slot
+        set expected_slots [initialize_expected_slots_dict]
+        set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
+        assert {[dict size $expected_slots] == 16384}
+        assert_slot_visibility $slot_stats $expected_slots
+    }
+}
+
+# -----------------------------------------------------------------------------
+# Test cases for CLUSTER SLOT-STATS SLOTSRANGE sub-argument.
+# -----------------------------------------------------------------------------
+
+start_cluster 1 0 {tags {external:skip cluster}} {
+
+    test "CLUSTER SLOT-STATS SLOTSRANGE all slots present" {
+        set start_slot 100
+        set end_slot 102
+        set expected_slots [initialize_expected_slots_dict_with_range $start_slot $end_slot]
+
+        set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE $start_slot $end_slot]
+        assert_slot_visibility $slot_stats $expected_slots
+    }
+
+    test "CLUSTER SLOT-STATS SLOTSRANGE some slots missing" {
+        set start_slot 100
+        set end_slot 102
+        set expected_slots [initialize_expected_slots_dict_with_range $start_slot $end_slot]
+
+        R 0 CLUSTER DELSLOTS $start_slot
+        dict unset expected_slots $start_slot
+
+        set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE $start_slot $end_slot]
+        assert_slot_visibility $slot_stats $expected_slots
+    }
+}
+
+# -----------------------------------------------------------------------------
+# Test cases for CLUSTER SLOT-STATS ORDERBY sub-argument.
+# -----------------------------------------------------------------------------
+
+start_cluster 1 0 {tags {external:skip cluster}} {
+    
+    # SET keys for target hashslots, to encourage ordering.
+    set hash_tags [list 0 1 2 3 4]
+    set num_keys 1
+    foreach hash_tag $hash_tags {
+        for {set i 0} {$i < $num_keys} {incr i 1} {
+            R 0 SET "$i{$hash_tag}" VALUE
+        }
+        incr num_keys 1
+    }
+
+    # SET keys for random hashslots, for random noise.
+    set num_keys 0
+    while {$num_keys < 1000} {
+        set random_key [randomInt 16384]
+        R 0 SET $random_key VALUE
+        incr num_keys 1
+    }
+
+    test "CLUSTER SLOT-STATS ORDERBY DESC correct ordering" {
+        set orderby "key-count"
+        set slot_stats [R 0 CLUSTER SLOT-STATS ORDERBY $orderby DESC]
+        assert_slot_stats_monotonic_descent $slot_stats $orderby
+    }
+
+    test "CLUSTER SLOT-STATS ORDERBY ASC correct ordering" {
+        set orderby "key-count"
+        set slot_stats [R 0 CLUSTER SLOT-STATS ORDERBY $orderby ASC]
+        assert_slot_stats_monotonic_ascent $slot_stats $orderby
+    }
+
+    test "CLUSTER SLOT-STATS ORDERBY LIMIT correct response pagination, where limit is less than number of assigned slots" {
+        R 0 FLUSHALL SYNC
+
+        set limit 5
+        set slot_stats_desc [R 0 CLUSTER SLOT-STATS ORDERBY key-count LIMIT $limit DESC]
+        set slot_stats_asc [R 0 CLUSTER SLOT-STATS ORDERBY key-count LIMIT $limit ASC]
+        set slot_stats_desc_length [llength $slot_stats_desc]
+        set slot_stats_asc_length [llength $slot_stats_asc]
+        assert {$limit == $slot_stats_desc_length && $limit == $slot_stats_asc_length}
+        
+        set expected_slots [dict create 0 0 1 0 2 0 3 0 4 0]
+        assert_slot_visibility $slot_stats_desc $expected_slots
+        assert_slot_visibility $slot_stats_asc $expected_slots
+    }
+
+    test "CLUSTER SLOT-STATS ORDERBY LIMIT correct response pagination, where limit is greater than number of assigned slots" {
+        R 0 CONFIG SET cluster-require-full-coverage no
+        R 0 FLUSHALL SYNC
+        R 0 CLUSTER FLUSHSLOTS
+        R 0 CLUSTER ADDSLOTS 100 101
+
+        set num_assigned_slots 2
+        set limit 5
+        set slot_stats_desc [R 0 CLUSTER SLOT-STATS ORDERBY key-count LIMIT $limit DESC]
+        set slot_stats_asc [R 0 CLUSTER SLOT-STATS ORDERBY key-count LIMIT $limit ASC]
+        set slot_stats_desc_length [llength $slot_stats_desc]
+        set slot_stats_asc_length [llength $slot_stats_asc]
+        set expected_response_length [expr min($num_assigned_slots, $limit)]
+        assert {$expected_response_length == $slot_stats_desc_length && $expected_response_length == $slot_stats_asc_length}
+
+        set expected_slots [dict create 100 0 101 0]
+        assert_slot_visibility $slot_stats_desc $expected_slots
+        assert_slot_visibility $slot_stats_asc $expected_slots
+    }
+}
+
+# -----------------------------------------------------------------------------
+# Test cases for CLUSTER SLOT-STATS replication.
+# -----------------------------------------------------------------------------
+
+start_cluster 1 1 {tags {external:skip cluster}} {
+    
+    # Define shared variables.
+    set key "FOO"
+    set key_slot [R 0 CLUSTER KEYSLOT $key]
+
+    # Setup replication.
+    assert {[s -1 role] eq {slave}}
+    wait_for_condition 1000 50 {
+        [s -1 master_link_status] eq {up}
+    } else {
+        fail "Instance #1 master link status is not up"
+    }
+    R 1 readonly
+
+    test "CLUSTER SLOT-STATS key-count replication for new keys" {
+        R 0 SET $key VALUE
+        set slot_stats_master [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
+
+        set expected_slots_key_count [dict create $key_slot 1]
+        assert_slot_stats_key_count $slot_stats_master $expected_slots_key_count
+        wait_for_replica_key_exists $key 1
+
+        set slot_stats_replica [R 1 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
+        assert {$slot_stats_master eq $slot_stats_replica}
+    }
+
+    test "CLUSTER SLOT-STATS key-count replication for existing keys" {
+        R 0 SET $key VALUE_UPDATED
+        set slot_stats_master [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
+
+        set expected_slots_key_count [dict create $key_slot 1]
+        assert_slot_stats_key_count $slot_stats_master $expected_slots_key_count
+        wait_for_replica_key_exists $key 1
+
+        set slot_stats_replica [R 1 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
+        assert {$slot_stats_master eq $slot_stats_replica}
+    }
+
+    test "CLUSTER SLOT-STATS key-count replication for deleting keys" {
+        R 0 DEL $key
+        set slot_stats_master [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
+
+        set expected_slots_key_count [dict create $key_slot 0]
+        assert_slot_stats_key_count $slot_stats_master $expected_slots_key_count
+        wait_for_replica_key_exists $key 0
+
+        set slot_stats_replica [R 1 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
+        assert {$slot_stats_master eq $slot_stats_replica}
+    }
+}
\ No newline at end of file

From 4fbe31ab87ba2a5fd1360328f5e75993986fa0d0 Mon Sep 17 00:00:00 2001
From: "zhaozhao.zz" <zhaozhao.zz@alibaba-inc.com>
Date: Fri, 28 Jun 2024 14:56:13 +0800
Subject: [PATCH 26/53] Fix the TLS and REPS issues about CLUSTER SLOTS cache
 (#581)

PR #53 introduced the cache of CLUSTER SLOTS response, but the cache has
some problems for different types of clients:

1. the RESP version is wrongly ignored:

    ```
    $./valkey-cli
    127.0.0.1:6379> cluster slots
    1) 1) (integer) 0
       2) (integer) 16383
       3) 1) ""
          2) (integer) 6379
          3) "f1aeceb352401ce57acd432c68c60b359c00ef85"
          4) (empty array)
    127.0.0.1:6379> hello 3
    1# "server" => "valkey"
    2# "version" => "255.255.255"
    3# "proto" => (integer) 3
    4# "id" => (integer) 3
    5# "mode" => "cluster"
    6# "role" => "master"
    7# "modules" => (empty array)
    127.0.0.1:6379> cluster slots
    1) 1) (integer) 0
       2) (integer) 16383
       3) 1) ""
          2) (integer) 6379
          3) "f1aeceb352401ce57acd432c68c60b359c00ef85"
          4) (empty array)
    ```

    RESP3 should get "empty hash" but get RESP2's "empty array"

3. we should use the original client's connect type, or lua/function and
module would get wrong port:

    ```
    $./valkey-cli --tls --insecure -p 6789
    127.0.0.1:6789> config get port tls-port
    1) "tls-port"
    2) "6789"
    3) "port"
    4) "6379"
    127.0.0.1:6789> cluster slots
    1) 1) (integer) 0
       2) (integer) 16383
       3) 1) ""
          2) (integer) 6789
          3) "f1aeceb352401ce57acd432c68c60b359c00ef85"
          4) (empty array)
    127.0.0.1:6789> eval "return redis.call('cluster','slots')" 0
    1) 1) (integer) 0
       2) (integer) 16383
       3) 1) ""
          2) (integer) 6379
          3) "f1aeceb352401ce57acd432c68c60b359c00ef85"
          4) (empty array)
        ```

---------

Signed-off-by: zhaozhao.zz <zhaozhao.zz@alibaba-inc.com>
---
 src/cluster.c        | 26 ++++++++++++++------------
 src/cluster.h        |  2 +-
 src/cluster_legacy.c |  4 +++-
 src/networking.c     |  3 ++-
 src/server.h         |  4 ++--
 5 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/src/cluster.c b/src/cluster.c
index d9da706c7b..c4949c08ee 100644
--- a/src/cluster.c
+++ b/src/cluster.c
@@ -1354,15 +1354,17 @@ void addNodeReplyForClusterSlot(client *c, clusterNode *node, int start_slot, in
 
 void clearCachedClusterSlotsResponse(void) {
     for (connTypeForCaching conn_type = CACHE_CONN_TCP; conn_type < CACHE_CONN_TYPE_MAX; conn_type++) {
-        if (server.cached_cluster_slot_info[conn_type]) {
-            sdsfree(server.cached_cluster_slot_info[conn_type]);
-            server.cached_cluster_slot_info[conn_type] = NULL;
+        for (int resp = 0; resp <= 3; resp++) {
+            if (server.cached_cluster_slot_info[conn_type][resp]) {
+                sdsfree(server.cached_cluster_slot_info[conn_type][resp]);
+                server.cached_cluster_slot_info[conn_type][resp] = NULL;
+            }
         }
     }
 }
 
-sds generateClusterSlotResponse(void) {
-    client *recording_client = createCachedResponseClient();
+sds generateClusterSlotResponse(int resp) {
+    client *recording_client = createCachedResponseClient(resp);
     clusterNode *n = NULL;
     int num_primaries = 0, start = -1;
     void *slot_replylen = addReplyDeferredLen(recording_client);
@@ -1392,8 +1394,8 @@ sds generateClusterSlotResponse(void) {
     return cluster_slot_response;
 }
 
-int verifyCachedClusterSlotsResponse(sds cached_response) {
-    sds generated_response = generateClusterSlotResponse();
+int verifyCachedClusterSlotsResponse(sds cached_response, int resp) {
+    sds generated_response = generateClusterSlotResponse(resp);
     int is_equal = !sdscmp(generated_response, cached_response);
     /* Here, we use LL_WARNING so this gets printed when debug assertions are enabled and the system is about to crash. */
     if (!is_equal)
@@ -1413,16 +1415,16 @@ void clusterCommandSlots(client *c) {
      *               3) node ID
      *           ... continued until done
      */
-    connTypeForCaching conn_type = connIsTLS(c->conn);
+    connTypeForCaching conn_type = shouldReturnTlsInfo();
 
     if (detectAndUpdateCachedNodeHealth()) clearCachedClusterSlotsResponse();
 
-    sds cached_reply = server.cached_cluster_slot_info[conn_type];
+    sds cached_reply = server.cached_cluster_slot_info[conn_type][c->resp];
     if (!cached_reply) {
-        cached_reply = generateClusterSlotResponse();
-        server.cached_cluster_slot_info[conn_type] = cached_reply;
+        cached_reply = generateClusterSlotResponse(c->resp);
+        server.cached_cluster_slot_info[conn_type][c->resp] = cached_reply;
     } else {
-        debugServerAssertWithInfo(c, NULL, verifyCachedClusterSlotsResponse(cached_reply) == 1);
+        debugServerAssertWithInfo(c, NULL, verifyCachedClusterSlotsResponse(cached_reply, c->resp) == 1);
     }
 
     addReplyProto(c, cached_reply, sdslen(cached_reply));
diff --git a/src/cluster.h b/src/cluster.h
index ec22d1f7fc..a83b4ac282 100644
--- a/src/cluster.h
+++ b/src/cluster.h
@@ -104,7 +104,7 @@ const char *clusterNodePreferredEndpoint(clusterNode *n);
 long long clusterNodeReplOffset(clusterNode *node);
 clusterNode *clusterLookupNode(const char *name, int length);
 int detectAndUpdateCachedNodeHealth(void);
-client *createCachedResponseClient(void);
+client *createCachedResponseClient(int resp);
 void deleteCachedResponseClient(client *recording_client);
 void clearCachedClusterSlotsResponse(void);
 unsigned int countKeysInSlot(unsigned int hashslot);
diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c
index b913cd5671..17e7d235d4 100644
--- a/src/cluster_legacy.c
+++ b/src/cluster_legacy.c
@@ -1033,7 +1033,9 @@ void clusterInit(void) {
     server.cluster->mf_end = 0;
     server.cluster->mf_replica = NULL;
     for (connTypeForCaching conn_type = CACHE_CONN_TCP; conn_type < CACHE_CONN_TYPE_MAX; conn_type++) {
-        server.cached_cluster_slot_info[conn_type] = NULL;
+        for (int resp = 0; resp <= 3; resp++) {
+            server.cached_cluster_slot_info[conn_type][resp] = NULL;
+        }
     }
     resetManualFailover();
     clusterUpdateMyselfFlags();
diff --git a/src/networking.c b/src/networking.c
index ba40db6c61..629e3aac7e 100644
--- a/src/networking.c
+++ b/src/networking.c
@@ -339,8 +339,9 @@ sds aggregateClientOutputBuffer(client *c) {
  * to initiate caching of any command response.
  *
  * It needs be paired with `deleteCachedResponseClient` function to stop caching. */
-client *createCachedResponseClient(void) {
+client *createCachedResponseClient(int resp) {
     struct client *recording_client = createClient(NULL);
+    recording_client->resp = resp;
     /* Allocating the `conn` allows to prepare the caching client before adding
      * data to the clients output buffer by `prepareClientToWrite`. */
     recording_client->conn = zcalloc(sizeof(connection));
diff --git a/src/server.h b/src/server.h
index f6be39acca..57ccd557e2 100644
--- a/src/server.h
+++ b/src/server.h
@@ -2075,7 +2075,7 @@ struct valkeyServer {
                                                             * dropping packets of a specific type */
     /* Debug config that goes along with cluster_drop_packet_filter. When set, the link is closed on packet drop. */
     uint32_t debug_cluster_close_link_on_packet_drop : 1;
-    sds cached_cluster_slot_info[CACHE_CONN_TYPE_MAX];
+    sds cached_cluster_slot_info[CACHE_CONN_TYPE_MAX][4]; /* Align to RESP3 */
     /* Scripting */
     mstime_t busy_reply_threshold;  /* Script / module timeout in milliseconds */
     int pre_command_oom_state;      /* OOM before command (script?) was started */
@@ -2738,7 +2738,7 @@ void initSharedQueryBuf(void);
 client *lookupClientByID(uint64_t id);
 int authRequired(client *c);
 void putClientInPendingWriteQueue(client *c);
-client *createCachedResponseClient(void);
+client *createCachedResponseClient(int resp);
 void deleteCachedResponseClient(client *recording_client);
 
 /* logreqres.c - logging of requests and responses */

From 7f7ef9a3fac516751ecdf20542753c321a12e0ba Mon Sep 17 00:00:00 2001
From: Binbin <binloveplay1314@qq.com>
Date: Fri, 28 Jun 2024 23:00:07 +0800
Subject: [PATCH 27/53] Update availability-zone to use the flag instead of the
 number 0 (#711)

Minor cleanup.

Signed-off-by: Binbin <binloveplay1314@qq.com>
---
 src/config.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/config.c b/src/config.c
index 1088613255..f8784413f9 100644
--- a/src/config.c
+++ b/src/config.c
@@ -3100,7 +3100,7 @@ standardConfig static_configs[] = {
     /* SDS Configs */
     createSDSConfig("primaryauth", "masterauth", MODIFIABLE_CONFIG | SENSITIVE_CONFIG, EMPTY_STRING_IS_NULL, server.primary_auth, NULL, NULL, NULL),
     createSDSConfig("requirepass", NULL, MODIFIABLE_CONFIG | SENSITIVE_CONFIG, EMPTY_STRING_IS_NULL, server.requirepass, NULL, NULL, updateRequirePass),
-    createSDSConfig("availability-zone", NULL, MODIFIABLE_CONFIG, 0, server.availability_zone, "", NULL, NULL),
+    createSDSConfig("availability-zone", NULL, MODIFIABLE_CONFIG, ALLOW_EMPTY_STRING, server.availability_zone, "", NULL, NULL),
 
     /* Enum Configs */
     createEnumConfig("supervised", NULL, IMMUTABLE_CONFIG, supervised_mode_enum, server.supervised_mode, SUPERVISED_NONE, NULL, NULL),

From 518f0bf79bae984e5b93e03b5b951369419d4ca3 Mon Sep 17 00:00:00 2001
From: Binbin <binloveplay1314@qq.com>
Date: Fri, 28 Jun 2024 23:02:52 +0800
Subject: [PATCH 28/53] Fix limit undefined behavior crash in CLUSTER
 SLOT-STATS (#709)

We did not set a default value for limit, but it will be used
in addReplyOrderBy later, the undefined behavior may crash the
server since the value could be negative and crash will happen
in addReplyArrayLen.

An interesting reproducible example (limit reuses the value of -1):
```
> cluster slot-stats orderby key-count desc limit -1
(error) ERR Limit has to lie in between 1 and 16384 (maximum number of slots).
> cluster slot-stats orderby key-count desc
Error: Server closed the connection
```

Set the default value of limit to 16384.

---------

Signed-off-by: Binbin <binloveplay1314@qq.com>
---
 src/cluster_slot_stats.c          | 2 +-
 tests/unit/cluster/slot-stats.tcl | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/cluster_slot_stats.c b/src/cluster_slot_stats.c
index 515be588f7..597cfa009f 100644
--- a/src/cluster_slot_stats.c
+++ b/src/cluster_slot_stats.c
@@ -147,7 +147,7 @@ void clusterSlotStatsCommand(client *c) {
         }
         int i = 4; /* Next argument index, following ORDERBY */
         int limit_counter = 0, asc_desc_counter = 0;
-        long limit;
+        long limit = CLUSTER_SLOTS;
         while (i < c->argc) {
             int moreargs = c->argc > i + 1;
             if (!strcasecmp(c->argv[i]->ptr, "limit") && moreargs) {
diff --git a/tests/unit/cluster/slot-stats.tcl b/tests/unit/cluster/slot-stats.tcl
index c2923dc8bb..2ee950f7bb 100644
--- a/tests/unit/cluster/slot-stats.tcl
+++ b/tests/unit/cluster/slot-stats.tcl
@@ -199,7 +199,7 @@ start_cluster 1 0 {tags {external:skip cluster}} {
 # -----------------------------------------------------------------------------
 
 start_cluster 1 0 {tags {external:skip cluster}} {
-    
+
     # SET keys for target hashslots, to encourage ordering.
     set hash_tags [list 0 1 2 3 4]
     set num_keys 1
@@ -220,6 +220,7 @@ start_cluster 1 0 {tags {external:skip cluster}} {
 
     test "CLUSTER SLOT-STATS ORDERBY DESC correct ordering" {
         set orderby "key-count"
+        assert_error "ERR*" {R 0 CLUSTER SLOT-STATS ORDERBY $orderby DESC LIMIT -1}
         set slot_stats [R 0 CLUSTER SLOT-STATS ORDERBY $orderby DESC]
         assert_slot_stats_monotonic_descent $slot_stats $orderby
     }

From 2979fe606057437b344b08b0758722c64dfe0fd0 Mon Sep 17 00:00:00 2001
From: Binbin <binloveplay1314@qq.com>
Date: Fri, 28 Jun 2024 23:03:03 +0800
Subject: [PATCH 29/53] CLUSTER SLOT-STATS ORDERBY when stats are the same,
 compare by slot in ascending order (#710)

Test failed in my local:
```
*** [err]: CLUSTER SLOT-STATS ORDERBY LIMIT correct response pagination, where limit is less than number of assigned slots in tests/unit/cluster/slot-stats.tcl
Expected [dict exists 0 0 1 0 2 0 3 0 4 0 16383] (context: type source line 64 file /xxx/tests/unit/cluster/slot-stats.tcl cmd {assert {[dict exists $expected_slots $slot]}} proc ::assert_slot_visibility level 1)
```

It seems that when the stat is equal, that is, when the key-count is
equal,
the qsort performance will be different. When the stat is equal, we
compare
by slot (in ascending order).

Signed-off-by: Binbin <binloveplay1314@qq.com>
---
 src/cluster_slot_stats.c          | 8 ++++++++
 tests/unit/cluster/slot-stats.tcl | 3 ++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/cluster_slot_stats.c b/src/cluster_slot_stats.c
index 597cfa009f..a2a6bfdd01 100644
--- a/src/cluster_slot_stats.c
+++ b/src/cluster_slot_stats.c
@@ -51,15 +51,23 @@ static uint64_t getSlotStat(int slot, int stat_type) {
     return slot_stat;
 }
 
+/* Compare by stat in ascending order. If stat is the same, compare by slot in ascending order. */
 static int slotStatForSortAscCmp(const void *a, const void *b) {
     slotStatForSort entry_a = *((slotStatForSort *)a);
     slotStatForSort entry_b = *((slotStatForSort *)b);
+    if (entry_a.stat == entry_b.stat) {
+        return entry_a.slot - entry_b.slot;
+    }
     return entry_a.stat - entry_b.stat;
 }
 
+/* Compare by stat in descending order. If stat is the same, compare by slot in ascending order. */
 static int slotStatForSortDescCmp(const void *a, const void *b) {
     slotStatForSort entry_a = *((slotStatForSort *)a);
     slotStatForSort entry_b = *((slotStatForSort *)b);
+    if (entry_b.stat == entry_a.stat) {
+        return entry_a.slot - entry_b.slot;
+    }
     return entry_b.stat - entry_a.stat;
 }
 
diff --git a/tests/unit/cluster/slot-stats.tcl b/tests/unit/cluster/slot-stats.tcl
index 2ee950f7bb..76bf60fbff 100644
--- a/tests/unit/cluster/slot-stats.tcl
+++ b/tests/unit/cluster/slot-stats.tcl
@@ -240,7 +240,8 @@ start_cluster 1 0 {tags {external:skip cluster}} {
         set slot_stats_desc_length [llength $slot_stats_desc]
         set slot_stats_asc_length [llength $slot_stats_asc]
         assert {$limit == $slot_stats_desc_length && $limit == $slot_stats_asc_length}
-        
+
+        # The key count of all slots is 0, so we will order by slot in ascending order.
         set expected_slots [dict create 0 0 1 0 2 0 3 0 4 0]
         assert_slot_visibility $slot_stats_desc $expected_slots
         assert_slot_visibility $slot_stats_asc $expected_slots

From b59762f734897cc511d3c68a219bed5e99752123 Mon Sep 17 00:00:00 2001
From: "w. ian douglas" <ian.douglas@iandouglas.com>
Date: Fri, 28 Jun 2024 15:56:30 -0600
Subject: [PATCH 30/53] Very minor misspelling in some tests (#705)

Fix misspelling "faiover" instead of "failover" in two test cases.

Signed-off-by: w. ian douglas <ian.douglas@iandouglas.com>
---
 tests/unit/cluster/slot-migration.tcl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/unit/cluster/slot-migration.tcl b/tests/unit/cluster/slot-migration.tcl
index 3ff669db90..abc50ca035 100644
--- a/tests/unit/cluster/slot-migration.tcl
+++ b/tests/unit/cluster/slot-migration.tcl
@@ -250,7 +250,7 @@ start_cluster 3 5 {tags {external:skip cluster} overrides {cluster-allow-replica
         wait_for_slot_state 7 "\[609-<-$R0_id\]"
     }
 
-    test "Empty-shard migration target is auto-updated after faiover in target shard" {
+    test "Empty-shard migration target is auto-updated after failover in target shard" {
         wait_for_role 6 master
         # Trigger an auto-failover from R6 to R7
         fail_server 6
@@ -271,7 +271,7 @@ start_cluster 3 5 {tags {external:skip cluster} overrides {cluster-allow-replica
         wait_for_slot_state 7 "\[609-<-$R0_id\]"
     }
 
-    test "Empty-shard migration source is auto-updated after faiover in source shard" {
+    test "Empty-shard migration source is auto-updated after failover in source shard" {
         wait_for_role 0 master
         # Trigger an auto-failover from R0 to R3
         fail_server 0

From 7415a576a8c49fdb946ac45aa66f4a462872d277 Mon Sep 17 00:00:00 2001
From: Wen Hui <wen.hui.ware@gmail.com>
Date: Fri, 28 Jun 2024 22:05:40 -0400
Subject: [PATCH 31/53] Add prompt when Ctrl-C pressed (#702)

When I play the pubsub command in console, I am confused by the
following scenario:


![image](https://github.com/valkey-io/valkey/assets/51993843/c56e3976-1e8f-4053-9abb-16fa05ef6ec4)

The reason is that when I press Ctrl-C, client exits current connection
and reconnects to the server.
Thus I add one prompt message to make everyone clear what it happens.


![image](https://github.com/valkey-io/valkey/assets/51993843/cc620f27-4522-4f34-a7b3-86bcdeedfaba)

---------

Signed-off-by: hwware <wen.hui.ware@gmail.com>
---
 src/valkey-cli.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/valkey-cli.c b/src/valkey-cli.c
index 5d506383c0..7e6898643e 100644
--- a/src/valkey-cli.c
+++ b/src/valkey-cli.c
@@ -2273,6 +2273,8 @@ static void cliWaitForMessagesOrStdin(void) {
             /* Ctrl-C pressed */
             config.blocking_state_aborted = 0;
             config.pubsub_mode = 0;
+            printf("Closing current connection. Ready to reconnect to Valkey server... \n");
+            fflush(stdout);
             if (cliConnect(CC_FORCE) != REDIS_OK) {
                 cliPrintContextError();
                 exit(1);

From e4c1f6d45af9c9a51177483c4be7537d832903c0 Mon Sep 17 00:00:00 2001
From: skyfirelee <739609084@qq.com>
Date: Mon, 1 Jul 2024 02:33:10 +0800
Subject: [PATCH 32/53] Replace client flags to bitfield (#614)

---
 src/acl.c            |  10 +-
 src/aof.c            |   9 +-
 src/blocked.c        |  48 +++----
 src/cluster.c        |  20 +--
 src/cluster_legacy.c |   6 +-
 src/db.c             |  13 +-
 src/debug.c          |   8 +-
 src/eval.c           |  19 +--
 src/functions.c      |   3 +-
 src/logreqres.c      |   2 +-
 src/module.c         |  70 ++++-----
 src/multi.c          |  40 +++---
 src/networking.c     | 333 ++++++++++++++++++++++---------------------
 src/object.c         |   2 +-
 src/pubsub.c         |  50 +++----
 src/rdb.c            |   2 +-
 src/replication.c    |  66 ++++-----
 src/script.c         |  14 +-
 src/server.c         |  99 ++++++-------
 src/server.h         | 155 +++++++++-----------
 src/socket.c         |   3 +-
 src/sort.c           |   2 +-
 src/t_hash.c         |   4 +-
 src/t_list.c         |   4 +-
 src/t_set.c          |   4 +-
 src/t_stream.c       |   2 +-
 src/t_zset.c         |   6 +-
 src/timeout.c        |  20 +--
 src/tls.c            |   3 +-
 src/tracking.c       |  70 +++++----
 src/unix.c           |   4 +-
 31 files changed, 549 insertions(+), 542 deletions(-)

diff --git a/src/acl.c b/src/acl.c
index bda449e8d2..51aa567165 100644
--- a/src/acl.c
+++ b/src/acl.c
@@ -506,11 +506,11 @@ void ACLFreeUserAndKillClients(user *u) {
              * more defensive to set the default user and put
              * it in non authenticated mode. */
             c->user = DefaultUser;
-            c->flags &= ~CLIENT_AUTHENTICATED;
+            c->flag.authenticated = 0;
             /* We will write replies to this client later, so we can't
              * close it directly even if async. */
             if (c == server.current_client) {
-                c->flags |= CLIENT_CLOSE_AFTER_COMMAND;
+                c->flag.close_after_command = 1;
             } else {
                 freeClientAsync(c);
             }
@@ -1494,13 +1494,13 @@ void addAuthErrReply(client *c, robj *err) {
  * The return value is AUTH_OK on success (valid username / password pair) & AUTH_ERR otherwise. */
 int checkPasswordBasedAuth(client *c, robj *username, robj *password) {
     if (ACLCheckUserCredentials(username, password) == C_OK) {
-        c->flags |= CLIENT_AUTHENTICATED;
+        c->flag.authenticated = 1;
         c->user = ACLGetUserByName(username->ptr, sdslen(username->ptr));
         moduleNotifyUserChanged(c);
         return AUTH_OK;
     } else {
-        addACLLogEntry(c, ACL_DENIED_AUTH, (c->flags & CLIENT_MULTI) ? ACL_LOG_CTX_MULTI : ACL_LOG_CTX_TOPLEVEL, 0,
-                       username->ptr, NULL);
+        addACLLogEntry(c, ACL_DENIED_AUTH, (c->flag.multi) ? ACL_LOG_CTX_MULTI : ACL_LOG_CTX_TOPLEVEL, 0, username->ptr,
+                       NULL);
         return AUTH_ERR;
     }
 }
diff --git a/src/aof.c b/src/aof.c
index ac9ffd5fcb..1a47d9c688 100644
--- a/src/aof.c
+++ b/src/aof.c
@@ -1364,7 +1364,8 @@ struct client *createAOFClient(void) {
      * background processing there is a chance that the
      * command execution order will be violated.
      */
-    c->flags = CLIENT_DENY_BLOCKING;
+    c->raw_flag = 0;
+    c->flag.deny_blocking = 1;
 
     /* We set the fake client as a replica waiting for the synchronization
      * so that the server will not try to send replies to this client. */
@@ -1536,7 +1537,7 @@ int loadSingleAppendOnlyFile(char *filename) {
 
         /* Run the command in the context of a fake client */
         fakeClient->cmd = fakeClient->lastcmd = cmd;
-        if (fakeClient->flags & CLIENT_MULTI && fakeClient->cmd->proc != execCommand) {
+        if (fakeClient->flag.multi && fakeClient->cmd->proc != execCommand) {
             /* Note: we don't have to attempt calling evalGetCommandFlags,
              * since this is AOF, the checks in processCommand are not made
              * anyway.*/
@@ -1549,7 +1550,7 @@ int loadSingleAppendOnlyFile(char *filename) {
         serverAssert(fakeClient->bufpos == 0 && listLength(fakeClient->reply) == 0);
 
         /* The fake client should never get blocked */
-        serverAssert((fakeClient->flags & CLIENT_BLOCKED) == 0);
+        serverAssert(fakeClient->flag.blocked == 0);
 
         /* Clean up. Command code may have changed argv/argc so we use the
          * argv/argc of the client instead of the local variables. */
@@ -1562,7 +1563,7 @@ int loadSingleAppendOnlyFile(char *filename) {
      * If the client is in the middle of a MULTI/EXEC, handle it as it was
      * a short read, even if technically the protocol is correct: we want
      * to remove the unprocessed tail and continue. */
-    if (fakeClient->flags & CLIENT_MULTI) {
+    if (fakeClient->flag.multi) {
         serverLog(LL_WARNING, "Revert incomplete MULTI/EXEC transaction in AOF file %s", filename);
         valid_up_to = valid_before_multi;
         goto uxeof;
diff --git a/src/blocked.c b/src/blocked.c
index 08abac15e3..15ef39af3b 100644
--- a/src/blocked.c
+++ b/src/blocked.c
@@ -87,11 +87,11 @@ void initClientBlockingState(client *c) {
  * and will be processed when the client is unblocked. */
 void blockClient(client *c, int btype) {
     /* Primary client should never be blocked unless pause or module */
-    serverAssert(!(c->flags & CLIENT_PRIMARY && btype != BLOCKED_MODULE && btype != BLOCKED_POSTPONE));
+    serverAssert(!(c->flag.primary && btype != BLOCKED_MODULE && btype != BLOCKED_POSTPONE));
 
-    c->flags |= CLIENT_BLOCKED;
+    c->flag.blocked = 1;
     c->bstate.btype = btype;
-    if (!(c->flags & CLIENT_MODULE))
+    if (!c->flag.module)
         server.blocked_clients++; /* We count blocked client stats on regular clients and not on module clients */
     server.blocked_clients_by_type[btype]++;
     addClientToTimeoutTable(c);
@@ -130,10 +130,10 @@ void processUnblockedClients(void) {
         serverAssert(ln != NULL);
         c = ln->value;
         listDelNode(server.unblocked_clients, ln);
-        c->flags &= ~CLIENT_UNBLOCKED;
+        c->flag.unblocked = 0;
 
-        if (c->flags & CLIENT_MODULE) {
-            if (!(c->flags & CLIENT_BLOCKED)) {
+        if (c->flag.module) {
+            if (!c->flag.blocked) {
                 moduleCallCommandUnblockedHandler(c);
             }
             continue;
@@ -143,7 +143,7 @@ void processUnblockedClients(void) {
          * is blocked again. Actually processInputBuffer() checks that the
          * client is not blocked before to proceed, but things may change and
          * the code is conceptually more correct this way. */
-        if (!(c->flags & CLIENT_BLOCKED)) {
+        if (!c->flag.blocked) {
             /* If we have a queued command, execute it now. */
             if (processPendingCommandAndInputBuffer(c) == C_ERR) {
                 c = NULL;
@@ -172,8 +172,8 @@ void processUnblockedClients(void) {
 void queueClientForReprocessing(client *c) {
     /* The client may already be into the unblocked list because of a previous
      * blocking operation, don't add back it into the list multiple times. */
-    if (!(c->flags & CLIENT_UNBLOCKED)) {
-        c->flags |= CLIENT_UNBLOCKED;
+    if (!c->flag.unblocked) {
+        c->flag.unblocked = 1;
         listAddNodeTail(server.unblocked_clients, c);
     }
 }
@@ -199,7 +199,7 @@ void unblockClient(client *c, int queue_for_reprocessing) {
 
     /* Reset the client for a new query, unless the client has pending command to process
      * or in case a shutdown operation was canceled and we are still in the processCommand sequence  */
-    if (!(c->flags & CLIENT_PENDING_COMMAND) && c->bstate.btype != BLOCKED_SHUTDOWN) {
+    if (!c->flag.pending_command && c->bstate.btype != BLOCKED_SHUTDOWN) {
         freeClientOriginalArgv(c);
         /* Clients that are not blocked on keys are not reprocessed so we must
          * call reqresAppendResponse here (for clients blocked on key,
@@ -210,11 +210,11 @@ void unblockClient(client *c, int queue_for_reprocessing) {
     }
 
     /* We count blocked client stats on regular clients and not on module clients */
-    if (!(c->flags & CLIENT_MODULE)) server.blocked_clients--;
+    if (!c->flag.module) server.blocked_clients--;
     server.blocked_clients_by_type[c->bstate.btype]--;
     /* Clear the flags, and put the client in the unblocked list so that
      * we'll process new commands in its query buffer ASAP. */
-    c->flags &= ~CLIENT_BLOCKED;
+    c->flag.blocked = 0;
     c->bstate.btype = BLOCKED_NONE;
     c->bstate.unblock_on_nokey = 0;
     removeClientFromTimeoutTable(c);
@@ -256,7 +256,7 @@ void replyToClientsBlockedOnShutdown(void) {
     listRewind(server.clients, &li);
     while ((ln = listNext(&li))) {
         client *c = listNodeValue(ln);
-        if (c->flags & CLIENT_BLOCKED && c->bstate.btype == BLOCKED_SHUTDOWN) {
+        if (c->flag.blocked && c->bstate.btype == BLOCKED_SHUTDOWN) {
             addReplyError(c, "Errors trying to SHUTDOWN. Check logs.");
             unblockClient(c, 1);
         }
@@ -278,7 +278,7 @@ void disconnectAllBlockedClients(void) {
     while ((ln = listNext(&li))) {
         client *c = listNodeValue(ln);
 
-        if (c->flags & CLIENT_BLOCKED) {
+        if (c->flag.blocked) {
             /* POSTPONEd clients are an exception, when they'll be unblocked, the
              * command processing will start from scratch, and the command will
              * be either executed or rejected. (unlike LIST blocked clients for
@@ -287,7 +287,7 @@ void disconnectAllBlockedClients(void) {
 
             unblockClientOnError(c, "-UNBLOCKED force unblock from blocking operation, "
                                     "instance state changed (master -> replica?)");
-            c->flags |= CLIENT_CLOSE_AFTER_REPLY;
+            c->flag.close_after_reply = 1;
         }
     }
 }
@@ -368,7 +368,7 @@ void blockForKeys(client *c, int btype, robj **keys, int numkeys, mstime_t timeo
     list *l;
     int j;
 
-    if (!(c->flags & CLIENT_REPROCESSING_COMMAND)) {
+    if (!c->flag.reprocessing_command) {
         /* If the client is re-processing the command, we do not set the timeout
          * because we need to retain the client's original timeout. */
         c->bstate.timeout = timeout;
@@ -411,7 +411,7 @@ void blockForKeys(client *c, int btype, robj **keys, int numkeys, mstime_t timeo
     /* Currently we assume key blocking will require reprocessing the command.
      * However in case of modules, they have a different way to handle the reprocessing
      * which does not require setting the pending command flag */
-    if (btype != BLOCKED_MODULE) c->flags |= CLIENT_PENDING_COMMAND;
+    if (btype != BLOCKED_MODULE) c->flag.pending_command = 1;
     blockClient(c, btype);
 }
 
@@ -605,7 +605,7 @@ void blockPostponeClient(client *c) {
     listAddNodeTail(server.postponed_clients, c);
     c->postponed_list_node = listLast(server.postponed_clients);
     /* Mark this client to execute its command */
-    c->flags |= CLIENT_PENDING_COMMAND;
+    c->flag.pending_command = 1;
 }
 
 /* Block client due to shutdown command */
@@ -633,8 +633,8 @@ static void unblockClientOnKey(client *c, robj *key) {
     unblockClient(c, 0);
     /* In case this client was blocked on keys during command
      * we need to re process the command again */
-    if (c->flags & CLIENT_PENDING_COMMAND) {
-        c->flags &= ~CLIENT_PENDING_COMMAND;
+    if (c->flag.pending_command) {
+        c->flag.pending_command = 0;
         /* We want the command processing and the unblock handler (see RM_Call 'K' option)
          * to run atomically, this is why we must enter the execution unit here before
          * running the command, and exit the execution unit after calling the unblock handler (if exists).
@@ -644,8 +644,8 @@ static void unblockClientOnKey(client *c, robj *key) {
         server.current_client = c;
         enterExecutionUnit(1, 0);
         processCommandAndResetClient(c);
-        if (!(c->flags & CLIENT_BLOCKED)) {
-            if (c->flags & CLIENT_MODULE) {
+        if (!c->flag.blocked) {
+            if (c->flag.module) {
                 moduleCallCommandUnblockedHandler(c);
             } else {
                 queueClientForReprocessing(c);
@@ -690,7 +690,7 @@ void unblockClientOnTimeout(client *c) {
     if (c->bstate.btype == BLOCKED_MODULE && isModuleClientUnblocked(c)) return;
 
     replyToBlockedClientTimedOut(c);
-    if (c->flags & CLIENT_PENDING_COMMAND) c->flags &= ~CLIENT_PENDING_COMMAND;
+    if (c->flag.pending_command) c->flag.pending_command = 0;
     unblockClient(c, 1);
 }
 
@@ -699,7 +699,7 @@ void unblockClientOnTimeout(client *c) {
 void unblockClientOnError(client *c, const char *err_str) {
     if (err_str) addReplyError(c, err_str);
     updateStatsOnUnblock(c, 0, 0, 1);
-    if (c->flags & CLIENT_PENDING_COMMAND) c->flags &= ~CLIENT_PENDING_COMMAND;
+    if (c->flag.pending_command) c->flag.pending_command = 0;
     unblockClient(c, 1);
 }
 
diff --git a/src/cluster.c b/src/cluster.c
index c4949c08ee..c77c4d1ff1 100644
--- a/src/cluster.c
+++ b/src/cluster.c
@@ -985,7 +985,7 @@ getNodeByQuery(client *c, struct serverCommand *cmd, robj **argv, int argc, int
     if (cmd->proc == execCommand) {
         /* If CLIENT_MULTI flag is not set EXEC is just going to return an
          * error. */
-        if (!(c->flags & CLIENT_MULTI)) return myself;
+        if (!c->flag.multi) return myself;
         ms = &c->mstate;
     } else {
         /* In order to have a single codepath create a fake Multi State
@@ -1048,7 +1048,7 @@ getNodeByQuery(client *c, struct serverCommand *cmd, robj **argv, int argc, int
                  * can safely serve the request, otherwise we return a TRYAGAIN
                  * error). To do so we set the importing/migrating state and
                  * increment a counter for every missing key. */
-                if (clusterNodeIsPrimary(myself) || c->flags & CLIENT_READONLY) {
+                if (clusterNodeIsPrimary(myself) || c->flag.readonly) {
                     if (n == clusterNodeGetPrimary(myself) && getMigratingSlotDest(slot) != NULL) {
                         migrating_slot = 1;
                     } else if (getImportingSlotSource(slot) != NULL) {
@@ -1143,7 +1143,7 @@ getNodeByQuery(client *c, struct serverCommand *cmd, robj **argv, int argc, int
      * request as "ASKING", we can serve the request. However if the request
      * involves multiple keys and we don't have them all, the only option is
      * to send a TRYAGAIN error. */
-    if (importing_slot && (c->flags & CLIENT_ASKING || cmd_flags & CMD_ASKING)) {
+    if (importing_slot && (c->flag.asking || cmd_flags & CMD_ASKING)) {
         if (multiple_keys && missing_keys) {
             if (error_code) *error_code = CLUSTER_REDIR_UNSTABLE;
             return NULL;
@@ -1157,7 +1157,7 @@ getNodeByQuery(client *c, struct serverCommand *cmd, robj **argv, int argc, int
      * is serving, we can reply without redirection. */
     int is_write_command =
         (cmd_flags & CMD_WRITE) || (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_WRITE));
-    if (((c->flags & CLIENT_READONLY) || pubsubshard_included) && !is_write_command && clusterNodeIsReplica(myself) &&
+    if ((c->flag.readonly || pubsubshard_included) && !is_write_command && clusterNodeIsReplica(myself) &&
         clusterNodeGetPrimary(myself) == n) {
         return myself;
     }
@@ -1213,8 +1213,8 @@ void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_co
  * returns 1. Otherwise 0 is returned and no operation is performed. */
 int clusterRedirectBlockedClientIfNeeded(client *c) {
     clusterNode *myself = getMyClusterNode();
-    if (c->flags & CLIENT_BLOCKED && (c->bstate.btype == BLOCKED_LIST || c->bstate.btype == BLOCKED_ZSET ||
-                                      c->bstate.btype == BLOCKED_STREAM || c->bstate.btype == BLOCKED_MODULE)) {
+    if (c->flag.blocked && (c->bstate.btype == BLOCKED_LIST || c->bstate.btype == BLOCKED_ZSET ||
+                            c->bstate.btype == BLOCKED_STREAM || c->bstate.btype == BLOCKED_MODULE)) {
         dictEntry *de;
         dictIterator *di;
 
@@ -1240,7 +1240,7 @@ int clusterRedirectBlockedClientIfNeeded(client *c) {
 
             /* if the client is read-only and attempting to access key that our
              * replica can handle, allow it. */
-            if ((c->flags & CLIENT_READONLY) && !(c->lastcmd->flags & CMD_WRITE) && clusterNodeIsReplica(myself) &&
+            if (c->flag.readonly && !(c->lastcmd->flags & CMD_WRITE) && clusterNodeIsReplica(myself) &&
                 clusterNodeGetPrimary(myself) == node) {
                 node = myself;
             }
@@ -1443,7 +1443,7 @@ void askingCommand(client *c) {
         addReplyError(c, "This instance has cluster support disabled");
         return;
     }
-    c->flags |= CLIENT_ASKING;
+    c->flag.asking = 1;
     addReply(c, shared.ok);
 }
 
@@ -1451,12 +1451,12 @@ void askingCommand(client *c) {
  * In this mode replica will not redirect clients as long as clients access
  * with read-only commands to keys that are served by the replica's primary. */
 void readonlyCommand(client *c) {
-    c->flags |= CLIENT_READONLY;
+    c->flag.readonly = 1;
     addReply(c, shared.ok);
 }
 
 /* The READWRITE command just clears the READONLY command state. */
 void readwriteCommand(client *c) {
-    c->flags &= ~CLIENT_READONLY;
+    c->flag.readonly = 0;
     addReply(c, shared.ok);
 }
diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c
index 17e7d235d4..dd95cc6bb7 100644
--- a/src/cluster_legacy.c
+++ b/src/cluster_legacy.c
@@ -5904,7 +5904,7 @@ int clusterParseSetSlotCommand(client *c, int *slot_out, clusterNode **node_out,
     int optarg_pos = 0;
 
     /* Allow primaries to replicate "CLUSTER SETSLOT" */
-    if (!(c->flags & CLIENT_PRIMARY) && nodeIsReplica(myself)) {
+    if (!c->flag.primary && nodeIsReplica(myself)) {
         addReplyError(c, "Please use SETSLOT only with masters.");
         return 0;
     }
@@ -6028,7 +6028,7 @@ void clusterCommandSetSlot(client *c) {
      * This ensures that all replicas have the latest topology information, enabling
      * a reliable slot ownership transfer even if the primary node went down during
      * the process. */
-    if (nodeIsPrimary(myself) && myself->num_replicas != 0 && (c->flags & CLIENT_REPLICATION_DONE) == 0) {
+    if (nodeIsPrimary(myself) && myself->num_replicas != 0 && !c->flag.replication_done) {
         /* Iterate through the list of replicas to check if there are any running
          * a version older than 8.0.0. Replicas with versions older than 8.0.0 do
          * not support the CLUSTER SETSLOT command on replicas. If such a replica
@@ -6058,7 +6058,7 @@ void clusterCommandSetSlot(client *c) {
              *    ack the repl offset at the command boundary. */
             blockClientForReplicaAck(c, timeout_ms, server.primary_repl_offset + 1, myself->num_replicas, 0);
             /* Mark client as pending command for execution after replication to replicas. */
-            c->flags |= CLIENT_PENDING_COMMAND;
+            c->flag.pending_command = 1;
             replicationRequestAckFromReplicas();
             return;
         }
diff --git a/src/db.c b/src/db.c
index c879b2ffb5..c33f10f90e 100644
--- a/src/db.c
+++ b/src/db.c
@@ -118,7 +118,7 @@ robj *lookupKey(serverDb *db, robj *key, int flags) {
         /* Update the access time for the ageing algorithm.
          * Don't do it if we have a saving child, as this will trigger
          * a copy on write madness. */
-        if (server.current_client && server.current_client->flags & CLIENT_NO_TOUCH &&
+        if (server.current_client && server.current_client->flag.no_touch &&
             server.current_client->cmd->proc != touchCommand)
             flags |= LOOKUP_NOTOUCH;
         if (!hasActiveChildProcess() && !(flags & LOOKUP_NOTOUCH)) {
@@ -231,8 +231,7 @@ int getKeySlot(sds key) {
      * It only gets set during the execution of command under `call` method. Other flows requesting
      * the key slot would fallback to calculateKeySlot.
      */
-    if (server.current_client && server.current_client->slot >= 0 &&
-        server.current_client->flags & CLIENT_EXECUTING_COMMAND) {
+    if (server.current_client && server.current_client->slot >= 0 && server.current_client->flag.executing_command) {
         debugServerAssertWithInfo(server.current_client, NULL, calculateKeySlot(key) == server.current_client->slot);
         return server.current_client->slot;
     }
@@ -822,7 +821,7 @@ void keysCommand(client *c) {
                 numkeys++;
             }
         }
-        if (c->flags & CLIENT_CLOSE_ASAP) break;
+        if (c->flag.close_asap) break;
     }
     if (kvs_di) kvstoreReleaseDictIterator(kvs_di);
     if (kvs_it) kvstoreIteratorRelease(kvs_it);
@@ -1238,7 +1237,7 @@ void shutdownCommand(client *c) {
         return;
     }
 
-    if (!(flags & SHUTDOWN_NOW) && c->flags & CLIENT_DENY_BLOCKING) {
+    if (!(flags & SHUTDOWN_NOW) && c->flag.deny_blocking) {
         addReplyError(c, "SHUTDOWN without NOW or ABORT isn't allowed for DENY BLOCKING client");
         return;
     }
@@ -1667,7 +1666,7 @@ void setExpire(client *c, serverDb *db, robj *key, long long when) {
     }
 
     int writable_replica = server.primary_host && server.repl_replica_ro == 0;
-    if (c && writable_replica && !(c->flags & CLIENT_PRIMARY)) rememberReplicaKeyWithExpire(db, key);
+    if (c && writable_replica && !c->flag.primary) rememberReplicaKeyWithExpire(db, key);
 }
 
 /* Return the expire time of the specified key, or -1 if no expire
@@ -1796,7 +1795,7 @@ keyStatus expireIfNeeded(serverDb *db, robj *key, int flags) {
      * When replicating commands from the primary, keys are never considered
      * expired. */
     if (server.primary_host != NULL) {
-        if (server.current_client && (server.current_client->flags & CLIENT_PRIMARY)) return KEY_VALID;
+        if (server.current_client && (server.current_client->flag.primary)) return KEY_VALID;
         if (!(flags & EXPIRE_FORCE_DELETE_EXPIRED)) return KEY_EXPIRED;
     }
 
diff --git a/src/debug.c b/src/debug.c
index c625ab5150..6eaabe40f4 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -804,12 +804,12 @@ void debugCommand(client *c) {
                 addReplyError(c, "RESP2 is not supported by this command");
                 return;
             }
-            uint64_t old_flags = c->flags;
-            c->flags |= CLIENT_PUSHING;
+            struct ClientFlags old_flags = c->flag;
+            c->flag.pushing = 1;
             addReplyPushLen(c, 2);
             addReplyBulkCString(c, "server-cpu-usage");
             addReplyLongLong(c, 42);
-            if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
+            if (!old_flags.pushing) c->flag.pushing = 0;
             /* Push replies are not synchronous replies, so we emit also a
              * normal reply in order for blocking clients just discarding the
              * push reply, to actually consume the reply and continue. */
@@ -1026,7 +1026,7 @@ void _serverAssertPrintClientInfo(const client *c) {
 
     bugReportStart();
     serverLog(LL_WARNING, "=== ASSERTION FAILED CLIENT CONTEXT ===");
-    serverLog(LL_WARNING, "client->flags = %llu", (unsigned long long)c->flags);
+    serverLog(LL_WARNING, "client->flags = %llu", (unsigned long long)c->raw_flag);
     serverLog(LL_WARNING, "client->conn = %s", connGetInfo(c->conn, conninfo, sizeof(conninfo)));
     serverLog(LL_WARNING, "client->argc = %d", c->argc);
     for (j = 0; j < c->argc; j++) {
diff --git a/src/eval.c b/src/eval.c
index f4d09a5aa6..e4e51f7da5 100644
--- a/src/eval.c
+++ b/src/eval.c
@@ -258,10 +258,10 @@ void scriptingInit(int setup) {
      * by scriptingReset(). */
     if (lctx.lua_client == NULL) {
         lctx.lua_client = createClient(NULL);
-        lctx.lua_client->flags |= CLIENT_SCRIPT;
+        lctx.lua_client->flag.script = 1;
 
         /* We do not want to allow blocking commands inside Lua */
-        lctx.lua_client->flags |= CLIENT_DENY_BLOCKING;
+        lctx.lua_client->flag.deny_blocking = 1;
     }
 
     /* Lock the global table from any changes */
@@ -630,7 +630,7 @@ void evalCommand(client *c) {
     /* Explicitly feed monitor here so that lua commands appear after their
      * script command. */
     replicationFeedMonitors(c, server.monitors, c->db->id, c->argv, c->argc);
-    if (!(c->flags & CLIENT_LUA_DEBUG))
+    if (!c->flag.lua_debug)
         evalGenericCommand(c, 0);
     else
         evalGenericCommandWithDebugging(c, 0);
@@ -652,7 +652,7 @@ void evalShaCommand(client *c) {
         addReplyErrorObject(c, shared.noscripterr);
         return;
     }
-    if (!(c->flags & CLIENT_LUA_DEBUG))
+    if (!c->flag.lua_debug)
         evalGenericCommand(c, 1);
     else {
         addReplyError(c, "Please use EVAL instead of EVALSHA for debugging");
@@ -732,7 +732,7 @@ NULL
         } else if (!strcasecmp(c->argv[2]->ptr, "sync")) {
             ldbEnable(c);
             addReply(c, shared.ok);
-            c->flags |= CLIENT_LUA_DEBUG_SYNC;
+            c->flag.lua_debug_sync = 1;
         } else {
             addReplyError(c, "Use SCRIPT DEBUG YES/SYNC/NO");
             return;
@@ -794,7 +794,7 @@ int ldbIsEnabled(void) {
 
 /* Enable debug mode of Lua scripts for this client. */
 void ldbEnable(client *c) {
-    c->flags |= CLIENT_LUA_DEBUG;
+    c->flag.lua_debug = 1;
     ldbFlushLog(ldb.logs);
     ldb.conn = c->conn;
     ldb.step = 1;
@@ -810,7 +810,8 @@ void ldbEnable(client *c) {
  * to properly shut down a client debugging session, see ldbEndSession()
  * for more information. */
 void ldbDisable(client *c) {
-    c->flags &= ~(CLIENT_LUA_DEBUG | CLIENT_LUA_DEBUG_SYNC);
+    c->flag.lua_debug = 0;
+    c->flag.lua_debug_sync = 0;
 }
 
 /* Append a log entry to the specified LDB log. */
@@ -871,7 +872,7 @@ void ldbSendLogs(void) {
  * The caller should call ldbEndSession() only if ldbStartSession()
  * returned 1. */
 int ldbStartSession(client *c) {
-    ldb.forked = (c->flags & CLIENT_LUA_DEBUG_SYNC) == 0;
+    ldb.forked = !c->flag.lua_debug_sync;
     if (ldb.forked) {
         pid_t cp = serverFork(CHILD_TYPE_LDB);
         if (cp == -1) {
@@ -940,7 +941,7 @@ void ldbEndSession(client *c) {
 
     /* Close the client connection after sending the final EVAL reply
      * in order to signal the end of the debugging session. */
-    c->flags |= CLIENT_CLOSE_AFTER_REPLY;
+    c->flag.close_after_reply = 1;
 
     /* Cleanup. */
     sdsfreesplitres(ldb.src, ldb.lines);
diff --git a/src/functions.c b/src/functions.c
index 08d869f026..852aa45d3e 100644
--- a/src/functions.c
+++ b/src/functions.c
@@ -406,7 +406,8 @@ int functionsRegisterEngine(const char *engine_name, engine *engine) {
     }
 
     client *c = createClient(NULL);
-    c->flags |= (CLIENT_DENY_BLOCKING | CLIENT_SCRIPT);
+    c->flag.deny_blocking = 1;
+    c->flag.script = 1;
     engineInfo *ei = zmalloc(sizeof(*ei));
     *ei = (engineInfo){
         .name = engine_name_sds,
diff --git a/src/logreqres.c b/src/logreqres.c
index 70b4e55f6f..72b182bcb3 100644
--- a/src/logreqres.c
+++ b/src/logreqres.c
@@ -78,7 +78,7 @@ static int reqresShouldLog(client *c) {
     if (!server.req_res_logfile) return 0;
 
     /* Ignore client with streaming non-standard response */
-    if (c->flags & (CLIENT_PUBSUB | CLIENT_MONITOR | CLIENT_REPLICA)) return 0;
+    if (c->flag.pubsub || c->flag.monitor || c->flag.replica) return 0;
 
     /* We only work on primaries (didn't implement reqresAppendResponse to work on shared replica buffers) */
     if (getClientType(c) == CLIENT_TYPE_PRIMARY) return 0;
diff --git a/src/module.c b/src/module.c
index c98c837a5c..5844fcbdea 100644
--- a/src/module.c
+++ b/src/module.c
@@ -654,7 +654,7 @@ client *moduleAllocTempClient(void) {
         if (moduleTempClientCount < moduleTempClientMinCount) moduleTempClientMinCount = moduleTempClientCount;
     } else {
         c = createClient(NULL);
-        c->flags |= CLIENT_MODULE;
+        c->flag.module = 1;
         c->user = NULL; /* Root user */
     }
     return c;
@@ -681,7 +681,8 @@ void moduleReleaseTempClient(client *c) {
     c->duration = 0;
     resetClient(c);
     c->bufpos = 0;
-    c->flags = CLIENT_MODULE;
+    c->raw_flag = 0;
+    c->flag.module = 1;
     c->user = NULL; /* Root user */
     c->cmd = c->lastcmd = c->realcmd = NULL;
     if (c->bstate.async_rm_call_handle) {
@@ -3638,11 +3639,11 @@ int modulePopulateClientInfoStructure(void *ci, client *client, int structver) {
     ValkeyModuleClientInfoV1 *ci1 = ci;
     memset(ci1, 0, sizeof(*ci1));
     ci1->version = structver;
-    if (client->flags & CLIENT_MULTI) ci1->flags |= VALKEYMODULE_CLIENTINFO_FLAG_MULTI;
-    if (client->flags & CLIENT_PUBSUB) ci1->flags |= VALKEYMODULE_CLIENTINFO_FLAG_PUBSUB;
-    if (client->flags & CLIENT_UNIX_SOCKET) ci1->flags |= VALKEYMODULE_CLIENTINFO_FLAG_UNIXSOCKET;
-    if (client->flags & CLIENT_TRACKING) ci1->flags |= VALKEYMODULE_CLIENTINFO_FLAG_TRACKING;
-    if (client->flags & CLIENT_BLOCKED) ci1->flags |= VALKEYMODULE_CLIENTINFO_FLAG_BLOCKED;
+    if (client->flag.multi) ci1->flags |= VALKEYMODULE_CLIENTINFO_FLAG_MULTI;
+    if (client->flag.pubsub) ci1->flags |= VALKEYMODULE_CLIENTINFO_FLAG_PUBSUB;
+    if (client->flag.unix_socket) ci1->flags |= VALKEYMODULE_CLIENTINFO_FLAG_UNIXSOCKET;
+    if (client->flag.tracking) ci1->flags |= VALKEYMODULE_CLIENTINFO_FLAG_TRACKING;
+    if (client->flag.blocked) ci1->flags |= VALKEYMODULE_CLIENTINFO_FLAG_BLOCKED;
     if (client->conn->type == connectionTypeTls()) ci1->flags |= VALKEYMODULE_CLIENTINFO_FLAG_SSL;
 
     int port;
@@ -3853,9 +3854,9 @@ int VM_GetContextFlags(ValkeyModuleCtx *ctx) {
     /* Client specific flags */
     if (ctx) {
         if (ctx->client) {
-            if (ctx->client->flags & CLIENT_DENY_BLOCKING) flags |= VALKEYMODULE_CTX_FLAGS_DENY_BLOCKING;
+            if (ctx->client->flag.deny_blocking) flags |= VALKEYMODULE_CTX_FLAGS_DENY_BLOCKING;
             /* Module command received from PRIMARY, is replicated. */
-            if (ctx->client->flags & CLIENT_PRIMARY) flags |= VALKEYMODULE_CTX_FLAGS_REPLICATED;
+            if (ctx->client->flag.primary) flags |= VALKEYMODULE_CTX_FLAGS_REPLICATED;
             if (ctx->client->resp == 3) {
                 flags |= VALKEYMODULE_CTX_FLAGS_RESP3;
             }
@@ -3863,7 +3864,7 @@ int VM_GetContextFlags(ValkeyModuleCtx *ctx) {
 
         /* For DIRTY flags, we need the blocked client if used */
         client *c = ctx->blocked_client ? ctx->blocked_client->client : ctx->client;
-        if (c && (c->flags & (CLIENT_DIRTY_CAS | CLIENT_DIRTY_EXEC))) {
+        if (c && (c->flag.dirty_cas || c->flag.dirty_exec)) {
             flags |= VALKEYMODULE_CTX_FLAGS_MULTI_DIRTY;
         }
     }
@@ -5955,8 +5956,7 @@ int VM_CallReplyPromiseAbort(ValkeyModuleCallReply *reply, void **private_data)
     ValkeyModuleAsyncRMCallPromise *promise = callReplyGetPrivateData(reply);
     if (!promise->c)
         return VALKEYMODULE_ERR; /* Promise can not be aborted, either already aborted or already finished. */
-    if (!(promise->c->flags & CLIENT_BLOCKED))
-        return VALKEYMODULE_ERR; /* Client is not blocked anymore, can not abort it. */
+    if (!(promise->c->flag.blocked)) return VALKEYMODULE_ERR; /* Client is not blocked anymore, can not abort it. */
 
     /* Client is still blocked, remove it from any blocking state and release it. */
     if (private_data) *private_data = promise->private_data;
@@ -6227,7 +6227,7 @@ ValkeyModuleCallReply *VM_Call(ValkeyModuleCtx *ctx, const char *cmdname, const
 
     if (!(flags & VALKEYMODULE_ARGV_ALLOW_BLOCK)) {
         /* We do not want to allow block, the module do not expect it */
-        c->flags |= CLIENT_DENY_BLOCKING;
+        c->flag.deny_blocking = 1;
     }
     c->db = ctx->client->db;
     c->argv = argv;
@@ -6324,7 +6324,7 @@ ValkeyModuleCallReply *VM_Call(ValkeyModuleCtx *ctx, const char *cmdname, const
         }
     } else {
         /* if we aren't OOM checking in VM_Call, we want further executions from this client to also not fail on OOM */
-        c->flags |= CLIENT_ALLOW_OOM;
+        c->flag.allow_oom = 1;
     }
 
     if (flags & VALKEYMODULE_ARGV_NO_WRITES) {
@@ -6422,8 +6422,8 @@ ValkeyModuleCallReply *VM_Call(ValkeyModuleCtx *ctx, const char *cmdname, const
     if (server.cluster_enabled && !mustObeyClient(ctx->client)) {
         int error_code;
         /* Duplicate relevant flags in the module client. */
-        c->flags &= ~(CLIENT_READONLY | CLIENT_ASKING);
-        c->flags |= ctx->client->flags & (CLIENT_READONLY | CLIENT_ASKING);
+        c->flag.readonly = ctx->client->flag.readonly;
+        c->flag.asking = ctx->client->flag.asking;
         if (getNodeByQuery(c, c->cmd, c->argv, c->argc, NULL, &error_code) != getMyClusterNode()) {
             sds msg = NULL;
             if (error_code == CLUSTER_REDIR_DOWN_RO_STATE) {
@@ -6474,7 +6474,7 @@ ValkeyModuleCallReply *VM_Call(ValkeyModuleCtx *ctx, const char *cmdname, const
     call(c, call_flags);
     server.replication_allowed = prev_replication_allowed;
 
-    if (c->flags & CLIENT_BLOCKED) {
+    if (c->flag.blocked) {
         serverAssert(flags & VALKEYMODULE_ARGV_ALLOW_BLOCK);
         serverAssert(ctx->module);
         ValkeyModuleAsyncRMCallPromise *promise = zmalloc(sizeof(ValkeyModuleAsyncRMCallPromise));
@@ -6492,11 +6492,11 @@ ValkeyModuleCallReply *VM_Call(ValkeyModuleCtx *ctx, const char *cmdname, const
         c->bstate.async_rm_call_handle = promise;
         if (!(call_flags & CMD_CALL_PROPAGATE_AOF)) {
             /* No need for AOF propagation, set the relevant flags of the client */
-            c->flags |= CLIENT_MODULE_PREVENT_AOF_PROP;
+            c->flag.module_prevent_aof_prop = 1;
         }
         if (!(call_flags & CMD_CALL_PROPAGATE_REPL)) {
             /* No need for replication propagation, set the relevant flags of the client */
-            c->flags |= CLIENT_MODULE_PREVENT_REPL_PROP;
+            c->flag.module_prevent_repl_prop = 1;
         }
         c = NULL; /* Make sure not to free the client */
     } else {
@@ -7847,7 +7847,7 @@ int attemptNextAuthCb(client *c, robj *username, robj *password, robj **err) {
             continue;
         }
         /* Remove the module auth complete flag before we attempt the next cb. */
-        c->flags &= ~CLIENT_MODULE_AUTH_HAS_RESULT;
+        c->flag.module_auth_has_result = 0;
         ValkeyModuleCtx ctx;
         moduleCreateContext(&ctx, cur_auth_ctx->module, VALKEYMODULE_CTX_NONE);
         ctx.client = c;
@@ -7905,19 +7905,20 @@ int checkModuleAuthentication(client *c, robj *username, robj *password, robj **
     if (result == VALKEYMODULE_AUTH_NOT_HANDLED) {
         result = attemptNextAuthCb(c, username, password, err);
     }
-    if (c->flags & CLIENT_BLOCKED) {
+    if (c->flag.blocked) {
         /* Modules are expected to return VALKEYMODULE_AUTH_HANDLED when blocking clients. */
         serverAssert(result == VALKEYMODULE_AUTH_HANDLED);
         return AUTH_BLOCKED;
     }
     c->module_auth_ctx = NULL;
     if (result == VALKEYMODULE_AUTH_NOT_HANDLED) {
-        c->flags &= ~CLIENT_MODULE_AUTH_HAS_RESULT;
+        c->flag.module_auth_has_result = 0;
         return AUTH_NOT_HANDLED;
     }
-    if (c->flags & CLIENT_MODULE_AUTH_HAS_RESULT) {
-        c->flags &= ~CLIENT_MODULE_AUTH_HAS_RESULT;
-        if (c->flags & CLIENT_AUTHENTICATED) return AUTH_OK;
+
+    if (c->flag.module_auth_has_result) {
+        c->flag.module_auth_has_result = 0;
+        if (c->flag.authenticated) return AUTH_OK;
     }
     return AUTH_ERR;
 }
@@ -8010,8 +8011,8 @@ ValkeyModuleBlockedClient *VM_BlockClientOnAuth(ValkeyModuleCtx *ctx,
     }
     ValkeyModuleBlockedClient *bc =
         moduleBlockClient(ctx, NULL, reply_callback, NULL, free_privdata, 0, NULL, 0, NULL, 0);
-    if (ctx->client->flags & CLIENT_BLOCKED) {
-        ctx->client->flags |= CLIENT_PENDING_COMMAND;
+    if (ctx->client->flag.blocked) {
+        ctx->client->flag.pending_command = 1;
     }
     return bc;
 }
@@ -8298,9 +8299,8 @@ void moduleHandleBlockedClients(void) {
             /* Put the client in the list of clients that need to write
              * if there are pending replies here. This is needed since
              * during a non blocking command the client may receive output. */
-            if (!clientHasModuleAuthInProgress(c) && clientHasPendingReplies(c) && !(c->flags & CLIENT_PENDING_WRITE) &&
-                c->conn) {
-                c->flags |= CLIENT_PENDING_WRITE;
+            if (!clientHasModuleAuthInProgress(c) && clientHasPendingReplies(c) && !c->flag.pending_write && c->conn) {
+                c->flag.pending_write = 1;
                 listLinkNodeHead(server.clients_pending_write, &c->clients_pending_write_node);
             }
         }
@@ -9465,11 +9465,11 @@ void revokeClientAuthentication(client *c) {
     moduleNotifyUserChanged(c);
 
     c->user = DefaultUser;
-    c->flags &= ~CLIENT_AUTHENTICATED;
+    c->flag.authenticated = 0;
     /* We will write replies to this client later, so we can't close it
      * directly even if async. */
     if (c == server.current_client) {
-        c->flags |= CLIENT_CLOSE_AFTER_COMMAND;
+        c->flag.close_after_command = 1;
     } else {
         freeClientAsync(c);
     }
@@ -9780,17 +9780,17 @@ static int authenticateClientWithUser(ValkeyModuleCtx *ctx,
     }
 
     /* Avoid settings which are meaningless and will be lost */
-    if (!ctx->client || (ctx->client->flags & CLIENT_MODULE)) {
+    if (!ctx->client || (ctx->client->flag.module)) {
         return VALKEYMODULE_ERR;
     }
 
     moduleNotifyUserChanged(ctx->client);
 
     ctx->client->user = user;
-    ctx->client->flags |= CLIENT_AUTHENTICATED;
+    ctx->client->flag.authenticated = 1;
 
     if (clientHasModuleAuthInProgress(ctx->client)) {
-        ctx->client->flags |= CLIENT_MODULE_AUTH_HAS_RESULT;
+        ctx->client->flag.module_auth_has_result = 1;
     }
 
     if (callback) {
diff --git a/src/multi.c b/src/multi.c
index 8e8f8e04cd..24311c9982 100644
--- a/src/multi.c
+++ b/src/multi.c
@@ -63,7 +63,7 @@ void queueMultiCommand(client *c, uint64_t cmd_flags) {
      * this is useful in case client sends these in a pipeline, or doesn't
      * bother to read previous responses and didn't notice the multi was already
      * aborted. */
-    if (c->flags & (CLIENT_DIRTY_CAS | CLIENT_DIRTY_EXEC)) return;
+    if (c->flag.dirty_cas || c->flag.dirty_exec) return;
     if (c->mstate.count == 0) {
         /* If a client is using multi/exec, assuming it is used to execute at least
          * two commands. Hence, creating by default size of 2. */
@@ -96,28 +96,30 @@ void queueMultiCommand(client *c, uint64_t cmd_flags) {
 void discardTransaction(client *c) {
     freeClientMultiState(c);
     initClientMultiState(c);
-    c->flags &= ~(CLIENT_MULTI | CLIENT_DIRTY_CAS | CLIENT_DIRTY_EXEC);
+    c->flag.multi = 0;
+    c->flag.dirty_cas = 0;
+    c->flag.dirty_exec = 0;
     unwatchAllKeys(c);
 }
 
 /* Flag the transaction as DIRTY_EXEC so that EXEC will fail.
  * Should be called every time there is an error while queueing a command. */
 void flagTransaction(client *c) {
-    if (c->flags & CLIENT_MULTI) c->flags |= CLIENT_DIRTY_EXEC;
+    if (c->flag.multi) c->flag.dirty_exec = 1;
 }
 
 void multiCommand(client *c) {
-    if (c->flags & CLIENT_MULTI) {
+    if (c->flag.multi) {
         addReplyError(c, "MULTI calls can not be nested");
         return;
     }
-    c->flags |= CLIENT_MULTI;
+    c->flag.multi = 1;
 
     addReply(c, shared.ok);
 }
 
 void discardCommand(client *c) {
-    if (!(c->flags & CLIENT_MULTI)) {
+    if (!c->flag.multi) {
         addReplyError(c, "DISCARD without MULTI");
         return;
     }
@@ -148,14 +150,14 @@ void execCommand(client *c) {
     int orig_argc, orig_argv_len;
     struct serverCommand *orig_cmd;
 
-    if (!(c->flags & CLIENT_MULTI)) {
+    if (!c->flag.multi) {
         addReplyError(c, "EXEC without MULTI");
         return;
     }
 
     /* EXEC with expired watched key is disallowed*/
     if (isWatchedKeyExpired(c)) {
-        c->flags |= (CLIENT_DIRTY_CAS);
+        c->flag.dirty_cas = 1;
     }
 
     /* Check if we need to abort the EXEC because:
@@ -164,8 +166,8 @@ void execCommand(client *c) {
      * A failed EXEC in the first case returns a multi bulk nil object
      * (technically it is not an error but a special behavior), while
      * in the second an EXECABORT error is returned. */
-    if (c->flags & (CLIENT_DIRTY_CAS | CLIENT_DIRTY_EXEC)) {
-        if (c->flags & CLIENT_DIRTY_EXEC) {
+    if (c->flag.dirty_cas || c->flag.dirty_exec) {
+        if (c->flag.dirty_exec) {
             addReplyErrorObject(c, shared.execaborterr);
         } else {
             addReply(c, shared.nullarray[c->resp]);
@@ -175,10 +177,10 @@ void execCommand(client *c) {
         return;
     }
 
-    uint64_t old_flags = c->flags;
+    struct ClientFlags old_flags = c->flag;
 
     /* we do not want to allow blocking commands inside multi */
-    c->flags |= CLIENT_DENY_BLOCKING;
+    c->flag.deny_blocking = 1;
 
     /* Exec all the queued commands */
     unwatchAllKeys(c); /* Unwatch ASAP otherwise we'll waste CPU cycles */
@@ -224,7 +226,7 @@ void execCommand(client *c) {
             else
                 call(c, CMD_CALL_FULL);
 
-            serverAssert((c->flags & CLIENT_BLOCKED) == 0);
+            serverAssert(c->flag.blocked == 0);
         }
 
         /* Commands may alter argc/argv, restore mstate. */
@@ -235,7 +237,7 @@ void execCommand(client *c) {
     }
 
     // restore old DENY_BLOCKING value
-    if (!(old_flags & CLIENT_DENY_BLOCKING)) c->flags &= ~CLIENT_DENY_BLOCKING;
+    if (!(old_flags.deny_blocking)) c->flag.deny_blocking = 0;
 
     c->argv = orig_argv;
     c->argv_len = orig_argv_len;
@@ -393,7 +395,7 @@ void touchWatchedKey(serverDb *db, robj *key) {
             break;
         }
 
-        c->flags |= CLIENT_DIRTY_CAS;
+        c->flag.dirty_cas = 1;
         /* As the client is marked as dirty, there is no point in getting here
          * again in case that key (or others) are modified again (or keep the
          * memory overhead till EXEC). */
@@ -444,7 +446,7 @@ void touchAllWatchedKeysInDb(serverDb *emptied, serverDb *replaced_with) {
                     continue;
                 }
                 client *c = wk->client;
-                c->flags |= CLIENT_DIRTY_CAS;
+                c->flag.dirty_cas = 1;
                 /* Note - we could potentially call unwatchAllKeys for this specific client in order to reduce
                  * the total number of iterations. BUT this could also free the current next entry pointer
                  * held by the iterator and can lead to use-after-free. */
@@ -457,12 +459,12 @@ void touchAllWatchedKeysInDb(serverDb *emptied, serverDb *replaced_with) {
 void watchCommand(client *c) {
     int j;
 
-    if (c->flags & CLIENT_MULTI) {
+    if (c->flag.multi) {
         addReplyError(c, "WATCH inside MULTI is not allowed");
         return;
     }
     /* No point in watching if the client is already dirty. */
-    if (c->flags & CLIENT_DIRTY_CAS) {
+    if (c->flag.dirty_cas) {
         addReply(c, shared.ok);
         return;
     }
@@ -472,7 +474,7 @@ void watchCommand(client *c) {
 
 void unwatchCommand(client *c) {
     unwatchAllKeys(c);
-    c->flags &= (~CLIENT_DIRTY_CAS);
+    c->flag.dirty_cas = 0;
     addReply(c, shared.ok);
 }
 
diff --git a/src/networking.c b/src/networking.c
index 629e3aac7e..bb7bab02c3 100644
--- a/src/networking.c
+++ b/src/networking.c
@@ -105,18 +105,14 @@ static void clientSetDefaultAuth(client *c) {
     /* If the default user does not require authentication, the user is
      * directly authenticated. */
     c->user = DefaultUser;
-    if ((c->user->flags & USER_FLAG_NOPASS) && !(c->user->flags & USER_FLAG_DISABLED)) {
-        c->flags |= CLIENT_AUTHENTICATED;
-    } else {
-        c->flags &= ~CLIENT_AUTHENTICATED;
-    }
+    c->flag.authenticated = (c->user->flags & USER_FLAG_NOPASS) && !(c->user->flags & USER_FLAG_DISABLED);
 }
 
 int authRequired(client *c) {
     /* Check if the user is authenticated. This check is skipped in case
      * the default user is flagged as "nopass" and is active. */
     int auth_required = (!(DefaultUser->flags & USER_FLAG_NOPASS) || (DefaultUser->flags & USER_FLAG_DISABLED)) &&
-                        !(c->flags & CLIENT_AUTHENTICATED);
+                        !c->flag.authenticated;
     return auth_required;
 }
 
@@ -167,7 +163,7 @@ client *createClient(connection *conn) {
     c->multibulklen = 0;
     c->bulklen = -1;
     c->sentlen = 0;
-    c->flags = 0;
+    c->raw_flag = 0;
     c->capa = 0;
     c->slot = -1;
     c->ctime = c->last_interaction = server.unixtime;
@@ -250,16 +246,15 @@ void putClientInPendingWriteQueue(client *c) {
     /* Schedule the client to write the output buffers to the socket only
      * if not already done and, for replicas, if the replica can actually receive
      * writes at this stage. */
-    if (!(c->flags & CLIENT_PENDING_WRITE) &&
-        (c->repl_state == REPL_STATE_NONE ||
-         (c->repl_state == REPLICA_STATE_ONLINE && !c->repl_start_cmd_stream_on_ack))) {
+    if (!c->flag.pending_write && (c->repl_state == REPL_STATE_NONE ||
+                                   (c->repl_state == REPLICA_STATE_ONLINE && !c->repl_start_cmd_stream_on_ack))) {
         /* Here instead of installing the write handler, we just flag the
          * client and put it into a list of clients that have something
          * to write to the socket. This way before re-entering the event
          * loop, we can try to directly write to the client sockets avoiding
          * a system call. We'll only really install the write handler if
          * we'll not be able to write the whole reply at once. */
-        c->flags |= CLIENT_PENDING_WRITE;
+        c->flag.pending_write = 1;
         listLinkNodeHead(server.clients_pending_write, &c->clients_pending_write_node);
     }
 }
@@ -289,18 +284,18 @@ void putClientInPendingWriteQueue(client *c) {
 int prepareClientToWrite(client *c) {
     /* If it's the Lua client we always return ok without installing any
      * handler since there is no socket at all. */
-    if (c->flags & (CLIENT_SCRIPT | CLIENT_MODULE)) return C_OK;
+    if (c->flag.script || c->flag.module) return C_OK;
 
     /* If CLIENT_CLOSE_ASAP flag is set, we need not write anything. */
-    if (c->flags & CLIENT_CLOSE_ASAP) return C_ERR;
+    if (c->flag.close_asap) return C_ERR;
 
     /* CLIENT REPLY OFF / SKIP handling: don't send replies.
      * CLIENT_PUSHING handling: disables the reply silencing flags. */
-    if ((c->flags & (CLIENT_REPLY_OFF | CLIENT_REPLY_SKIP)) && !(c->flags & CLIENT_PUSHING)) return C_ERR;
+    if ((c->flag.reply_off || c->flag.reply_skip) && !c->flag.pushing) return C_ERR;
 
     /* Primaries don't receive replies, unless CLIENT_PRIMARY_FORCE_REPLY flag
      * is set. */
-    if ((c->flags & CLIENT_PRIMARY) && !(c->flags & CLIENT_PRIMARY_FORCE_REPLY)) return C_ERR;
+    if (c->flag.primary && !c->flag.primary_force_reply) return C_ERR;
 
     if (!c->conn) return C_ERR; /* Fake client for AOF loading. */
 
@@ -430,7 +425,7 @@ int cmdHasPushAsReply(struct serverCommand *cmd) {
 }
 
 void _addReplyToBufferOrList(client *c, const char *s, size_t len) {
-    if (c->flags & CLIENT_CLOSE_AFTER_REPLY) return;
+    if (c->flag.close_after_reply) return;
 
     /* Replicas should normally not cause any writes to the reply buffer. In case a rogue replica sent a command on the
      * replication link that caused a reply to be generated we'll simply disconnect it.
@@ -453,7 +448,7 @@ void _addReplyToBufferOrList(client *c, const char *s, size_t len) {
      * the SUBSCRIBE command family, which (currently) have a push message instead of a proper reply.
      * The check for executing_client also avoids affecting push messages that are part of eviction.
      * Check CLIENT_PUSHING first to avoid race conditions, as it's absent in module's fake client. */
-    if ((c->flags & CLIENT_PUSHING) && c == server.current_client && server.executing_client &&
+    if (c->flag.pushing && c == server.current_client && server.executing_client &&
         !cmdHasPushAsReply(server.executing_client->cmd)) {
         _addReplyProtoToList(c, server.pending_push_messages, s, len);
         return;
@@ -536,7 +531,7 @@ void afterErrorReply(client *c, const char *s, size_t len, int flags) {
      * Calls to RM_Call, in which case the error isn't being returned to a client, so should not be counted.
      * Module thread safe context calls to RM_ReplyWithError, which will be added to a real client by the main thread
      * later. */
-    if (c->flags & CLIENT_MODULE) {
+    if (c->flag.module) {
         if (!c->deferred_reply_errors) {
             c->deferred_reply_errors = listCreate();
             listSetFreeMethod(c->deferred_reply_errors, (void (*)(void *))sdsfree);
@@ -1034,7 +1029,7 @@ void addReplyAttributeLen(client *c, long length) {
 
 void addReplyPushLen(client *c, long length) {
     serverAssert(c->resp >= 3);
-    serverAssertWithInfo(c, NULL, c->flags & CLIENT_PUSHING);
+    serverAssertWithInfo(c, NULL, c->flag.pushing);
     addReplyAggregateLen(c, length, '>');
 }
 
@@ -1213,7 +1208,7 @@ void AddReplyFromClient(client *dst, client *src) {
      * reply. We don't wanna run the risk of copying partial response in case
      * for some reason the output limits don't reach the same decision (maybe
      * they changed) */
-    if (src->flags & CLIENT_CLOSE_ASAP) {
+    if (src->flag.close_asap) {
         sds client = catClientInfoString(sdsempty(), dst);
         freeClientAsync(dst);
         serverLog(LL_WARNING, "Client %s scheduled to be closed ASAP for overcoming of output buffer limits.", client);
@@ -1230,7 +1225,7 @@ void AddReplyFromClient(client *dst, client *src) {
 
     /* We're bypassing _addReplyProtoToList, so we need to add the pre/post
      * checks in it. */
-    if (dst->flags & CLIENT_CLOSE_AFTER_REPLY) return;
+    if (dst->flag.close_after_reply) return;
 
     /* Concatenate the reply list into the dest */
     if (listLength(src->reply)) listJoin(dst->reply, src->reply);
@@ -1340,7 +1335,7 @@ void clientAcceptHandler(connection *conn) {
     moduleFireServerEvent(VALKEYMODULE_EVENT_CLIENT_CHANGE, VALKEYMODULE_SUBEVENT_CLIENT_CHANGE_CONNECTED, c);
 }
 
-void acceptCommonHandler(connection *conn, int flags, char *ip) {
+void acceptCommonHandler(connection *conn, struct ClientFlags flags, char *ip) {
     client *c;
     UNUSED(ip);
 
@@ -1392,7 +1387,7 @@ void acceptCommonHandler(connection *conn, int flags, char *ip) {
     }
 
     /* Last chance to keep flags */
-    c->flags |= flags;
+    if (flags.unix_socket) c->flag.unix_socket = 1;
 
     /* Initiate accept.
      *
@@ -1484,7 +1479,7 @@ void unlinkClient(client *c) {
 
         /* Check if this is a replica waiting for diskless replication (rdb pipe),
          * in which case it needs to be cleaned from that list */
-        if (c->flags & CLIENT_REPLICA && c->repl_state == REPLICA_STATE_WAIT_BGSAVE_END && server.rdb_pipe_conns) {
+        if (c->flag.replica && c->repl_state == REPLICA_STATE_WAIT_BGSAVE_END && server.rdb_pipe_conns) {
             int i;
             for (i = 0; i < server.rdb_pipe_numconns; i++) {
                 if (server.rdb_pipe_conns[i] == c->conn) {
@@ -1501,10 +1496,10 @@ void unlinkClient(client *c) {
     }
 
     /* Remove from the list of pending writes if needed. */
-    if (c->flags & CLIENT_PENDING_WRITE) {
+    if (c->flag.pending_write) {
         serverAssert(&c->clients_pending_write_node.next != NULL || &c->clients_pending_write_node.prev != NULL);
         listUnlinkNode(server.clients_pending_write, &c->clients_pending_write_node);
-        c->flags &= ~CLIENT_PENDING_WRITE;
+        c->flag.pending_write = 0;
     }
 
     /* Remove from the list of pending reads if needed. */
@@ -1517,15 +1512,15 @@ void unlinkClient(client *c) {
 
     /* When client was just unblocked because of a blocking operation,
      * remove it from the list of unblocked clients. */
-    if (c->flags & CLIENT_UNBLOCKED) {
+    if (c->flag.unblocked) {
         ln = listSearchKey(server.unblocked_clients, c);
         serverAssert(ln != NULL);
         listDelNode(server.unblocked_clients, ln);
-        c->flags &= ~CLIENT_UNBLOCKED;
+        c->flag.unblocked = 0;
     }
 
     /* Clear the tracking status. */
-    if (c->flags & CLIENT_TRACKING) disableTracking(c);
+    if (c->flag.tracking) disableTracking(c);
 }
 
 /* Clear the client state to resemble a newly connected client. */
@@ -1535,17 +1530,18 @@ void clearClientConnectionState(client *c) {
     /* MONITOR clients are also marked with CLIENT_REPLICA, we need to
      * distinguish between the two.
      */
-    if (c->flags & CLIENT_MONITOR) {
+    if (c->flag.monitor) {
         ln = listSearchKey(server.monitors, c);
         serverAssert(ln != NULL);
         listDelNode(server.monitors, ln);
 
-        c->flags &= ~(CLIENT_MONITOR | CLIENT_REPLICA);
+        c->flag.monitor = 0;
+        c->flag.replica = 0;
     }
 
-    serverAssert(!(c->flags & (CLIENT_REPLICA | CLIENT_PRIMARY)));
+    serverAssert(!(c->flag.replica || c->flag.primary));
 
-    if (c->flags & CLIENT_TRACKING) disableTracking(c);
+    if (c->flag.tracking) disableTracking(c);
     selectDb(c, 0);
 #ifdef LOG_REQ_RES
     c->resp = server.client_default_resp;
@@ -1571,8 +1567,12 @@ void clearClientConnectionState(client *c) {
      * represent the client library behind the connection. */
 
     /* Selectively clear state flags not covered above */
-    c->flags &= ~(CLIENT_ASKING | CLIENT_READONLY | CLIENT_REPLY_OFF | CLIENT_REPLY_SKIP_NEXT | CLIENT_NO_TOUCH |
-                  CLIENT_NO_EVICT);
+    c->flag.asking = 0;
+    c->flag.readonly = 0;
+    c->flag.reply_off = 0;
+    c->flag.reply_skip_next = 0;
+    c->flag.no_touch = 0;
+    c->flag.no_evict = 0;
 }
 
 void freeClient(client *c) {
@@ -1580,7 +1580,7 @@ void freeClient(client *c) {
 
     /* If a client is protected, yet we need to free it right now, make sure
      * to at least use asynchronous freeing. */
-    if (c->flags & CLIENT_PROTECTED) {
+    if (c->flag.protected) {
         freeClientAsync(c);
         return;
     }
@@ -1600,7 +1600,7 @@ void freeClient(client *c) {
      * from the queue. Note that we need to do this here, because later
      * we may call replicationCachePrimary() and the client should already
      * be removed from the list of clients to free. */
-    if (c->flags & CLIENT_CLOSE_ASAP) {
+    if (c->flag.close_asap) {
         ln = listSearchKey(server.clients_to_close, c);
         serverAssert(ln != NULL);
         listDelNode(server.clients_to_close, ln);
@@ -1611,10 +1611,11 @@ void freeClient(client *c) {
      *
      * Note that before doing this we make sure that the client is not in
      * some unexpected state, by checking its flags. */
-    if (server.primary && c->flags & CLIENT_PRIMARY) {
+    if (server.primary && c->flag.primary) {
         serverLog(LL_NOTICE, "Connection with primary lost.");
-        if (!(c->flags & (CLIENT_PROTOCOL_ERROR | CLIENT_BLOCKED))) {
-            c->flags &= ~(CLIENT_CLOSE_ASAP | CLIENT_CLOSE_AFTER_REPLY);
+        if (!(c->flag.protocol_error || c->flag.blocked)) {
+            c->flag.close_asap = 0;
+            c->flag.close_after_reply = 0;
             replicationCachePrimary(c);
             return;
         }
@@ -1636,7 +1637,7 @@ void freeClient(client *c) {
     /* Deallocate structures used to block on blocking ops. */
     /* If there is any in-flight command, we don't record their duration. */
     c->duration = 0;
-    if (c->flags & CLIENT_BLOCKED) unblockClient(c, 1);
+    if (c->flag.blocked) unblockClient(c, 1);
     dictRelease(c->bstate.keys);
 
     /* UNWATCH all the keys */
@@ -1674,7 +1675,7 @@ void freeClient(client *c) {
 
     /* Primary/replica cleanup Case 1:
      * we lost the connection with a replica. */
-    if (c->flags & CLIENT_REPLICA) {
+    if (c->flag.replica) {
         /* If there is no any other replica waiting dumping RDB finished, the
          * current child process need not continue to dump RDB, then we kill it.
          * So child process won't use more memory, and we also can fork a new
@@ -1691,7 +1692,7 @@ void freeClient(client *c) {
             if (c->repldbfd != -1) close(c->repldbfd);
             if (c->replpreamble) sdsfree(c->replpreamble);
         }
-        list *l = (c->flags & CLIENT_MONITOR) ? server.monitors : server.replicas;
+        list *l = (c->flag.monitor) ? server.monitors : server.replicas;
         ln = listSearchKey(l, c);
         serverAssert(ln != NULL);
         listDelNode(l, ln);
@@ -1709,7 +1710,7 @@ void freeClient(client *c) {
 
     /* Primary/replica cleanup Case 2:
      * we lost the connection with the primary. */
-    if (c->flags & CLIENT_PRIMARY) replicationHandlePrimaryDisconnection();
+    if (c->flag.primary) replicationHandlePrimaryDisconnection();
 
     /* Remove client from memory usage buckets */
     if (c->mem_usage_bucket) {
@@ -1739,8 +1740,8 @@ void freeClientAsync(client *c) {
      * may access the list while the server uses I/O threads. All the other accesses
      * are in the context of the main thread while the other threads are
      * idle. */
-    if (c->flags & CLIENT_CLOSE_ASAP || c->flags & CLIENT_SCRIPT) return;
-    c->flags |= CLIENT_CLOSE_ASAP;
+    if (c->flag.close_asap || c->flag.script) return;
+    c->flag.close_asap = 1;
     if (server.io_threads_num == 1) {
         /* no need to bother with locking if there's just one thread (the main thread) */
         listAddNodeTail(server.clients_to_close, c);
@@ -1788,7 +1789,7 @@ int beforeNextClient(client *c) {
      * cases where we want an async free of a client other than myself. For example
      * in ACL modifications we disconnect clients authenticated to non-existent
      * users (see ACL LOAD). */
-    if (c && (c->flags & CLIENT_CLOSE_ASAP)) {
+    if (c && (c->flag.close_asap)) {
         freeClient(c);
         return C_ERR;
     }
@@ -1806,9 +1807,9 @@ int freeClientsInAsyncFreeQueue(void) {
     while ((ln = listNext(&li)) != NULL) {
         client *c = listNodeValue(ln);
 
-        if (c->flags & CLIENT_PROTECTED) continue;
+        if (c->flag.protected) continue;
 
-        c->flags &= ~CLIENT_CLOSE_ASAP;
+        c->flag.close_asap = 0;
         freeClient(c);
         listDelNode(server.clients_to_close, ln);
         freed++;
@@ -1986,7 +1987,7 @@ int writeToClient(client *c, int handler_installed) {
          * a replica or a monitor (otherwise, on high-speed traffic, the
          * replication/output buffer will grow indefinitely) */
         if (totwritten > NET_MAX_WRITES_PER_EVENT &&
-            (server.maxmemory == 0 || zmalloc_used_memory() < server.maxmemory) && !(c->flags & CLIENT_REPLICA))
+            (server.maxmemory == 0 || zmalloc_used_memory() < server.maxmemory) && !c->flag.replica)
             break;
     }
 
@@ -2009,7 +2010,7 @@ int writeToClient(client *c, int handler_installed) {
          * as an interaction, since we always send REPLCONF ACK commands
          * that take some time to just fill the socket output buffer.
          * We just rely on data / pings received for timeout detection. */
-        if (!(c->flags & CLIENT_PRIMARY)) c->last_interaction = server.unixtime;
+        if (!c->flag.primary) c->last_interaction = server.unixtime;
     }
     if (!clientHasPendingReplies(c)) {
         c->sentlen = 0;
@@ -2023,7 +2024,7 @@ int writeToClient(client *c, int handler_installed) {
         }
 
         /* Close connection after entire reply has been sent. */
-        if (c->flags & CLIENT_CLOSE_AFTER_REPLY) {
+        if (c->flag.close_after_reply) {
             freeClientAsync(c);
             return C_ERR;
         }
@@ -2053,15 +2054,15 @@ int handleClientsWithPendingWrites(void) {
     listRewind(server.clients_pending_write, &li);
     while ((ln = listNext(&li))) {
         client *c = listNodeValue(ln);
-        c->flags &= ~CLIENT_PENDING_WRITE;
+        c->flag.pending_write = 0;
         listUnlinkNode(server.clients_pending_write, ln);
 
         /* If a client is protected, don't do anything,
          * that may trigger write error or recreate handler. */
-        if (c->flags & CLIENT_PROTECTED) continue;
+        if (c->flag.protected) continue;
 
         /* Don't write to clients that are going to be closed anyway. */
-        if (c->flags & CLIENT_CLOSE_ASAP) continue;
+        if (c->flag.close_asap) continue;
 
         /* Try to write buffers to the client socket. */
         if (writeToClient(c, 0) == C_ERR) continue;
@@ -2085,7 +2086,8 @@ void resetClient(client *c) {
     c->multibulklen = 0;
     c->bulklen = -1;
     c->slot = -1;
-    c->flags &= ~(CLIENT_EXECUTING_COMMAND | CLIENT_REPLICATION_DONE);
+    c->flag.executing_command = 0;
+    c->flag.replication_done = 0;
 
     /* Make sure the duration has been recorded to some command. */
     serverAssert(c->duration == 0);
@@ -2098,20 +2100,20 @@ void resetClient(client *c) {
 
     /* We clear the ASKING flag as well if we are not inside a MULTI, and
      * if what we just executed is not the ASKING command itself. */
-    if (!(c->flags & CLIENT_MULTI) && prevcmd != askingCommand) c->flags &= ~CLIENT_ASKING;
+    if (!c->flag.multi && prevcmd != askingCommand) c->flag.asking = 0;
 
     /* We do the same for the CACHING command as well. It also affects
      * the next command or transaction executed, in a way very similar
      * to ASKING. */
-    if (!(c->flags & CLIENT_MULTI) && prevcmd != clientCommand) c->flags &= ~CLIENT_TRACKING_CACHING;
+    if (!c->flag.multi && prevcmd != clientCommand) c->flag.tracking_caching = 0;
 
     /* Remove the CLIENT_REPLY_SKIP flag if any so that the reply
      * to the next command will be sent, but set the flag if the command
      * we just processed was "CLIENT REPLY SKIP". */
-    c->flags &= ~CLIENT_REPLY_SKIP;
-    if (c->flags & CLIENT_REPLY_SKIP_NEXT) {
-        c->flags |= CLIENT_REPLY_SKIP;
-        c->flags &= ~CLIENT_REPLY_SKIP_NEXT;
+    c->flag.reply_skip = 0;
+    if (c->flag.reply_skip_next) {
+        c->flag.reply_skip = 1;
+        c->flag.reply_skip_next = 0;
     }
 }
 
@@ -2171,7 +2173,7 @@ void trimClientQueryBuffer(client *c) {
  * 2) Moreover it makes sure that if the client is freed in a different code
  *    path, it is not really released, but only marked for later release. */
 void protectClient(client *c) {
-    c->flags |= CLIENT_PROTECTED;
+    c->flag.protected = 1;
     if (c->conn) {
         connSetReadHandler(c->conn, NULL);
         connSetWriteHandler(c->conn, NULL);
@@ -2180,8 +2182,8 @@ void protectClient(client *c) {
 
 /* This will undo the client protection done by protectClient() */
 void unprotectClient(client *c) {
-    if (c->flags & CLIENT_PROTECTED) {
-        c->flags &= ~CLIENT_PROTECTED;
+    if (c->flag.protected) {
+        c->flag.protected = 0;
         if (c->conn) {
             connSetReadHandler(c->conn, readQueryFromClient);
             if (clientHasPendingReplies(c)) putClientInPendingWriteQueue(c);
@@ -2240,7 +2242,7 @@ int processInlineBuffer(client *c) {
      *
      * However there is an exception: primaries may send us just a newline
      * to keep the connection active. */
-    if (querylen != 0 && c->flags & CLIENT_PRIMARY) {
+    if (querylen != 0 && c->flag.primary) {
         sdsfreesplitres(argv, argc);
         serverLog(LL_WARNING, "WARNING: Receiving inline protocol from primary, primary stream corruption? Closing the "
                               "primary connection and discarding the cached primary.");
@@ -2274,7 +2276,7 @@ int processInlineBuffer(client *c) {
  * CLIENT_PROTOCOL_ERROR. */
 #define PROTO_DUMP_LEN 128
 static void setProtocolError(const char *errstr, client *c) {
-    if (server.verbosity <= LL_VERBOSE || c->flags & CLIENT_PRIMARY) {
+    if (server.verbosity <= LL_VERBOSE || c->flag.primary) {
         sds client = catClientInfoString(sdsempty(), c);
 
         /* Sample some protocol to given an idea about what was inside. */
@@ -2295,11 +2297,12 @@ static void setProtocolError(const char *errstr, client *c) {
         }
 
         /* Log all the client and protocol info. */
-        int loglevel = (c->flags & CLIENT_PRIMARY) ? LL_WARNING : LL_VERBOSE;
+        int loglevel = (c->flag.primary) ? LL_WARNING : LL_VERBOSE;
         serverLog(loglevel, "Protocol error (%s) from client: %s. %s", errstr, client, buf);
         sdsfree(client);
     }
-    c->flags |= (CLIENT_CLOSE_AFTER_REPLY | CLIENT_PROTOCOL_ERROR);
+    c->flag.close_after_reply = 1;
+    c->flag.protocol_error = 1;
 }
 
 /* Process the query buffer for client 'c', setting up the client argument
@@ -2386,7 +2389,7 @@ int processMultibulkBuffer(client *c) {
             }
 
             ok = string2ll(c->querybuf + c->qb_pos + 1, newline - (c->querybuf + c->qb_pos + 1), &ll);
-            if (!ok || ll < 0 || (!(c->flags & CLIENT_PRIMARY) && ll > server.proto_max_bulk_len)) {
+            if (!ok || ll < 0 || (!c->flag.primary && ll > server.proto_max_bulk_len)) {
                 addReplyError(c, "Protocol error: invalid bulk length");
                 setProtocolError("invalid bulk length", c);
                 return C_ERR;
@@ -2397,7 +2400,7 @@ int processMultibulkBuffer(client *c) {
             }
 
             c->qb_pos = newline - c->querybuf + 2;
-            if (!(c->flags & CLIENT_PRIMARY) && ll >= PROTO_MBULK_BIG_ARG) {
+            if (!c->flag.primary && ll >= PROTO_MBULK_BIG_ARG) {
                 /* When the client is not a primary client (because primary
                  * client's querybuf can only be trimmed after data applied
                  * and sent to replicas).
@@ -2443,7 +2446,7 @@ int processMultibulkBuffer(client *c) {
             /* Optimization: if a non-primary client's buffer contains JUST our bulk element
              * instead of creating a new object by *copying* the sds we
              * just use the current sds string. */
-            if (!(c->flags & CLIENT_PRIMARY) && c->qb_pos == 0 && c->bulklen >= PROTO_MBULK_BIG_ARG &&
+            if (!c->flag.primary && c->qb_pos == 0 && c->bulklen >= PROTO_MBULK_BIG_ARG &&
                 sdslen(c->querybuf) == (size_t)(c->bulklen + 2)) {
                 c->argv[c->argc++] = createObject(OBJ_STRING, c->querybuf);
                 c->argv_len_sum += c->bulklen;
@@ -2482,13 +2485,13 @@ void commandProcessed(client *c) {
      *    The client will be reset in unblockClient().
      * 2. Don't update replication offset or propagate commands to replicas,
      *    since we have not applied the command. */
-    if (c->flags & CLIENT_BLOCKED) return;
+    if (c->flag.blocked) return;
 
     reqresAppendResponse(c);
     resetClient(c);
 
     long long prev_offset = c->reploff;
-    if (c->flags & CLIENT_PRIMARY && !(c->flags & CLIENT_MULTI)) {
+    if (c->flag.primary && !c->flag.multi) {
         /* Update the applied replication offset of our primary. */
         c->reploff = c->read_reploff - sdslen(c->querybuf) + c->qb_pos;
     }
@@ -2499,7 +2502,7 @@ void commandProcessed(client *c) {
      * applied to the primary state: this quantity, and its corresponding
      * part of the replication stream, will be propagated to the
      * sub-replicas and to the replication backlog. */
-    if (c->flags & CLIENT_PRIMARY) {
+    if (c->flag.primary) {
         long long applied = c->reploff - prev_offset;
         if (applied) {
             replicationFeedStreamFromPrimaryStream(c->querybuf + c->repl_applied, applied);
@@ -2551,8 +2554,8 @@ int processPendingCommandAndInputBuffer(client *c) {
      * But in case of a module blocked client (see RM_Call 'K' flag) we do not reach this code path.
      * So whenever we change the code here we need to consider if we need this change on module
      * blocked client as well */
-    if (c->flags & CLIENT_PENDING_COMMAND) {
-        c->flags &= ~CLIENT_PENDING_COMMAND;
+    if (c->flag.pending_command) {
+        c->flag.pending_command = 0;
         if (processCommandAndResetClient(c) == C_ERR) {
             return C_ERR;
         }
@@ -2578,24 +2581,24 @@ int processInputBuffer(client *c) {
     /* Keep processing while there is something in the input buffer */
     while (c->querybuf && c->qb_pos < sdslen(c->querybuf)) {
         /* Immediately abort if the client is in the middle of something. */
-        if (c->flags & CLIENT_BLOCKED) break;
+        if (c->flag.blocked) break;
 
         /* Don't process more buffers from clients that have already pending
          * commands to execute in c->argv. */
-        if (c->flags & CLIENT_PENDING_COMMAND) break;
+        if (c->flag.pending_command) break;
 
         /* Don't process input from the primary while there is a busy script
          * condition on the replica. We want just to accumulate the replication
          * stream (instead of replying -BUSY like we do with other clients) and
          * later resume the processing. */
-        if (isInsideYieldingLongCommand() && c->flags & CLIENT_PRIMARY) break;
+        if (isInsideYieldingLongCommand() && c->flag.primary) break;
 
         /* CLIENT_CLOSE_AFTER_REPLY closes the connection once the reply is
          * written to the client. Make sure to not let the reply grow after
          * this flag has been set (i.e. don't process more commands).
          *
          * The same applies for clients we want to terminate ASAP. */
-        if (c->flags & (CLIENT_CLOSE_AFTER_REPLY | CLIENT_CLOSE_ASAP)) break;
+        if (c->flag.close_after_reply || c->flag.close_asap) break;
 
         /* Determine request type when unknown. */
         if (!c->reqtype) {
@@ -2623,7 +2626,7 @@ int processInputBuffer(client *c) {
              * as one that needs to process the command. */
             if (io_threads_op != IO_THREADS_OP_IDLE) {
                 serverAssert(io_threads_op == IO_THREADS_OP_READ);
-                c->flags |= CLIENT_PENDING_COMMAND;
+                c->flag.pending_command = 1;
                 break;
             }
 
@@ -2644,7 +2647,7 @@ int processInputBuffer(client *c) {
         }
     }
 
-    if (c->flags & CLIENT_PRIMARY) {
+    if (c->flag.primary) {
         /* If the client is a primary, trim the querybuf to repl_applied,
          * since primary client is very special, its querybuf not only
          * used to parse command, but also proxy to sub-replicas.
@@ -2705,7 +2708,7 @@ void readQueryFromClient(connection *conn) {
 
         /* Primary client needs expand the readlen when meet BIG_ARG(see #9100),
          * but doesn't need align to the next arg, we can read more data. */
-        if (c->flags & CLIENT_PRIMARY && readlen < PROTO_IOBUF_LEN) readlen = PROTO_IOBUF_LEN;
+        if (c->flag.primary && readlen < PROTO_IOBUF_LEN) readlen = PROTO_IOBUF_LEN;
     }
 
     if (c->querybuf == NULL) {
@@ -2714,7 +2717,7 @@ void readQueryFromClient(connection *conn) {
         qblen = sdslen(c->querybuf);
     }
 
-    if (!(c->flags & CLIENT_PRIMARY) && // primary client's querybuf can grow greedy.
+    if (!c->flag.primary && // primary client's querybuf can grow greedy.
         (big_arg || sdsalloc(c->querybuf) < PROTO_IOBUF_LEN)) {
         /* When reading a BIG_ARG we won't be reading more than that one arg
          * into the query buffer, so we don't need to pre-allocate more than we
@@ -2755,7 +2758,7 @@ void readQueryFromClient(connection *conn) {
     if (c->querybuf_peak < qblen) c->querybuf_peak = qblen;
 
     c->last_interaction = server.unixtime;
-    if (c->flags & CLIENT_PRIMARY) {
+    if (c->flag.primary) {
         c->read_reploff += nread;
         atomic_fetch_add_explicit(&server.stat_net_repl_input_bytes, nread, memory_order_relaxed);
     } else {
@@ -2763,7 +2766,7 @@ void readQueryFromClient(connection *conn) {
     }
     c->net_input_bytes += nread;
 
-    if (!(c->flags & CLIENT_PRIMARY) &&
+    if (!c->flag.primary &&
         /* The commands cached in the MULTI/EXEC queue have not been executed yet,
          * so they are also considered a part of the query buffer in a broader sense.
          *
@@ -2806,7 +2809,7 @@ void readQueryFromClient(connection *conn) {
  * you want to relax error checking or need to display something anyway (see
  * anetFdToString implementation for more info). */
 void genClientAddrString(client *client, char *addr, size_t addr_len, int remote) {
-    if (client->flags & CLIENT_UNIX_SOCKET) {
+    if (client->flag.unix_socket) {
         /* Unix socket client. */
         snprintf(addr, addr_len, "%s:0", server.unixsocket);
     } else {
@@ -2849,27 +2852,29 @@ sds catClientInfoString(sds s, client *client) {
     char flags[17], events[3], conninfo[CONN_INFO_LEN], *p;
 
     p = flags;
-    if (client->flags & CLIENT_REPLICA) {
-        if (client->flags & CLIENT_MONITOR)
+    if (client->flag.replica) {
+        if (client->flag.monitor)
             *p++ = 'O';
         else
             *p++ = 'S';
     }
-    if (client->flags & CLIENT_PRIMARY) *p++ = 'M';
-    if (client->flags & CLIENT_PUBSUB) *p++ = 'P';
-    if (client->flags & CLIENT_MULTI) *p++ = 'x';
-    if (client->flags & CLIENT_BLOCKED) *p++ = 'b';
-    if (client->flags & CLIENT_TRACKING) *p++ = 't';
-    if (client->flags & CLIENT_TRACKING_BROKEN_REDIR) *p++ = 'R';
-    if (client->flags & CLIENT_TRACKING_BCAST) *p++ = 'B';
-    if (client->flags & CLIENT_DIRTY_CAS) *p++ = 'd';
-    if (client->flags & CLIENT_CLOSE_AFTER_REPLY) *p++ = 'c';
-    if (client->flags & CLIENT_UNBLOCKED) *p++ = 'u';
-    if (client->flags & CLIENT_CLOSE_ASAP) *p++ = 'A';
-    if (client->flags & CLIENT_UNIX_SOCKET) *p++ = 'U';
-    if (client->flags & CLIENT_READONLY) *p++ = 'r';
-    if (client->flags & CLIENT_NO_EVICT) *p++ = 'e';
-    if (client->flags & CLIENT_NO_TOUCH) *p++ = 'T';
+
+    /* clang-format off */
+    if (client->flag.primary) *p++ = 'M';
+    if (client->flag.pubsub) *p++ = 'P';
+    if (client->flag.multi) *p++ = 'x';
+    if (client->flag.blocked) *p++ = 'b';
+    if (client->flag.tracking) *p++ = 't';
+    if (client->flag.tracking_broken_redir) *p++ = 'R';
+    if (client->flag.tracking_bcast) *p++ = 'B';
+    if (client->flag.dirty_cas) *p++ = 'd';
+    if (client->flag.close_after_reply) *p++ = 'c';
+    if (client->flag.unblocked) *p++ = 'u';
+    if (client->flag.close_asap) *p++ = 'A';
+    if (client->flag.unix_socket) *p++ = 'U';
+    if (client->flag.readonly) *p++ = 'r';
+    if (client->flag.no_evict) *p++ = 'e';
+    if (client->flag.no_touch) *p++ = 'T';
     if (p == flags) *p++ = 'N';
     *p++ = '\0';
 
@@ -2904,7 +2909,7 @@ sds catClientInfoString(sds s, client *client) {
         " sub=%i", (int) dictSize(client->pubsub_channels),
         " psub=%i", (int) dictSize(client->pubsub_patterns),
         " ssub=%i", (int) dictSize(client->pubsubshard_channels),
-        " multi=%i", (client->flags & CLIENT_MULTI) ? client->mstate.count : -1,
+        " multi=%i", (client->flag.multi) ? client->mstate.count : -1,
         " watch=%i", (int) listLength(client->watched_keys),
         " qbuf=%U", client->querybuf ? (unsigned long long) sdslen(client->querybuf) : 0,
         " qbuf-free=%U", client->querybuf ? (unsigned long long) sdsavail(client->querybuf) : 0,
@@ -2919,7 +2924,7 @@ sds catClientInfoString(sds s, client *client) {
         " events=%s", events,
         " cmd=%s", client->lastcmd ? client->lastcmd->fullname : "NULL",
         " user=%s", client->user ? client->user->name : "(superuser)",
-        " redir=%I", (client->flags & CLIENT_TRACKING) ? (long long) client->client_tracking_redirection : -1,
+        " redir=%I", (client->flag.tracking) ? (long long) client->client_tracking_redirection : -1,
         " resp=%i", client->resp,
         " lib-name=%s", client->lib_name ? (char*)client->lib_name->ptr : "",
         " lib-ver=%s", client->lib_ver ? (char*)client->lib_ver->ptr : "",
@@ -3044,10 +3049,13 @@ void resetCommand(client *c) {
     /* MONITOR clients are also marked with CLIENT_REPLICA, we need to
      * distinguish between the two.
      */
-    uint64_t flags = c->flags;
-    if (flags & CLIENT_MONITOR) flags &= ~(CLIENT_MONITOR | CLIENT_REPLICA);
+    struct ClientFlags flags = c->flag;
+    if (flags.monitor) {
+        flags.monitor = 0;
+        flags.replica = 0;
+    }
 
-    if (flags & (CLIENT_REPLICA | CLIENT_PRIMARY | CLIENT_MODULE)) {
+    if (flags.replica || flags.primary || flags.module) {
         addReplyError(c, "can only reset normal client connections");
         return;
     }
@@ -3059,7 +3067,7 @@ void resetCommand(client *c) {
 /* Disconnect the current client */
 void quitCommand(client *c) {
     addReply(c, shared.ok);
-    c->flags |= CLIENT_CLOSE_AFTER_REPLY;
+    c->flag.close_after_reply = 1;
 }
 
 void clientCommand(client *c) {
@@ -3173,12 +3181,13 @@ NULL
     } else if (!strcasecmp(c->argv[1]->ptr, "reply") && c->argc == 3) {
         /* CLIENT REPLY ON|OFF|SKIP */
         if (!strcasecmp(c->argv[2]->ptr, "on")) {
-            c->flags &= ~(CLIENT_REPLY_SKIP | CLIENT_REPLY_OFF);
+            c->flag.reply_skip = 0;
+            c->flag.reply_off = 0;
             addReply(c, shared.ok);
         } else if (!strcasecmp(c->argv[2]->ptr, "off")) {
-            c->flags |= CLIENT_REPLY_OFF;
+            c->flag.reply_off = 1;
         } else if (!strcasecmp(c->argv[2]->ptr, "skip")) {
-            if (!(c->flags & CLIENT_REPLY_OFF)) c->flags |= CLIENT_REPLY_SKIP_NEXT;
+            if (!c->flag.reply_off) c->flag.reply_skip_next = 1;
         } else {
             addReplyErrorObject(c, shared.syntaxerr);
             return;
@@ -3186,11 +3195,11 @@ NULL
     } else if (!strcasecmp(c->argv[1]->ptr, "no-evict") && c->argc == 3) {
         /* CLIENT NO-EVICT ON|OFF */
         if (!strcasecmp(c->argv[2]->ptr, "on")) {
-            c->flags |= CLIENT_NO_EVICT;
+            c->flag.no_evict = 1;
             removeClientFromMemUsageBucket(c, 0);
             addReply(c, shared.ok);
         } else if (!strcasecmp(c->argv[2]->ptr, "off")) {
-            c->flags &= ~CLIENT_NO_EVICT;
+            c->flag.no_evict = 0;
             updateClientMemUsageAndBucket(c);
             addReply(c, shared.ok);
         } else {
@@ -3308,7 +3317,7 @@ NULL
 
         /* If this client has to be closed, flag it as CLOSE_AFTER_REPLY
          * only after we queued the reply to its output buffers. */
-        if (close_this_client) c->flags |= CLIENT_CLOSE_AFTER_REPLY;
+        if (close_this_client) c->flag.close_after_reply = 1;
     } else if (!strcasecmp(c->argv[1]->ptr, "unblock") && (c->argc == 3 || c->argc == 4)) {
         /* CLIENT UNBLOCK <id> [timeout|error] */
         long long id;
@@ -3330,7 +3339,7 @@ NULL
          * doesn't have a timeout callback (even in the case of UNBLOCK ERROR).
          * The reason is that we assume that if a command doesn't expect to be timedout,
          * it also doesn't expect to be unblocked by CLIENT UNBLOCK */
-        if (target && target->flags & CLIENT_BLOCKED && moduleBlockedClientMayTimeout(target)) {
+        if (target && target->flag.blocked && moduleBlockedClientMayTimeout(target)) {
             if (unblock_error)
                 unblockClientOnError(target, "-UNBLOCKED client unblocked via CLIENT UNBLOCK");
             else
@@ -3373,7 +3382,7 @@ NULL
         /* CLIENT TRACKING (on|off) [REDIRECT <id>] [BCAST] [PREFIX first]
          *                          [PREFIX second] [OPTIN] [OPTOUT] [NOLOOP]... */
         long long redir = 0;
-        uint64_t options = 0;
+        struct ClientFlags options = {0};
         robj **prefix = NULL;
         size_t numprefix = 0;
 
@@ -3404,13 +3413,13 @@ NULL
                     return;
                 }
             } else if (!strcasecmp(c->argv[j]->ptr, "bcast")) {
-                options |= CLIENT_TRACKING_BCAST;
+                options.tracking_bcast = 1;
             } else if (!strcasecmp(c->argv[j]->ptr, "optin")) {
-                options |= CLIENT_TRACKING_OPTIN;
+                options.tracking_optin = 1;
             } else if (!strcasecmp(c->argv[j]->ptr, "optout")) {
-                options |= CLIENT_TRACKING_OPTOUT;
+                options.tracking_optout = 1;
             } else if (!strcasecmp(c->argv[j]->ptr, "noloop")) {
-                options |= CLIENT_TRACKING_NOLOOP;
+                options.tracking_noloop = 1;
             } else if (!strcasecmp(c->argv[j]->ptr, "prefix") && moreargs) {
                 j++;
                 prefix = zrealloc(prefix, sizeof(robj *) * (numprefix + 1));
@@ -3426,15 +3435,15 @@ NULL
         if (!strcasecmp(c->argv[2]->ptr, "on")) {
             /* Before enabling tracking, make sure options are compatible
              * among each other and with the current state of the client. */
-            if (!(options & CLIENT_TRACKING_BCAST) && numprefix) {
+            if (!(options.tracking_bcast) && numprefix) {
                 addReplyError(c, "PREFIX option requires BCAST mode to be enabled");
                 zfree(prefix);
                 return;
             }
 
-            if (c->flags & CLIENT_TRACKING) {
-                int oldbcast = !!(c->flags & CLIENT_TRACKING_BCAST);
-                int newbcast = !!(options & CLIENT_TRACKING_BCAST);
+            if (c->flag.tracking) {
+                int oldbcast = !!c->flag.tracking_bcast;
+                int newbcast = !!(options.tracking_bcast);
                 if (oldbcast != newbcast) {
                     addReplyError(c, "You can't switch BCAST mode on/off before disabling "
                                      "tracking for this client, and then re-enabling it with "
@@ -3444,20 +3453,20 @@ NULL
                 }
             }
 
-            if (options & CLIENT_TRACKING_BCAST && options & (CLIENT_TRACKING_OPTIN | CLIENT_TRACKING_OPTOUT)) {
+            if (options.tracking_bcast && (options.tracking_optin || options.tracking_optout)) {
                 addReplyError(c, "OPTIN and OPTOUT are not compatible with BCAST");
                 zfree(prefix);
                 return;
             }
 
-            if (options & CLIENT_TRACKING_OPTIN && options & CLIENT_TRACKING_OPTOUT) {
+            if (options.tracking_optin && options.tracking_optout) {
                 addReplyError(c, "You can't specify both OPTIN mode and OPTOUT mode");
                 zfree(prefix);
                 return;
             }
 
-            if ((options & CLIENT_TRACKING_OPTIN && c->flags & CLIENT_TRACKING_OPTOUT) ||
-                (options & CLIENT_TRACKING_OPTOUT && c->flags & CLIENT_TRACKING_OPTIN)) {
+            if ((options.tracking_optin && c->flag.tracking_optout) ||
+                (options.tracking_optout && c->flag.tracking_optin)) {
                 addReplyError(c, "You can't switch OPTIN/OPTOUT mode before disabling "
                                  "tracking for this client, and then re-enabling it with "
                                  "a different mode.");
@@ -3465,7 +3474,7 @@ NULL
                 return;
             }
 
-            if (options & CLIENT_TRACKING_BCAST) {
+            if (options.tracking_bcast) {
                 if (!checkPrefixCollisionsOrReply(c, prefix, numprefix)) {
                     zfree(prefix);
                     return;
@@ -3483,7 +3492,7 @@ NULL
         zfree(prefix);
         addReply(c, shared.ok);
     } else if (!strcasecmp(c->argv[1]->ptr, "caching") && c->argc >= 3) {
-        if (!(c->flags & CLIENT_TRACKING)) {
+        if (!c->flag.tracking) {
             addReplyError(c, "CLIENT CACHING can be called only when the "
                              "client is in tracking mode with OPTIN or "
                              "OPTOUT mode enabled");
@@ -3492,15 +3501,15 @@ NULL
 
         char *opt = c->argv[2]->ptr;
         if (!strcasecmp(opt, "yes")) {
-            if (c->flags & CLIENT_TRACKING_OPTIN) {
-                c->flags |= CLIENT_TRACKING_CACHING;
+            if (c->flag.tracking_optin) {
+                c->flag.tracking_caching = 1;
             } else {
                 addReplyError(c, "CLIENT CACHING YES is only valid when tracking is enabled in OPTIN mode.");
                 return;
             }
         } else if (!strcasecmp(opt, "no")) {
-            if (c->flags & CLIENT_TRACKING_OPTOUT) {
-                c->flags |= CLIENT_TRACKING_CACHING;
+            if (c->flag.tracking_optout) {
+                c->flag.tracking_caching = 1;
             } else {
                 addReplyError(c, "CLIENT CACHING NO is only valid when tracking is enabled in OPTOUT mode.");
                 return;
@@ -3514,7 +3523,7 @@ NULL
         addReply(c, shared.ok);
     } else if (!strcasecmp(c->argv[1]->ptr, "getredir") && c->argc == 2) {
         /* CLIENT GETREDIR */
-        if (c->flags & CLIENT_TRACKING) {
+        if (c->flag.tracking) {
             addReplyLongLong(c, c->client_tracking_redirection);
         } else {
             addReplyLongLong(c, -1);
@@ -3526,33 +3535,33 @@ NULL
         addReplyBulkCString(c, "flags");
         void *arraylen_ptr = addReplyDeferredLen(c);
         int numflags = 0;
-        addReplyBulkCString(c, c->flags & CLIENT_TRACKING ? "on" : "off");
+        addReplyBulkCString(c, c->flag.tracking ? "on" : "off");
         numflags++;
-        if (c->flags & CLIENT_TRACKING_BCAST) {
+        if (c->flag.tracking_bcast) {
             addReplyBulkCString(c, "bcast");
             numflags++;
         }
-        if (c->flags & CLIENT_TRACKING_OPTIN) {
+        if (c->flag.tracking_optin) {
             addReplyBulkCString(c, "optin");
             numflags++;
-            if (c->flags & CLIENT_TRACKING_CACHING) {
+            if (c->flag.tracking_caching) {
                 addReplyBulkCString(c, "caching-yes");
                 numflags++;
             }
         }
-        if (c->flags & CLIENT_TRACKING_OPTOUT) {
+        if (c->flag.tracking_optout) {
             addReplyBulkCString(c, "optout");
             numflags++;
-            if (c->flags & CLIENT_TRACKING_CACHING) {
+            if (c->flag.tracking_caching) {
                 addReplyBulkCString(c, "caching-no");
                 numflags++;
             }
         }
-        if (c->flags & CLIENT_TRACKING_NOLOOP) {
+        if (c->flag.tracking_noloop) {
             addReplyBulkCString(c, "noloop");
             numflags++;
         }
-        if (c->flags & CLIENT_TRACKING_BROKEN_REDIR) {
+        if (c->flag.tracking_broken_redir) {
             addReplyBulkCString(c, "broken_redirect");
             numflags++;
         }
@@ -3560,7 +3569,7 @@ NULL
 
         /* Redirect */
         addReplyBulkCString(c, "redirect");
-        if (c->flags & CLIENT_TRACKING) {
+        if (c->flag.tracking) {
             addReplyLongLong(c, c->client_tracking_redirection);
         } else {
             addReplyLongLong(c, -1);
@@ -3583,10 +3592,10 @@ NULL
     } else if (!strcasecmp(c->argv[1]->ptr, "no-touch")) {
         /* CLIENT NO-TOUCH ON|OFF */
         if (!strcasecmp(c->argv[2]->ptr, "on")) {
-            c->flags |= CLIENT_NO_TOUCH;
+            c->flag.no_touch = 1;
             addReply(c, shared.ok);
         } else if (!strcasecmp(c->argv[2]->ptr, "off")) {
-            c->flags &= ~CLIENT_NO_TOUCH;
+            c->flag.no_touch = 0;
             addReply(c, shared.ok);
         } else {
             addReplyErrorObject(c, shared.syntaxerr);
@@ -3661,7 +3670,7 @@ void helloCommand(client *c) {
     }
 
     /* At this point we need to be authenticated to continue. */
-    if (!(c->flags & CLIENT_AUTHENTICATED)) {
+    if (!c->flag.authenticated) {
         addReplyError(c, "-NOAUTH HELLO must be called with the client already "
                          "authenticated, otherwise the HELLO <proto> AUTH <user> <pass> "
                          "option can be used to authenticate the client and "
@@ -3900,11 +3909,11 @@ size_t getClientMemoryUsage(client *c, size_t *output_buffer_mem_usage) {
  * CLIENT_TYPE_PRIMARY -> The client representing our replication primary.
  */
 int getClientType(client *c) {
-    if (c->flags & CLIENT_PRIMARY) return CLIENT_TYPE_PRIMARY;
+    if (c->flag.primary) return CLIENT_TYPE_PRIMARY;
     /* Even though MONITOR clients are marked as replicas, we
      * want the expose them as normal clients. */
-    if ((c->flags & CLIENT_REPLICA) && !(c->flags & CLIENT_MONITOR)) return CLIENT_TYPE_REPLICA;
-    if (c->flags & CLIENT_PUBSUB) return CLIENT_TYPE_PUBSUB;
+    if (c->flag.replica && !c->flag.monitor) return CLIENT_TYPE_REPLICA;
+    if (c->flag.pubsub) return CLIENT_TYPE_PUBSUB;
     return CLIENT_TYPE_NORMAL;
 }
 
@@ -3999,7 +4008,7 @@ int closeClientOnOutputBufferLimitReached(client *c, int async) {
     serverAssert(c->reply_bytes < SIZE_MAX - (1024 * 64));
     /* Note that c->reply_bytes is irrelevant for replica clients
      * (they use the global repl buffers). */
-    if ((c->reply_bytes == 0 && getClientType(c) != CLIENT_TYPE_REPLICA) || c->flags & CLIENT_CLOSE_ASAP) return 0;
+    if ((c->reply_bytes == 0 && getClientType(c) != CLIENT_TYPE_REPLICA) || c->flag.close_asap) return 0;
     if (checkClientOutputBufferLimits(c)) {
         sds client = catClientInfoString(sdsempty(), c);
 
@@ -4029,7 +4038,7 @@ void flushReplicasOutputBuffers(void) {
     listRewind(server.replicas, &li);
     while ((ln = listNext(&li))) {
         client *replica = listNodeValue(ln);
-        int can_receive_writes = connHasWriteHandler(replica->conn) || (replica->flags & CLIENT_PENDING_WRITE);
+        int can_receive_writes = connHasWriteHandler(replica->conn) || (replica->flag.pending_write);
 
         /* We don't want to send the pending data to the replica in a few
          * cases:
@@ -4045,8 +4054,8 @@ void flushReplicasOutputBuffers(void) {
          *
          * 3. Obviously if the replica is not ONLINE.
          */
-        if (replica->repl_state == REPLICA_STATE_ONLINE && !(replica->flags & CLIENT_CLOSE_ASAP) &&
-            can_receive_writes && !replica->repl_start_cmd_stream_on_ack && clientHasPendingReplies(replica)) {
+        if (replica->repl_state == REPLICA_STATE_ONLINE && !(replica->flag.close_asap) && can_receive_writes &&
+            !replica->repl_start_cmd_stream_on_ack && clientHasPendingReplies(replica)) {
             writeToClient(replica, 0);
         }
     }
@@ -4410,11 +4419,11 @@ int handleClientsWithPendingWritesUsingThreads(void) {
     int item_id = 0;
     while ((ln = listNext(&li))) {
         client *c = listNodeValue(ln);
-        c->flags &= ~CLIENT_PENDING_WRITE;
+        c->flag.pending_write = 0;
 
         /* Remove clients from the list of pending writes since
          * they are going to be closed ASAP. */
-        if (c->flags & CLIENT_CLOSE_ASAP) {
+        if (c->flag.close_asap) {
             listUnlinkNode(server.clients_pending_write, ln);
             continue;
         }
@@ -4489,7 +4498,7 @@ int handleClientsWithPendingWritesUsingThreads(void) {
  * pending read clients and flagged as such. */
 int postponeClientRead(client *c) {
     if (server.io_threads_active && server.io_threads_do_reads && !ProcessingEventsWhileBlocked &&
-        !(c->flags & (CLIENT_PRIMARY | CLIENT_REPLICA | CLIENT_BLOCKED)) && io_threads_op == IO_THREADS_OP_IDLE) {
+        !(c->flag.primary || c->flag.replica || c->flag.blocked) && io_threads_op == IO_THREADS_OP_IDLE) {
         listAddNodeHead(server.clients_pending_read, c);
         c->pending_read_list_node = listFirst(server.clients_pending_read);
         return 1;
@@ -4559,7 +4568,7 @@ int handleClientsWithPendingReadsUsingThreads(void) {
         listDelNode(server.clients_pending_read, ln);
         c->pending_read_list_node = NULL;
 
-        serverAssert(!(c->flags & CLIENT_BLOCKED));
+        serverAssert(!c->flag.blocked);
 
         if (beforeNextClient(c) == C_ERR) {
             /* If the client is no longer valid, we avoid
@@ -4581,7 +4590,7 @@ int handleClientsWithPendingReadsUsingThreads(void) {
         /* We may have pending replies if a thread readQueryFromClient() produced
          * replies and did not put the client in pending write queue (it can't).
          */
-        if (!(c->flags & CLIENT_PENDING_WRITE) && clientHasPendingReplies(c)) putClientInPendingWriteQueue(c);
+        if (!c->flag.pending_write && clientHasPendingReplies(c)) putClientInPendingWriteQueue(c);
     }
 
     /* Update processed count on server */
diff --git a/src/object.c b/src/object.c
index 73c3de55dd..ea56b38dd2 100644
--- a/src/object.c
+++ b/src/object.c
@@ -605,7 +605,7 @@ void trimStringObjectIfNeeded(robj *o, int trim_small_values) {
      * 3. When calling from RM_TrimStringAllocation (trim_small_values is true). */
     size_t len = sdslen(o->ptr);
     if (len >= PROTO_MBULK_BIG_ARG || trim_small_values ||
-        (server.executing_client && server.executing_client->flags & CLIENT_SCRIPT && len < LUA_CMD_OBJCACHE_MAX_LEN)) {
+        (server.executing_client && server.executing_client->flag.script && len < LUA_CMD_OBJCACHE_MAX_LEN)) {
         if (sdsavail(o->ptr) > len / 10) {
             o->ptr = sdsRemoveFreeSpace(o->ptr, 0);
         }
diff --git a/src/pubsub.c b/src/pubsub.c
index a7e3e283ed..b79b532bf8 100644
--- a/src/pubsub.c
+++ b/src/pubsub.c
@@ -105,8 +105,8 @@ pubsubtype pubSubShardType = {
  * to send a special message (for instance an Array type) by using the
  * addReply*() API family. */
 void addReplyPubsubMessage(client *c, robj *channel, robj *msg, robj *message_bulk) {
-    uint64_t old_flags = c->flags;
-    c->flags |= CLIENT_PUSHING;
+    struct ClientFlags old_flags = c->flag;
+    c->flag.pushing = 1;
     if (c->resp == 2)
         addReply(c, shared.mbulkhdr[3]);
     else
@@ -114,15 +114,15 @@ void addReplyPubsubMessage(client *c, robj *channel, robj *msg, robj *message_bu
     addReply(c, message_bulk);
     addReplyBulk(c, channel);
     if (msg) addReplyBulk(c, msg);
-    if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
+    if (!old_flags.pushing) c->flag.pushing = 0;
 }
 
 /* Send a pubsub message of type "pmessage" to the client. The difference
  * with the "message" type delivered by addReplyPubsubMessage() is that
  * this message format also includes the pattern that matched the message. */
 void addReplyPubsubPatMessage(client *c, robj *pat, robj *channel, robj *msg) {
-    uint64_t old_flags = c->flags;
-    c->flags |= CLIENT_PUSHING;
+    struct ClientFlags old_flags = c->flag;
+    c->flag.pushing = 1;
     if (c->resp == 2)
         addReply(c, shared.mbulkhdr[4]);
     else
@@ -131,13 +131,13 @@ void addReplyPubsubPatMessage(client *c, robj *pat, robj *channel, robj *msg) {
     addReplyBulk(c, pat);
     addReplyBulk(c, channel);
     addReplyBulk(c, msg);
-    if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
+    if (!old_flags.pushing) c->flag.pushing = 0;
 }
 
 /* Send the pubsub subscription notification to the client. */
 void addReplyPubsubSubscribed(client *c, robj *channel, pubsubtype type) {
-    uint64_t old_flags = c->flags;
-    c->flags |= CLIENT_PUSHING;
+    struct ClientFlags old_flags = c->flag;
+    c->flag.pushing = 1;
     if (c->resp == 2)
         addReply(c, shared.mbulkhdr[3]);
     else
@@ -145,7 +145,7 @@ void addReplyPubsubSubscribed(client *c, robj *channel, pubsubtype type) {
     addReply(c, *type.subscribeMsg);
     addReplyBulk(c, channel);
     addReplyLongLong(c, type.subscriptionCount(c));
-    if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
+    if (!old_flags.pushing) c->flag.pushing = 0;
 }
 
 /* Send the pubsub unsubscription notification to the client.
@@ -153,8 +153,8 @@ void addReplyPubsubSubscribed(client *c, robj *channel, pubsubtype type) {
  * unsubscribe command but there are no channels to unsubscribe from: we
  * still send a notification. */
 void addReplyPubsubUnsubscribed(client *c, robj *channel, pubsubtype type) {
-    uint64_t old_flags = c->flags;
-    c->flags |= CLIENT_PUSHING;
+    struct ClientFlags old_flags = c->flag;
+    c->flag.pushing = 1;
     if (c->resp == 2)
         addReply(c, shared.mbulkhdr[3]);
     else
@@ -165,13 +165,13 @@ void addReplyPubsubUnsubscribed(client *c, robj *channel, pubsubtype type) {
     else
         addReplyNull(c);
     addReplyLongLong(c, type.subscriptionCount(c));
-    if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
+    if (!old_flags.pushing) c->flag.pushing = 0;
 }
 
 /* Send the pubsub pattern subscription notification to the client. */
 void addReplyPubsubPatSubscribed(client *c, robj *pattern) {
-    uint64_t old_flags = c->flags;
-    c->flags |= CLIENT_PUSHING;
+    struct ClientFlags old_flags = c->flag;
+    c->flag.pushing = 1;
     if (c->resp == 2)
         addReply(c, shared.mbulkhdr[3]);
     else
@@ -179,7 +179,7 @@ void addReplyPubsubPatSubscribed(client *c, robj *pattern) {
     addReply(c, shared.psubscribebulk);
     addReplyBulk(c, pattern);
     addReplyLongLong(c, clientSubscriptionsCount(c));
-    if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
+    if (!old_flags.pushing) c->flag.pushing = 0;
 }
 
 /* Send the pubsub pattern unsubscription notification to the client.
@@ -187,8 +187,8 @@ void addReplyPubsubPatSubscribed(client *c, robj *pattern) {
  * punsubscribe command but there are no pattern to unsubscribe from: we
  * still send a notification. */
 void addReplyPubsubPatUnsubscribed(client *c, robj *pattern) {
-    uint64_t old_flags = c->flags;
-    c->flags |= CLIENT_PUSHING;
+    struct ClientFlags old_flags = c->flag;
+    c->flag.pushing = 1;
     if (c->resp == 2)
         addReply(c, shared.mbulkhdr[3]);
     else
@@ -199,7 +199,7 @@ void addReplyPubsubPatUnsubscribed(client *c, robj *pattern) {
     else
         addReplyNull(c);
     addReplyLongLong(c, clientSubscriptionsCount(c));
-    if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
+    if (!old_flags.pushing) c->flag.pushing = 0;
 }
 
 /*-----------------------------------------------------------------------------
@@ -241,15 +241,15 @@ int clientTotalPubSubSubscriptionCount(client *c) {
 }
 
 void markClientAsPubSub(client *c) {
-    if (!(c->flags & CLIENT_PUBSUB)) {
-        c->flags |= CLIENT_PUBSUB;
+    if (!c->flag.pubsub) {
+        c->flag.pubsub = 1;
         server.pubsub_clients++;
     }
 }
 
 void unmarkClientAsPubSub(client *c) {
-    if (c->flags & CLIENT_PUBSUB) {
-        c->flags &= ~CLIENT_PUBSUB;
+    if (c->flag.pubsub) {
+        c->flag.pubsub = 0;
         server.pubsub_clients--;
     }
 }
@@ -539,7 +539,7 @@ int pubsubPublishMessage(robj *channel, robj *message, int sharded) {
 /* SUBSCRIBE channel [channel ...] */
 void subscribeCommand(client *c) {
     int j;
-    if ((c->flags & CLIENT_DENY_BLOCKING) && !(c->flags & CLIENT_MULTI)) {
+    if (c->flag.deny_blocking && !c->flag.multi) {
         /**
          * A client that has CLIENT_DENY_BLOCKING flag on
          * expect a reply per command and so can not execute subscribe.
@@ -571,7 +571,7 @@ void unsubscribeCommand(client *c) {
 /* PSUBSCRIBE pattern [pattern ...] */
 void psubscribeCommand(client *c) {
     int j;
-    if ((c->flags & CLIENT_DENY_BLOCKING) && !(c->flags & CLIENT_MULTI)) {
+    if (c->flag.deny_blocking && !c->flag.multi) {
         /**
          * A client that has CLIENT_DENY_BLOCKING flag on
          * expect a reply per command and so can not execute subscribe.
@@ -709,7 +709,7 @@ void spublishCommand(client *c) {
 
 /* SSUBSCRIBE shardchannel [shardchannel ...] */
 void ssubscribeCommand(client *c) {
-    if (c->flags & CLIENT_DENY_BLOCKING) {
+    if (c->flag.deny_blocking) {
         /* A client that has CLIENT_DENY_BLOCKING flag on
          * expect a reply per command and so can not execute subscribe. */
         addReplyError(c, "SSUBSCRIBE isn't allowed for a DENY BLOCKING client");
diff --git a/src/rdb.c b/src/rdb.c
index 07fc70c16d..53623f84a8 100644
--- a/src/rdb.c
+++ b/src/rdb.c
@@ -1859,7 +1859,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
     if (server.sanitize_dump_payload == SANITIZE_DUMP_CLIENTS) {
         /* Skip sanitization when loading (an RDB), or getting a RESTORE command
          * from either the primary or a client using an ACL user with the skip-sanitize-payload flag. */
-        int skip = server.loading || (server.current_client && (server.current_client->flags & CLIENT_PRIMARY));
+        int skip = server.loading || (server.current_client && (server.current_client->flag.primary));
         if (!skip && server.current_client && server.current_client->user)
             skip = !!(server.current_client->user->flags & USER_FLAG_SANITIZE_PAYLOAD_SKIP);
         deep_integrity_validation = !skip;
diff --git a/src/replication.c b/src/replication.c
index e74f66a67c..6779b4f1b4 100644
--- a/src/replication.c
+++ b/src/replication.c
@@ -198,7 +198,7 @@ void resetReplicationBuffer(void) {
 
 int canFeedReplicaReplBuffer(client *replica) {
     /* Don't feed replicas that only want the RDB. */
-    if (replica->flags & CLIENT_REPL_RDBONLY) return 0;
+    if (replica->flag.repl_rdbonly) return 0;
 
     /* Don't feed replicas that are still waiting for BGSAVE to start. */
     if (replica->repl_state == REPLICA_STATE_WAIT_BGSAVE_START) return 0;
@@ -568,9 +568,9 @@ void replicationFeedMonitors(client *c, list *monitors, int dictid, robj **argv,
 
     gettimeofday(&tv, NULL);
     cmdrepr = sdscatprintf(cmdrepr, "%ld.%06ld ", (long)tv.tv_sec, (long)tv.tv_usec);
-    if (c->flags & CLIENT_SCRIPT) {
+    if (c->flag.script) {
         cmdrepr = sdscatprintf(cmdrepr, "[%d lua] ", dictid);
-    } else if (c->flags & CLIENT_UNIX_SOCKET) {
+    } else if (c->flag.unix_socket) {
         cmdrepr = sdscatprintf(cmdrepr, "[%d unix:%s] ", dictid, server.unixsocket);
     } else {
         cmdrepr = sdscatprintf(cmdrepr, "[%d %s] ", dictid, getClientPeerId(c));
@@ -699,7 +699,7 @@ int replicationSetupReplicaForFullResync(client *replica, long long offset) {
 
     /* Don't send this reply to replicas that approached us with
      * the old SYNC command. */
-    if (!(replica->flags & CLIENT_PRE_PSYNC)) {
+    if (!(replica->flag.pre_psync)) {
         buflen = snprintf(buf, sizeof(buf), "+FULLRESYNC %s %lld\r\n", server.replid, offset);
         if (connWrite(replica->conn, buf, buflen) != buflen) {
             freeClientAsync(replica);
@@ -768,7 +768,7 @@ int primaryTryPartialResynchronization(client *c, long long psync_offset) {
      * 1) Set client state to make it a replica.
      * 2) Inform the client we can continue with +CONTINUE
      * 3) Send the backlog data (from the offset to the end) to the replica. */
-    c->flags |= CLIENT_REPLICA;
+    c->flag.replica = 1;
     c->repl_state = REPLICA_STATE_ONLINE;
     c->repl_ack_time = server.unixtime;
     c->repl_start_cmd_stream_on_ack = 0;
@@ -877,10 +877,10 @@ int startBgsaveForReplication(int mincapa, int req) {
 
             if (replica->repl_state == REPLICA_STATE_WAIT_BGSAVE_START) {
                 replica->repl_state = REPL_STATE_NONE;
-                replica->flags &= ~CLIENT_REPLICA;
+                replica->flag.replica = 0;
                 listDelNode(server.replicas, ln);
                 addReplyError(replica, "BGSAVE failed, replication can't continue");
-                replica->flags |= CLIENT_CLOSE_AFTER_REPLY;
+                replica->flag.close_after_reply = 1;
             }
         }
         return retval;
@@ -907,7 +907,7 @@ int startBgsaveForReplication(int mincapa, int req) {
 /* SYNC and PSYNC command implementation. */
 void syncCommand(client *c) {
     /* ignore SYNC if already replica or in monitor mode */
-    if (c->flags & CLIENT_REPLICA) return;
+    if (c->flag.replica) return;
 
     /* Check if this is a failover request to a replica with the same replid and
      * become a primary if so. */
@@ -998,7 +998,7 @@ void syncCommand(client *c) {
         /* If a replica uses SYNC, we are dealing with an old implementation
          * of the replication protocol (like valkey-cli --replica). Flag the client
          * so that we don't expect to receive REPLCONF ACK feedbacks. */
-        c->flags |= CLIENT_PRE_PSYNC;
+        c->flag.pre_psync = 1;
     }
 
     /* Full resynchronization. */
@@ -1009,7 +1009,7 @@ void syncCommand(client *c) {
     c->repl_state = REPLICA_STATE_WAIT_BGSAVE_START;
     if (server.repl_disable_tcp_nodelay) connDisableTcpNoDelay(c->conn); /* Non critical if it fails. */
     c->repldbfd = -1;
-    c->flags |= CLIENT_REPLICA;
+    c->flag.replica = 1;
     listAddNodeTail(server.replicas, c);
 
     /* Create the replication backlog if needed. */
@@ -1041,7 +1041,7 @@ void syncCommand(client *c) {
             /* If the client needs a buffer of commands, we can't use
              * a replica without replication buffer. */
             if (replica->repl_state == REPLICA_STATE_WAIT_BGSAVE_END &&
-                (!(replica->flags & CLIENT_REPL_RDBONLY) || (c->flags & CLIENT_REPL_RDBONLY)))
+                (!(replica->flag.repl_rdbonly) || (c->flag.repl_rdbonly)))
                 break;
         }
         /* To attach this replica, we check that it has at least all the
@@ -1052,7 +1052,7 @@ void syncCommand(client *c) {
             /* Perfect, the server is already registering differences for
              * another replica. Set the right state, and copy the buffer.
              * We don't copy buffer if clients don't want. */
-            if (!(c->flags & CLIENT_REPL_RDBONLY)) copyReplicaOutputBuffer(c, replica);
+            if (!c->flag.repl_rdbonly) copyReplicaOutputBuffer(c, replica);
             replicationSetupReplicaForFullResync(c, replica->psync_initial_offset);
             serverLog(LL_NOTICE, "Waiting for end of BGSAVE for SYNC");
         } else {
@@ -1168,7 +1168,7 @@ void replconfCommand(client *c) {
              * internal only command that normal clients should never use. */
             long long offset;
 
-            if (!(c->flags & CLIENT_REPLICA)) return;
+            if (!c->flag.replica) return;
             if ((getLongLongFromObject(c->argv[j + 1], &offset) != C_OK)) return;
             if (offset > c->repl_ack_off) c->repl_ack_off = offset;
             if (c->argc > j + 3 && !strcasecmp(c->argv[j + 2]->ptr, "fack")) {
@@ -1200,9 +1200,9 @@ void replconfCommand(client *c) {
             long rdb_only = 0;
             if (getRangeLongFromObjectOrReply(c, c->argv[j + 1], 0, 1, &rdb_only, NULL) != C_OK) return;
             if (rdb_only == 1)
-                c->flags |= CLIENT_REPL_RDBONLY;
+                c->flag.repl_rdbonly = 1;
             else
-                c->flags &= ~CLIENT_REPL_RDBONLY;
+                c->flag.repl_rdbonly = 0;
         } else if (!strcasecmp(c->argv[j]->ptr, "rdb-filter-only")) {
             /* REPLCONFG RDB-FILTER-ONLY is used to define "include" filters
              * for the RDB snapshot. Currently we only support a single
@@ -1258,7 +1258,7 @@ void replconfCommand(client *c) {
  * the return value indicates that the replica should be disconnected.
  * */
 int replicaPutOnline(client *replica) {
-    if (replica->flags & CLIENT_REPL_RDBONLY) {
+    if (replica->flag.repl_rdbonly) {
         replica->repl_state = REPLICA_STATE_RDB_TRANSMITTED;
         /* The client asked for RDB only so we should close it ASAP */
         serverLog(LL_NOTICE, "RDB transfer completed, rdb only replica (%s) should be disconnected asap",
@@ -1288,7 +1288,7 @@ int replicaPutOnline(client *replica) {
  *    accumulate output buffer data without sending it to the replica so it
  *    won't get mixed with the RDB stream. */
 void replicaStartCommandStream(client *replica) {
-    serverAssert(!(replica->flags & CLIENT_REPL_RDBONLY));
+    serverAssert(!(replica->flag.repl_rdbonly));
     replica->repl_start_cmd_stream_on_ack = 0;
 
     putClientInPendingWriteQueue(replica);
@@ -1721,7 +1721,8 @@ void replicationCreatePrimaryClient(connection *conn, int dbid) {
      * to pass the execution to a background thread and unblock after the
      * execution is done. This is the reason why we allow blocking the replication
      * connection. */
-    server.primary->flags |= (CLIENT_PRIMARY | CLIENT_AUTHENTICATED);
+    server.primary->flag.primary = 1;
+    server.primary->flag.authenticated = 1;
 
     /* Allocate a private query buffer for the primary client instead of using the shared query buffer.
      * This is done because the primary's query buffer data needs to be preserved for my sub-replicas to use. */
@@ -1732,7 +1733,7 @@ void replicationCreatePrimaryClient(connection *conn, int dbid) {
     memcpy(server.primary->replid, server.primary_replid, sizeof(server.primary_replid));
     /* If primary offset is set to -1, this primary is old and is not
      * PSYNC capable, so we flag it accordingly. */
-    if (server.primary->reploff == -1) server.primary->flags |= CLIENT_PRE_PSYNC;
+    if (server.primary->reploff == -1) server.primary->flag.pre_psync = 1;
     if (dbid != -1) selectDb(server.primary, dbid);
 }
 
@@ -3073,7 +3074,7 @@ void replicaofCommand(client *c) {
     } else {
         long port;
 
-        if (c->flags & CLIENT_REPLICA) {
+        if (c->flag.replica) {
             /* If a client is already a replica they cannot run this command,
              * because it involves flushing all replicas (including this
              * client) */
@@ -3171,7 +3172,7 @@ void replicationSendAck(void) {
 
     if (c != NULL) {
         int send_fack = server.fsynced_reploff != -1;
-        c->flags |= CLIENT_PRIMARY_FORCE_REPLY;
+        c->flag.primary_force_reply = 1;
         addReplyArrayLen(c, send_fack ? 5 : 3);
         addReplyBulkCString(c, "REPLCONF");
         addReplyBulkCString(c, "ACK");
@@ -3180,7 +3181,7 @@ void replicationSendAck(void) {
             addReplyBulkCString(c, "FACK");
             addReplyBulkLongLong(c, server.fsynced_reploff);
         }
-        c->flags &= ~CLIENT_PRIMARY_FORCE_REPLY;
+        c->flag.primary_force_reply = 0;
     }
 }
 
@@ -3219,7 +3220,7 @@ void replicationCachePrimary(client *c) {
     server.primary->qb_pos = 0;
     server.primary->repl_applied = 0;
     server.primary->read_reploff = server.primary->reploff;
-    if (c->flags & CLIENT_MULTI) discardTransaction(c);
+    if (c->flag.multi) discardTransaction(c);
     listEmpty(c->reply);
     c->sentlen = 0;
     c->reply_bytes = 0;
@@ -3286,7 +3287,7 @@ void replicationDiscardCachedPrimary(void) {
     if (server.cached_primary == NULL) return;
 
     serverLog(LL_NOTICE, "Discarding previously cached primary state.");
-    server.cached_primary->flags &= ~CLIENT_PRIMARY;
+    server.cached_primary->flag.primary = 0;
     freeClient(server.cached_primary);
     server.cached_primary = NULL;
 }
@@ -3302,8 +3303,9 @@ void replicationResurrectCachedPrimary(connection *conn) {
     server.cached_primary = NULL;
     server.primary->conn = conn;
     connSetPrivateData(server.primary->conn, server.primary);
-    server.primary->flags &= ~(CLIENT_CLOSE_AFTER_REPLY | CLIENT_CLOSE_ASAP);
-    server.primary->flags |= CLIENT_AUTHENTICATED;
+    server.primary->flag.close_after_reply = 0;
+    server.primary->flag.close_asap = 0;
+    server.primary->flag.authenticated = 1;
     server.primary->last_interaction = server.unixtime;
     server.repl_state = REPL_STATE_CONNECTED;
     server.repl_down_since = 0;
@@ -3448,7 +3450,7 @@ void waitCommand(client *c) {
 
     /* First try without blocking at all. */
     ackreplicas = replicationCountAcksByOffset(c->woff);
-    if (ackreplicas >= numreplicas || c->flags & CLIENT_DENY_BLOCKING) {
+    if (ackreplicas >= numreplicas || c->flag.deny_blocking) {
         addReplyLongLong(c, ackreplicas);
         return;
     }
@@ -3486,7 +3488,7 @@ void waitaofCommand(client *c) {
     /* First try without blocking at all. */
     ackreplicas = replicationCountAOFAcksByOffset(c->woff);
     acklocal = server.fsynced_reploff >= c->woff;
-    if ((ackreplicas >= numreplicas && acklocal >= numlocal) || c->flags & CLIENT_DENY_BLOCKING) {
+    if ((ackreplicas >= numreplicas && acklocal >= numlocal) || c->flag.deny_blocking) {
         addReplyArrayLen(c, 2);
         addReplyLongLong(c, acklocal);
         addReplyLongLong(c, ackreplicas);
@@ -3577,8 +3579,8 @@ void processClientsWaitingReplicas(void) {
             addReplyArrayLen(c, 2);
             addReplyLongLong(c, numlocal);
             addReplyLongLong(c, numreplicas);
-        } else if (c->flags & CLIENT_PENDING_COMMAND) {
-            c->flags |= CLIENT_REPLICATION_DONE;
+        } else if (c->flag.pending_command) {
+            c->flag.replication_done = 1;
         } else {
             addReplyLongLong(c, numreplicas);
         }
@@ -3648,7 +3650,7 @@ void replicationCron(void) {
     /* Send ACK to primary from time to time.
      * Note that we do not send periodic acks to primary that don't
      * support PSYNC and replication offsets. */
-    if (server.primary_host && server.primary && !(server.primary->flags & CLIENT_PRE_PSYNC)) replicationSendAck();
+    if (server.primary_host && server.primary && !(server.primary->flag.pre_psync)) replicationSendAck();
 
     /* If we have attached replicas, PING them from time to time.
      * So replicas can implement an explicit timeout to primaries, and will
@@ -3711,7 +3713,7 @@ void replicationCron(void) {
             client *replica = ln->value;
 
             if (replica->repl_state == REPLICA_STATE_ONLINE) {
-                if (replica->flags & CLIENT_PRE_PSYNC) continue;
+                if (replica->flag.pre_psync) continue;
                 if ((server.unixtime - replica->repl_ack_time) > server.repl_timeout) {
                     serverLog(LL_WARNING, "Disconnecting timedout replica (streaming sync): %s",
                               replicationGetReplicaName(replica));
diff --git a/src/script.c b/src/script.c
index 29397f81bd..dcf90d8688 100644
--- a/src/script.c
+++ b/src/script.c
@@ -132,7 +132,7 @@ int scriptPrepareForRun(scriptRunCtx *run_ctx,
                         uint64_t script_flags,
                         int ro) {
     serverAssert(!curr_run_ctx);
-    int client_allow_oom = !!(caller->flags & CLIENT_ALLOW_OOM);
+    int client_allow_oom = !!(caller->flag.allow_oom);
 
     int running_stale =
         server.primary_host && server.repl_state != REPL_STATE_CONNECTED && server.repl_serve_stale_data == 0;
@@ -224,8 +224,8 @@ int scriptPrepareForRun(scriptRunCtx *run_ctx,
     script_client->resp = 2; /* Default is RESP2, scripts can change it. */
 
     /* If we are in MULTI context, flag Lua client as CLIENT_MULTI. */
-    if (curr_client->flags & CLIENT_MULTI) {
-        script_client->flags |= CLIENT_MULTI;
+    if (curr_client->flag.multi) {
+        script_client->flag.multi = 1;
     }
 
     run_ctx->start_time = getMonotonicUs();
@@ -260,7 +260,7 @@ void scriptResetRun(scriptRunCtx *run_ctx) {
     serverAssert(curr_run_ctx);
 
     /* After the script done, remove the MULTI state. */
-    run_ctx->c->flags &= ~CLIENT_MULTI;
+    run_ctx->c->flag.multi = 0;
 
     if (scriptIsTimedout()) {
         exitScriptTimedoutMode(run_ctx);
@@ -426,8 +426,8 @@ static int scriptVerifyClusterState(scriptRunCtx *run_ctx, client *c, client *or
      * received from our primary or when loading the AOF back in memory. */
     int error_code;
     /* Duplicate relevant flags in the script client. */
-    c->flags &= ~(CLIENT_READONLY | CLIENT_ASKING);
-    c->flags |= original_c->flags & (CLIENT_READONLY | CLIENT_ASKING);
+    c->flag.readonly = original_c->flag.readonly;
+    c->flag.asking = original_c->flag.asking;
     int hashslot = -1;
     if (getNodeByQuery(c, c->cmd, c->argv, c->argc, &hashslot, &error_code) != getMyClusterNode()) {
         if (error_code == CLUSTER_REDIR_DOWN_RO_STATE) {
@@ -582,7 +582,7 @@ void scriptCall(scriptRunCtx *run_ctx, sds *err) {
         call_flags |= CMD_CALL_PROPAGATE_REPL;
     }
     call(c, call_flags);
-    serverAssert((c->flags & CLIENT_BLOCKED) == 0);
+    serverAssert(c->flag.blocked == 0);
     return;
 
 error:
diff --git a/src/server.c b/src/server.c
index ee1bcd088f..228307e3cc 100644
--- a/src/server.c
+++ b/src/server.c
@@ -707,7 +707,7 @@ int clientsCronResizeQueryBuffer(client *c) {
         if (idletime > 2) {
             /* 1) Query is idle for a long time. */
             size_t remaining = sdslen(c->querybuf) - c->qb_pos;
-            if (!(c->flags & CLIENT_PRIMARY) && !remaining) {
+            if (!c->flag.primary && !remaining) {
                 /* If the client is not a primary and no data is pending,
                  * The client can safely use the shared query buffer in the next read - free the client's querybuf. */
                 sdsfree(c->querybuf);
@@ -858,7 +858,7 @@ void updateClientMemoryUsage(client *c) {
 }
 
 int clientEvictionAllowed(client *c) {
-    if (server.maxmemory_clients == 0 || c->flags & CLIENT_NO_EVICT || !c->conn) {
+    if (server.maxmemory_clients == 0 || c->flag.no_evict || !c->conn) {
         return 0;
     }
     int type = getClientType(c);
@@ -3144,7 +3144,7 @@ struct serverCommand *lookupCommandOrOriginal(robj **argv, int argc) {
 
 /* Commands arriving from the primary client or AOF client, should never be rejected. */
 int mustObeyClient(client *c) {
-    return c->id == CLIENT_ID_AOF || c->flags & CLIENT_PRIMARY;
+    return c->id == CLIENT_ID_AOF || c->flag.primary;
 }
 
 static int shouldPropagate(int target) {
@@ -3216,25 +3216,25 @@ void alsoPropagate(int dbid, robj **argv, int argc, int target) {
  * specific command execution into AOF / Replication. */
 void forceCommandPropagation(client *c, int flags) {
     serverAssert(c->cmd->flags & (CMD_WRITE | CMD_MAY_REPLICATE));
-    if (flags & PROPAGATE_REPL) c->flags |= CLIENT_FORCE_REPL;
-    if (flags & PROPAGATE_AOF) c->flags |= CLIENT_FORCE_AOF;
+    if (flags & PROPAGATE_REPL) c->flag.force_repl = 1;
+    if (flags & PROPAGATE_AOF) c->flag.force_aof = 1;
 }
 
 /* Avoid that the executed command is propagated at all. This way we
  * are free to just propagate what we want using the alsoPropagate()
  * API. */
 void preventCommandPropagation(client *c) {
-    c->flags |= CLIENT_PREVENT_PROP;
+    c->flag.prevent_prop = 1;
 }
 
 /* AOF specific version of preventCommandPropagation(). */
 void preventCommandAOF(client *c) {
-    c->flags |= CLIENT_PREVENT_AOF_PROP;
+    c->flag.prevent_aof_prop = 1;
 }
 
 /* Replication specific version of preventCommandPropagation(). */
 void preventCommandReplication(client *c) {
-    c->flags |= CLIENT_PREVENT_REPL_PROP;
+    c->flag.prevent_repl_prop = 1;
 }
 
 /* Log the last command a client executed into the slowlog. */
@@ -3395,7 +3395,8 @@ int incrCommandStatsOnError(struct serverCommand *cmd, int flags) {
  */
 void call(client *c, int flags) {
     long long dirty;
-    uint64_t client_old_flags = c->flags;
+    struct ClientFlags client_old_flags = c->flag;
+
     struct serverCommand *real_cmd = c->realcmd;
     client *prev_client = server.executing_client;
     server.executing_client = c;
@@ -3412,7 +3413,9 @@ void call(client *c, int flags) {
 
     /* Initialization: clear the flags that must be set by the command on
      * demand, and initialize the array for additional commands propagation. */
-    c->flags &= ~(CLIENT_FORCE_AOF | CLIENT_FORCE_REPL | CLIENT_PREVENT_PROP);
+    c->flag.force_aof = 0;
+    c->flag.force_repl = 0;
+    c->flag.prevent_prop = 0;
 
     /* The server core is in charge of propagation when the first entry point
      * of call() is processCommand().
@@ -3433,12 +3436,12 @@ void call(client *c, int flags) {
      * sending client side caching message in the middle of a command reply.
      * In case of blocking commands, the flag will be un-set only after successfully
      * re-processing and unblock the client.*/
-    c->flags |= CLIENT_EXECUTING_COMMAND;
+    c->flag.executing_command = 1;
 
     /* Setting the CLIENT_REPROCESSING_COMMAND flag so that during the actual
      * processing of the command proc, the client is aware that it is being
      * re-processed. */
-    if (reprocessing_command) c->flags |= CLIENT_REPROCESSING_COMMAND;
+    if (reprocessing_command) c->flag.reprocessing_command = 1;
 
     monotime monotonic_start = 0;
     if (monotonicGetType() == MONOTONIC_CLOCK_HW) monotonic_start = getMonotonicUs();
@@ -3446,13 +3449,13 @@ void call(client *c, int flags) {
     c->cmd->proc(c);
 
     /* Clear the CLIENT_REPROCESSING_COMMAND flag after the proc is executed. */
-    if (reprocessing_command) c->flags &= ~CLIENT_REPROCESSING_COMMAND;
+    if (reprocessing_command) c->flag.reprocessing_command = 0;
 
     exitExecutionUnit();
 
     /* In case client is blocked after trying to execute the command,
      * it means the execution is not yet completed and we MIGHT reprocess the command in the future. */
-    if (!(c->flags & CLIENT_BLOCKED)) c->flags &= ~(CLIENT_EXECUTING_COMMAND);
+    if (!c->flag.blocked) c->flag.executing_command = 0;
 
     /* In order to avoid performance implication due to querying the clock using a system call 3 times,
      * we use a monotonic clock, when we are sure its cost is very low, and fall back to non-monotonic call otherwise. */
@@ -3478,9 +3481,9 @@ void call(client *c, int flags) {
 
     /* After executing command, we will close the client after writing entire
      * reply if it is set 'CLIENT_CLOSE_AFTER_COMMAND' flag. */
-    if (c->flags & CLIENT_CLOSE_AFTER_COMMAND) {
-        c->flags &= ~CLIENT_CLOSE_AFTER_COMMAND;
-        c->flags |= CLIENT_CLOSE_AFTER_REPLY;
+    if (c->flag.close_after_command) {
+        c->flag.close_after_command = 0;
+        c->flag.close_after_reply = 1;
     }
 
     /* Note: the code below uses the real command that was executed
@@ -3498,7 +3501,7 @@ void call(client *c, int flags) {
 
     /* Log the command into the Slow log if needed.
      * If the client is blocked we will handle slowlog when it is unblocked. */
-    if (update_command_stats && !(c->flags & CLIENT_BLOCKED)) slowlogPushCurrentCommand(c, real_cmd, c->duration);
+    if (update_command_stats && !c->flag.blocked) slowlogPushCurrentCommand(c, real_cmd, c->duration);
 
     /* Send the command to clients in MONITOR mode if applicable,
      * since some administrative commands are considered too dangerous to be shown.
@@ -3512,20 +3515,20 @@ void call(client *c, int flags) {
 
     /* Clear the original argv.
      * If the client is blocked we will handle slowlog when it is unblocked. */
-    if (!(c->flags & CLIENT_BLOCKED)) freeClientOriginalArgv(c);
+    if (!c->flag.blocked) freeClientOriginalArgv(c);
 
     /* populate the per-command statistics that we show in INFO commandstats.
      * If the client is blocked we will handle latency stats and duration when it is unblocked. */
-    if (update_command_stats && !(c->flags & CLIENT_BLOCKED)) {
+    if (update_command_stats && !c->flag.blocked) {
         real_cmd->calls++;
         real_cmd->microseconds += c->duration;
-        if (server.latency_tracking_enabled && !(c->flags & CLIENT_BLOCKED))
+        if (server.latency_tracking_enabled && !c->flag.blocked)
             updateCommandLatencyHistogram(&(real_cmd->latency_histogram), c->duration * 1000);
     }
 
     /* The duration needs to be reset after each call except for a blocked command,
      * which is expected to record and reset the duration after unblocking. */
-    if (!(c->flags & CLIENT_BLOCKED)) {
+    if (!c->flag.blocked) {
         c->duration = 0;
     }
 
@@ -3533,8 +3536,8 @@ void call(client *c, int flags) {
      * We never propagate EXEC explicitly, it will be implicitly
      * propagated if needed (see propagatePendingCommands).
      * Also, module commands take care of themselves */
-    if (flags & CMD_CALL_PROPAGATE && (c->flags & CLIENT_PREVENT_PROP) != CLIENT_PREVENT_PROP &&
-        c->cmd->proc != execCommand && !(c->cmd->flags & CMD_MODULE)) {
+    if (flags & CMD_CALL_PROPAGATE && !c->flag.prevent_prop && c->cmd->proc != execCommand &&
+        !(c->cmd->flags & CMD_MODULE)) {
         int propagate_flags = PROPAGATE_NONE;
 
         /* Check if the command operated changes in the data set. If so
@@ -3543,17 +3546,15 @@ void call(client *c, int flags) {
 
         /* If the client forced AOF / replication of the command, set
          * the flags regardless of the command effects on the data set. */
-        if (c->flags & CLIENT_FORCE_REPL) propagate_flags |= PROPAGATE_REPL;
-        if (c->flags & CLIENT_FORCE_AOF) propagate_flags |= PROPAGATE_AOF;
+        if (c->flag.force_repl) propagate_flags |= PROPAGATE_REPL;
+        if (c->flag.force_aof) propagate_flags |= PROPAGATE_AOF;
 
         /* However prevent AOF / replication propagation if the command
          * implementation called preventCommandPropagation() or similar,
          * or if we don't have the call() flags to do so. */
-        if (c->flags & CLIENT_PREVENT_REPL_PROP || c->flags & CLIENT_MODULE_PREVENT_REPL_PROP ||
-            !(flags & CMD_CALL_PROPAGATE_REPL))
+        if (c->flag.prevent_repl_prop || c->flag.module_prevent_repl_prop || !(flags & CMD_CALL_PROPAGATE_REPL))
             propagate_flags &= ~PROPAGATE_REPL;
-        if (c->flags & CLIENT_PREVENT_AOF_PROP || c->flags & CLIENT_MODULE_PREVENT_AOF_PROP ||
-            !(flags & CMD_CALL_PROPAGATE_AOF))
+        if (c->flag.prevent_aof_prop || c->flag.module_prevent_aof_prop || !(flags & CMD_CALL_PROPAGATE_AOF))
             propagate_flags &= ~PROPAGATE_AOF;
 
         /* Call alsoPropagate() only if at least one of AOF / replication
@@ -3563,8 +3564,9 @@ void call(client *c, int flags) {
 
     /* Restore the old replication flags, since call() can be executed
      * recursively. */
-    c->flags &= ~(CLIENT_FORCE_AOF | CLIENT_FORCE_REPL | CLIENT_PREVENT_PROP);
-    c->flags |= client_old_flags & (CLIENT_FORCE_AOF | CLIENT_FORCE_REPL | CLIENT_PREVENT_PROP);
+    c->flag.force_aof = client_old_flags.force_aof;
+    c->flag.force_repl = client_old_flags.force_repl;
+    c->flag.prevent_prop = client_old_flags.prevent_prop;
 
     /* If the client has keys tracking enabled for client side caching,
      * make sure to remember the keys it fetched via this command. For read-only
@@ -3574,13 +3576,13 @@ void call(client *c, int flags) {
         /* We use the tracking flag of the original external client that
          * triggered the command, but we take the keys from the actual command
          * being executed. */
-        if (server.current_client && (server.current_client->flags & CLIENT_TRACKING) &&
-            !(server.current_client->flags & CLIENT_TRACKING_BCAST)) {
+        if (server.current_client && (server.current_client->flag.tracking) &&
+            !(server.current_client->flag.tracking_bcast)) {
             trackingRememberKeys(server.current_client, c);
         }
     }
 
-    if (!(c->flags & CLIENT_BLOCKED)) {
+    if (!c->flag.blocked) {
         /* Modules may call commands in cron, in which case server.current_client
          * is not set. */
         if (server.current_client) {
@@ -3828,7 +3830,7 @@ int processCommand(client *c) {
         }
     }
 
-    if (c->flags & CLIENT_MULTI && c->cmd->flags & CMD_NO_MULTI) {
+    if (c->flag.multi && c->cmd->flags & CMD_NO_MULTI) {
         rejectCommandFormat(c, "Command not allowed inside a transaction");
         return C_OK;
     }
@@ -3838,8 +3840,8 @@ int processCommand(client *c) {
     int acl_errpos;
     int acl_retval = ACLCheckAllPerm(c, &acl_errpos);
     if (acl_retval != ACL_OK) {
-        addACLLogEntry(c, acl_retval, (c->flags & CLIENT_MULTI) ? ACL_LOG_CTX_MULTI : ACL_LOG_CTX_TOPLEVEL, acl_errpos,
-                       NULL, NULL);
+        addACLLogEntry(c, acl_retval, (c->flag.multi) ? ACL_LOG_CTX_MULTI : ACL_LOG_CTX_TOPLEVEL, acl_errpos, NULL,
+                       NULL);
         sds msg = getAclErrorMessage(acl_retval, c->user, c->cmd, c->argv[acl_errpos]->ptr, 0);
         rejectCommandFormat(c, "-NOPERM %s", msg);
         sdsfree(msg);
@@ -3868,7 +3870,7 @@ int processCommand(client *c) {
     }
 
     if (!server.cluster_enabled && c->capa & CLIENT_CAPA_REDIRECT && server.primary_host && !mustObeyClient(c) &&
-        (is_write_command || (is_read_command && !(c->flags & CLIENT_READONLY)))) {
+        (is_write_command || (is_read_command && !c->flag.readonly))) {
         addReplyErrorSds(c, sdscatprintf(sdsempty(), "-REDIRECT %s:%d", server.primary_host, server.primary_port));
         return C_OK;
     }
@@ -3960,7 +3962,7 @@ int processCommand(client *c) {
 
     /* Only allow a subset of commands in the context of Pub/Sub if the
      * connection is in RESP2 mode. With RESP3 there are no limits. */
-    if ((c->flags & CLIENT_PUBSUB && c->resp == 2) && c->cmd->proc != pingCommand && c->cmd->proc != subscribeCommand &&
+    if ((c->flag.pubsub && c->resp == 2) && c->cmd->proc != pingCommand && c->cmd->proc != subscribeCommand &&
         c->cmd->proc != ssubscribeCommand && c->cmd->proc != unsubscribeCommand &&
         c->cmd->proc != sunsubscribeCommand && c->cmd->proc != psubscribeCommand &&
         c->cmd->proc != punsubscribeCommand && c->cmd->proc != quitCommand && c->cmd->proc != resetCommand) {
@@ -4016,21 +4018,21 @@ int processCommand(client *c) {
     /* Prevent a replica from sending commands that access the keyspace.
      * The main objective here is to prevent abuse of client pause check
      * from which replicas are exempt. */
-    if ((c->flags & CLIENT_REPLICA) && (is_may_replicate_command || is_write_command || is_read_command)) {
+    if (c->flag.replica && (is_may_replicate_command || is_write_command || is_read_command)) {
         rejectCommandFormat(c, "Replica can't interact with the keyspace");
         return C_OK;
     }
 
     /* If the server is paused, block the client until
      * the pause has ended. Replicas are never paused. */
-    if (!(c->flags & CLIENT_REPLICA) && ((isPausedActions(PAUSE_ACTION_CLIENT_ALL)) ||
-                                         ((isPausedActions(PAUSE_ACTION_CLIENT_WRITE)) && is_may_replicate_command))) {
+    if (!c->flag.replica && ((isPausedActions(PAUSE_ACTION_CLIENT_ALL)) ||
+                             ((isPausedActions(PAUSE_ACTION_CLIENT_WRITE)) && is_may_replicate_command))) {
         blockPostponeClient(c);
         return C_OK;
     }
 
     /* Exec the command */
-    if (c->flags & CLIENT_MULTI && c->cmd->proc != execCommand && c->cmd->proc != discardCommand &&
+    if (c->flag.multi && c->cmd->proc != execCommand && c->cmd->proc != discardCommand &&
         c->cmd->proc != multiCommand && c->cmd->proc != watchCommand && c->cmd->proc != quitCommand &&
         c->cmd->proc != resetCommand) {
         queueMultiCommand(c, cmd_flags);
@@ -4410,7 +4412,7 @@ void pingCommand(client *c) {
         return;
     }
 
-    if (c->flags & CLIENT_PUBSUB && c->resp == 2) {
+    if (c->flag.pubsub && c->resp == 2) {
         addReply(c, shared.mbulkhdr[2]);
         addReplyBulkCBuffer(c, "pong", 4);
         if (c->argc == 1)
@@ -5949,7 +5951,7 @@ void infoCommand(client *c) {
 }
 
 void monitorCommand(client *c) {
-    if (c->flags & CLIENT_DENY_BLOCKING) {
+    if (c->flag.deny_blocking) {
         /**
          * A client that has CLIENT_DENY_BLOCKING flag on
          * expects a reply per command and so can't execute MONITOR. */
@@ -5958,9 +5960,10 @@ void monitorCommand(client *c) {
     }
 
     /* ignore MONITOR if already replica or in monitor mode */
-    if (c->flags & CLIENT_REPLICA) return;
+    if (c->flag.replica) return;
 
-    c->flags |= (CLIENT_REPLICA | CLIENT_MONITOR);
+    c->flag.replica = 1;
+    c->flag.monitor = 1;
     listAddNodeTail(server.monitors, c);
     addReply(c, shared.ok);
 }
diff --git a/src/server.h b/src/server.h
index 57ccd557e2..d56dfdceee 100644
--- a/src/server.h
+++ b/src/server.h
@@ -343,92 +343,6 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT];
 #define CMD_DOC_DEPRECATED (1 << 0) /* Command is deprecated */
 #define CMD_DOC_SYSCMD (1 << 1)     /* System (internal) command */
 
-/* Client flags */
-#define CLIENT_REPLICA (1 << 0)           /* This client is a replica */
-#define CLIENT_PRIMARY (1 << 1)           /* This client is a primary */
-#define CLIENT_MONITOR (1 << 2)           /* This client is a replica monitor, see MONITOR */
-#define CLIENT_MULTI (1 << 3)             /* This client is in a MULTI context */
-#define CLIENT_BLOCKED (1 << 4)           /* The client is waiting in a blocking operation */
-#define CLIENT_DIRTY_CAS (1 << 5)         /* Watched keys modified. EXEC will fail. */
-#define CLIENT_CLOSE_AFTER_REPLY (1 << 6) /* Close after writing entire reply. */
-#define CLIENT_UNBLOCKED                                                                                               \
-    (1 << 7)                                 /* This client was unblocked and is stored in                             \
-                                               server.unblocked_clients */
-#define CLIENT_SCRIPT (1 << 8)               /* This is a non connected client used by Lua */
-#define CLIENT_ASKING (1 << 9)               /* Client issued the ASKING command */
-#define CLIENT_CLOSE_ASAP (1 << 10)          /* Close this client ASAP */
-#define CLIENT_UNIX_SOCKET (1 << 11)         /* Client connected via Unix domain socket */
-#define CLIENT_DIRTY_EXEC (1 << 12)          /* EXEC will fail for errors while queueing */
-#define CLIENT_PRIMARY_FORCE_REPLY (1 << 13) /* Queue replies even if is primary */
-#define CLIENT_FORCE_AOF (1 << 14)           /* Force AOF propagation of current cmd. */
-#define CLIENT_FORCE_REPL (1 << 15)          /* Force replication of current cmd. */
-#define CLIENT_PRE_PSYNC (1 << 16)           /* Instance don't understand PSYNC. */
-#define CLIENT_READONLY (1 << 17)            /* Cluster client is in read-only state. */
-#define CLIENT_PUBSUB (1 << 18)              /* Client is in Pub/Sub mode. */
-#define CLIENT_PREVENT_AOF_PROP (1 << 19)    /* Don't propagate to AOF. */
-#define CLIENT_PREVENT_REPL_PROP (1 << 20)   /* Don't propagate to replicas. */
-#define CLIENT_PREVENT_PROP (CLIENT_PREVENT_AOF_PROP | CLIENT_PREVENT_REPL_PROP)
-#define CLIENT_PENDING_WRITE                                                                                           \
-    (1 << 21)                            /* Client has output to send but a write                                      \
-                                            handler is yet not installed. */
-#define CLIENT_REPLY_OFF (1 << 22)       /* Don't send replies to client. */
-#define CLIENT_REPLY_SKIP_NEXT (1 << 23) /* Set CLIENT_REPLY_SKIP for next cmd */
-#define CLIENT_REPLY_SKIP (1 << 24)      /* Don't send just this reply. */
-#define CLIENT_LUA_DEBUG (1 << 25)       /* Run EVAL in debug mode. */
-#define CLIENT_LUA_DEBUG_SYNC (1 << 26)  /* EVAL debugging without fork() */
-#define CLIENT_MODULE (1 << 27)          /* Non connected client used by some module. */
-#define CLIENT_PROTECTED (1 << 28)       /* Client should not be freed for now. */
-#define CLIENT_EXECUTING_COMMAND                                                                                       \
-    (1 << 29) /* Indicates that the client is currently in the process of handling                                     \
-               a command. usually this will be marked only during call()                                               \
-               however, blocked clients might have this flag kept until they                                           \
-               will try to reprocess the command. */
-
-#define CLIENT_PENDING_COMMAND                                                                                         \
-    (1 << 30) /* Indicates the client has a fully                                                                      \
-               * parsed command ready for execution. */
-#define CLIENT_TRACKING                                                                                                \
-    (1ULL << 31)                                  /* Client enabled keys tracking in order to                          \
-                                                  perform client side caching. */
-#define CLIENT_TRACKING_BROKEN_REDIR (1ULL << 32) /* Target client is invalid. */
-#define CLIENT_TRACKING_BCAST (1ULL << 33)        /* Tracking in BCAST mode. */
-#define CLIENT_TRACKING_OPTIN (1ULL << 34)        /* Tracking in opt-in mode. */
-#define CLIENT_TRACKING_OPTOUT (1ULL << 35)       /* Tracking in opt-out mode. */
-#define CLIENT_TRACKING_CACHING                                                                                        \
-    (1ULL << 36) /* CACHING yes/no was given,                                                                          \
-                    depending on optin/optout mode. */
-#define CLIENT_TRACKING_NOLOOP                                                                                         \
-    (1ULL << 37)                           /* Don't send invalidation messages                                         \
-                                              about writes performed by myself.*/
-#define CLIENT_IN_TO_TABLE (1ULL << 38)    /* This client is in the timeout table. */
-#define CLIENT_PROTOCOL_ERROR (1ULL << 39) /* Protocol error chatting with it. */
-#define CLIENT_CLOSE_AFTER_COMMAND                                                                                     \
-    (1ULL << 40) /* Close after executing commands                                                                     \
-                  * and writing entire reply. */
-#define CLIENT_DENY_BLOCKING                                                                                           \
-    (1ULL << 41) /* Indicate that the client should not be blocked.                                                    \
-                    currently, turned on inside MULTI, Lua, RM_Call,                                                   \
-                    and AOF client */
-#define CLIENT_REPL_RDBONLY                                                                                            \
-    (1ULL << 42) /* This client is a replica that only wants                                                           \
-                    RDB without replication buffer. */
-#define CLIENT_NO_EVICT                                                                                                \
-    (1ULL << 43) /* This client is protected against client                                                            \
-                    memory eviction. */
-#define CLIENT_ALLOW_OOM                                                                                               \
-    (1ULL << 44)                     /* Client used by RM_Call is allowed to fully execute                             \
-                                        scripts even when in OOM */
-#define CLIENT_NO_TOUCH (1ULL << 45) /* This client will not touch LFU/LRU stats. */
-#define CLIENT_PUSHING (1ULL << 46)  /* This client is pushing notifications. */
-#define CLIENT_MODULE_AUTH_HAS_RESULT                                                                                  \
-    (1ULL << 47)                                     /* Indicates a client in the middle of module based               \
-                                                        auth had been authenticated from the Module. */
-#define CLIENT_MODULE_PREVENT_AOF_PROP (1ULL << 48)  /* Module client do not want to propagate to AOF */
-#define CLIENT_MODULE_PREVENT_REPL_PROP (1ULL << 49) /* Module client do not want to propagate to replica */
-#define CLIENT_REPROCESSING_COMMAND (1ULL << 50)     /* The client is re-processing the command. */
-#define CLIENT_REPLICATION_DONE (1ULL << 51)         /* Indicate that replication has been done on the client */
-#define CLIENT_AUTHENTICATED (1ULL << 52)            /* Indicate a client has successfully authenticated */
-
 /* Client capabilities */
 #define CLIENT_CAPA_REDIRECT (1 << 0) /* Indicate that the client can handle redirection */
 
@@ -1203,9 +1117,70 @@ typedef struct {
 } clientReqResInfo;
 #endif
 
+typedef struct ClientFlags {
+    uint64_t primary : 1;
+    uint64_t replica : 1;
+    uint64_t monitor : 1;
+    uint64_t multi : 1;
+    uint64_t blocked : 1;
+    uint64_t dirty_cas : 1;
+    uint64_t close_after_reply : 1;
+    uint64_t unblocked : 1;
+    uint64_t script : 1;
+    uint64_t asking : 1;
+    uint64_t close_asap : 1;
+    uint64_t unix_socket : 1;
+    uint64_t dirty_exec : 1;
+    uint64_t primary_force_reply : 1;
+    uint64_t force_aof : 1;
+    uint64_t force_repl : 1;
+    uint64_t pre_psync : 1;
+    uint64_t readonly : 1;
+    uint64_t pubsub : 1;
+    uint64_t prevent_aof_prop : 1;
+    uint64_t prevent_repl_prop : 1;
+    uint64_t prevent_prop : 1;
+    uint64_t pending_write : 1;
+    uint64_t reply_off : 1;
+    uint64_t reply_skip_next : 1;
+    uint64_t reply_skip : 1;
+    uint64_t lua_debug : 1;
+    uint64_t lua_debug_sync : 1;
+    uint64_t module : 1;
+    uint64_t protected : 1;
+    uint64_t executing_command : 1;
+    uint64_t pending_command : 1;
+    uint64_t tracking : 1;
+    uint64_t tracking_broken_redir : 1;
+    uint64_t tracking_bcast : 1;
+    uint64_t tracking_optin : 1;
+    uint64_t tracking_optout : 1;
+    uint64_t tracking_caching : 1;
+    uint64_t tracking_noloop : 1;
+    uint64_t in_to_table : 1;
+    uint64_t protocol_error : 1;
+    uint64_t close_after_command : 1;
+    uint64_t deny_blocking : 1;
+    uint64_t repl_rdbonly : 1;
+    uint64_t no_evict : 1;
+    uint64_t allow_oom : 1;
+    uint64_t no_touch : 1;
+    uint64_t pushing : 1;
+    uint64_t module_auth_has_result : 1;
+    uint64_t module_prevent_aof_prop : 1;
+    uint64_t module_prevent_repl_prop : 1;
+    uint64_t reprocessing_command : 1;
+    uint64_t replication_done : 1;
+    uint64_t authenticated : 1;
+    uint64_t reserved : 11;
+} ClientFlags;
+
 typedef struct client {
-    uint64_t id;    /* Client incremental unique ID. */
-    uint64_t flags; /* Client flags: CLIENT_* macros. */
+    uint64_t id; /* Client incremental unique ID. */
+    union {
+        uint64_t raw_flag;
+        struct ClientFlags flag;
+    };
     connection *conn;
     int resp;                            /* RESP protocol version. Can be 2 or 3. */
     uint32_t capa;                       /* Client capabilities: CLIENT_CAPA* macros. */
@@ -2642,7 +2617,7 @@ void setDeferredSetLen(client *c, void *node, long length);
 void setDeferredAttributeLen(client *c, void *node, long length);
 void setDeferredPushLen(client *c, void *node, long length);
 int processInputBuffer(client *c);
-void acceptCommonHandler(connection *conn, int flags, char *ip);
+void acceptCommonHandler(connection *conn, struct ClientFlags flags, char *ip);
 void readQueryFromClient(connection *conn);
 int prepareClientToWrite(client *c);
 void addReplyNull(client *c);
@@ -2758,7 +2733,7 @@ void addReplyStatusFormat(client *c, const char *fmt, ...);
 #endif
 
 /* Client side caching (tracking mode) */
-void enableTracking(client *c, uint64_t redirect_to, uint64_t options, robj **prefix, size_t numprefix);
+void enableTracking(client *c, uint64_t redirect_to, struct ClientFlags options, robj **prefix, size_t numprefix);
 void disableTracking(client *c);
 void trackingRememberKeys(client *tracking, client *executing);
 void trackingInvalidateKey(client *c, robj *keyobj, int bcast);
diff --git a/src/socket.c b/src/socket.c
index 170b031ad7..5aa3606990 100644
--- a/src/socket.c
+++ b/src/socket.c
@@ -305,6 +305,7 @@ static void connSocketEventHandler(struct aeEventLoop *el, int fd, void *clientD
 static void connSocketAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
     int cport, cfd;
     int max = server.max_new_conns_per_cycle;
+    struct ClientFlags flags = {0};
     char cip[NET_IP_STR_LEN];
     UNUSED(el);
     UNUSED(mask);
@@ -317,7 +318,7 @@ static void connSocketAcceptHandler(aeEventLoop *el, int fd, void *privdata, int
             return;
         }
         serverLog(LL_VERBOSE, "Accepted %s:%d", cip, cport);
-        acceptCommonHandler(connCreateAcceptedSocket(cfd, NULL), 0, cip);
+        acceptCommonHandler(connCreateAcceptedSocket(cfd, NULL), flags, cip);
     }
 }
 
diff --git a/src/sort.c b/src/sort.c
index 6a04cfcc6a..bad86add3b 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -308,7 +308,7 @@ void sortCommandGeneric(client *c, int readonly) {
      * The other types (list, sorted set) will retain their native order
      * even if no sort order is requested, so they remain stable across
      * scripting and replication. */
-    if (dontsort && sortval->type == OBJ_SET && (storekey || c->flags & CLIENT_SCRIPT)) {
+    if (dontsort && sortval->type == OBJ_SET && (storekey || c->flag.script)) {
         /* Force ALPHA sorting */
         dontsort = 0;
         alpha = 1;
diff --git a/src/t_hash.c b/src/t_hash.c
index 737ba06650..ae4c499f9d 100644
--- a/src/t_hash.c
+++ b/src/t_hash.c
@@ -940,7 +940,7 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) {
                 if (withvalues && c->resp > 2) addReplyArrayLen(c, 2);
                 addReplyBulkCBuffer(c, key, sdslen(key));
                 if (withvalues) addReplyBulkCBuffer(c, value, sdslen(value));
-                if (c->flags & CLIENT_CLOSE_ASAP) break;
+                if (c->flag.close_asap) break;
             }
         } else if (hash->encoding == OBJ_ENCODING_LISTPACK) {
             listpackEntry *keys, *vals = NULL;
@@ -954,7 +954,7 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) {
                 count -= sample_count;
                 lpRandomPairs(hash->ptr, sample_count, keys, vals);
                 hrandfieldReplyWithListpack(c, sample_count, keys, vals);
-                if (c->flags & CLIENT_CLOSE_ASAP) break;
+                if (c->flag.close_asap) break;
             }
             zfree(keys);
             zfree(vals);
diff --git a/src/t_list.c b/src/t_list.c
index 28d5f08123..845666b13c 100644
--- a/src/t_list.c
+++ b/src/t_list.c
@@ -1213,7 +1213,7 @@ void blockingPopGenericCommand(client *c, robj **keys, int numkeys, int where, i
 
     /* If we are not allowed to block the client, the only thing
      * we can do is treating it as a timeout (even with timeout 0). */
-    if (c->flags & CLIENT_DENY_BLOCKING) {
+    if (c->flag.deny_blocking) {
         addReplyNullArray(c);
         return;
     }
@@ -1237,7 +1237,7 @@ void blmoveGenericCommand(client *c, int wherefrom, int whereto, mstime_t timeou
     if (checkType(c, key, OBJ_LIST)) return;
 
     if (key == NULL) {
-        if (c->flags & CLIENT_DENY_BLOCKING) {
+        if (c->flag.deny_blocking) {
             /* Blocking against an empty list when blocking is not allowed
              * returns immediately. */
             addReplyNull(c);
diff --git a/src/t_set.c b/src/t_set.c
index 52661a1547..53ab37be03 100644
--- a/src/t_set.c
+++ b/src/t_set.c
@@ -1045,7 +1045,7 @@ void srandmemberWithCountCommand(client *c) {
                     else
                         addReplyBulkLongLong(c, entries[i].lval);
                 }
-                if (c->flags & CLIENT_CLOSE_ASAP) break;
+                if (c->flag.close_asap) break;
             }
             zfree(entries);
             return;
@@ -1058,7 +1058,7 @@ void srandmemberWithCountCommand(client *c) {
             } else {
                 addReplyBulkCBuffer(c, str, len);
             }
-            if (c->flags & CLIENT_CLOSE_ASAP) break;
+            if (c->flag.close_asap) break;
         }
         return;
     }
diff --git a/src/t_stream.c b/src/t_stream.c
index 7c4f5788d5..d2a45dde06 100644
--- a/src/t_stream.c
+++ b/src/t_stream.c
@@ -2388,7 +2388,7 @@ void xreadCommand(client *c) {
     if (timeout != -1) {
         /* If we are not allowed to block the client, the only thing
          * we can do is treating it as a timeout (even with timeout 0). */
-        if (c->flags & CLIENT_DENY_BLOCKING) {
+        if (c->flag.deny_blocking) {
             addReplyNullArray(c);
             goto cleanup;
         }
diff --git a/src/t_zset.c b/src/t_zset.c
index 216ed165d2..034f711c58 100644
--- a/src/t_zset.c
+++ b/src/t_zset.c
@@ -4038,7 +4038,7 @@ void blockingGenericZpopCommand(client *c,
 
     /* If we are not allowed to block the client and the zset is empty the only thing
      * we can do is treating it as a timeout (even with timeout 0). */
-    if (c->flags & CLIENT_DENY_BLOCKING) {
+    if (c->flag.deny_blocking) {
         addReplyNullArray(c);
         return;
     }
@@ -4123,7 +4123,7 @@ void zrandmemberWithCountCommand(client *c, long l, int withscores) {
                 if (withscores && c->resp > 2) addReplyArrayLen(c, 2);
                 addReplyBulkCBuffer(c, key, sdslen(key));
                 if (withscores) addReplyDouble(c, *(double *)dictGetVal(de));
-                if (c->flags & CLIENT_CLOSE_ASAP) break;
+                if (c->flag.close_asap) break;
             }
         } else if (zsetobj->encoding == OBJ_ENCODING_LISTPACK) {
             listpackEntry *keys, *vals = NULL;
@@ -4136,7 +4136,7 @@ void zrandmemberWithCountCommand(client *c, long l, int withscores) {
                 count -= sample_count;
                 lpRandomPairs(zsetobj->ptr, sample_count, keys, vals);
                 zrandmemberReplyWithListpack(c, sample_count, keys, vals);
-                if (c->flags & CLIENT_CLOSE_ASAP) break;
+                if (c->flag.close_asap) break;
             }
             zfree(keys);
             zfree(vals);
diff --git a/src/timeout.c b/src/timeout.c
index 98eb778c21..3084edf7f3 100644
--- a/src/timeout.c
+++ b/src/timeout.c
@@ -37,7 +37,7 @@
  * not blocked right now). If so send a reply, unblock it, and return 1.
  * Otherwise 0 is returned and no operation is performed. */
 int checkBlockedClientTimeout(client *c, mstime_t now) {
-    if (c->flags & CLIENT_BLOCKED && c->bstate.timeout != 0 && c->bstate.timeout < now) {
+    if (c->flag.blocked && c->bstate.timeout != 0 && c->bstate.timeout < now) {
         /* Handle blocking operation specific timeout. */
         unblockClientOnTimeout(c);
         return 1;
@@ -55,15 +55,15 @@ int clientsCronHandleTimeout(client *c, mstime_t now_ms) {
 
     if (server.maxidletime &&
         /* This handles the idle clients connection timeout if set. */
-        !(c->flags & CLIENT_REPLICA) && /* No timeout for replicas and monitors */
-        !mustObeyClient(c) &&           /* No timeout for primaries and AOF */
-        !(c->flags & CLIENT_BLOCKED) && /* No timeout for BLPOP */
-        !(c->flags & CLIENT_PUBSUB) &&  /* No timeout for Pub/Sub clients */
+        !c->flag.replica &&   /* No timeout for replicas and monitors */
+        !mustObeyClient(c) && /* No timeout for primaries and AOF */
+        !c->flag.blocked &&   /* No timeout for BLPOP */
+        !c->flag.pubsub &&    /* No timeout for Pub/Sub clients */
         (now - c->last_interaction > server.maxidletime)) {
         serverLog(LL_VERBOSE, "Closing idle client");
         freeClient(c);
         return 1;
-    } else if (c->flags & CLIENT_BLOCKED) {
+    } else if (c->flag.blocked) {
         /* Cluster: handle unblock & redirect of clients blocked
          * into keys no longer served by this server. */
         if (server.cluster_enabled) {
@@ -112,14 +112,14 @@ void addClientToTimeoutTable(client *c) {
     uint64_t timeout = c->bstate.timeout;
     unsigned char buf[CLIENT_ST_KEYLEN];
     encodeTimeoutKey(buf, timeout, c);
-    if (raxTryInsert(server.clients_timeout_table, buf, sizeof(buf), NULL, NULL)) c->flags |= CLIENT_IN_TO_TABLE;
+    if (raxTryInsert(server.clients_timeout_table, buf, sizeof(buf), NULL, NULL)) c->flag.in_to_table = 1;
 }
 
 /* Remove the client from the table when it is unblocked for reasons
  * different than timing out. */
 void removeClientFromTimeoutTable(client *c) {
-    if (!(c->flags & CLIENT_IN_TO_TABLE)) return;
-    c->flags &= ~CLIENT_IN_TO_TABLE;
+    if (!c->flag.in_to_table) return;
+    c->flag.in_to_table = 0;
     uint64_t timeout = c->bstate.timeout;
     unsigned char buf[CLIENT_ST_KEYLEN];
     encodeTimeoutKey(buf, timeout, c);
@@ -140,7 +140,7 @@ void handleBlockedClientsTimeout(void) {
         client *c;
         decodeTimeoutKey(ri.key, &timeout, &c);
         if (timeout >= now) break; /* All the timeouts are in the future. */
-        c->flags &= ~CLIENT_IN_TO_TABLE;
+        c->flag.in_to_table = 0;
         checkBlockedClientTimeout(c, now);
         raxRemove(server.clients_timeout_table, ri.key, ri.key_len, NULL);
         raxSeek(&ri, "^", NULL, 0);
diff --git a/src/tls.c b/src/tls.c
index a26ceb7506..2d4d6cd0ae 100644
--- a/src/tls.c
+++ b/src/tls.c
@@ -745,6 +745,7 @@ static void tlsAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask)
     int cport, cfd;
     int max = server.max_new_tls_conns_per_cycle;
     char cip[NET_IP_STR_LEN];
+    struct ClientFlags flags = {0};
     UNUSED(el);
     UNUSED(mask);
     UNUSED(privdata);
@@ -756,7 +757,7 @@ static void tlsAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask)
             return;
         }
         serverLog(LL_VERBOSE, "Accepted %s:%d", cip, cport);
-        acceptCommonHandler(connCreateAcceptedTLS(cfd, &server.tls_auth_clients), 0, cip);
+        acceptCommonHandler(connCreateAcceptedTLS(cfd, &server.tls_auth_clients), flags, cip);
     }
 }
 
diff --git a/src/tracking.c b/src/tracking.c
index b95ca05b3e..62d6121cfd 100644
--- a/src/tracking.c
+++ b/src/tracking.c
@@ -67,7 +67,7 @@ typedef struct bcastState {
 void disableTracking(client *c) {
     /* If this client is in broadcasting mode, we need to unsubscribe it
      * from all the prefixes it is registered to. */
-    if (c->flags & CLIENT_TRACKING_BCAST) {
+    if (c->flag.tracking_bcast) {
         raxIterator ri;
         raxStart(&ri, c->client_tracking_prefixes);
         raxSeek(&ri, "^", NULL, 0);
@@ -92,10 +92,15 @@ void disableTracking(client *c) {
     }
 
     /* Clear flags and adjust the count. */
-    if (c->flags & CLIENT_TRACKING) {
+    if (c->flag.tracking) {
         server.tracking_clients--;
-        c->flags &= ~(CLIENT_TRACKING | CLIENT_TRACKING_BROKEN_REDIR | CLIENT_TRACKING_BCAST | CLIENT_TRACKING_OPTIN |
-                      CLIENT_TRACKING_OPTOUT | CLIENT_TRACKING_CACHING | CLIENT_TRACKING_NOLOOP);
+        c->flag.tracking = 0;
+        c->flag.tracking_broken_redir = 0;
+        c->flag.tracking_bcast = 0;
+        c->flag.tracking_optin = 0;
+        c->flag.tracking_optout = 0;
+        c->flag.tracking_caching = 0;
+        c->flag.tracking_noloop = 0;
     }
 }
 
@@ -173,11 +178,14 @@ void enableBcastTrackingForPrefix(client *c, char *prefix, size_t plen) {
  * eventually get freed, we'll send a message to the original client to
  * inform it of the condition. Multiple clients can redirect the invalidation
  * messages to the same client ID. */
-void enableTracking(client *c, uint64_t redirect_to, uint64_t options, robj **prefix, size_t numprefix) {
-    if (!(c->flags & CLIENT_TRACKING)) server.tracking_clients++;
-    c->flags |= CLIENT_TRACKING;
-    c->flags &= ~(CLIENT_TRACKING_BROKEN_REDIR | CLIENT_TRACKING_BCAST | CLIENT_TRACKING_OPTIN |
-                  CLIENT_TRACKING_OPTOUT | CLIENT_TRACKING_NOLOOP);
+void enableTracking(client *c, uint64_t redirect_to, struct ClientFlags options, robj **prefix, size_t numprefix) {
+    if (!c->flag.tracking) server.tracking_clients++;
+    c->flag.tracking = 1;
+    c->flag.tracking_broken_redir = 0;
+    c->flag.tracking_bcast = 0;
+    c->flag.tracking_optin = 0;
+    c->flag.tracking_optout = 0;
+    c->flag.tracking_noloop = 0;
     c->client_tracking_redirection = redirect_to;
 
     /* This may be the first client we ever enable. Create the tracking
@@ -189,8 +197,8 @@ void enableTracking(client *c, uint64_t redirect_to, uint64_t options, robj **pr
     }
 
     /* For broadcasting, set the list of prefixes in the client. */
-    if (options & CLIENT_TRACKING_BCAST) {
-        c->flags |= CLIENT_TRACKING_BCAST;
+    if (options.tracking_bcast) {
+        c->flag.tracking_bcast = 1;
         if (numprefix == 0) enableBcastTrackingForPrefix(c, "", 0);
         for (size_t j = 0; j < numprefix; j++) {
             sds sdsprefix = prefix[j]->ptr;
@@ -199,7 +207,9 @@ void enableTracking(client *c, uint64_t redirect_to, uint64_t options, robj **pr
     }
 
     /* Set the remaining flags that don't need any special handling. */
-    c->flags |= options & (CLIENT_TRACKING_OPTIN | CLIENT_TRACKING_OPTOUT | CLIENT_TRACKING_NOLOOP);
+    c->flag.tracking_optin = options.tracking_optin;
+    c->flag.tracking_optout = options.tracking_optout;
+    c->flag.tracking_noloop = options.tracking_noloop;
 }
 
 /* This function is called after the execution of a readonly command in the
@@ -211,9 +221,9 @@ void enableTracking(client *c, uint64_t redirect_to, uint64_t options, robj **pr
 void trackingRememberKeys(client *tracking, client *executing) {
     /* Return if we are in optin/out mode and the right CACHING command
      * was/wasn't given in order to modify the default behavior. */
-    uint64_t optin = tracking->flags & CLIENT_TRACKING_OPTIN;
-    uint64_t optout = tracking->flags & CLIENT_TRACKING_OPTOUT;
-    uint64_t caching_given = tracking->flags & CLIENT_TRACKING_CACHING;
+    uint64_t optin = tracking->flag.tracking_optin;
+    uint64_t optout = tracking->flag.tracking_optout;
+    uint64_t caching_given = tracking->flag.tracking_caching;
     if ((optin && !caching_given) || (optout && caching_given)) return;
 
     getKeysResult result;
@@ -263,14 +273,14 @@ void trackingRememberKeys(client *tracking, client *executing) {
  * - Following a flush command, to send a single RESP NULL to indicate
  *   that all keys are now invalid. */
 void sendTrackingMessage(client *c, char *keyname, size_t keylen, int proto) {
-    uint64_t old_flags = c->flags;
-    c->flags |= CLIENT_PUSHING;
+    struct ClientFlags old_flags = c->flag;
+    c->flag.pushing = 1;
 
     int using_redirection = 0;
     if (c->client_tracking_redirection) {
         client *redir = lookupClientByID(c->client_tracking_redirection);
         if (!redir) {
-            c->flags |= CLIENT_TRACKING_BROKEN_REDIR;
+            c->flag.tracking_broken_redir = 1;
             /* We need to signal to the original connection that we
              * are unable to send invalidation messages to the redirected
              * connection, because the client no longer exist. */
@@ -279,14 +289,14 @@ void sendTrackingMessage(client *c, char *keyname, size_t keylen, int proto) {
                 addReplyBulkCBuffer(c, "tracking-redir-broken", 21);
                 addReplyLongLong(c, c->client_tracking_redirection);
             }
-            if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
+            if (!old_flags.pushing) c->flag.pushing = 0;
             return;
         }
-        if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
+        if (!old_flags.pushing) c->flag.pushing = 0;
         c = redir;
         using_redirection = 1;
-        old_flags = c->flags;
-        c->flags |= CLIENT_PUSHING;
+        old_flags = c->flag;
+        c->flag.pushing = 1;
     }
 
     /* Only send such info for clients in RESP version 3 or more. However
@@ -296,7 +306,7 @@ void sendTrackingMessage(client *c, char *keyname, size_t keylen, int proto) {
     if (c->resp > 2) {
         addReplyPushLen(c, 2);
         addReplyBulkCBuffer(c, "invalidate", 10);
-    } else if (using_redirection && c->flags & CLIENT_PUBSUB) {
+    } else if (using_redirection && c->flag.pubsub) {
         /* We use a static object to speedup things, however we assume
          * that addReplyPubsubMessage() will not take a reference. */
         addReplyPubsubMessage(c, TrackingChannelName, NULL, shared.messagebulk);
@@ -305,7 +315,7 @@ void sendTrackingMessage(client *c, char *keyname, size_t keylen, int proto) {
          * redirecting to another client. We can't send anything to
          * it since RESP2 does not support push messages in the same
          * connection. */
-        if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
+        if (!old_flags.pushing) c->flag.pushing = 0;
         return;
     }
 
@@ -317,7 +327,7 @@ void sendTrackingMessage(client *c, char *keyname, size_t keylen, int proto) {
         addReplyBulkCBuffer(c, keyname, keylen);
     }
     updateClientMemUsageAndBucket(c);
-    if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
+    if (!old_flags.pushing) c->flag.pushing = 0;
 }
 
 /* This function is called when a key is modified in the server and in the case
@@ -383,20 +393,20 @@ void trackingInvalidateKey(client *c, robj *keyobj, int bcast) {
          * previously the client was not in BCAST mode. This can happen if
          * TRACKING is enabled normally, and then the client switches to
          * BCAST mode. */
-        if (target == NULL || !(target->flags & CLIENT_TRACKING) || target->flags & CLIENT_TRACKING_BCAST) {
+        if (target == NULL || !(target->flag.tracking) || target->flag.tracking_bcast) {
             continue;
         }
 
         /* If the client enabled the NOLOOP mode, don't send notifications
          * about keys changed by the client itself. */
-        if (target->flags & CLIENT_TRACKING_NOLOOP && target == server.current_client) {
+        if (target->flag.tracking_noloop && target == server.current_client) {
             continue;
         }
 
         /* If target is current client and it's executing a command, we need schedule key invalidation.
          * As the invalidation messages may be interleaved with command
          * response and should after command response. */
-        if (target == server.current_client && (server.current_client->flags & CLIENT_EXECUTING_COMMAND)) {
+        if (target == server.current_client && (server.current_client->flag.executing_command)) {
             incrRefCount(keyobj);
             listAddNodeTail(server.tracking_pending_keys, keyobj);
         } else {
@@ -463,7 +473,7 @@ void trackingInvalidateKeysOnFlush(int async) {
         listRewind(server.clients, &li);
         while ((ln = listNext(&li)) != NULL) {
             client *c = listNodeValue(ln);
-            if (c->flags & CLIENT_TRACKING) {
+            if (c->flag.tracking) {
                 if (c == server.current_client) {
                     /* We use a special NULL to indicate that we should send null */
                     listAddNodeTail(server.tracking_pending_keys, NULL);
@@ -610,7 +620,7 @@ void trackingBroadcastInvalidationMessages(void) {
             while (raxNext(&ri2)) {
                 client *c;
                 memcpy(&c, ri2.key, sizeof(c));
-                if (c->flags & CLIENT_TRACKING_NOLOOP) {
+                if (c->flag.tracking_noloop) {
                     /* This client may have certain keys excluded. */
                     sds adhoc = trackingBuildBroadcastReply(c, bs->keys);
                     if (adhoc) {
diff --git a/src/unix.c b/src/unix.c
index d3c7ed20c0..ca38e83ed0 100644
--- a/src/unix.c
+++ b/src/unix.c
@@ -92,6 +92,8 @@ static connection *connCreateAcceptedUnix(int fd, void *priv) {
 static void connUnixAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
     int cfd;
     int max = server.max_new_conns_per_cycle;
+    struct ClientFlags flags = {0};
+    flags.unix_socket = 1;
     UNUSED(el);
     UNUSED(mask);
     UNUSED(privdata);
@@ -103,7 +105,7 @@ static void connUnixAcceptHandler(aeEventLoop *el, int fd, void *privdata, int m
             return;
         }
         serverLog(LL_VERBOSE, "Accepted connection to %s", server.unixsocket);
-        acceptCommonHandler(connCreateAcceptedUnix(cfd, NULL), CLIENT_UNIX_SOCKET, NULL);
+        acceptCommonHandler(connCreateAcceptedUnix(cfd, NULL), flags, NULL);
     }
 }
 

From 24208812a658c5dbb843dc20f9615e801be37d46 Mon Sep 17 00:00:00 2001
From: ranshid <88133677+ranshid@users.noreply.github.com>
Date: Mon, 1 Jul 2024 02:30:46 +0300
Subject: [PATCH 33/53] Increase ping and cluster timeout for cluster-slots
 test (#717)

cluster-slots test is tesing a very fragmented slots range of a
relatively large cluster. For this reason, when run under valgrind, some
of the nodes are timing out when cluster is attempting to converge and
propagate.
This pr sets the test's cluster-node-timeout to 90000 and
cluster-ping-interval to 1000.

Signed-off-by: ranshid <ranshid@amazon.com>
---
 tests/unit/cluster/cluster-slots.tcl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/cluster/cluster-slots.tcl b/tests/unit/cluster/cluster-slots.tcl
index 441e5644e4..bfcdd93ecf 100644
--- a/tests/unit/cluster/cluster-slots.tcl
+++ b/tests/unit/cluster/cluster-slots.tcl
@@ -10,7 +10,7 @@ proc cluster_allocate_mixedSlots {masters replicas} {
     }
 }
 
-start_cluster 5 10 {tags {external:skip cluster}} {
+start_cluster 5 10 {overrides {cluster-ping-interval 1000 cluster-node-timeout 90000} tags {external:skip cluster}} {
 
 test "Cluster is up" {
     wait_for_cluster_state ok

From 752b6ee8ff75a72c364feca8b667f9e723df0c4c Mon Sep 17 00:00:00 2001
From: ranshid <88133677+ranshid@users.noreply.github.com>
Date: Mon, 1 Jul 2024 14:47:45 +0300
Subject: [PATCH 34/53] Avoid compilation error oin valkey-cli (#721)

Signed-off-by: ranshid <ranshid@amazon.com>
---
 src/valkey-cli.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/valkey-cli.c b/src/valkey-cli.c
index 7e6898643e..22078fc423 100644
--- a/src/valkey-cli.c
+++ b/src/valkey-cli.c
@@ -5706,6 +5706,8 @@ static clusterManagerNode *clusterManagerNodePrimaryRandom(void) {
     }
     /* Can not be reached */
     assert(0);
+    /* Make compiler happy */
+    return 0;
 }
 
 static int clusterManagerFixSlotsCoverage(char *all_slots) {

From 9f4f6036b80410e849664236a0b3994645106262 Mon Sep 17 00:00:00 2001
From: Ping Xie <pingxie@google.com>
Date: Mon, 1 Jul 2024 13:45:14 -0700
Subject: [PATCH 35/53] Restore comments for client flags (#718)

---
 src/server.h | 111 ++++++++++++++++++++++++++-------------------------
 1 file changed, 56 insertions(+), 55 deletions(-)

diff --git a/src/server.h b/src/server.h
index d56dfdceee..1bdab048ca 100644
--- a/src/server.h
+++ b/src/server.h
@@ -1118,61 +1118,62 @@ typedef struct {
 #endif
 
 typedef struct ClientFlags {
-    uint64_t primary : 1;
-    uint64_t replica : 1;
-    uint64_t monitor : 1;
-    uint64_t multi : 1;
-    uint64_t blocked : 1;
-    uint64_t dirty_cas : 1;
-    uint64_t close_after_reply : 1;
-    uint64_t unblocked : 1;
-    uint64_t script : 1;
-    uint64_t asking : 1;
-    uint64_t close_asap : 1;
-    uint64_t unix_socket : 1;
-    uint64_t dirty_exec : 1;
-    uint64_t primary_force_reply : 1;
-    uint64_t force_aof : 1;
-    uint64_t force_repl : 1;
-    uint64_t pre_psync : 1;
-    uint64_t readonly : 1;
-    uint64_t pubsub : 1;
-    uint64_t prevent_aof_prop : 1;
-    uint64_t prevent_repl_prop : 1;
-    uint64_t prevent_prop : 1;
-    uint64_t pending_write : 1;
-    uint64_t reply_off : 1;
-    uint64_t reply_skip_next : 1;
-    uint64_t reply_skip : 1;
-    uint64_t lua_debug : 1;
-    uint64_t lua_debug_sync : 1;
-    uint64_t module : 1;
-    uint64_t protected : 1;
-    uint64_t executing_command : 1;
-    uint64_t pending_command : 1;
-    uint64_t tracking : 1;
-    uint64_t tracking_broken_redir : 1;
-    uint64_t tracking_bcast : 1;
-    uint64_t tracking_optin : 1;
-    uint64_t tracking_optout : 1;
-    uint64_t tracking_caching : 1;
-    uint64_t tracking_noloop : 1;
-    uint64_t in_to_table : 1;
-    uint64_t protocol_error : 1;
-    uint64_t close_after_command : 1;
-    uint64_t deny_blocking : 1;
-    uint64_t repl_rdbonly : 1;
-    uint64_t no_evict : 1;
-    uint64_t allow_oom : 1;
-    uint64_t no_touch : 1;
-    uint64_t pushing : 1;
-    uint64_t module_auth_has_result : 1;
-    uint64_t module_prevent_aof_prop : 1;
-    uint64_t module_prevent_repl_prop : 1;
-    uint64_t reprocessing_command : 1;
-    uint64_t replication_done : 1;
-    uint64_t authenticated : 1;
-    uint64_t reserved : 11;
+    uint64_t primary : 1;             /* This client is a primary */
+    uint64_t replica : 1;             /* This client is a replica */
+    uint64_t monitor : 1;             /* This client is a replica monitor, see MONITOR */
+    uint64_t multi : 1;               /* This client is in a MULTI context */
+    uint64_t blocked : 1;             /* The client is waiting in a blocking operation */
+    uint64_t dirty_cas : 1;           /* Watched keys modified. EXEC will fail. */
+    uint64_t close_after_reply : 1;   /* Close after writing entire reply. */
+    uint64_t unblocked : 1;           /* This client was unblocked and is stored in server.unblocked_clients */
+    uint64_t script : 1;              /* This is a non connected client used by Lua */
+    uint64_t asking : 1;              /* Client issued the ASKING command */
+    uint64_t close_asap : 1;          /* Close this client ASAP */
+    uint64_t unix_socket : 1;         /* Client connected via Unix domain socket */
+    uint64_t dirty_exec : 1;          /* EXEC will fail for errors while queueing */
+    uint64_t primary_force_reply : 1; /* Queue replies even if is primary */
+    uint64_t force_aof : 1;           /* Force AOF propagation of current cmd. */
+    uint64_t force_repl : 1;          /* Force replication of current cmd. */
+    uint64_t pre_psync : 1;           /* Instance don't understand PSYNC. */
+    uint64_t readonly : 1;            /* Cluster client is in read-only state. */
+    uint64_t pubsub : 1;              /* Client is in Pub/Sub mode. */
+    uint64_t prevent_aof_prop : 1;    /* Don't propagate to AOF. */
+    uint64_t prevent_repl_prop : 1;   /* Don't propagate to replicas. */
+    uint64_t prevent_prop : 1;        /* Don't propagate to AOF or replicas. */
+    uint64_t pending_write : 1;       /* Client has output to send but a write handler is yet not installed. */
+    uint64_t reply_off : 1;           /* Don't send replies to client. */
+    uint64_t reply_skip_next : 1;     /* Set CLIENT_REPLY_SKIP for next cmd */
+    uint64_t reply_skip : 1;          /* Don't send just this reply. */
+    uint64_t lua_debug : 1;           /* Run EVAL in debug mode. */
+    uint64_t lua_debug_sync : 1;      /* EVAL debugging without fork() */
+    uint64_t module : 1;              /* Non connected client used by some module. */
+    uint64_t protected : 1;           /* Client should not be freed for now. */
+    uint64_t executing_command : 1;   /* Indicates that the client is currently in the process of handling a command. */
+    uint64_t pending_command : 1;     /* Indicates the client has a fully parsed command ready for execution. */
+    uint64_t tracking : 1;            /* Client enabled keys tracking in order to perform client side caching. */
+    uint64_t tracking_broken_redir : 1; /* Target client is invalid. */
+    uint64_t tracking_bcast : 1;        /* Tracking in BCAST mode. */
+    uint64_t tracking_optin : 1;        /* Tracking in opt-in mode. */
+    uint64_t tracking_optout : 1;       /* Tracking in opt-out mode. */
+    uint64_t tracking_caching : 1;      /* CACHING yes/no was given, depending on optin/optout mode. */
+    uint64_t tracking_noloop : 1;       /* Don't send invalidation messages about writes performed by myself. */
+    uint64_t in_to_table : 1;           /* This client is in the timeout table. */
+    uint64_t protocol_error : 1;        /* Protocol error chatting with it. */
+    uint64_t close_after_command : 1;   /* Close after executing commands and writing entire reply. */
+    uint64_t deny_blocking : 1;         /* Indicate that the client should not be blocked. */
+    uint64_t repl_rdbonly : 1;          /* This client is a replica that only wants RDB without replication buffer. */
+    uint64_t no_evict : 1;              /* This client is protected against client memory eviction. */
+    uint64_t allow_oom : 1; /* Client used by RM_Call is allowed to fully execute scripts even when in OOM */
+    uint64_t no_touch : 1;  /* This client will not touch LFU/LRU stats. */
+    uint64_t pushing : 1;   /* This client is pushing notifications. */
+    uint64_t module_auth_has_result : 1; /* Indicates a client in the middle of module based auth had been authenticated
+                                            from the Module. */
+    uint64_t module_prevent_aof_prop : 1;  /* Module client do not want to propagate to AOF */
+    uint64_t module_prevent_repl_prop : 1; /* Module client do not want to propagate to replica */
+    uint64_t reprocessing_command : 1;     /* The client is re-processing the command. */
+    uint64_t replication_done : 1;         /* Indicate that replication has been done on the client */
+    uint64_t authenticated : 1;            /* Indicate a client has successfully authenticated */
+    uint64_t reserved : 11;                /* Reserved for future use */
 } ClientFlags;
 
 typedef struct client {

From fa01a293659b25bbf6fd396299dd24f3f5deb6b8 Mon Sep 17 00:00:00 2001
From: KarthikSubbarao <karthikrs2021@gmail.com>
Date: Mon, 1 Jul 2024 13:59:06 -0700
Subject: [PATCH 36/53] Allow Module authentication to succeed when cluster is
 down (#693)

Module Authentication using a blocking implementation currently gets
rejected when the "cluster is down" from the client timeout cron job
(`clientsCronHandleTimeout`).

This PR exempts clients blocked on Module Authentication from being
rejected here.

---------

Signed-off-by: KarthikSubbarao <karthikrs2021@gmail.com>
---
 src/cluster.c                    |  8 ++++----
 tests/unit/moduleapi/cluster.tcl | 24 +++++++++++++++++-------
 2 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/src/cluster.c b/src/cluster.c
index c77c4d1ff1..45fde52842 100644
--- a/src/cluster.c
+++ b/src/cluster.c
@@ -1218,6 +1218,10 @@ int clusterRedirectBlockedClientIfNeeded(client *c) {
         dictEntry *de;
         dictIterator *di;
 
+        /* If the client is blocked on module, but not on a specific key,
+         * don't unblock it. */
+        if (c->bstate.btype == BLOCKED_MODULE && !moduleClientIsBlockedOnKeys(c)) return 0;
+
         /* If the cluster is down, unblock the client with the right error.
          * If the cluster is configured to allow reads on cluster down, we
          * still want to emit this error since a write will be required
@@ -1227,10 +1231,6 @@ int clusterRedirectBlockedClientIfNeeded(client *c) {
             return 1;
         }
 
-        /* If the client is blocked on module, but not on a specific key,
-         * don't unblock it (except for the CLUSTER_FAIL case above). */
-        if (c->bstate.btype == BLOCKED_MODULE && !moduleClientIsBlockedOnKeys(c)) return 0;
-
         /* All keys must belong to the same slot, so check first key only. */
         di = dictGetIterator(c->bstate.keys);
         if ((de = dictNext(di)) != NULL) {
diff --git a/tests/unit/moduleapi/cluster.tcl b/tests/unit/moduleapi/cluster.tcl
index 06878c1a01..84c7e274c8 100644
--- a/tests/unit/moduleapi/cluster.tcl
+++ b/tests/unit/moduleapi/cluster.tcl
@@ -8,8 +8,9 @@ tags {tls:skip external:skip cluster modules} {
 set testmodule_nokey [file normalize tests/modules/blockonbackground.so]
 set testmodule_blockedclient [file normalize tests/modules/blockedclient.so]
 set testmodule [file normalize tests/modules/blockonkeys.so]
+set testmodule_auth [file normalize tests/modules/auth.so]
 
-set modules [list loadmodule $testmodule loadmodule $testmodule_nokey loadmodule $testmodule_blockedclient]
+set modules [list loadmodule $testmodule loadmodule $testmodule_nokey loadmodule $testmodule_blockedclient loadmodule $testmodule_auth]
 start_cluster 3 0 [list config_lines $modules] {
 
     set node1 [srv 0 client]
@@ -146,18 +147,27 @@ start_cluster 3 0 [list config_lines $modules] {
         assert_error {*CLUSTERDOWN*} {$node1_rd read}
     }
 
-    test "Verify command (no keys) got unblocked after cluster failure" {
-        assert_error {*CLUSTERDOWN*} {$node2_rd read}
-
-        # verify there are no blocked clients
-        assert_equal [s 0 blocked_clients]  {0}
-        assert_equal [s -1 blocked_clients]  {0}
+    test "Verify command (with no keys) is not unblocked after cluster failure" {
+        assert_no_match {*CLUSTERDOWN*} {$node2_rd read}
+        # verify there are blocked clients
+        assert_equal [s -1 blocked_clients]  {1}
     }
 
     test "Verify command RM_Call is rejected when cluster is down" {
         assert_error "ERR Can not execute a command 'set' while the cluster is down" {$node1 do_rm_call set x 1}
     }
 
+    test "Verify Module Auth Succeeds when cluster is down" {
+        r acl setuser foo >pwd on ~* &* +@all
+        assert_error "*CLUSTERDOWN*" {r set x 1}
+        # Non Blocking Module Auth
+        assert_equal {OK} [r testmoduleone.rm_register_auth_cb]
+        assert_equal {OK} [r AUTH foo allow]
+        # Blocking Module Auth
+        assert_equal {OK} [r testmoduleone.rm_register_blocking_auth_cb]
+        assert_equal {OK} [r AUTH foo block_allow]
+    }
+
     resume_process $node3_pid
     $node1_rd close
     $node2_rd close

From 0cc16d0298995ae4292b0d317e17fa82c338942a Mon Sep 17 00:00:00 2001
From: Binbin <binloveplay1314@qq.com>
Date: Tue, 2 Jul 2024 12:05:21 +0800
Subject: [PATCH 37/53] Fix wrong reserved bits in ClientFlags (#729)

The bits should be 10, it causes ClientFlags to consume 8 more bytes now.
Introduced in #614.

Signed-off-by: Binbin <binloveplay1314@qq.com>
---
 src/server.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/server.h b/src/server.h
index 1bdab048ca..73f68a73d4 100644
--- a/src/server.h
+++ b/src/server.h
@@ -1173,7 +1173,7 @@ typedef struct ClientFlags {
     uint64_t reprocessing_command : 1;     /* The client is re-processing the command. */
     uint64_t replication_done : 1;         /* Indicate that replication has been done on the client */
     uint64_t authenticated : 1;            /* Indicate a client has successfully authenticated */
-    uint64_t reserved : 11;                /* Reserved for future use */
+    uint64_t reserved : 10;                /* Reserved for future use */
 } ClientFlags;
 
 typedef struct client {

From 3323e422ad0c0dc81859017798cb04a9621d6522 Mon Sep 17 00:00:00 2001
From: Lipeng Zhu <lipeng.zhu@intel.com>
Date: Tue, 2 Jul 2024 12:52:43 +0800
Subject: [PATCH 38/53] Introduce thread-local storage variable to update
 thread's own used_memory and sum when reading to reduce atomic contention.
 (#674)

#### Description
This patch try to introduce a thread-local storage variable for each
thread to update its own `used_memory`, and then sum them together when
reading in `zmalloc_used_memory`. Then we can reduce unnecessary `lock
add` contention from atomic variable. We also add a protection if too
many threads created and the total threads number greater than 132, then
fall back to atomic operation for the threads index >= 132.

#### Problem Statement
`zmalloc` and `zfree` related functions will update the `used_memory`
atomicity for each operation, and they are called very frequency. From
the benchmark of
[memtier_benchmark-1Mkeys-load-stream-5-fields-with-100B-values-pipeline-10.yml](https://github.com/redis/redis-benchmarks-specification/blob/main/redis_benchmarks_specification/test-suites/memtier_benchmark-1Mkeys-load-stream-5-fields-with-100B-values-pipeline-10.yml)
, the cycles ratio of `zmalloc` and `zfree` are high, they are wrappers
for the lower allocator library, it should not take too much cycles. And
most of the cycles are contributed by `lock add` and `lock sub` , they
are expensive instructions. From the profiling, the metrics' update
mainly come from the main thread, use a TLS will reduce a lot of
contention.

#### Performance Boost

**Note:** This optimization should benefit common benchmark widely. I
choose below 2 scenarios to validate the performance boost in my local
environment.

| Test Suites | Performance Boost |
|-|-|

|[memtier_benchmark-1Mkeys-load-stream-5-fields-with-100B-values-pipeline-10](https://github.com/redis/redis-benchmarks-specification/blob/main/redis_benchmarks_specification/test-suites/memtier_benchmark-1Mkeys-load-stream-5-fields-with-100B-values-pipeline-10.yml)|8%|

|[memtier_benchmark-1Mkeys-load-string-with-100B-values-pipeline-10](https://github.com/redis/redis-benchmarks-specification/blob/main/redis_benchmarks_specification/test-suites/memtier_benchmark-1Mkeys-load-string-with-100B-values-pipeline-10.yml)|4%|

##### Test Env
- OS: Ubuntu 22.04.4 LTS
- Platform: Intel Xeon Platinum 8380
- Server and Client in same socket

##### Start Server
```sh
taskset -c 0-3 ~/valkey/src/valkey-server /tmp/valkey_1.conf
port 9001
bind * -::*
daemonize yes
protected-mode no
save ""
```

---------

Signed-off-by: Lipeng Zhu <lipeng.zhu@intel.com>
Co-authored-by: Wangyang Guo <wangyang.guo@intel.com>
---
 src/config.h     | 10 ++++++++
 src/networking.c |  9 --------
 src/zmalloc.c    | 59 ++++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 65 insertions(+), 13 deletions(-)

diff --git a/src/config.h b/src/config.h
index e5adb785aa..95c2e84a00 100644
--- a/src/config.h
+++ b/src/config.h
@@ -329,4 +329,14 @@ void setcpuaffinity(const char *cpulist);
 #define HAVE_FADVISE
 #endif
 
+#define IO_THREADS_MAX_NUM 128
+
+#ifndef CACHE_LINE_SIZE
+#if defined(__aarch64__) && defined(__APPLE__)
+#define CACHE_LINE_SIZE 128
+#else
+#define CACHE_LINE_SIZE 64
+#endif
+#endif
+
 #endif
diff --git a/src/networking.c b/src/networking.c
index bb7bab02c3..f017e7c034 100644
--- a/src/networking.c
+++ b/src/networking.c
@@ -4222,15 +4222,6 @@ void processEventsWhileBlocked(void) {
  * Threaded I/O
  * ========================================================================== */
 
-#define IO_THREADS_MAX_NUM 128
-#ifndef CACHE_LINE_SIZE
-#if defined(__aarch64__) && defined(__APPLE__)
-#define CACHE_LINE_SIZE 128
-#else
-#define CACHE_LINE_SIZE 64
-#endif
-#endif
-
 typedef struct __attribute__((aligned(CACHE_LINE_SIZE))) threads_pending {
     _Atomic unsigned long value;
 } threads_pending;
diff --git a/src/zmalloc.c b/src/zmalloc.c
index 3ab646dd71..afee8e07a0 100644
--- a/src/zmalloc.c
+++ b/src/zmalloc.c
@@ -88,10 +88,53 @@ void zlibc_free(void *ptr) {
 #define dallocx(ptr, flags) je_dallocx(ptr, flags)
 #endif
 
-#define update_zmalloc_stat_alloc(__n) atomic_fetch_add_explicit(&used_memory, (__n), memory_order_relaxed)
-#define update_zmalloc_stat_free(__n) atomic_fetch_sub_explicit(&used_memory, (__n), memory_order_relaxed)
+#if __STDC_NO_THREADS__
+#define thread_local __thread
+#else
+#include <threads.h>
+#endif
+
+#define MAX_THREADS_NUM (IO_THREADS_MAX_NUM + 3 + 1)
+/* A thread-local storage which keep the current thread's index in the used_memory_thread array. */
+static thread_local int thread_index = -1;
+/* Element in used_memory_thread array should only be written by a single thread which
+ * distinguished by the thread-local storage thread_index. But when an element in
+ * used_memory_thread array was written, it could be read by another thread simultaneously,
+ * the reader will see the inconsistency memory on non x86 architecture potentially.
+ * For the ARM and PowerPC platform, we can solve this issue by make the memory aligned.
+ * For the other architecture, lets fall back to the atomic operation to keep safe. */
+#if defined(__i386__) || defined(__x86_64__) || defined(__amd64__) || defined(__POWERPC__) || defined(__arm__) ||      \
+    defined(__arm64__)
+static __attribute__((aligned(sizeof(size_t)))) size_t used_memory_thread[MAX_THREADS_NUM];
+#else
+static _Atomic size_t used_memory_thread[MAX_THREADS_NUM];
+#endif
+static atomic_int total_active_threads = 0;
+/* This is a simple protection. It's used only if some modules create a lot of threads. */
+static atomic_size_t used_memory_for_additional_threads = 0;
+
+/* Register the thread index in start_routine. */
+static inline void zmalloc_register_thread_index(void) {
+    thread_index = atomic_fetch_add_explicit(&total_active_threads, 1, memory_order_relaxed);
+}
+
+static inline void update_zmalloc_stat_alloc(size_t size) {
+    if (unlikely(thread_index == -1)) zmalloc_register_thread_index();
+    if (unlikely(thread_index >= MAX_THREADS_NUM)) {
+        atomic_fetch_add_explicit(&used_memory_for_additional_threads, size, memory_order_relaxed);
+    } else {
+        used_memory_thread[thread_index] += size;
+    }
+}
 
-static _Atomic size_t used_memory = 0;
+static inline void update_zmalloc_stat_free(size_t size) {
+    if (unlikely(thread_index == -1)) zmalloc_register_thread_index();
+    if (unlikely(thread_index >= MAX_THREADS_NUM)) {
+        atomic_fetch_sub_explicit(&used_memory_for_additional_threads, size, memory_order_relaxed);
+    } else {
+        used_memory_thread[thread_index] -= size;
+    }
+}
 
 static void zmalloc_default_oom(size_t size) {
     fprintf(stderr, "zmalloc: Out of memory trying to allocate %zu bytes\n", size);
@@ -415,7 +458,15 @@ char *zstrdup(const char *s) {
 }
 
 size_t zmalloc_used_memory(void) {
-    size_t um = atomic_load_explicit(&used_memory, memory_order_relaxed);
+    size_t um = 0;
+    int threads_num = total_active_threads;
+    if (unlikely(total_active_threads > MAX_THREADS_NUM)) {
+        um += atomic_load_explicit(&used_memory_for_additional_threads, memory_order_relaxed);
+        threads_num = MAX_THREADS_NUM;
+    }
+    for (int i = 0; i < threads_num; i++) {
+        um += used_memory_thread[i];
+    }
     return um;
 }
 

From eff45f546762577bd1877dfeece41c53b39926ad Mon Sep 17 00:00:00 2001
From: Sankar <1890648+srgsanky@users.noreply.github.com>
Date: Mon, 1 Jul 2024 22:27:38 -0700
Subject: [PATCH 39/53] Fix flakiness of cluster-multiple-meets and
 cluster-reliable-meet (#728)

Tests in cluster-multiple-meets were flaky as reported by @madolson

*
https://github.com/valkey-io/valkey/actions/runs/9688455588/job/26776953320
*
https://github.com/valkey-io/valkey/actions/runs/9688455588/job/26776953585

I wasn't able to reproduce this locally, but I suspect that the
flakiness is coming from the fact that nodes are reported as "connected"
as long as there is an outgoing link. An outgoing link is created before
MEET is sent out.

Signed-off-by: Sankar <1890648+srgsanky@users.noreply.github.com>
---
 tests/unit/cluster/cluster-multiple-meets.tcl | 8 +++++++-
 tests/unit/cluster/cluster-reliable-meet.tcl  | 8 +++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/tests/unit/cluster/cluster-multiple-meets.tcl b/tests/unit/cluster/cluster-multiple-meets.tcl
index 07a2582133..059f03fbe4 100644
--- a/tests/unit/cluster/cluster-multiple-meets.tcl
+++ b/tests/unit/cluster/cluster-multiple-meets.tcl
@@ -51,7 +51,13 @@ tags {tls:skip external:skip cluster} {
             }
 
             # 0 will be connected to 1, but 1 won't see that 0 is connected
-            assert {[llength [get_cluster_nodes 1 connected]] == 1}
+            # Using a wait condition here as an assert can be flaky - especially
+            # when cluster nodes is processed when the link is established to send MEET.
+            wait_for_condition 1000 50 {
+                [llength [get_cluster_nodes 1 connected]] == 1
+            } else {
+                fail "Node 1 recognizes node 0 even though it drops PONGs from node 0"
+            }
             assert {[llength [get_cluster_nodes 0 connected]] == 2}
 
             # Drop incoming and outgoing links from/to 1
diff --git a/tests/unit/cluster/cluster-reliable-meet.tcl b/tests/unit/cluster/cluster-reliable-meet.tcl
index 41da97ab9b..45f5a6dc89 100644
--- a/tests/unit/cluster/cluster-reliable-meet.tcl
+++ b/tests/unit/cluster/cluster-reliable-meet.tcl
@@ -50,7 +50,13 @@ tags {tls:skip external:skip cluster} {
             }
 
             # Make sure the nodes still don't know about each other
-            assert {[llength [get_cluster_nodes 1 connected]] == 1}
+            # Using a wait condition here as an assert can be flaky - especially
+            # when cluster nodes is processed when the link is established to send MEET.
+            wait_for_condition 1000 50 {
+                [llength [get_cluster_nodes 1 connected]] == 1
+            } else {
+                fail "Node 1 recognizes node 0 even though node 0 drops MEETs from node 1"
+            }
             assert {[llength [get_cluster_nodes 0 connected]] == 1}
 
             R 0 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_NONE

From 1ea49e5845a11250a13273c725720822c26860f1 Mon Sep 17 00:00:00 2001
From: Binbin <binloveplay1314@qq.com>
Date: Tue, 2 Jul 2024 23:32:34 +0800
Subject: [PATCH 40/53] Make valkey compatible with redis-sentinel to start
 sentinel (#731)

We already have similar changes to check-rdb / check-aof, apply
this change to sentinel.

Fixes #719.

Signed-off-by: Binbin <binloveplay1314@qq.com>
---
 src/server.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/server.c b/src/server.c
index 228307e3cc..574423fdee 100644
--- a/src/server.c
+++ b/src/server.c
@@ -6380,6 +6380,9 @@ void memtest(size_t megabytes, int passes);
 int checkForSentinelMode(int argc, char **argv, char *exec_name) {
     if (strstr(exec_name, "valkey-sentinel") != NULL) return 1;
 
+    /* valkey may install symlinks like redis-sentinel -> valkey-sentinel. */
+    if (strstr(exec_name, "redis-sentinel") != NULL) return 1;
+
     for (int j = 1; j < argc; j++)
         if (!strcmp(argv[j], "--sentinel")) return 1;
     return 0;

From 8faf2788a207373b2cb356188f939accaaf6ce37 Mon Sep 17 00:00:00 2001
From: Harkrishn Patro <harkrisp@amazon.com>
Date: Tue, 2 Jul 2024 15:45:37 -0700
Subject: [PATCH 41/53] Embed key into dict entry (#541)

This PR incorporates changes related to key embedding described in the
https://github.com/redis/redis/issues/12216
With this change there will be no `key` pointer and embedded the key
within the `dictEntry`. 1 byte is used for additional bookkeeping.
Overall the saving would be 7 bytes on average.

Key changes:

New dict entry type introduced, which is now used as an entry for the
main dictionary:

```c
typedef struct {
    union {
        void *val;
        uint64_t u64;
        int64_t s64;
        double d;
    } v;
    struct dictEntry *next;  /* Next entry in the same hash bucket. */
    uint8_t key_header_size; /* offset into key_buf where the key is located at. */
    unsigned char key_buf[]; /* buffer with embedded key. */
} embeddedDictEntry;
```

One new function has been added to the dictType:

```c
size_t (*embedKey)(unsigned char *buf, size_t buf_len, const void *key, unsigned char *header_size);
```


Change is opt-in per dict type, hence sets, hashes and other types that
are using dictionary are not impacted.
With this change main dictionary now owns the data, so copy on insert in
dbAdd is no longer needed.

### Benchmarking results

TLDR; Around 9-10% memory usage reduction in overall memory usage for
scenario with key of 16 bytes and value of 8 bytes and 16 bytes. The
throughput per second varies but is similar or greater in most of the
run(s) with the changes against unstable (ae2d421).

---------

Signed-off-by: Harkrishn Patro <harkrisp@amazon.com>
Signed-off-by: Madelyn Olson <madelyneolson@gmail.com>
Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
---
 src/db.c      |  16 +++--
 src/debug.c   |   2 +-
 src/defrag.c  |  54 ++++++++++------
 src/dict.c    | 166 ++++++++++++++++++++++++++++++++++++++------------
 src/dict.h    |  21 ++++---
 src/kvstore.c |  26 +++++---
 src/kvstore.h |   1 -
 src/object.c  |   9 ++-
 src/rdb.c     |   3 +
 src/sds.c     |  19 ++++++
 src/sds.h     |   1 +
 src/server.c  |   8 ++-
 12 files changed, 240 insertions(+), 86 deletions(-)

diff --git a/src/db.c b/src/db.c
index c33f10f90e..5a6562a1e2 100644
--- a/src/db.c
+++ b/src/db.c
@@ -190,7 +190,11 @@ robj *lookupKeyWriteOrReply(client *c, robj *key, robj *reply) {
     return o;
 }
 
-/* Add the key to the DB. It's up to the caller to increment the reference
+/* Add the key to the DB.
+ *
+ * In this case a copy of `key` is copied in kvstore, the caller must ensure the `key` is properly freed.
+ *
+ * It's up to the caller to increment the reference
  * counter of the value if needed.
  *
  * If the update_if_existing argument is false, the program is aborted
@@ -204,7 +208,6 @@ static void dbAddInternal(serverDb *db, robj *key, robj *val, int update_if_exis
         return;
     }
     serverAssertWithInfo(NULL, key, de != NULL);
-    kvstoreDictSetKey(db->keys, slot, de, sdsdup(key->ptr));
     initObjectLRUOrLFU(val);
     kvstoreDictSetVal(db->keys, slot, de, val);
     signalKeyAsReady(db, key, val->type);
@@ -240,15 +243,16 @@ int getKeySlot(sds key) {
 
 /* This is a special version of dbAdd() that is used only when loading
  * keys from the RDB file: the key is passed as an SDS string that is
- * retained by the function (and not freed by the caller).
+ * copied by the function and freed by the caller.
  *
  * Moreover this function will not abort if the key is already busy, to
  * give more control to the caller, nor will signal the key as ready
  * since it is not useful in this context.
  *
- * The function returns 1 if the key was added to the database, taking
- * ownership of the SDS string, otherwise 0 is returned, and is up to the
- * caller to free the SDS string. */
+ * The function returns 1 if the key was added to the database, otherwise 0 is returned.
+ *
+ * In this case a copy of `key` is copied in kvstore, the caller must ensure the `key` is properly freed.
+ */
 int dbAddRDBLoad(serverDb *db, sds key, robj *val) {
     int slot = getKeySlot(key);
     dictEntry *de = kvstoreDictAddRaw(db->keys, slot, key, NULL);
diff --git a/src/debug.c b/src/debug.c
index 6eaabe40f4..36c425a4f4 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -864,7 +864,7 @@ void debugCommand(client *c) {
         sds sizes = sdsempty();
         sizes = sdscatprintf(sizes, "bits:%d ", (sizeof(void *) == 8) ? 64 : 32);
         sizes = sdscatprintf(sizes, "robj:%d ", (int)sizeof(robj));
-        sizes = sdscatprintf(sizes, "dictentry:%d ", (int)dictEntryMemUsage());
+        sizes = sdscatprintf(sizes, "dictentry:%d ", (int)dictEntryMemUsage(NULL));
         sizes = sdscatprintf(sizes, "sdshdr5:%d ", (int)sizeof(struct sdshdr5));
         sizes = sdscatprintf(sizes, "sdshdr8:%d ", (int)sizeof(struct sdshdr8));
         sizes = sdscatprintf(sizes, "sdshdr16:%d ", (int)sizeof(struct sdshdr16));
diff --git a/src/defrag.c b/src/defrag.c
index 2de1c061e8..5a54875864 100644
--- a/src/defrag.c
+++ b/src/defrag.c
@@ -41,6 +41,7 @@
 typedef struct defragCtx {
     void *privdata;
     int slot;
+    void *aux;
 } defragCtx;
 
 typedef struct defragPubSubCtx {
@@ -75,6 +76,36 @@ void *activeDefragAlloc(void *ptr) {
     return newptr;
 }
 
+/* This method captures the expiry db dict entry which refers to data stored in keys db dict entry. */
+void defragEntryStartCbForKeys(void *ctx, void *oldptr) {
+    defragCtx *defragctx = (defragCtx *)ctx;
+    serverDb *db = defragctx->privdata;
+    sds oldsds = (sds)dictGetKey((dictEntry *)oldptr);
+    int slot = defragctx->slot;
+    if (kvstoreDictSize(db->expires, slot)) {
+        dictEntry *expire_de = kvstoreDictFind(db->expires, slot, oldsds);
+        defragctx->aux = expire_de;
+    }
+}
+
+/* This method updates the key of expiry db dict entry. The key might be no longer valid
+ * as it could have been cleaned up during the defrag-realloc of the main dictionary. */
+void defragEntryFinishCbForKeys(void *ctx, void *newptr) {
+    defragCtx *defragctx = (defragCtx *)ctx;
+    dictEntry *expire_de = (dictEntry *)defragctx->aux;
+    /* Item doesn't have TTL associated to it. */
+    if (!expire_de) return;
+    /* No reallocation happened. */
+    if (!newptr) {
+        expire_de = NULL;
+        return;
+    }
+    serverDb *db = defragctx->privdata;
+    sds newsds = (sds)dictGetKey((dictEntry *)newptr);
+    int slot = defragctx->slot;
+    kvstoreDictSetKey(db->expires, slot, expire_de, newsds);
+}
+
 /*Defrag helper for sds strings
  *
  * returns NULL in case the allocation wasn't moved.
@@ -650,25 +681,10 @@ void defragModule(serverDb *db, dictEntry *kde) {
 /* for each key we scan in the main dict, this function will attempt to defrag
  * all the various pointers it has. */
 void defragKey(defragCtx *ctx, dictEntry *de) {
-    sds keysds = dictGetKey(de);
-    robj *newob, *ob;
-    unsigned char *newzl;
-    sds newsds;
     serverDb *db = ctx->privdata;
     int slot = ctx->slot;
-    /* Try to defrag the key name. */
-    newsds = activeDefragSds(keysds);
-    if (newsds) {
-        kvstoreDictSetKey(db->keys, slot, de, newsds);
-        if (kvstoreDictSize(db->expires, slot)) {
-            /* We can't search in db->expires for that key after we've released
-             * the pointer it holds, since it won't be able to do the string
-             * compare, but we can find the entry using key hash and pointer. */
-            uint64_t hash = kvstoreGetHash(db->expires, newsds);
-            dictEntry *expire_de = kvstoreDictFindEntryByPtrAndHash(db->expires, slot, keysds, hash);
-            if (expire_de) kvstoreDictSetKey(db->expires, slot, expire_de, newsds);
-        }
-    }
+    robj *newob, *ob;
+    unsigned char *newzl;
 
     /* Try to defrag robj and / or string value. */
     ob = dictGetVal(de);
@@ -984,7 +1000,9 @@ void activeDefragCycle(void) {
     endtime = start + timelimit;
     latencyStartMonitor(latency);
 
-    dictDefragFunctions defragfns = {.defragAlloc = activeDefragAlloc};
+    dictDefragFunctions defragfns = {.defragAlloc = activeDefragAlloc,
+                                     .defragEntryStartCb = defragEntryStartCbForKeys,
+                                     .defragEntryFinishCb = defragEntryFinishCbForKeys};
     do {
         /* if we're not continuing a scan from the last call or loop, start a new one */
         if (!defrag_stage && !defrag_cursor && (slot < 0)) {
diff --git a/src/dict.c b/src/dict.c
index bc92d49564..b6a06eb36a 100644
--- a/src/dict.c
+++ b/src/dict.c
@@ -35,6 +35,7 @@
 
 #include "fmacros.h"
 
+#include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
@@ -48,6 +49,10 @@
 #include "serverassert.h"
 #include "monotonic.h"
 
+#ifndef static_assert
+#define static_assert(expr, lit) _Static_assert(expr, lit)
+#endif
+
 #define UNUSED(V) ((void)V)
 
 /* Using dictSetResizeEnabled() we make possible to disable
@@ -76,6 +81,33 @@ struct dictEntry {
     struct dictEntry *next; /* Next entry in the same hash bucket. */
 };
 
+typedef struct {
+    union {
+        void *val;
+        uint64_t u64;
+        int64_t s64;
+        double d;
+    } v;
+    struct dictEntry *next;  /* Next entry in the same hash bucket. */
+    uint8_t key_header_size; /* offset into key_buf where the key is located at. */
+    unsigned char key_buf[]; /* buffer with embedded key. */
+} embeddedDictEntry;
+
+/* Validation and helper for `embeddedDictEntry` */
+
+static_assert(offsetof(embeddedDictEntry, v) == 0, "unexpected field offset");
+static_assert(offsetof(embeddedDictEntry, next) == sizeof(double), "unexpected field offset");
+static_assert(offsetof(embeddedDictEntry, key_header_size) == sizeof(double) + sizeof(void *),
+              "unexpected field offset");
+/* key_buf is located after a union with a double value  `v.d`, a pointer `next` and uint8_t field `key_header_size` */
+static_assert(offsetof(embeddedDictEntry, key_buf) == sizeof(double) + sizeof(void *) + sizeof(uint8_t),
+              "unexpected field offset");
+
+/* The minimum amount of bytes required for embedded dict entry. */
+static inline size_t compactSizeEmbeddedDictEntry(void) {
+    return offsetof(embeddedDictEntry, key_buf);
+}
+
 typedef struct {
     void *key;
     dictEntry *next;
@@ -91,6 +123,19 @@ static dictEntry *dictGetNext(const dictEntry *de);
 static dictEntry **dictGetNextRef(dictEntry *de);
 static void dictSetNext(dictEntry *de, dictEntry *next);
 
+/* -------------------------- Utility functions -------------------------------- */
+
+/* Validates dict type members dependencies. */
+static inline void validateDictType(dictType *type) {
+    if (type->embedded_entry) {
+        assert(type->embedKey);
+        assert(!type->keyDup);
+        assert(!type->keyDestructor);
+    } else {
+        assert(!type->embedKey);
+    }
+}
+
 /* -------------------------- hash functions -------------------------------- */
 
 static uint8_t dict_hash_function_seed[16];
@@ -126,6 +171,8 @@ uint64_t dictGenCaseHashFunction(const unsigned char *buf, size_t len) {
 #define ENTRY_PTR_MASK 7     /* 111 */
 #define ENTRY_PTR_NORMAL 0   /* 000 */
 #define ENTRY_PTR_NO_VALUE 2 /* 010 */
+#define ENTRY_PTR_EMBEDDED 4 /* 100 */
+/*      ENTRY_PTR_IS_KEY        xx1 */
 
 /* Returns 1 if the entry pointer is a pointer to a key, rather than to an
  * allocated entry. Returns 0 otherwise. */
@@ -145,12 +192,9 @@ static inline int entryIsNoValue(const dictEntry *de) {
     return ((uintptr_t)(void *)de & ENTRY_PTR_MASK) == ENTRY_PTR_NO_VALUE;
 }
 
-/* Creates an entry without a value field. */
-static inline dictEntry *createEntryNoValue(void *key, dictEntry *next) {
-    dictEntryNoValue *entry = zmalloc(sizeof(*entry));
-    entry->key = key;
-    entry->next = next;
-    return (dictEntry *)(void *)((uintptr_t)(void *)entry | ENTRY_PTR_NO_VALUE);
+
+static inline int entryIsEmbedded(const dictEntry *de) {
+    return ((uintptr_t)(void *)de & ENTRY_PTR_MASK) == ENTRY_PTR_EMBEDDED;
 }
 
 static inline dictEntry *encodeMaskedPtr(const void *ptr, unsigned int bits) {
@@ -163,15 +207,40 @@ static inline void *decodeMaskedPtr(const dictEntry *de) {
     return (void *)((uintptr_t)(void *)de & ~ENTRY_PTR_MASK);
 }
 
+/* Creates an entry without a value field. */
+static inline dictEntry *createEntryNoValue(void *key, dictEntry *next) {
+    dictEntryNoValue *entry = zmalloc(sizeof(*entry));
+    entry->key = key;
+    entry->next = next;
+    return encodeMaskedPtr(entry, ENTRY_PTR_NO_VALUE);
+}
+
+static inline dictEntry *createEmbeddedEntry(void *key, dictEntry *next, dictType *dt) {
+    size_t key_len = dt->embedKey(NULL, 0, key, NULL);
+    embeddedDictEntry *entry = zmalloc(compactSizeEmbeddedDictEntry() + key_len);
+    dt->embedKey(entry->key_buf, key_len, key, &entry->key_header_size);
+    entry->next = next;
+    return encodeMaskedPtr(entry, ENTRY_PTR_EMBEDDED);
+}
+
+static inline void *getEmbeddedKey(const dictEntry *de) {
+    embeddedDictEntry *entry = (embeddedDictEntry *)decodeMaskedPtr(de);
+    return &entry->key_buf[entry->key_header_size];
+}
+
 /* Decodes the pointer to an entry without value, when you know it is an entry
  * without value. Hint: Use entryIsNoValue to check. */
 static inline dictEntryNoValue *decodeEntryNoValue(const dictEntry *de) {
     return decodeMaskedPtr(de);
 }
 
+static inline embeddedDictEntry *decodeEmbeddedEntry(const dictEntry *de) {
+    return decodeMaskedPtr(de);
+}
+
 /* Returns 1 if the entry has a value field and 0 otherwise. */
 static inline int entryHasValue(const dictEntry *de) {
-    return entryIsNormal(de);
+    return entryIsNormal(de) || entryIsEmbedded(de);
 }
 
 /* ----------------------------- API implementation ------------------------- */
@@ -185,6 +254,7 @@ static void _dictReset(dict *d, int htidx) {
 
 /* Create a new hash table */
 dict *dictCreate(dictType *type) {
+    validateDictType(type);
     size_t metasize = type->dictMetadataBytes ? type->dictMetadataBytes(NULL) : 0;
     dict *d = zmalloc(sizeof(*d) + metasize);
     if (metasize > 0) {
@@ -473,6 +543,10 @@ int dictAdd(dict *d, void *key, void *val) {
  * with the existing entry if existing is not NULL.
  *
  * If key was added, the hash entry is returned to be manipulated by the caller.
+ *
+ * The dict handles `key` based on `dictType` during initialization:
+ * - If `dictType.embedded-entry` is 1, it clones the `key`.
+ * - Otherwise, it assumes ownership of the `key`.
  */
 dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing) {
     /* Get the position for the new key or NULL if the key already exists. */
@@ -511,6 +585,8 @@ dictEntry *dictInsertAtPosition(dict *d, void *key, void *position) {
             /* Allocate an entry without value. */
             entry = createEntryNoValue(key, *bucket);
         }
+    } else if (d->type->embedded_entry) {
+        entry = createEmbeddedEntry(key, *bucket, d->type);
     } else {
         /* Allocate the memory and store the new entry.
          * Insert the element in top, with the assumption that in a database
@@ -658,6 +734,7 @@ void dictFreeUnlinkedEntry(dict *d, dictEntry *he) {
     if (he == NULL) return;
     dictFreeKey(d, he);
     dictFreeVal(d, he);
+    /* Clear the dictEntry */
     if (!entryIsKey(he)) zfree(decodeMaskedPtr(he));
 }
 
@@ -804,7 +881,11 @@ void dictSetKey(dict *d, dictEntry *de, void *key) {
 void dictSetVal(dict *d, dictEntry *de, void *val) {
     UNUSED(d);
     assert(entryHasValue(de));
-    de->v.val = val;
+    if (entryIsEmbedded(de)) {
+        decodeEmbeddedEntry(de)->v.val = val;
+    } else {
+        de->v.val = val;
+    }
 }
 
 void dictSetSignedIntegerVal(dictEntry *de, int64_t val) {
@@ -840,11 +921,15 @@ double dictIncrDoubleVal(dictEntry *de, double val) {
 void *dictGetKey(const dictEntry *de) {
     if (entryIsKey(de)) return (void *)de;
     if (entryIsNoValue(de)) return decodeEntryNoValue(de)->key;
+    if (entryIsEmbedded(de)) return getEmbeddedKey(de);
     return de->key;
 }
 
 void *dictGetVal(const dictEntry *de) {
     assert(entryHasValue(de));
+    if (entryIsEmbedded(de)) {
+        return decodeEmbeddedEntry(de)->v.val;
+    }
     return de->v.val;
 }
 
@@ -874,6 +959,7 @@ double *dictGetDoubleValPtr(dictEntry *de) {
 static dictEntry *dictGetNext(const dictEntry *de) {
     if (entryIsKey(de)) return NULL; /* there's no next */
     if (entryIsNoValue(de)) return decodeEntryNoValue(de)->next;
+    if (entryIsEmbedded(de)) return decodeEmbeddedEntry(de)->next;
     return de->next;
 }
 
@@ -882,14 +968,16 @@ static dictEntry *dictGetNext(const dictEntry *de) {
 static dictEntry **dictGetNextRef(dictEntry *de) {
     if (entryIsKey(de)) return NULL;
     if (entryIsNoValue(de)) return &decodeEntryNoValue(de)->next;
+    if (entryIsEmbedded(de)) return &decodeEmbeddedEntry(de)->next;
     return &de->next;
 }
 
 static void dictSetNext(dictEntry *de, dictEntry *next) {
     assert(!entryIsKey(de));
     if (entryIsNoValue(de)) {
-        dictEntryNoValue *entry = decodeEntryNoValue(de);
-        entry->next = next;
+        decodeEntryNoValue(de)->next = next;
+    } else if (entryIsEmbedded(de)) {
+        decodeEmbeddedEntry(de)->next = next;
     } else {
         de->next = next;
     }
@@ -901,8 +989,20 @@ size_t dictMemUsage(const dict *d) {
     return dictSize(d) * sizeof(dictEntry) + dictBuckets(d) * sizeof(dictEntry *);
 }
 
-size_t dictEntryMemUsage(void) {
-    return sizeof(dictEntry);
+/* Returns the memory usage in bytes of dictEntry based on the type. if `de` is NULL, return the size of
+ * regular dict entry else return based on the type. */
+size_t dictEntryMemUsage(dictEntry *de) {
+    if (de == NULL || entryIsNormal(de))
+        return sizeof(dictEntry);
+    else if (entryIsKey(de))
+        return 0;
+    else if (entryIsNoValue(de))
+        return sizeof(dictEntryNoValue);
+    else if (entryIsEmbedded(de))
+        return zmalloc_size(decodeEmbeddedEntry(de));
+    else
+        assert("Entry type not supported");
+    return 0;
 }
 
 /* A fingerprint is a 64 bit number that represents the state of the dictionary
@@ -1172,7 +1272,7 @@ unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) {
 
 /* Reallocate the dictEntry, key and value allocations in a bucket using the
  * provided allocation functions in order to defrag them. */
-static void dictDefragBucket(dictEntry **bucketref, dictDefragFunctions *defragfns) {
+static void dictDefragBucket(dictEntry **bucketref, dictDefragFunctions *defragfns, void *privdata) {
     dictDefragAllocFunction *defragalloc = defragfns->defragAlloc;
     dictDefragAllocFunction *defragkey = defragfns->defragKey;
     dictDefragAllocFunction *defragval = defragfns->defragVal;
@@ -1190,6 +1290,17 @@ static void dictDefragBucket(dictEntry **bucketref, dictDefragFunctions *defragf
                 entry = newentry;
             }
             if (newkey) entry->key = newkey;
+        } else if (entryIsEmbedded(de)) {
+            defragfns->defragEntryStartCb(privdata, de);
+            embeddedDictEntry *entry = decodeEmbeddedEntry(de), *newentry;
+            if ((newentry = defragalloc(entry))) {
+                newde = encodeMaskedPtr(newentry, ENTRY_PTR_EMBEDDED);
+                entry = newentry;
+                defragfns->defragEntryFinishCb(privdata, newde);
+            } else {
+                defragfns->defragEntryFinishCb(privdata, NULL);
+            }
+            if (newval) entry->v.val = newval;
         } else {
             assert(entryIsNormal(de));
             newde = defragalloc(de);
@@ -1353,7 +1464,7 @@ dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctio
 
         /* Emit entries at cursor */
         if (defragfns) {
-            dictDefragBucket(&d->ht_table[htidx0][v & m0], defragfns);
+            dictDefragBucket(&d->ht_table[htidx0][v & m0], defragfns, privdata);
         }
         de = d->ht_table[htidx0][v & m0];
         while (de) {
@@ -1386,7 +1497,7 @@ dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctio
 
         /* Emit entries at cursor */
         if (defragfns) {
-            dictDefragBucket(&d->ht_table[htidx0][v & m0], defragfns);
+            dictDefragBucket(&d->ht_table[htidx0][v & m0], defragfns, privdata);
         }
         de = d->ht_table[htidx0][v & m0];
         while (de) {
@@ -1400,7 +1511,7 @@ dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctio
         do {
             /* Emit entries at cursor */
             if (defragfns) {
-                dictDefragBucket(&d->ht_table[htidx1][v & m1], defragfns);
+                dictDefragBucket(&d->ht_table[htidx1][v & m1], defragfns, privdata);
             }
             de = d->ht_table[htidx1][v & m1];
             while (de) {
@@ -1573,29 +1684,6 @@ uint64_t dictGetHash(dict *d, const void *key) {
     return dictHashKey(d, key);
 }
 
-/* Finds the dictEntry using pointer and pre-calculated hash.
- * oldkey is a dead pointer and should not be accessed.
- * the hash value should be provided using dictGetHash.
- * no string / key comparison is performed.
- * return value is a pointer to the dictEntry if found, or NULL if not found. */
-dictEntry *dictFindEntryByPtrAndHash(dict *d, const void *oldptr, uint64_t hash) {
-    dictEntry *he;
-    unsigned long idx, table;
-
-    if (dictSize(d) == 0) return NULL; /* dict is empty */
-    for (table = 0; table <= 1; table++) {
-        idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
-        if (table == 0 && (long)idx < d->rehashidx) continue;
-        he = d->ht_table[table][idx];
-        while (he) {
-            if (oldptr == dictGetKey(he)) return he;
-            he = dictGetNext(he);
-        }
-        if (!dictIsRehashing(d)) return NULL;
-    }
-    return NULL;
-}
-
 /* Provides the old and new ht size for a given dictionary during rehashing. This method
  * should only be invoked during initialization/rehashing. */
 void dictRehashingInfo(dict *d, unsigned long long *from_size, unsigned long long *to_size) {
diff --git a/src/dict.h b/src/dict.h
index 723e5a54c2..a7c5c71826 100644
--- a/src/dict.h
+++ b/src/dict.h
@@ -66,6 +66,10 @@ typedef struct dictType {
     /* Allow a dict to carry extra caller-defined metadata. The
      * extra memory is initialized to 0 when a dict is allocated. */
     size_t (*dictMetadataBytes)(dict *d);
+    /* Method for copying a given key into a buffer of buf_len. Also used for
+     * computing the length of the key + header when buf is NULL. */
+    size_t (*embedKey)(unsigned char *buf, size_t buf_len, const void *key, unsigned char *header_size);
+
 
     /* Data */
     void *userdata;
@@ -80,8 +84,9 @@ typedef struct dictType {
      * enables one more optimization: to store a key without an allocated
      * dictEntry. */
     unsigned int keys_are_odd : 1;
-    /* TODO: Add a 'keys_are_even' flag and use a similar optimization if that
-     * flag is set. */
+    /* If embedded_entry flag is set, it indicates that a copy of the key is created and the key is embedded
+     * as part of the dict entry. */
+    unsigned int embedded_entry : 1;
 } dictType;
 
 #define DICTHT_SIZE(exp) ((exp) == -1 ? 0 : (unsigned long)1 << (exp))
@@ -127,10 +132,13 @@ typedef struct dictStats {
 
 typedef void(dictScanFunction)(void *privdata, const dictEntry *de);
 typedef void *(dictDefragAllocFunction)(void *ptr);
+typedef void(dictDefragEntryCb)(void *privdata, void *ptr);
 typedef struct {
-    dictDefragAllocFunction *defragAlloc; /* Used for entries etc. */
-    dictDefragAllocFunction *defragKey;   /* Defrag-realloc keys (optional) */
-    dictDefragAllocFunction *defragVal;   /* Defrag-realloc values (optional) */
+    dictDefragAllocFunction *defragAlloc;   /* Used for entries etc. */
+    dictDefragAllocFunction *defragKey;     /* Defrag-realloc keys (optional) */
+    dictDefragAllocFunction *defragVal;     /* Defrag-realloc values (optional) */
+    dictDefragEntryCb *defragEntryStartCb;  /* Callback invoked prior to the start of defrag of dictEntry. */
+    dictDefragEntryCb *defragEntryFinishCb; /* Callback invoked after the defrag of dictEntry is tried. */
 } dictDefragFunctions;
 
 /* This is the initial size of every hash table */
@@ -212,7 +220,7 @@ uint64_t dictGetUnsignedIntegerVal(const dictEntry *de);
 double dictGetDoubleVal(const dictEntry *de);
 double *dictGetDoubleValPtr(dictEntry *de);
 size_t dictMemUsage(const dict *d);
-size_t dictEntryMemUsage(void);
+size_t dictEntryMemUsage(dictEntry *de);
 dictIterator *dictGetIterator(dict *d);
 dictIterator *dictGetSafeIterator(dict *d);
 void dictInitIterator(dictIterator *iter, dict *d);
@@ -236,7 +244,6 @@ unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, void *pri
 unsigned long
 dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctions *defragfns, void *privdata);
 uint64_t dictGetHash(dict *d, const void *key);
-dictEntry *dictFindEntryByPtrAndHash(dict *d, const void *oldptr, uint64_t hash);
 void dictRehashingInfo(dict *d, unsigned long long *from_size, unsigned long long *to_size);
 
 size_t dictGetStatsMsg(char *buf, size_t bufsize, dictStats *stats, int full);
diff --git a/src/kvstore.c b/src/kvstore.c
index a43b72e1e1..16cc8e4822 100644
--- a/src/kvstore.c
+++ b/src/kvstore.c
@@ -241,7 +241,12 @@ static size_t kvstoreDictMetadataSize(dict *d) {
 
 /* Create an array of dictionaries
  * num_dicts_bits is the log2 of the amount of dictionaries needed (e.g. 0 for 1 dict,
- * 3 for 8 dicts, etc.) */
+ * 3 for 8 dicts, etc.)
+ *
+ * The kvstore handles `key` based on `dictType` during initialization:
+ * - If `dictType.embedded-entry` is 1, it clones the `key`.
+ * - Otherwise, it assumes ownership of the `key`.
+ */
 kvstore *kvstoreCreate(dictType *type, int num_dicts_bits, int flags) {
     /* We can't support more than 2^16 dicts because we want to save 48 bits
      * for the dict cursor, see kvstoreScan */
@@ -340,7 +345,7 @@ size_t kvstoreMemUsage(kvstore *kvs) {
     size_t mem = sizeof(*kvs);
 
     unsigned long long keys_count = kvstoreSize(kvs);
-    mem += keys_count * dictEntryMemUsage() + kvstoreBuckets(kvs) * sizeof(dictEntry *) +
+    mem += keys_count * dictEntryMemUsage(NULL) + kvstoreBuckets(kvs) * sizeof(dictEntry *) +
            kvs->allocated_dicts * (sizeof(dict) + kvstoreDictMetadataSize(NULL));
 
     /* Values are dict* shared with kvs->dicts */
@@ -717,12 +722,6 @@ dictEntry *kvstoreDictGetFairRandomKey(kvstore *kvs, int didx) {
     return dictGetFairRandomKey(d);
 }
 
-dictEntry *kvstoreDictFindEntryByPtrAndHash(kvstore *kvs, int didx, const void *oldptr, uint64_t hash) {
-    dict *d = kvstoreGetDict(kvs, didx);
-    if (!d) return NULL;
-    return dictFindEntryByPtrAndHash(d, oldptr, hash);
-}
-
 unsigned int kvstoreDictGetSomeKeys(kvstore *kvs, int didx, dictEntry **des, unsigned int count) {
     dict *d = kvstoreGetDict(kvs, didx);
     if (!d) return 0;
@@ -776,6 +775,17 @@ dictEntry *kvstoreDictFind(kvstore *kvs, int didx, void *key) {
     return dictFind(d, key);
 }
 
+/*
+ * The kvstore handles `key` based on `dictType` during initialization:
+ * - If `dictType.embedded-entry` is 1, it clones the `key`.
+ * - Otherwise, it assumes ownership of the `key`.
+ * The caller must ensure the `key` is properly freed.
+ *
+ * kvstore current usage:
+ *
+ * 1. keyspace (db.keys) kvstore - creates a copy of the key.
+ * 2. expiry (db.expires), pubsub_channels and pubsubshard_channels kvstore - takes ownership of the key.
+ */
 dictEntry *kvstoreDictAddRaw(kvstore *kvs, int didx, void *key, dictEntry **existing) {
     dict *d = createDictIfNeeded(kvs, didx);
     dictEntry *ret = dictAddRaw(d, key, existing);
diff --git a/src/kvstore.h b/src/kvstore.h
index e7e21f8aa9..a94f366b6b 100644
--- a/src/kvstore.h
+++ b/src/kvstore.h
@@ -58,7 +58,6 @@ void kvstoreReleaseDictIterator(kvstoreDictIterator *kvs_id);
 dictEntry *kvstoreDictIteratorNext(kvstoreDictIterator *kvs_di);
 dictEntry *kvstoreDictGetRandomKey(kvstore *kvs, int didx);
 dictEntry *kvstoreDictGetFairRandomKey(kvstore *kvs, int didx);
-dictEntry *kvstoreDictFindEntryByPtrAndHash(kvstore *kvs, int didx, const void *oldptr, uint64_t hash);
 unsigned int kvstoreDictGetSomeKeys(kvstore *kvs, int didx, dictEntry **des, unsigned int count);
 int kvstoreDictExpand(kvstore *kvs, int didx, unsigned long size);
 unsigned long kvstoreDictScanDefrag(kvstore *kvs,
diff --git a/src/object.c b/src/object.c
index ea56b38dd2..6e5d1f460b 100644
--- a/src/object.c
+++ b/src/object.c
@@ -1010,7 +1010,7 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
             asize = sizeof(*o) + sizeof(dict) + (sizeof(struct dictEntry *) * dictBuckets(d));
             while ((de = dictNext(di)) != NULL && samples < sample_size) {
                 ele = dictGetKey(de);
-                elesize += dictEntryMemUsage() + sdsZmallocSize(ele);
+                elesize += dictEntryMemUsage(de) + sdsZmallocSize(ele);
                 samples++;
             }
             dictReleaseIterator(di);
@@ -1033,7 +1033,7 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
                     (sizeof(struct dictEntry *) * dictBuckets(d)) + zmalloc_size(zsl->header);
             while (znode != NULL && samples < sample_size) {
                 elesize += sdsZmallocSize(znode->ele);
-                elesize += dictEntryMemUsage() + zmalloc_size(znode);
+                elesize += dictEntryMemUsage(NULL) + zmalloc_size(znode);
                 samples++;
                 znode = znode->level[0].forward;
             }
@@ -1052,7 +1052,7 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
                 ele = dictGetKey(de);
                 ele2 = dictGetVal(de);
                 elesize += sdsZmallocSize(ele) + sdsZmallocSize(ele2);
-                elesize += dictEntryMemUsage();
+                elesize += dictEntryMemUsage(de);
                 samples++;
             }
             dictReleaseIterator(di);
@@ -1552,8 +1552,7 @@ NULL
             return;
         }
         size_t usage = objectComputeSize(c->argv[2], dictGetVal(de), samples, c->db->id);
-        usage += sdsZmallocSize(dictGetKey(de));
-        usage += dictEntryMemUsage();
+        usage += dictEntryMemUsage(de);
         addReplyLongLong(c, usage);
     } else if (!strcasecmp(c->argv[1]->ptr, "stats") && c->argc == 2) {
         struct serverMemOverhead *mh = getMemoryOverheadData();
diff --git a/src/rdb.c b/src/rdb.c
index 53623f84a8..8b1037ab93 100644
--- a/src/rdb.c
+++ b/src/rdb.c
@@ -3324,6 +3324,9 @@ int rdbLoadRioWithLoadingCtx(rio *rdb, int rdbflags, rdbSaveInfo *rsi, rdbLoadin
 
             /* call key space notification on key loaded for modules only */
             moduleNotifyKeyspaceEvent(NOTIFY_LOADED, "loaded", &keyobj, db->id);
+
+            /* Release key (sds), dictEntry stores a copy of it in embedded data */
+            sdsfree(key);
         }
 
         /* Loading the database more slowly is useful in order to test
diff --git a/src/sds.c b/src/sds.c
index 1c0ddd559d..ba3362e88a 100644
--- a/src/sds.c
+++ b/src/sds.c
@@ -192,6 +192,25 @@ sds sdsdup(const sds s) {
     return sdsnewlen(s, sdslen(s));
 }
 
+/*
+ * This method returns the minimum amount of bytes required to store the sds (header + data + NULL terminator).
+ */
+static inline size_t sdsminlen(sds s) {
+    return sdslen(s) + sdsHdrSize(s[-1]) + 1;
+}
+
+/* This method copies the sds `s` into `buf` which is the target character buffer. */
+size_t sdscopytobuffer(unsigned char *buf, size_t buf_len, sds s, uint8_t *hdr_size) {
+    size_t required_keylen = sdsminlen(s);
+    if (buf == NULL) {
+        return required_keylen;
+    }
+    assert(buf_len >= required_keylen);
+    memcpy(buf, sdsAllocPtr(s), required_keylen);
+    *hdr_size = sdsHdrSize(s[-1]);
+    return required_keylen;
+}
+
 /* Free an sds string. No operation is performed if 's' is NULL. */
 void sdsfree(sds s) {
     if (s == NULL) return;
diff --git a/src/sds.h b/src/sds.h
index 20d598829a..a12b8dd89e 100644
--- a/src/sds.h
+++ b/src/sds.h
@@ -182,6 +182,7 @@ sds sdstrynewlen(const void *init, size_t initlen);
 sds sdsnew(const char *init);
 sds sdsempty(void);
 sds sdsdup(const sds s);
+size_t sdscopytobuffer(unsigned char *buf, size_t buf_len, sds s, uint8_t *hdr_size);
 void sdsfree(sds s);
 sds sdsgrowzero(sds s, size_t len);
 sds sdscatlen(sds s, const void *t, size_t len);
diff --git a/src/server.c b/src/server.c
index 574423fdee..57456c6597 100644
--- a/src/server.c
+++ b/src/server.c
@@ -289,6 +289,10 @@ int dictSdsKeyCompare(dict *d, const void *key1, const void *key2) {
     return memcmp(key1, key2, l1) == 0;
 }
 
+size_t dictSdsEmbedKey(unsigned char *buf, size_t buf_len, const void *key, uint8_t *key_offset) {
+    return sdscopytobuffer(buf, buf_len, (sds)key, key_offset);
+}
+
 /* A case insensitive version used for the command lookup table and other
  * places where case insensitive non binary-safe comparison is needed. */
 int dictSdsKeyCaseCompare(dict *d, const void *key1, const void *key2) {
@@ -468,9 +472,11 @@ dictType dbDictType = {
     dictSdsHash,          /* hash function */
     NULL,                 /* key dup */
     dictSdsKeyCompare,    /* key compare */
-    dictSdsDestructor,    /* key destructor */
+    NULL,                 /* key is embedded in the dictEntry and freed internally */
     dictObjectDestructor, /* val destructor */
     dictResizeAllowed,    /* allow to resize */
+    .embedKey = dictSdsEmbedKey,
+    .embedded_entry = 1,
 };
 
 /* Db->expires */

From b298dfd6ef5c8572965c8154526ea76381fa29d2 Mon Sep 17 00:00:00 2001
From: AlanZhang1204 <1014810735@qq.com>
Date: Wed, 3 Jul 2024 22:41:27 +0800
Subject: [PATCH 42/53] Use 'primary' instead of 'master' in Sentinel tcl
 testing. (#724)

Use 'primary' instead of 'master' in Sentinel tcl testing.

---------

Signed-off-by: z00808363 <zhangtianlun2@huawei.com>
Co-authored-by: z00808363 <zhangtianlun2@huawei.com>
---
 tests/sentinel/tests/00-base.tcl             | 16 ++++++++--------
 tests/sentinel/tests/01-conf-update.tcl      |  2 +-
 tests/sentinel/tests/02-slaves-reconf.tcl    |  4 ++--
 tests/sentinel/tests/03-runtime-reconf.tcl   |  8 ++++----
 tests/sentinel/tests/05-manual.tcl           |  6 +++---
 tests/sentinel/tests/07-down-conditions.tcl  |  2 +-
 tests/sentinel/tests/12-master-reboot.tcl    |  8 ++++----
 tests/sentinel/tests/includes/init-tests.tcl |  8 ++++----
 8 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/tests/sentinel/tests/00-base.tcl b/tests/sentinel/tests/00-base.tcl
index ee88469ec3..c9f23a5c34 100644
--- a/tests/sentinel/tests/00-base.tcl
+++ b/tests/sentinel/tests/00-base.tcl
@@ -56,7 +56,7 @@ test "SENTINEL PENDING-SCRIPTS returns the information about pending scripts" {
     assert_morethan_equal [llength [S 0 SENTINEL PENDING-SCRIPTS]] 0
 }
 
-test "SENTINEL MASTERS returns a list of monitored masters" {
+test "SENTINEL PRIMARIES returns a list of monitored primaries" {
     assert_match "*mymaster*" [S 0 SENTINEL MASTERS]
     assert_morethan_equal [llength [S 0 SENTINEL MASTERS]] 1
 }
@@ -75,7 +75,7 @@ test "SENTINEL SIMULATE-FAILURE HELP list supported flags" {
     assert_equal {crash-after-election crash-after-promotion} $res
 }
 
-test "Basic failover works if the master is down" {
+test "Basic failover works if the primary is down" {
     set old_port [RPort $master_id]
     set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
     assert {[lindex $addr 1] == $old_port}
@@ -94,11 +94,11 @@ test "Basic failover works if the master is down" {
     set master_id [get_instance_id_by_port valkey [lindex $addr 1]]
 }
 
-test "New master [join $addr {:}] role matches" {
+test "New primary [join $addr {:}] role matches" {
     assert {[RI $master_id role] eq {master}}
 }
 
-test "All the other slaves now point to the new master" {
+test "All the other slaves now point to the new primary" {
     foreach_valkey_id id {
         if {$id != $master_id && $id != 0} {
             wait_for_condition 1000 50 {
@@ -110,7 +110,7 @@ test "All the other slaves now point to the new master" {
     }
 }
 
-test "The old master eventually gets reconfigured as a slave" {
+test "The old primary eventually gets reconfigured as a slave" {
     wait_for_condition 1000 50 {
         [RI 0 master_port] == [lindex $addr 1]
     } else {
@@ -190,12 +190,12 @@ test "Failover works if we configure for absolute agreement" {
     }
 }
 
-test "New master [join $addr {:}] role matches" {
+test "New primary [join $addr {:}] role matches" {
     assert {[RI $master_id role] eq {master}}
 }
 
-test "SENTINEL RESET can resets the master" {
-    # After SENTINEL RESET, sometimes the sentinel can sense the master again,
+test "SENTINEL RESET can resets the primary" {
+    # After SENTINEL RESET, sometimes the sentinel can sense the primary again,
     # causing the test to fail. Here we give it a few more chances.
     for {set j 0} {$j < 10} {incr j} {
         assert_equal 1 [S 0 SENTINEL RESET mymaster]
diff --git a/tests/sentinel/tests/01-conf-update.tcl b/tests/sentinel/tests/01-conf-update.tcl
index a531137297..e8550e9e33 100644
--- a/tests/sentinel/tests/01-conf-update.tcl
+++ b/tests/sentinel/tests/01-conf-update.tcl
@@ -34,7 +34,7 @@ test "After Sentinel 1 is restarted, its config gets updated" {
     }
 }
 
-test "New master [join $addr {:}] role matches" {
+test "New primary [join $addr {:}] role matches" {
     assert {[RI $master_id role] eq {master}}
 }
 
diff --git a/tests/sentinel/tests/02-slaves-reconf.tcl b/tests/sentinel/tests/02-slaves-reconf.tcl
index 32e18d0eb2..7fb2e615a9 100644
--- a/tests/sentinel/tests/02-slaves-reconf.tcl
+++ b/tests/sentinel/tests/02-slaves-reconf.tcl
@@ -9,7 +9,7 @@ source "../tests/includes/init-tests.tcl"
 
 proc 02_test_slaves_replication {} {
     uplevel 1 {
-        test "Check that slaves replicate from current master" {
+        test "Check that slaves replicate from current primary" {
             set master_port [RPort $master_id]
             foreach_valkey_id id {
                 if {$id == $master_id} continue
@@ -27,7 +27,7 @@ proc 02_test_slaves_replication {} {
 
 proc 02_crash_and_failover {} {
     uplevel 1 {
-        test "Crash the master and force a failover" {
+        test "Crash the primary and force a failover" {
             set old_port [RPort $master_id]
             set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
             assert {[lindex $addr 1] == $old_port}
diff --git a/tests/sentinel/tests/03-runtime-reconf.tcl b/tests/sentinel/tests/03-runtime-reconf.tcl
index c43333067a..46043adf1f 100644
--- a/tests/sentinel/tests/03-runtime-reconf.tcl
+++ b/tests/sentinel/tests/03-runtime-reconf.tcl
@@ -63,11 +63,11 @@ proc wait_for_sentinels_connect_servers { {is_connect 1} } {
     }
 }
 
-test "Sentinels (re)connection following SENTINEL SET mymaster auth-pass" {
+test "Sentinels (re)connection following SENTINEL SET myprimary auth-pass" {
     # 3 types of sentinels to test:
-    # (re)started while master changed pwd. Manage to connect only after setting pwd
+    # (re)started while primary changed pwd. Manage to connect only after setting pwd
     set sent2re 0
-    # (up)dated in advance with master new password
+    # (up)dated in advance with primary new password
     set sent2up 1
     # (un)touched. Yet manage to maintain (old) connection
     set sent2un 2
@@ -98,7 +98,7 @@ test "Sentinels (re)connection following SENTINEL SET mymaster auth-pass" {
     verify_sentinel_auto_discovery
 }
 
-test "Sentinels (re)connection following master ACL change" {
+test "Sentinels (re)connection following primary ACL change" {
     # Three types of sentinels to test during ACL change:
     # 1. (re)started Sentinel. Manage to connect only after setting new pwd
     # 2. (up)dated Sentinel, get just before ACL change the new password
diff --git a/tests/sentinel/tests/05-manual.tcl b/tests/sentinel/tests/05-manual.tcl
index 1834085645..ba5572c1a9 100644
--- a/tests/sentinel/tests/05-manual.tcl
+++ b/tests/sentinel/tests/05-manual.tcl
@@ -38,11 +38,11 @@ test "Manual failover works" {
     set master_id [get_instance_id_by_port valkey [lindex $addr 1]]
 }
 
-test "New master [join $addr {:}] role matches" {
+test "New primary [join $addr {:}] role matches" {
     assert {[RI $master_id role] eq {master}}
 }
 
-test "All the other slaves now point to the new master" {
+test "All the other slaves now point to the new primary" {
     foreach_valkey_id id {
         if {$id != $master_id && $id != 0} {
             wait_for_condition 1000 50 {
@@ -54,7 +54,7 @@ test "All the other slaves now point to the new master" {
     }
 }
 
-test "The old master eventually gets reconfigured as a slave" {
+test "The old primary eventually gets reconfigured as a slave" {
     wait_for_condition 1000 50 {
         [RI 0 master_port] == [lindex $addr 1]
     } else {
diff --git a/tests/sentinel/tests/07-down-conditions.tcl b/tests/sentinel/tests/07-down-conditions.tcl
index 611772013e..1068cd18fb 100644
--- a/tests/sentinel/tests/07-down-conditions.tcl
+++ b/tests/sentinel/tests/07-down-conditions.tcl
@@ -64,7 +64,7 @@ test "SDOWN is triggered by crashed instance" {
     ensure_master_up
 }
 
-test "SDOWN is triggered by masters advertising as slaves" {
+test "SDOWN is triggered by primaries advertising as slaves" {
     ensure_master_up
     R 0 slaveof 127.0.0.1 34567
     ensure_master_down
diff --git a/tests/sentinel/tests/12-master-reboot.tcl b/tests/sentinel/tests/12-master-reboot.tcl
index ad536f7e37..c684d4505d 100644
--- a/tests/sentinel/tests/12-master-reboot.tcl
+++ b/tests/sentinel/tests/12-master-reboot.tcl
@@ -35,7 +35,7 @@ proc reboot_instance {type id} {
 }
 
 
-test "Master reboot in very short time" {
+test "Primary reboot in very short time" {
     set old_port [RPort $master_id]
     set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
     assert {[lindex $addr 1] == $old_port}
@@ -78,11 +78,11 @@ test "Master reboot in very short time" {
     }
 }
 
-test "New master [join $addr {:}] role matches" {
+test "New primary [join $addr {:}] role matches" {
     assert {[RI $master_id role] eq {master}}
 }
 
-test "All the other slaves now point to the new master" {
+test "All the other slaves now point to the new primary" {
     foreach_valkey_id id {
         if {$id != $master_id && $id != 0} {
             wait_for_condition 1000 50 {
@@ -94,7 +94,7 @@ test "All the other slaves now point to the new master" {
     }
 }
 
-test "The old master eventually gets reconfigured as a slave" {
+test "The old primary eventually gets reconfigured as a slave" {
     wait_for_condition 1000 50 {
         [RI 0 master_port] == [lindex $addr 1]
     } else {
diff --git a/tests/sentinel/tests/includes/init-tests.tcl b/tests/sentinel/tests/includes/init-tests.tcl
index 4f67cb9aa7..8635e0e255 100644
--- a/tests/sentinel/tests/includes/init-tests.tcl
+++ b/tests/sentinel/tests/includes/init-tests.tcl
@@ -5,19 +5,19 @@ test "(init) Restart killed instances" {
     restart_killed_instances
 }
 
-test "(init) Remove old master entry from sentinels" {
+test "(init) Remove old primary entry from sentinels" {
     foreach_sentinel_id id {
         catch {S $id SENTINEL REMOVE mymaster}
     }
 }
 
 set redis_slaves [expr $::instances_count - 1]
-test "(init) Create a master-slaves cluster of [expr $redis_slaves+1] instances" {
+test "(init) Create a primary-slaves cluster of [expr $redis_slaves+1] instances" {
     create_valkey_master_slave_cluster [expr {$redis_slaves+1}]
 }
 set master_id 0
 
-test "(init) Sentinels can start monitoring a master" {
+test "(init) Sentinels can start monitoring a primary" {
     set sentinels [llength $::sentinel_instances]
     set quorum [expr {$sentinels/2+1}]
     foreach_sentinel_id id {
@@ -38,7 +38,7 @@ test "(init) Sentinels can start monitoring a master" {
     }
 }
 
-test "(init) Sentinels can talk with the master" {
+test "(init) Sentinels can talk with the primary" {
     foreach_sentinel_id id {
         wait_for_condition 1000 50 {
             [catch {S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster}] == 0

From 2d6791bb1157af52e20ffd1fb4ca025c283153d3 Mon Sep 17 00:00:00 2001
From: Binbin <binloveplay1314@qq.com>
Date: Thu, 4 Jul 2024 02:42:25 +0800
Subject: [PATCH 43/53] Use clusterNodeIsVotingPrimary function to check the
 right (#735)

Minor cleanups.

---------

Signed-off-by: Binbin <binloveplay1314@qq.com>
---
 src/cluster_legacy.c | 16 ++++++++--------
 src/cluster_legacy.h |  6 +++---
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c
index dd95cc6bb7..f2f980d58c 100644
--- a/src/cluster_legacy.c
+++ b/src/cluster_legacy.c
@@ -1902,10 +1902,10 @@ void clearNodeFailureIfNeeded(clusterNode *node) {
 
     serverAssert(nodeFailed(node));
 
-    /* For replicas we always clear the FAIL flag if we can contact the
-     * node again. */
-    if (nodeIsReplica(node) || node->numslots == 0) {
-        serverLog(LL_NOTICE, "Clear FAIL state for node %.40s (%s):%s is reachable again.", node->name,
+    /* For replicas or primaries without slots, that is, nodes without voting
+     * right, we always clear the FAIL flag if we can contact the node again. */
+    if (!clusterNodeIsVotingPrimary(node)) {
+        serverLog(LL_NOTICE, "Clear FAIL state for node %.40s (%s): %s is reachable again.", node->name,
                   node->human_nodename, nodeIsReplica(node) ? "replica" : "primary without slots");
         node->flags &= ~CLUSTER_NODE_FAIL;
         clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE | CLUSTER_TODO_SAVE_CONFIG);
@@ -4006,9 +4006,9 @@ void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) {
 
     /* IF we are not a primary serving at least 1 slot, we don't have the
      * right to vote, as the cluster size is the number
-     * of primariies serving at least one slot, and quorum is the cluster
+     * of primaries serving at least one slot, and quorum is the cluster
      * size + 1 */
-    if (nodeIsReplica(myself) || myself->numslots == 0) return;
+    if (!clusterNodeIsVotingPrimary(myself)) return;
 
     /* Request epoch must be >= our currentEpoch.
      * Note that it is impossible for it to actually be greater since
@@ -4086,7 +4086,7 @@ void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) {
 }
 
 /* This function returns the "rank" of this instance, a replica, in the context
- * of its primar-replicas ring. The rank of the replica is given by the number of
+ * of its primary-replicas ring. The rank of the replica is given by the number of
  * other replicas for the same primary that have a better replication offset
  * compared to the local one (better means, greater, so they claim more data).
  *
@@ -6022,7 +6022,7 @@ void clusterCommandSetSlot(client *c) {
      * 3. Upon replication completion, primary B executes `SETSLOT n NODE B` and
      *    returns success to client C.
      * 4. The following steps can happen in parallel:
-     *   a. Client C issues `SETSLOT n NODE B` against parimary A.
+     *   a. Client C issues `SETSLOT n NODE B` against primary A.
      *   b. Primary B gossips its new slot ownership to the cluster (including A, A', etc.).
      *
      * This ensures that all replicas have the latest topology information, enabling
diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h
index fb80f45eec..d054d86017 100644
--- a/src/cluster_legacy.h
+++ b/src/cluster_legacy.h
@@ -287,18 +287,18 @@ struct _clusterNode {
     uint16_t *slot_info_pairs;              /* Slots info represented as (start/end) pair (consecutive index). */
     int slot_info_pairs_count;              /* Used number of slots in slot_info_pairs */
     int numslots;                           /* Number of slots handled by this node */
-    int num_replicas;                       /* Number of replica nodes, if this is a primar */
+    int num_replicas;                       /* Number of replica nodes, if this is a primary */
     clusterNode **replicas;                 /* pointers to replica nodes */
     clusterNode *replicaof;                 /* pointer to the primary node. Note that it
                                              may be NULL even if the node is a replica
-                                             if we don't have the parimary node in our
+                                             if we don't have the primary node in our
                                              tables. */
     unsigned long long last_in_ping_gossip; /* The number of the last carried in the ping gossip section */
     mstime_t ping_sent;                     /* Unix time we sent latest ping */
     mstime_t pong_received;                 /* Unix time we received the pong */
     mstime_t data_received;                 /* Unix time we received any data */
     mstime_t fail_time;                     /* Unix time when FAIL flag was set */
-    mstime_t voted_time;                    /* Last time we voted for a replica of this parimary */
+    mstime_t voted_time;                    /* Last time we voted for a replica of this primary */
     mstime_t repl_offset_time;              /* Unix time we received offset for this node */
     mstime_t orphaned_time;                 /* Starting time of orphaned primary condition */
     long long repl_offset;                  /* Last known repl offset for this node. */

From 6bf1d02edf962674cb32dc2e4f2f13d3d8cece3f Mon Sep 17 00:00:00 2001
From: Binbin <binloveplay1314@qq.com>
Date: Thu, 4 Jul 2024 03:27:45 +0800
Subject: [PATCH 44/53] Nested MULTI or WATCH in MULTI now will abort the
 transaction (#723)

Currently, for nested MULTI or executing WATCH in MULTI, we will return
an error but we will not abort the transaction.

```
127.0.0.1:6379> multi
OK
127.0.0.1:6379(TX)> multi
(error) ERR MULTI calls can not be nested
127.0.0.1:6379(TX)> set key value
QUEUED
127.0.0.1:6379(TX)> exec
1) OK

127.0.0.1:6379> multi
OK
127.0.0.1:6379(TX)> watch key
(error) ERR WATCH inside MULTI is not allowed
127.0.0.1:6379(TX)> set key value
QUEUED
127.0.0.1:6379(TX)> exec
1) OK
```

This is an unexpected behavior that should abort the transaction.
The number of elements returned by EXEC also doesn't match the number
of commands in MULTI.
Add the NO_MULTI flag to them so that they will
be rejected in processCommand and rejectCommand will abort the
transaction.

So there are two visible changes:

- Different words in the error messages. (Command not allowed inside a
transaction)
- Exec returns error.

Signed-off-by: Binbin <binloveplay1314@qq.com>
---
 src/commands.def        |  4 ++--
 src/commands/multi.json |  1 +
 src/commands/watch.json |  1 +
 src/multi.c             |  9 ---------
 tests/unit/multi.tcl    | 16 ++++++----------
 5 files changed, 10 insertions(+), 21 deletions(-)

diff --git a/src/commands.def b/src/commands.def
index 99f4872f0e..4559c0aefe 100644
--- a/src/commands.def
+++ b/src/commands.def
@@ -11022,8 +11022,8 @@ struct COMMAND_STRUCT serverCommandTable[] = {
 /* transactions */
 {MAKE_CMD("discard","Discards a transaction.","O(N), when N is the number of queued commands","2.0.0",CMD_DOC_NONE,NULL,NULL,"transactions",COMMAND_GROUP_TRANSACTIONS,DISCARD_History,0,DISCARD_Tips,0,discardCommand,1,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_ALLOW_BUSY,ACL_CATEGORY_TRANSACTION,DISCARD_Keyspecs,0,NULL,0)},
 {MAKE_CMD("exec","Executes all commands in a transaction.","Depends on commands in the transaction","1.2.0",CMD_DOC_NONE,NULL,NULL,"transactions",COMMAND_GROUP_TRANSACTIONS,EXEC_History,0,EXEC_Tips,0,execCommand,1,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SKIP_SLOWLOG,ACL_CATEGORY_TRANSACTION,EXEC_Keyspecs,0,NULL,0)},
-{MAKE_CMD("multi","Starts a transaction.","O(1)","1.2.0",CMD_DOC_NONE,NULL,NULL,"transactions",COMMAND_GROUP_TRANSACTIONS,MULTI_History,0,MULTI_Tips,0,multiCommand,1,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_ALLOW_BUSY,ACL_CATEGORY_TRANSACTION,MULTI_Keyspecs,0,NULL,0)},
+{MAKE_CMD("multi","Starts a transaction.","O(1)","1.2.0",CMD_DOC_NONE,NULL,NULL,"transactions",COMMAND_GROUP_TRANSACTIONS,MULTI_History,0,MULTI_Tips,0,multiCommand,1,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_NO_MULTI|CMD_ALLOW_BUSY,ACL_CATEGORY_TRANSACTION,MULTI_Keyspecs,0,NULL,0)},
 {MAKE_CMD("unwatch","Forgets about watched keys of a transaction.","O(1)","2.2.0",CMD_DOC_NONE,NULL,NULL,"transactions",COMMAND_GROUP_TRANSACTIONS,UNWATCH_History,0,UNWATCH_Tips,0,unwatchCommand,1,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_ALLOW_BUSY,ACL_CATEGORY_TRANSACTION,UNWATCH_Keyspecs,0,NULL,0)},
-{MAKE_CMD("watch","Monitors changes to keys to determine the execution of a transaction.","O(1) for every key.","2.2.0",CMD_DOC_NONE,NULL,NULL,"transactions",COMMAND_GROUP_TRANSACTIONS,WATCH_History,0,WATCH_Tips,0,watchCommand,-2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_ALLOW_BUSY,ACL_CATEGORY_TRANSACTION,WATCH_Keyspecs,1,NULL,1),.args=WATCH_Args},
+{MAKE_CMD("watch","Monitors changes to keys to determine the execution of a transaction.","O(1) for every key.","2.2.0",CMD_DOC_NONE,NULL,NULL,"transactions",COMMAND_GROUP_TRANSACTIONS,WATCH_History,0,WATCH_Tips,0,watchCommand,-2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_NO_MULTI|CMD_ALLOW_BUSY,ACL_CATEGORY_TRANSACTION,WATCH_Keyspecs,1,NULL,1),.args=WATCH_Args},
 {0}
 };
diff --git a/src/commands/multi.json b/src/commands/multi.json
index 5f17a1da29..e66fff0174 100644
--- a/src/commands/multi.json
+++ b/src/commands/multi.json
@@ -11,6 +11,7 @@
             "LOADING",
             "STALE",
             "FAST",
+            "NO_MULTI",
             "ALLOW_BUSY"
         ],
         "acl_categories": [
diff --git a/src/commands/watch.json b/src/commands/watch.json
index 9faab2b917..588cea72b4 100644
--- a/src/commands/watch.json
+++ b/src/commands/watch.json
@@ -11,6 +11,7 @@
             "LOADING",
             "STALE",
             "FAST",
+            "NO_MULTI",
             "ALLOW_BUSY"
         ],
         "acl_categories": [
diff --git a/src/multi.c b/src/multi.c
index 24311c9982..074060269c 100644
--- a/src/multi.c
+++ b/src/multi.c
@@ -109,12 +109,7 @@ void flagTransaction(client *c) {
 }
 
 void multiCommand(client *c) {
-    if (c->flag.multi) {
-        addReplyError(c, "MULTI calls can not be nested");
-        return;
-    }
     c->flag.multi = 1;
-
     addReply(c, shared.ok);
 }
 
@@ -459,10 +454,6 @@ void touchAllWatchedKeysInDb(serverDb *emptied, serverDb *replaced_with) {
 void watchCommand(client *c) {
     int j;
 
-    if (c->flag.multi) {
-        addReplyError(c, "WATCH inside MULTI is not allowed");
-        return;
-    }
     /* No point in watching if the client is already dirty. */
     if (c->flag.dirty_cas) {
         addReply(c, shared.ok);
diff --git a/tests/unit/multi.tcl b/tests/unit/multi.tcl
index 0e2e74c2b6..dafbc66c10 100644
--- a/tests/unit/multi.tcl
+++ b/tests/unit/multi.tcl
@@ -34,12 +34,10 @@ start_server {tags {"multi"}} {
     } {QUEUED OK {a b c}}
 
     test {Nested MULTI are not allowed} {
-        set err {}
         r multi
-        catch {[r multi]} err
-        r exec
-        set _ $err
-    } {*ERR MULTI*}
+        assert_error "ERR*" {r multi}
+        assert_error "EXECABORT*" {r exec}
+    }
 
     test {MULTI where commands alter argc/argv} {
         r sadd myset a
@@ -49,12 +47,10 @@ start_server {tags {"multi"}} {
     } {a 0}
 
     test {WATCH inside MULTI is not allowed} {
-        set err {}
         r multi
-        catch {[r watch x]} err
-        r exec
-        set _ $err
-    } {*ERR WATCH*}
+        assert_error "ERR*" {r watch}
+        assert_error "EXECABORT*" {r exec}
+    }
 
     test {EXEC fails if there are errors while queueing commands #1} {
         r del foo1{t} foo2{t}

From 16803788451bfe8d5e6b757e5263653650cbd274 Mon Sep 17 00:00:00 2001
From: Wen Hui <wen.hui.ware@gmail.com>
Date: Thu, 4 Jul 2024 11:54:58 -0400
Subject: [PATCH 45/53] Update redis keyword to valkey in some sentinel
 functions (Redis Legacy) (#706)

This PR updates all Redis/redis keywords to Valkey/valkey, including
variable names, comments, function names.

All sentinel test cases passed.

---------

Signed-off-by: hwware <wen.hui.ware@gmail.com>
---
 src/sentinel.c | 396 ++++++++++++++++++++++++-------------------------
 1 file changed, 198 insertions(+), 198 deletions(-)

diff --git a/src/sentinel.c b/src/sentinel.c
index 71b548debc..a095b8f48a 100644
--- a/src/sentinel.c
+++ b/src/sentinel.c
@@ -49,7 +49,7 @@ extern SSL_CTX *valkey_tls_ctx;
 extern SSL_CTX *valkey_tls_client_ctx;
 #endif
 
-#define REDIS_SENTINEL_PORT 26379
+#define VALKEY_SENTINEL_PORT 26379
 
 /* ======================== Sentinel global state =========================== */
 
@@ -79,7 +79,7 @@ typedef struct sentinelAddr {
 #define SRI_FORCE_FAILOVER (1 << 11)   /* Force failover with primary up. */
 #define SRI_SCRIPT_KILL_SENT (1 << 12) /* SCRIPT KILL already sent on -BUSY */
 #define SRI_PRIMARY_REBOOT (1 << 13)   /* Primary was detected as rebooting */
-/* Note: when adding new flags, please check the flags section in addReplySentinelRedisInstance. */
+/* Note: when adding new flags, please check the flags section in addReplySentinelValkeyInstance. */
 
 /* Note: times are in milliseconds. */
 #define SENTINEL_PING_PERIOD 1000
@@ -140,7 +140,7 @@ static mstime_t sentinel_default_failover_timeout = 60 * 3 * 1000;
 #define SENTINEL_SIMFAILURE_CRASH_AFTER_ELECTION (1 << 0)
 #define SENTINEL_SIMFAILURE_CRASH_AFTER_PROMOTION (1 << 1)
 
-/* The link to a sentinelRedisInstance. When we have the same set of Sentinels
+/* The link to a sentinelValkeyInstance. When we have the same set of Sentinels
  * monitoring many primaries, we have different instances representing the
  * same Sentinels, one per primary, and we need to share the hiredis connections
  * among them. Otherwise if 5 Sentinels are monitoring 100 primaries we create
@@ -155,7 +155,7 @@ static mstime_t sentinel_default_failover_timeout = 60 * 3 * 1000;
  * Links are shared only for Sentinels: primary and replica instances have
  * a link with refcount = 1, always. */
 typedef struct instanceLink {
-    int refcount;              /* Number of sentinelRedisInstance owners. */
+    int refcount;              /* Number of sentinelValkeyInstance owners. */
     int disconnected;          /* Non-zero if we need to reconnect cc or pc. */
     int pending_commands;      /* Number of commands sent waiting for a reply. */
     redisAsyncContext *cc;     /* Hiredis context for commands. */
@@ -181,7 +181,7 @@ typedef struct instanceLink {
                                   the link was down. */
 } instanceLink;
 
-typedef struct sentinelRedisInstance {
+typedef struct sentinelValkeyInstance {
     int flags;                                 /* See SRI_... defines */
     char *name;                                /* Primary name from the point of view of this sentinel. */
     char *runid;                               /* Run ID of this instance, or unique ID if is a Sentinel.*/
@@ -227,7 +227,7 @@ typedef struct sentinelRedisInstance {
     int replica_priority;                   /* Replica priority according to its INFO output. */
     int replica_announced;                  /* Replica announcing according to its INFO output. */
     mstime_t replica_reconf_sent_time;      /* Time at which we sent REPLICA OF <new> */
-    struct sentinelRedisInstance *primary;  /* Primary instance if it's replica. */
+    struct sentinelValkeyInstance *primary; /* Primary instance if it's replica. */
     char *replica_primary_host;             /* Primary host as reported by INFO */
     int replica_primary_port;               /* Primary port as reported by INFO */
     int replica_primary_link_status;        /* Primary link status as reported by INFO */
@@ -241,25 +241,25 @@ typedef struct sentinelRedisInstance {
     uint64_t failover_epoch; /* Epoch of the currently started failover. */
     int failover_state;      /* See SENTINEL_FAILOVER_STATE_* defines. */
     mstime_t failover_state_change_time;
-    mstime_t failover_start_time;                   /* Last failover attempt start time. */
-    mstime_t failover_timeout;                      /* Max time to refresh failover state. */
-    mstime_t failover_delay_logged;                 /* For what failover_start_time value we
-                                                       logged the failover delay. */
-    struct sentinelRedisInstance *promoted_replica; /* Promoted replica instance. */
+    mstime_t failover_start_time;                    /* Last failover attempt start time. */
+    mstime_t failover_timeout;                       /* Max time to refresh failover state. */
+    mstime_t failover_delay_logged;                  /* For what failover_start_time value we
+                                                      * logged the failover delay. */
+    struct sentinelValkeyInstance *promoted_replica; /* Promoted replica instance. */
     /* Scripts executed to notify admin or reconfigure clients: when they
      * are set to NULL no script is executed. */
     char *notification_script;
     char *client_reconfig_script;
     sds info; /* cached INFO output */
-} sentinelRedisInstance;
+} sentinelValkeyInstance;
 
 /* Main state. */
 struct sentinelState {
     char myid[CONFIG_RUN_ID_SIZE + 1]; /* This sentinel ID. */
     uint64_t current_epoch;            /* Current epoch. */
-    dict *primaries;                   /* Dictionary of primary sentinelRedisInstances.
+    dict *primaries;                   /* Dictionary of primary sentinelValkeyInstances.
                                         Key is the instance name, value is the
-                                        sentinelRedisInstance structure pointer. */
+                                        sentinelValkeyInstance structure pointer. */
     int tilt;                          /* Are we in TILT mode? */
     int running_scripts;               /* Number of scripts in execution right now. */
     mstime_t tilt_start_time;          /* When TITL started. */
@@ -296,19 +296,19 @@ typedef struct sentinelScriptJob {
  * we have our modified copy for Sentinel in order to use our allocator
  * and to have full control over how the adapter works. */
 
-typedef struct redisAeEvents {
+typedef struct ValkeyAeEvents {
     redisAsyncContext *context;
     aeEventLoop *loop;
     int fd;
     int reading, writing;
-} redisAeEvents;
+} ValkeyAeEvents;
 
 static void redisAeReadEvent(aeEventLoop *el, int fd, void *privdata, int mask) {
     ((void)el);
     ((void)fd);
     ((void)mask);
 
-    redisAeEvents *e = (redisAeEvents *)privdata;
+    ValkeyAeEvents *e = (ValkeyAeEvents *)privdata;
     redisAsyncHandleRead(e->context);
 }
 
@@ -317,12 +317,12 @@ static void redisAeWriteEvent(aeEventLoop *el, int fd, void *privdata, int mask)
     ((void)fd);
     ((void)mask);
 
-    redisAeEvents *e = (redisAeEvents *)privdata;
+    ValkeyAeEvents *e = (ValkeyAeEvents *)privdata;
     redisAsyncHandleWrite(e->context);
 }
 
 static void redisAeAddRead(void *privdata) {
-    redisAeEvents *e = (redisAeEvents *)privdata;
+    ValkeyAeEvents *e = (ValkeyAeEvents *)privdata;
     aeEventLoop *loop = e->loop;
     if (!e->reading) {
         e->reading = 1;
@@ -331,7 +331,7 @@ static void redisAeAddRead(void *privdata) {
 }
 
 static void redisAeDelRead(void *privdata) {
-    redisAeEvents *e = (redisAeEvents *)privdata;
+    ValkeyAeEvents *e = (ValkeyAeEvents *)privdata;
     aeEventLoop *loop = e->loop;
     if (e->reading) {
         e->reading = 0;
@@ -340,7 +340,7 @@ static void redisAeDelRead(void *privdata) {
 }
 
 static void redisAeAddWrite(void *privdata) {
-    redisAeEvents *e = (redisAeEvents *)privdata;
+    ValkeyAeEvents *e = (ValkeyAeEvents *)privdata;
     aeEventLoop *loop = e->loop;
     if (!e->writing) {
         e->writing = 1;
@@ -349,7 +349,7 @@ static void redisAeAddWrite(void *privdata) {
 }
 
 static void redisAeDelWrite(void *privdata) {
-    redisAeEvents *e = (redisAeEvents *)privdata;
+    ValkeyAeEvents *e = (ValkeyAeEvents *)privdata;
     aeEventLoop *loop = e->loop;
     if (e->writing) {
         e->writing = 0;
@@ -358,7 +358,7 @@ static void redisAeDelWrite(void *privdata) {
 }
 
 static void redisAeCleanup(void *privdata) {
-    redisAeEvents *e = (redisAeEvents *)privdata;
+    ValkeyAeEvents *e = (ValkeyAeEvents *)privdata;
     redisAeDelRead(privdata);
     redisAeDelWrite(privdata);
     zfree(e);
@@ -366,13 +366,13 @@ static void redisAeCleanup(void *privdata) {
 
 static int redisAeAttach(aeEventLoop *loop, redisAsyncContext *ac) {
     redisContext *c = &(ac->c);
-    redisAeEvents *e;
+    ValkeyAeEvents *e;
 
     /* Nothing should be attached when something is already attached */
     if (ac->ev.data != NULL) return C_ERR;
 
     /* Create container for context and r/w events */
-    e = (redisAeEvents *)zmalloc(sizeof(*e));
+    e = (ValkeyAeEvents *)zmalloc(sizeof(*e));
     e->context = ac;
     e->loop = loop;
     e->fd = c->fd;
@@ -394,38 +394,38 @@ static int redisAeAttach(aeEventLoop *loop, redisAsyncContext *ac) {
 void sentinelLinkEstablishedCallback(const redisAsyncContext *c, int status);
 void sentinelDisconnectCallback(const redisAsyncContext *c, int status);
 void sentinelReceiveHelloMessages(redisAsyncContext *c, void *reply, void *privdata);
-sentinelRedisInstance *sentinelGetPrimaryByName(char *name);
-char *sentinelGetSubjectiveLeader(sentinelRedisInstance *primary);
-char *sentinelGetObjectiveLeader(sentinelRedisInstance *primary);
+sentinelValkeyInstance *sentinelGetPrimaryByName(char *name);
+char *sentinelGetSubjectiveLeader(sentinelValkeyInstance *primary);
+char *sentinelGetObjectiveLeader(sentinelValkeyInstance *primary);
 void instanceLinkConnectionError(const redisAsyncContext *c);
-const char *sentinelRedisInstanceTypeStr(sentinelRedisInstance *ri);
-void sentinelAbortFailover(sentinelRedisInstance *ri);
-void sentinelEvent(int level, char *type, sentinelRedisInstance *ri, const char *fmt, ...);
-sentinelRedisInstance *sentinelSelectReplica(sentinelRedisInstance *primary);
+const char *sentinelValkeyInstanceTypeStr(sentinelValkeyInstance *ri);
+void sentinelAbortFailover(sentinelValkeyInstance *ri);
+void sentinelEvent(int level, char *type, sentinelValkeyInstance *ri, const char *fmt, ...);
+sentinelValkeyInstance *sentinelSelectReplica(sentinelValkeyInstance *primary);
 void sentinelScheduleScriptExecution(char *path, ...);
-void sentinelStartFailover(sentinelRedisInstance *primary);
+void sentinelStartFailover(sentinelValkeyInstance *primary);
 void sentinelDiscardReplyCallback(redisAsyncContext *c, void *reply, void *privdata);
-int sentinelSendReplicaOf(sentinelRedisInstance *ri, const sentinelAddr *addr);
-char *sentinelVoteLeader(sentinelRedisInstance *primary, uint64_t req_epoch, char *req_runid, uint64_t *leader_epoch);
+int sentinelSendReplicaOf(sentinelValkeyInstance *ri, const sentinelAddr *addr);
+char *sentinelVoteLeader(sentinelValkeyInstance *primary, uint64_t req_epoch, char *req_runid, uint64_t *leader_epoch);
 int sentinelFlushConfig(void);
 void sentinelGenerateInitialMonitorEvents(void);
-int sentinelSendPing(sentinelRedisInstance *ri);
-int sentinelForceHelloUpdateForPrimary(sentinelRedisInstance *primary);
-sentinelRedisInstance *getSentinelRedisInstanceByAddrAndRunID(dict *instances, char *ip, int port, char *runid);
+int sentinelSendPing(sentinelValkeyInstance *ri);
+int sentinelForceHelloUpdateForPrimary(sentinelValkeyInstance *primary);
+sentinelValkeyInstance *getSentinelValkeyInstanceByAddrAndRunID(dict *instances, char *ip, int port, char *runid);
 void sentinelSimFailureCrash(void);
 
 /* ========================= Dictionary types =============================== */
 
-void releaseSentinelRedisInstance(sentinelRedisInstance *ri);
+void releaseSentinelValkeyInstance(sentinelValkeyInstance *ri);
 
 void dictInstancesValDestructor(dict *d, void *obj) {
     UNUSED(d);
-    releaseSentinelRedisInstance(obj);
+    releaseSentinelValkeyInstance(obj);
 }
 
-/* Instance name (sds) -> instance (sentinelRedisInstance pointer)
+/* Instance name (sds) -> instance (sentinelValkeyInstance pointer)
  *
- * also used for: sentinelRedisInstance->sentinels dictionary that maps
+ * also used for: sentinelValkeyInstance->sentinels dictionary that maps
  * sentinels ip:port to last seen time in Pub/Sub hello message. */
 dictType instancesDictType = {
     dictSdsHash,                /* hash function */
@@ -474,7 +474,7 @@ const char *preMonitorCfgName[] = {"announce-ip",   "announce-port",     "deny-s
 /* This function overwrites a few normal server config default with Sentinel
  * specific defaults. */
 void initSentinelConfig(void) {
-    server.port = REDIS_SENTINEL_PORT;
+    server.port = VALKEY_SENTINEL_PORT;
     server.protected_mode = 0; /* Sentinel must be exposed. */
 }
 
@@ -654,21 +654,21 @@ sds announceSentinelAddrAndPort(const sentinelAddr *a) {
  *
  *  Any other specifier after "%@" is processed by printf itself.
  */
-void sentinelEvent(int level, char *type, sentinelRedisInstance *ri, const char *fmt, ...) {
+void sentinelEvent(int level, char *type, sentinelValkeyInstance *ri, const char *fmt, ...) {
     va_list ap;
     char msg[LOG_MAX_LEN];
     robj *channel, *payload;
 
     /* Handle %@ */
     if (fmt[0] == '%' && fmt[1] == '@') {
-        sentinelRedisInstance *primary = (ri->flags & SRI_PRIMARY) ? NULL : ri->primary;
+        sentinelValkeyInstance *primary = (ri->flags & SRI_PRIMARY) ? NULL : ri->primary;
 
         if (primary) {
-            snprintf(msg, sizeof(msg), "%s %s %s %d @ %s %s %d", sentinelRedisInstanceTypeStr(ri), ri->name,
+            snprintf(msg, sizeof(msg), "%s %s %s %d @ %s %s %d", sentinelValkeyInstanceTypeStr(ri), ri->name,
                      announceSentinelAddr(ri->addr), ri->addr->port, primary->name, announceSentinelAddr(primary->addr),
                      primary->addr->port);
         } else {
-            snprintf(msg, sizeof(msg), "%s %s %s %d", sentinelRedisInstanceTypeStr(ri), ri->name,
+            snprintf(msg, sizeof(msg), "%s %s %s %d", sentinelValkeyInstanceTypeStr(ri), ri->name,
                      announceSentinelAddr(ri->addr), ri->addr->port);
         }
         fmt += 2;
@@ -697,7 +697,7 @@ void sentinelEvent(int level, char *type, sentinelRedisInstance *ri, const char
 
     /* Call the notification script if applicable. */
     if (level == LL_WARNING && ri != NULL) {
-        sentinelRedisInstance *primary = (ri->flags & SRI_PRIMARY) ? ri : ri->primary;
+        sentinelValkeyInstance *primary = (ri->flags & SRI_PRIMARY) ? ri : ri->primary;
         if (primary && primary->notification_script) {
             sentinelScheduleScriptExecution(primary->notification_script, type, msg, NULL);
         }
@@ -714,7 +714,7 @@ void sentinelGenerateInitialMonitorEvents(void) {
 
     di = dictGetIterator(sentinel.primaries);
     while ((de = dictNext(di)) != NULL) {
-        sentinelRedisInstance *ri = dictGetVal(de);
+        sentinelValkeyInstance *ri = dictGetVal(de);
         sentinelEvent(LL_WARNING, "+monitor", ri, "%@ quorum %d", ri->quorum);
     }
     dictReleaseIterator(di);
@@ -967,7 +967,7 @@ void sentinelPendingScriptsCommand(client *c) {
  *
  * from/to fields are respectively primary -> promoted replica addresses for
  * "start" and "end". */
-void sentinelCallClientReconfScript(sentinelRedisInstance *primary,
+void sentinelCallClientReconfScript(sentinelValkeyInstance *primary,
                                     int role,
                                     char *state,
                                     sentinelAddr *from,
@@ -1030,7 +1030,7 @@ void instanceLinkCloseConnection(instanceLink *link, redisAsyncContext *c) {
  * pending requests in link->cc (hiredis connection for commands) to a
  * callback that will just ignore them. This is useful to avoid processing
  * replies for an instance that no longer exists. */
-instanceLink *releaseInstanceLink(instanceLink *link, sentinelRedisInstance *ri) {
+instanceLink *releaseInstanceLink(instanceLink *link, sentinelValkeyInstance *ri) {
     serverAssert(link->refcount > 0);
     link->refcount--;
     if (link->refcount != 0) {
@@ -1073,7 +1073,7 @@ instanceLink *releaseInstanceLink(instanceLink *link, sentinelRedisInstance *ri)
  * Return C_OK if a matching Sentinel was found in the context of a
  * different primary and sharing was performed. Otherwise C_ERR
  * is returned. */
-int sentinelTryConnectionSharing(sentinelRedisInstance *ri) {
+int sentinelTryConnectionSharing(sentinelValkeyInstance *ri) {
     serverAssert(ri->flags & SRI_SENTINEL);
     dictIterator *di;
     dictEntry *de;
@@ -1083,11 +1083,11 @@ int sentinelTryConnectionSharing(sentinelRedisInstance *ri) {
 
     di = dictGetIterator(sentinel.primaries);
     while ((de = dictNext(di)) != NULL) {
-        sentinelRedisInstance *primary = dictGetVal(de), *match;
+        sentinelValkeyInstance *primary = dictGetVal(de), *match;
         /* We want to share with the same physical Sentinel referenced
          * in other primaries, so skip our primary. */
         if (primary == ri->primary) continue;
-        match = getSentinelRedisInstanceByAddrAndRunID(primary->sentinels, NULL, 0, ri->runid);
+        match = getSentinelValkeyInstanceByAddrAndRunID(primary->sentinels, NULL, 0, ri->runid);
         if (match == NULL) continue; /* No match. */
         if (match == ri) continue;   /* Should never happen but... safer. */
 
@@ -1104,7 +1104,7 @@ int sentinelTryConnectionSharing(sentinelRedisInstance *ri) {
 }
 
 /* Disconnect the relevant primary and its replicas. */
-void dropInstanceConnections(sentinelRedisInstance *ri) {
+void dropInstanceConnections(sentinelValkeyInstance *ri) {
     serverAssert(ri->flags & SRI_PRIMARY);
 
     /* Disconnect with the primary. */
@@ -1114,7 +1114,7 @@ void dropInstanceConnections(sentinelRedisInstance *ri) {
     /* Disconnect with all replicas. */
     dictIterator *di;
     dictEntry *de;
-    sentinelRedisInstance *repl_ri;
+    sentinelValkeyInstance *repl_ri;
     di = dictGetIterator(ri->replicas);
     while ((de = dictNext(di)) != NULL) {
         repl_ri = dictGetVal(de);
@@ -1136,10 +1136,10 @@ int sentinelDropConnections(void) {
         dictIterator *sdi;
         dictEntry *sde;
 
-        sentinelRedisInstance *ri = dictGetVal(de);
+        sentinelValkeyInstance *ri = dictGetVal(de);
         sdi = dictGetIterator(ri->sentinels);
         while ((sde = dictNext(sdi)) != NULL) {
-            sentinelRedisInstance *si = dictGetVal(sde);
+            sentinelValkeyInstance *si = dictGetVal(sde);
             if (!si->link->disconnected) {
                 instanceLinkCloseConnection(si->link, si->link->pc);
                 instanceLinkCloseConnection(si->link, si->link->cc);
@@ -1159,7 +1159,7 @@ int sentinelDropConnections(void) {
  * will be updated.
  *
  * Return the number of updated Sentinel addresses. */
-int sentinelUpdateSentinelAddressInAllPrimaries(sentinelRedisInstance *ri) {
+int sentinelUpdateSentinelAddressInAllPrimaries(sentinelValkeyInstance *ri) {
     serverAssert(ri->flags & SRI_SENTINEL);
     dictIterator *di;
     dictEntry *de;
@@ -1167,8 +1167,8 @@ int sentinelUpdateSentinelAddressInAllPrimaries(sentinelRedisInstance *ri) {
 
     di = dictGetIterator(sentinel.primaries);
     while ((de = dictNext(di)) != NULL) {
-        sentinelRedisInstance *primary = dictGetVal(de), *match;
-        match = getSentinelRedisInstanceByAddrAndRunID(primary->sentinels, NULL, 0, ri->runid);
+        sentinelValkeyInstance *primary = dictGetVal(de), *match;
+        match = getSentinelValkeyInstanceByAddrAndRunID(primary->sentinels, NULL, 0, ri->runid);
         /* If there is no match, this primary does not know about this
          * Sentinel, try with the next one. */
         if (match == NULL) continue;
@@ -1222,7 +1222,7 @@ void sentinelDisconnectCallback(const redisAsyncContext *c, int status) {
     instanceLinkConnectionError(c);
 }
 
-/* ========================== sentinelRedisInstance ========================= */
+/* ========================== sentinelValkeyInstance ========================= */
 
 /* Create an instance of the server, the following fields must be populated by the
  * caller if needed:
@@ -1246,13 +1246,13 @@ void sentinelDisconnectCallback(const redisAsyncContext *c, int status) {
  * a primary with the same name, a replica with the same address, or a sentinel
  * with the same ID already exists. */
 
-sentinelRedisInstance *createSentinelRedisInstance(char *name,
-                                                   int flags,
-                                                   char *hostname,
-                                                   int port,
-                                                   int quorum,
-                                                   sentinelRedisInstance *primary) {
-    sentinelRedisInstance *ri;
+sentinelValkeyInstance *createSentinelValkeyInstance(char *name,
+                                                     int flags,
+                                                     char *hostname,
+                                                     int port,
+                                                     int quorum,
+                                                     sentinelValkeyInstance *primary) {
+    sentinelValkeyInstance *ri;
     sentinelAddr *addr;
     dict *table = NULL;
     sds sdsname;
@@ -1350,7 +1350,7 @@ sentinelRedisInstance *createSentinelRedisInstance(char *name,
  * This function does not take care of unlinking the instance from the main
  * primaries table (if it is a primary) or from its primary sentinels/replicas table
  * if it is a replica or sentinel. */
-void releaseSentinelRedisInstance(sentinelRedisInstance *ri) {
+void releaseSentinelValkeyInstance(sentinelValkeyInstance *ri) {
     /* Release all its replicas or sentinels if any. */
     dictRelease(ri->sentinels);
     dictRelease(ri->replicas);
@@ -1378,9 +1378,9 @@ void releaseSentinelRedisInstance(sentinelRedisInstance *ri) {
 }
 
 /* Lookup a replica in a primary instance, by ip and port. */
-sentinelRedisInstance *sentinelRedisInstanceLookupReplica(sentinelRedisInstance *ri, char *replica_addr, int port) {
+sentinelValkeyInstance *sentinelValkeyInstanceLookupReplica(sentinelValkeyInstance *ri, char *replica_addr, int port) {
     sds key;
-    sentinelRedisInstance *replica;
+    sentinelValkeyInstance *replica;
     sentinelAddr *addr;
 
     serverAssert(ri->flags & SRI_PRIMARY);
@@ -1400,7 +1400,7 @@ sentinelRedisInstance *sentinelRedisInstanceLookupReplica(sentinelRedisInstance
 }
 
 /* Return the name of the type of the instance as a string. */
-const char *sentinelRedisInstanceTypeStr(sentinelRedisInstance *ri) {
+const char *sentinelValkeyInstanceTypeStr(sentinelValkeyInstance *ri) {
     if (ri->flags & SRI_PRIMARY)
         return "master";
     else if (ri->flags & SRI_REPLICA)
@@ -1422,7 +1422,7 @@ const char *sentinelRedisInstanceTypeStr(sentinelRedisInstance *ri) {
  *
  * The function returns 1 if the matching Sentinel was removed, otherwise
  * 0 if there was no Sentinel with this ID. */
-int removeMatchingSentinelFromPrimary(sentinelRedisInstance *primary, char *runid) {
+int removeMatchingSentinelFromPrimary(sentinelValkeyInstance *primary, char *runid) {
     dictIterator *di;
     dictEntry *de;
     int removed = 0;
@@ -1431,7 +1431,7 @@ int removeMatchingSentinelFromPrimary(sentinelRedisInstance *primary, char *runi
 
     di = dictGetSafeIterator(primary->sentinels);
     while ((de = dictNext(di)) != NULL) {
-        sentinelRedisInstance *ri = dictGetVal(de);
+        sentinelValkeyInstance *ri = dictGetVal(de);
 
         if (ri->runid && strcmp(ri->runid, runid) == 0) {
             dictDelete(primary->sentinels, ri->name);
@@ -1448,10 +1448,10 @@ int removeMatchingSentinelFromPrimary(sentinelRedisInstance *primary, char *runi
  *
  * runid or addr can be NULL. In such a case the search is performed only
  * by the non-NULL field. */
-sentinelRedisInstance *getSentinelRedisInstanceByAddrAndRunID(dict *instances, char *addr, int port, char *runid) {
+sentinelValkeyInstance *getSentinelValkeyInstanceByAddrAndRunID(dict *instances, char *addr, int port, char *runid) {
     dictIterator *di;
     dictEntry *de;
-    sentinelRedisInstance *instance = NULL;
+    sentinelValkeyInstance *instance = NULL;
     sentinelAddr *ri_addr = NULL;
 
     serverAssert(addr || runid); /* User must pass at least one search param. */
@@ -1464,7 +1464,7 @@ sentinelRedisInstance *getSentinelRedisInstanceByAddrAndRunID(dict *instances, c
     }
     di = dictGetIterator(instances);
     while ((de = dictNext(di)) != NULL) {
-        sentinelRedisInstance *ri = dictGetVal(de);
+        sentinelValkeyInstance *ri = dictGetVal(de);
 
         if (runid && !ri->runid) continue;
         if ((runid == NULL || strcmp(ri->runid, runid) == 0) &&
@@ -1480,8 +1480,8 @@ sentinelRedisInstance *getSentinelRedisInstanceByAddrAndRunID(dict *instances, c
 }
 
 /* Primary lookup by name */
-sentinelRedisInstance *sentinelGetPrimaryByName(char *name) {
-    sentinelRedisInstance *ri;
+sentinelValkeyInstance *sentinelGetPrimaryByName(char *name) {
+    sentinelValkeyInstance *ri;
     sds sdsname = sdsnew(name);
 
     ri = dictFetchValue(sentinel.primaries, sdsname);
@@ -1501,7 +1501,7 @@ sentinelRedisInstance *sentinelGetPrimaryByName(char *name) {
  */
 
 #define SENTINEL_RESET_NO_SENTINELS (1 << 0)
-void sentinelResetPrimary(sentinelRedisInstance *ri, int flags) {
+void sentinelResetPrimary(sentinelValkeyInstance *ri, int flags) {
     serverAssert(ri->flags & SRI_PRIMARY);
     dictRelease(ri->replicas);
     ri->replicas = dictCreate(&instancesDictType);
@@ -1542,7 +1542,7 @@ int sentinelResetPrimariesByPattern(char *pattern, int flags) {
 
     di = dictGetIterator(sentinel.primaries);
     while ((de = dictNext(di)) != NULL) {
-        sentinelRedisInstance *ri = dictGetVal(de);
+        sentinelValkeyInstance *ri = dictGetVal(de);
 
         if (ri->name) {
             if (stringmatch(pattern, ri->name, 0)) {
@@ -1562,7 +1562,7 @@ int sentinelResetPrimariesByPattern(char *pattern, int flags) {
  *
  * The function returns C_ERR if the address can't be resolved for some
  * reason. Otherwise C_OK is returned.  */
-int sentinelResetPrimaryAndChangeAddress(sentinelRedisInstance *primary, char *hostname, int port) {
+int sentinelResetPrimaryAndChangeAddress(sentinelValkeyInstance *primary, char *hostname, int port) {
     sentinelAddr *oldaddr, *newaddr;
     sentinelAddr **replicas = NULL;
     int num_replicas = 0, j;
@@ -1580,7 +1580,7 @@ int sentinelResetPrimaryAndChangeAddress(sentinelRedisInstance *primary, char *h
     /* Don't include the one having the address we are switching to. */
     di = dictGetIterator(primary->replicas);
     while ((de = dictNext(di)) != NULL) {
-        sentinelRedisInstance *replica = dictGetVal(de);
+        sentinelValkeyInstance *replica = dictGetVal(de);
 
         if (sentinelAddrOrHostnameEqual(replica->addr, newaddr)) continue;
         replicas[num_replicas++] = dupSentinelAddr(replica->addr);
@@ -1603,10 +1603,10 @@ int sentinelResetPrimaryAndChangeAddress(sentinelRedisInstance *primary, char *h
 
     /* Add replicas back. */
     for (j = 0; j < num_replicas; j++) {
-        sentinelRedisInstance *replica;
+        sentinelValkeyInstance *replica;
 
-        replica = createSentinelRedisInstance(NULL, SRI_REPLICA, replicas[j]->hostname, replicas[j]->port,
-                                              primary->quorum, primary);
+        replica = createSentinelValkeyInstance(NULL, SRI_REPLICA, replicas[j]->hostname, replicas[j]->port,
+                                               primary->quorum, primary);
         releaseSentinelAddr(replicas[j]);
         if (replica) sentinelEvent(LL_NOTICE, "+slave", replica, "%@");
     }
@@ -1621,7 +1621,7 @@ int sentinelResetPrimaryAndChangeAddress(sentinelRedisInstance *primary, char *h
 
 /* Return non-zero if there was no SDOWN or ODOWN error associated to this
  * instance in the latest 'ms' milliseconds. */
-int sentinelRedisInstanceNoDownFor(sentinelRedisInstance *ri, mstime_t ms) {
+int sentinelValkeyInstanceNoDownFor(sentinelValkeyInstance *ri, mstime_t ms) {
     mstime_t most_recent;
 
     most_recent = ri->s_down_since_time;
@@ -1631,7 +1631,7 @@ int sentinelRedisInstanceNoDownFor(sentinelRedisInstance *ri, mstime_t ms) {
 
 /* Return the current primary address, that is, its address or the address
  * of the promoted replica if already operational. */
-sentinelAddr *sentinelGetCurrentPrimaryAddress(sentinelRedisInstance *primary) {
+sentinelAddr *sentinelGetCurrentPrimaryAddress(sentinelValkeyInstance *primary) {
     /* If we are failing over the primary, and the state is already
      * SENTINEL_FAILOVER_STATE_RECONF_REPLICAS or greater, it means that we
      * already have the new configuration epoch in the primary, and the
@@ -1647,7 +1647,7 @@ sentinelAddr *sentinelGetCurrentPrimaryAddress(sentinelRedisInstance *primary) {
 
 /* This function sets the down_after_period field value in 'primary' to all
  * the replicas and sentinel instances connected to this primary. */
-void sentinelPropagateDownAfterPeriod(sentinelRedisInstance *primary) {
+void sentinelPropagateDownAfterPeriod(sentinelValkeyInstance *primary) {
     dictIterator *di;
     dictEntry *de;
     int j;
@@ -1656,7 +1656,7 @@ void sentinelPropagateDownAfterPeriod(sentinelRedisInstance *primary) {
     for (j = 0; d[j]; j++) {
         di = dictGetIterator(d[j]);
         while ((de = dictNext(di)) != NULL) {
-            sentinelRedisInstance *ri = dictGetVal(de);
+            sentinelValkeyInstance *ri = dictGetVal(de);
             ri->down_after_period = primary->down_after_period;
         }
         dictReleaseIterator(di);
@@ -1670,7 +1670,7 @@ void sentinelPropagateDownAfterPeriod(sentinelRedisInstance *primary) {
  * we check the one of the primary), and map the command that we should send
  * to the set of renamed commands. However, if the command was not renamed,
  * we just return "command" itself. */
-char *sentinelInstanceMapCommand(sentinelRedisInstance *ri, char *command) {
+char *sentinelInstanceMapCommand(sentinelValkeyInstance *ri, char *command) {
     sds sc = sdsnew(command);
     if (ri->primary) ri = ri->primary;
     char *retval = dictFetchValue(ri->renamed_commands, sc);
@@ -1810,14 +1810,14 @@ void loadSentinelConfigFromQueue(void) {
 }
 
 const char *sentinelHandleConfiguration(char **argv, int argc) {
-    sentinelRedisInstance *ri;
+    sentinelValkeyInstance *ri;
 
     if (!strcasecmp(argv[0], "monitor") && argc == 5) {
         /* monitor <name> <host> <port> <quorum> */
         int quorum = atoi(argv[4]);
 
         if (quorum <= 0) return "Quorum must be 1 or greater.";
-        if (createSentinelRedisInstance(argv[1], SRI_PRIMARY, argv[2], atoi(argv[3]), quorum, NULL) == NULL) {
+        if (createSentinelValkeyInstance(argv[1], SRI_PRIMARY, argv[2], atoi(argv[3]), quorum, NULL) == NULL) {
             return sentinelCheckCreateInstanceErrors(SRI_PRIMARY);
         }
     } else if (!strcasecmp(argv[0], "down-after-milliseconds") && argc == 3) {
@@ -1884,23 +1884,23 @@ const char *sentinelHandleConfiguration(char **argv, int argc) {
         if (!ri) return "No such master with specified name.";
         ri->leader_epoch = strtoull(argv[2], NULL, 10);
     } else if ((!strcasecmp(argv[0], "known-slave") || !strcasecmp(argv[0], "known-replica")) && argc == 4) {
-        sentinelRedisInstance *replica;
+        sentinelValkeyInstance *replica;
 
         /* known-replica <name> <ip> <port> */
         ri = sentinelGetPrimaryByName(argv[1]);
         if (!ri) return "No such master with specified name.";
-        if ((replica = createSentinelRedisInstance(NULL, SRI_REPLICA, argv[2], atoi(argv[3]), ri->quorum, ri)) ==
+        if ((replica = createSentinelValkeyInstance(NULL, SRI_REPLICA, argv[2], atoi(argv[3]), ri->quorum, ri)) ==
             NULL) {
             return sentinelCheckCreateInstanceErrors(SRI_REPLICA);
         }
     } else if (!strcasecmp(argv[0], "known-sentinel") && (argc == 4 || argc == 5)) {
-        sentinelRedisInstance *si;
+        sentinelValkeyInstance *si;
 
         if (argc == 5) { /* Ignore the old form without runid. */
             /* known-sentinel <name> <ip> <port> [runid] */
             ri = sentinelGetPrimaryByName(argv[1]);
             if (!ri) return "No such master with specified name.";
-            if ((si = createSentinelRedisInstance(argv[4], SRI_SENTINEL, argv[2], atoi(argv[3]), ri->quorum, ri)) ==
+            if ((si = createSentinelValkeyInstance(argv[4], SRI_SENTINEL, argv[2], atoi(argv[3]), ri->quorum, ri)) ==
                 NULL) {
                 return sentinelCheckCreateInstanceErrors(SRI_SENTINEL);
             }
@@ -1993,7 +1993,7 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
     /* For every primary emit a "sentinel monitor" config entry. */
     di = dictGetIterator(sentinel.primaries);
     while ((de = dictNext(di)) != NULL) {
-        sentinelRedisInstance *primary, *ri;
+        sentinelValkeyInstance *primary, *ri;
         sentinelAddr *primary_addr;
 
         /* sentinel monitor */
@@ -2233,7 +2233,7 @@ static void sentinelFlushConfigAndReply(client *c) {
  * We don't check at all if the command was successfully transmitted
  * to the instance as if it fails Sentinel will detect the instance down,
  * will disconnect and reconnect the link and so forth. */
-void sentinelSendAuthIfNeeded(sentinelRedisInstance *ri, redisAsyncContext *c) {
+void sentinelSendAuthIfNeeded(sentinelValkeyInstance *ri, redisAsyncContext *c) {
     char *auth_pass = NULL;
     char *auth_user = NULL;
 
@@ -2276,7 +2276,7 @@ void sentinelSendAuthIfNeeded(sentinelRedisInstance *ri, redisAsyncContext *c) {
  *
  * This makes it possible to list all the sentinel instances connected
  * to a server with CLIENT LIST, grepping for a specific name format. */
-void sentinelSetClientName(sentinelRedisInstance *ri, redisAsyncContext *c, char *type) {
+void sentinelSetClientName(sentinelValkeyInstance *ri, redisAsyncContext *c, char *type) {
     char name[64];
 
     snprintf(name, sizeof(name), "sentinel-%.8s-%s", sentinel.myid, type);
@@ -2305,7 +2305,7 @@ static int instanceLinkNegotiateTLS(redisAsyncContext *context) {
 /* Create the async connections for the instance link if the link
  * is disconnected. Note that link->disconnected is true even if just
  * one of the two links (commands and pub/sub) is missing. */
-void sentinelReconnectInstance(sentinelRedisInstance *ri) {
+void sentinelReconnectInstance(sentinelValkeyInstance *ri) {
     if (ri->link->disconnected == 0) return;
     if (ri->addr->port == 0) return; /* port == 0 means invalid address. */
     instanceLink *link = ri->link;
@@ -2396,14 +2396,14 @@ void sentinelReconnectInstance(sentinelRedisInstance *ri) {
  * 2) It reports itself as a primary.
  * 3) It is not SDOWN or ODOWN.
  * 4) We obtained last INFO no more than two times the INFO period time ago. */
-int sentinelPrimaryLooksSane(sentinelRedisInstance *primary) {
+int sentinelPrimaryLooksSane(sentinelValkeyInstance *primary) {
     return primary->flags & SRI_PRIMARY && primary->role_reported == SRI_PRIMARY &&
            (primary->flags & (SRI_S_DOWN | SRI_O_DOWN)) == 0 &&
            (mstime() - primary->info_refresh) < sentinel_info_period * 2;
 }
 
 /* Process the INFO output from primaries. */
-void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
+void sentinelRefreshInstanceInfo(sentinelValkeyInstance *ri, const char *info) {
     sds *lines;
     int numlines, j;
     int role = 0;
@@ -2419,7 +2419,7 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
     /* Process line by line. */
     lines = sdssplitlen(info, strlen(info), "\r\n", 2, &numlines);
     for (j = 0; j < numlines; j++) {
-        sentinelRedisInstance *replica;
+        sentinelValkeyInstance *replica;
         sds l = lines[j];
 
         /* run_id:<40 hex chars>*/
@@ -2475,8 +2475,8 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
 
             /* Check if we already have this replica into our table,
              * otherwise add it. */
-            if (sentinelRedisInstanceLookupReplica(ri, ip, atoi(port)) == NULL) {
-                if ((replica = createSentinelRedisInstance(NULL, SRI_REPLICA, ip, atoi(port), ri->quorum, ri)) !=
+            if (sentinelValkeyInstanceLookupReplica(ri, ip, atoi(port)) == NULL) {
+                if ((replica = createSentinelValkeyInstance(NULL, SRI_REPLICA, ip, atoi(port), ri->quorum, ri)) !=
                     NULL) {
                     sentinelEvent(LL_NOTICE, "+slave", replica, "%@");
                     sentinelFlushConfig();
@@ -2588,7 +2588,7 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
             mstime_t wait_time = sentinel_publish_period * 4;
 
             if (!(ri->flags & SRI_PROMOTED) && sentinelPrimaryLooksSane(ri->primary) &&
-                sentinelRedisInstanceNoDownFor(ri, wait_time) && mstime() - ri->role_reported_time > wait_time) {
+                sentinelValkeyInstanceNoDownFor(ri, wait_time) && mstime() - ri->role_reported_time > wait_time) {
                 int retval = sentinelSendReplicaOf(ri, ri->primary->addr);
                 if (retval == C_OK) sentinelEvent(LL_NOTICE, "+convert-to-slave", ri, "%@");
             }
@@ -2603,7 +2603,7 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
 
         /* Make sure the primary is sane before reconfiguring this instance
          * into a replica. */
-        if (sentinelPrimaryLooksSane(ri->primary) && sentinelRedisInstanceNoDownFor(ri, wait_time) &&
+        if (sentinelPrimaryLooksSane(ri->primary) && sentinelValkeyInstanceNoDownFor(ri, wait_time) &&
             mstime() - ri->replica_conf_change_time > wait_time) {
             int retval = sentinelSendReplicaOf(ri, ri->primary->addr);
             if (retval == C_OK) sentinelEvent(LL_NOTICE, "+fix-slave-config", ri, "%@");
@@ -2632,7 +2632,7 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
 }
 
 void sentinelInfoReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
-    sentinelRedisInstance *ri = privdata;
+    sentinelValkeyInstance *ri = privdata;
     instanceLink *link = c->data;
     redisReply *r;
 
@@ -2656,7 +2656,7 @@ void sentinelDiscardReplyCallback(redisAsyncContext *c, void *reply, void *privd
 }
 
 void sentinelPingReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
-    sentinelRedisInstance *ri = privdata;
+    sentinelValkeyInstance *ri = privdata;
     instanceLink *link = c->data;
     redisReply *r;
 
@@ -2692,7 +2692,7 @@ void sentinelPingReplyCallback(redisAsyncContext *c, void *reply, void *privdata
 /* This is called when we get the reply about the PUBLISH command we send
  * to the primary to advertise this sentinel. */
 void sentinelPublishReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
-    sentinelRedisInstance *ri = privdata;
+    sentinelValkeyInstance *ri = privdata;
     instanceLink *link = c->data;
     redisReply *r;
 
@@ -2717,7 +2717,7 @@ void sentinelProcessHelloMessage(char *hello, int hello_len) {
     int numtokens, port, removed, primary_port;
     uint64_t current_epoch, primary_config_epoch;
     char **token = sdssplitlen(hello, hello_len, ",", 1, &numtokens);
-    sentinelRedisInstance *si, *primary;
+    sentinelValkeyInstance *si, *primary;
 
     if (numtokens == 8) {
         /* Obtain a reference to the primary this hello message is about */
@@ -2727,7 +2727,7 @@ void sentinelProcessHelloMessage(char *hello, int hello_len) {
         /* First, try to see if we already have this sentinel. */
         port = atoi(token[1]);
         primary_port = atoi(token[6]);
-        si = getSentinelRedisInstanceByAddrAndRunID(primary->sentinels, token[0], port, token[2]);
+        si = getSentinelValkeyInstanceByAddrAndRunID(primary->sentinels, token[0], port, token[2]);
         current_epoch = strtoull(token[3], NULL, 10);
         primary_config_epoch = strtoull(token[7], NULL, 10);
 
@@ -2744,8 +2744,8 @@ void sentinelProcessHelloMessage(char *hello, int hello_len) {
                  * new one is reporting. What we do if this happens is to set its
                  * port to 0, to signal the address is invalid. We'll update it
                  * later if we get an HELLO message. */
-                sentinelRedisInstance *other =
-                    getSentinelRedisInstanceByAddrAndRunID(primary->sentinels, token[0], port, NULL);
+                sentinelValkeyInstance *other =
+                    getSentinelValkeyInstanceByAddrAndRunID(primary->sentinels, token[0], port, NULL);
                 if (other) {
                     /* If there is already other sentinel with same address (but
                      * different runid) then remove the old one across all primaries */
@@ -2758,7 +2758,7 @@ void sentinelProcessHelloMessage(char *hello, int hello_len) {
 
                     di = dictGetIterator(sentinel.primaries);
                     while ((de = dictNext(di)) != NULL) {
-                        sentinelRedisInstance *primary = dictGetVal(de);
+                        sentinelValkeyInstance *primary = dictGetVal(de);
                         removeMatchingSentinelFromPrimary(primary, runid_obsolete);
                     }
                     dictReleaseIterator(di);
@@ -2767,7 +2767,7 @@ void sentinelProcessHelloMessage(char *hello, int hello_len) {
             }
 
             /* Add the new sentinel. */
-            si = createSentinelRedisInstance(token[2], SRI_SENTINEL, token[0], port, primary->quorum, primary);
+            si = createSentinelValkeyInstance(token[2], SRI_SENTINEL, token[0], port, primary->quorum, primary);
 
             if (si) {
                 if (!removed) sentinelEvent(LL_NOTICE, "+sentinel", si, "%@");
@@ -2817,7 +2817,7 @@ void sentinelProcessHelloMessage(char *hello, int hello_len) {
 /* This is our Pub/Sub callback for the Hello channel. It's useful in order
  * to discover other sentinels attached at the same primary. */
 void sentinelReceiveHelloMessages(redisAsyncContext *c, void *reply, void *privdata) {
-    sentinelRedisInstance *ri = privdata;
+    sentinelValkeyInstance *ri = privdata;
     redisReply *r;
     UNUSED(c);
 
@@ -2855,13 +2855,13 @@ void sentinelReceiveHelloMessages(redisAsyncContext *c, void *reply, void *privd
  *
  * Returns C_OK if the PUBLISH was queued correctly, otherwise
  * C_ERR is returned. */
-int sentinelSendHello(sentinelRedisInstance *ri) {
+int sentinelSendHello(sentinelValkeyInstance *ri) {
     char ip[NET_IP_STR_LEN];
     char payload[NET_IP_STR_LEN + 1024];
     int retval;
     char *announce_ip;
     int announce_port;
-    sentinelRedisInstance *primary = (ri->flags & SRI_PRIMARY) ? ri : ri->primary;
+    sentinelValkeyInstance *primary = (ri->flags & SRI_PRIMARY) ? ri : ri->primary;
     sentinelAddr *primary_addr = sentinelGetCurrentPrimaryAddress(primary);
 
     if (ri->link->disconnected) return C_ERR;
@@ -2898,13 +2898,13 @@ int sentinelSendHello(sentinelRedisInstance *ri) {
 
 /* Reset last_pub_time in all the instances in the specified dictionary
  * in order to force the delivery of a Hello update ASAP. */
-void sentinelForceHelloUpdateDictOfRedisInstances(dict *instances) {
+void sentinelForceHelloUpdateDictOfValkeyInstances(dict *instances) {
     dictIterator *di;
     dictEntry *de;
 
     di = dictGetSafeIterator(instances);
     while ((de = dictNext(di)) != NULL) {
-        sentinelRedisInstance *ri = dictGetVal(de);
+        sentinelValkeyInstance *ri = dictGetVal(de);
         if (ri->last_pub_time >= (sentinel_publish_period + 1)) ri->last_pub_time -= (sentinel_publish_period + 1);
     }
     dictReleaseIterator(di);
@@ -2918,12 +2918,12 @@ void sentinelForceHelloUpdateDictOfRedisInstances(dict *instances) {
  * with a period of SENTINEL_PUBLISH_PERIOD milliseconds, however when a
  * Sentinel upgrades a configuration it is a good idea to deliver an update
  * to the other Sentinels ASAP. */
-int sentinelForceHelloUpdateForPrimary(sentinelRedisInstance *primary) {
+int sentinelForceHelloUpdateForPrimary(sentinelValkeyInstance *primary) {
     if (!(primary->flags & SRI_PRIMARY)) return C_ERR;
     if (primary->last_pub_time >= (sentinel_publish_period + 1))
         primary->last_pub_time -= (sentinel_publish_period + 1);
-    sentinelForceHelloUpdateDictOfRedisInstances(primary->sentinels);
-    sentinelForceHelloUpdateDictOfRedisInstances(primary->replicas);
+    sentinelForceHelloUpdateDictOfValkeyInstances(primary->sentinels);
+    sentinelForceHelloUpdateDictOfValkeyInstances(primary->replicas);
     return C_OK;
 }
 
@@ -2932,7 +2932,7 @@ int sentinelForceHelloUpdateForPrimary(sentinelRedisInstance *primary) {
  *
  * On error zero is returned, and we can't consider the PING command
  * queued in the connection. */
-int sentinelSendPing(sentinelRedisInstance *ri) {
+int sentinelSendPing(sentinelValkeyInstance *ri) {
     int retval =
         redisAsyncCommand(ri->link->cc, sentinelPingReplyCallback, ri, "%s", sentinelInstanceMapCommand(ri, "PING"));
     if (retval == C_OK) {
@@ -2950,7 +2950,7 @@ int sentinelSendPing(sentinelRedisInstance *ri) {
 
 /* Send periodic PING, INFO, and PUBLISH to the Hello channel to
  * the specified primary or replica instance. */
-void sentinelSendPeriodicCommands(sentinelRedisInstance *ri) {
+void sentinelSendPeriodicCommands(sentinelValkeyInstance *ri) {
     mstime_t now = mstime();
     mstime_t info_period, ping_period;
     int retval;
@@ -3214,7 +3214,7 @@ const char *sentinelFailoverStateStr(int state) {
 }
 
 /* Server instance to RESP representation. */
-void addReplySentinelRedisInstance(client *c, sentinelRedisInstance *ri) {
+void addReplySentinelValkeyInstance(client *c, sentinelValkeyInstance *ri) {
     char *flags = sdsempty();
     void *mbl;
     int fields = 0;
@@ -3613,7 +3613,7 @@ void addReplySentinelDebugInfo(client *c) {
 
 /* Output a number of instances contained inside a dictionary as
  * RESP. */
-void addReplyDictOfRedisInstances(client *c, dict *instances) {
+void addReplyDictOfValkeyInstances(client *c, dict *instances) {
     dictIterator *di;
     dictEntry *de;
     long replicas = 0;
@@ -3621,11 +3621,11 @@ void addReplyDictOfRedisInstances(client *c, dict *instances) {
 
     di = dictGetIterator(instances);
     while ((de = dictNext(di)) != NULL) {
-        sentinelRedisInstance *ri = dictGetVal(de);
+        sentinelValkeyInstance *ri = dictGetVal(de);
 
         /* don't announce unannounced replicas */
         if (ri->flags & SRI_REPLICA && !ri->replica_announced) continue;
-        addReplySentinelRedisInstance(c, ri);
+        addReplySentinelValkeyInstance(c, ri);
         replicas++;
     }
     dictReleaseIterator(di);
@@ -3635,8 +3635,8 @@ void addReplyDictOfRedisInstances(client *c, dict *instances) {
 /* Lookup the named primary into sentinel.primaries.
  * If the primary is not found reply to the client with an error and returns
  * NULL. */
-sentinelRedisInstance *sentinelGetPrimaryByNameOrReplyError(client *c, robj *name) {
-    sentinelRedisInstance *ri;
+sentinelValkeyInstance *sentinelGetPrimaryByNameOrReplyError(client *c, robj *name) {
+    sentinelValkeyInstance *ri;
 
     ri = dictFetchValue(sentinel.primaries, name->ptr);
     if (!ri) {
@@ -3649,7 +3649,7 @@ sentinelRedisInstance *sentinelGetPrimaryByNameOrReplyError(client *c, robj *nam
 #define SENTINEL_ISQR_OK 0
 #define SENTINEL_ISQR_NOQUORUM (1 << 0)
 #define SENTINEL_ISQR_NOAUTH (1 << 1)
-int sentinelIsQuorumReachable(sentinelRedisInstance *primary, int *usableptr) {
+int sentinelIsQuorumReachable(sentinelValkeyInstance *primary, int *usableptr) {
     dictIterator *di;
     dictEntry *de;
     int usable = 1; /* Number of usable Sentinels. Init to 1 to count myself. */
@@ -3658,7 +3658,7 @@ int sentinelIsQuorumReachable(sentinelRedisInstance *primary, int *usableptr) {
 
     di = dictGetIterator(primary->sentinels);
     while ((de = dictNext(di)) != NULL) {
-        sentinelRedisInstance *ri = dictGetVal(de);
+        sentinelValkeyInstance *ri = dictGetVal(de);
 
         if (ri->flags & (SRI_S_DOWN | SRI_O_DOWN)) continue;
         usable++;
@@ -3728,28 +3728,28 @@ NULL
     } else if (!strcasecmp(c->argv[1]->ptr, "masters")) {
         /* SENTINEL PRIMARIES */
         if (c->argc != 2) goto numargserr;
-        addReplyDictOfRedisInstances(c, sentinel.primaries);
+        addReplyDictOfValkeyInstances(c, sentinel.primaries);
     } else if (!strcasecmp(c->argv[1]->ptr, "master")) {
         /* SENTINEL PRIMARY <name> */
-        sentinelRedisInstance *ri;
+        sentinelValkeyInstance *ri;
 
         if (c->argc != 3) goto numargserr;
         if ((ri = sentinelGetPrimaryByNameOrReplyError(c, c->argv[2])) == NULL) return;
-        addReplySentinelRedisInstance(c, ri);
+        addReplySentinelValkeyInstance(c, ri);
     } else if (!strcasecmp(c->argv[1]->ptr, "slaves") || !strcasecmp(c->argv[1]->ptr, "replicas")) {
         /* SENTINEL REPLICAS <primary-name> */
-        sentinelRedisInstance *ri;
+        sentinelValkeyInstance *ri;
 
         if (c->argc != 3) goto numargserr;
         if ((ri = sentinelGetPrimaryByNameOrReplyError(c, c->argv[2])) == NULL) return;
-        addReplyDictOfRedisInstances(c, ri->replicas);
+        addReplyDictOfValkeyInstances(c, ri->replicas);
     } else if (!strcasecmp(c->argv[1]->ptr, "sentinels")) {
         /* SENTINEL SENTINELS <primary-name> */
-        sentinelRedisInstance *ri;
+        sentinelValkeyInstance *ri;
 
         if (c->argc != 3) goto numargserr;
         if ((ri = sentinelGetPrimaryByNameOrReplyError(c, c->argv[2])) == NULL) return;
-        addReplyDictOfRedisInstances(c, ri->sentinels);
+        addReplyDictOfValkeyInstances(c, ri->sentinels);
     } else if (!strcasecmp(c->argv[1]->ptr, "myid") && c->argc == 2) {
         /* SENTINEL MYID */
         addReplyBulkCBuffer(c, sentinel.myid, CONFIG_RUN_ID_SIZE);
@@ -3771,7 +3771,7 @@ NULL
          * in order to elect the failover leader. Otherwise it is set to the
          * runid we want the Sentinel to vote if it did not already voted.
          */
-        sentinelRedisInstance *ri;
+        sentinelValkeyInstance *ri;
         long long req_epoch;
         uint64_t leader_epoch = 0;
         char *leader = NULL;
@@ -3782,7 +3782,7 @@ NULL
         if (getLongFromObjectOrReply(c, c->argv[3], &port, NULL) != C_OK ||
             getLongLongFromObjectOrReply(c, c->argv[4], &req_epoch, NULL) != C_OK)
             return;
-        ri = getSentinelRedisInstanceByAddrAndRunID(sentinel.primaries, c->argv[2]->ptr, port, NULL);
+        ri = getSentinelValkeyInstanceByAddrAndRunID(sentinel.primaries, c->argv[2]->ptr, port, NULL);
 
         /* It exists? Is actually a primary? Is subjectively down? It's down.
          * Note: if we are in tilt mode we always reply with "0". */
@@ -3807,7 +3807,7 @@ NULL
         addReplyLongLong(c, sentinelResetPrimariesByPattern(c->argv[2]->ptr, SENTINEL_GENERATE_EVENT));
     } else if (!strcasecmp(c->argv[1]->ptr, "get-master-addr-by-name")) {
         /* SENTINEL GET-PRIMARY-ADDR-BY-NAME <primary-name> */
-        sentinelRedisInstance *ri;
+        sentinelValkeyInstance *ri;
 
         if (c->argc != 3) goto numargserr;
         ri = sentinelGetPrimaryByName(c->argv[2]->ptr);
@@ -3822,7 +3822,7 @@ NULL
         }
     } else if (!strcasecmp(c->argv[1]->ptr, "failover")) {
         /* SENTINEL FAILOVER <primary-name> */
-        sentinelRedisInstance *ri;
+        sentinelValkeyInstance *ri;
 
         if (c->argc != 3) goto numargserr;
         if ((ri = sentinelGetPrimaryByNameOrReplyError(c, c->argv[2])) == NULL) return;
@@ -3845,7 +3845,7 @@ NULL
         sentinelPendingScriptsCommand(c);
     } else if (!strcasecmp(c->argv[1]->ptr, "monitor")) {
         /* SENTINEL MONITOR <name> <ip> <port> <quorum> */
-        sentinelRedisInstance *ri;
+        sentinelValkeyInstance *ri;
         long quorum, port;
         char ip[NET_IP_STR_LEN];
 
@@ -3868,7 +3868,7 @@ NULL
         }
 
         /* Parameters are valid. Try to create the primary instance. */
-        ri = createSentinelRedisInstance(c->argv[2]->ptr, SRI_PRIMARY, c->argv[3]->ptr, port, quorum, NULL);
+        ri = createSentinelValkeyInstance(c->argv[2]->ptr, SRI_PRIMARY, c->argv[3]->ptr, port, quorum, NULL);
         if (ri == NULL) {
             addReplyError(c, sentinelCheckCreateInstanceErrors(SRI_PRIMARY));
         } else {
@@ -3881,7 +3881,7 @@ NULL
         return;
     } else if (!strcasecmp(c->argv[1]->ptr, "remove")) {
         /* SENTINEL REMOVE <name> */
-        sentinelRedisInstance *ri;
+        sentinelValkeyInstance *ri;
 
         if (c->argc != 3) goto numargserr;
         if ((ri = sentinelGetPrimaryByNameOrReplyError(c, c->argv[2])) == NULL) return;
@@ -3890,7 +3890,7 @@ NULL
         sentinelFlushConfigAndReply(c);
     } else if (!strcasecmp(c->argv[1]->ptr, "ckquorum")) {
         /* SENTINEL CKQUORUM <name> */
-        sentinelRedisInstance *ri;
+        sentinelValkeyInstance *ri;
         int usable;
 
         if (c->argc != 3) goto numargserr;
@@ -3939,7 +3939,7 @@ NULL
             primaries_local = dictCreate(&copy_keeper);
 
             for (int i = 2; i < c->argc; i++) {
-                sentinelRedisInstance *ri;
+                sentinelValkeyInstance *ri;
                 ri = sentinelGetPrimaryByName(c->argv[i]->ptr);
                 if (!ri) continue; /* ignore non-existing names */
                 dictAdd(primaries_local, ri->name, ri);
@@ -3963,7 +3963,7 @@ NULL
         dictEntry *de;
         di = dictGetIterator(primaries_local);
         while ((de = dictNext(di)) != NULL) {
-            sentinelRedisInstance *ri = dictGetVal(de);
+            sentinelValkeyInstance *ri = dictGetVal(de);
             addReplyBulkCBuffer(c, ri->name, strlen(ri->name));
             addReplyArrayLen(c, dictSize(ri->replicas) + 1); /* +1 for self */
             addReplyArrayLen(c, 2);
@@ -3977,7 +3977,7 @@ NULL
             dictEntry *sde;
             sdi = dictGetIterator(ri->replicas);
             while ((sde = dictNext(sdi)) != NULL) {
-                sentinelRedisInstance *sri = dictGetVal(sde);
+                sentinelValkeyInstance *sri = dictGetVal(sde);
                 addReplyArrayLen(c, 2);
                 addReplyLongLong(c, ri->info_refresh ? (now - sri->info_refresh) : 0);
                 if (sri->info)
@@ -4085,7 +4085,7 @@ void sentinelInfoCommand(client *c) {
 
         di = dictGetIterator(sentinel.primaries);
         while ((de = dictNext(di)) != NULL) {
-            sentinelRedisInstance *ri = dictGetVal(de);
+            sentinelValkeyInstance *ri = dictGetVal(de);
             char *status = "ok";
 
             if (ri->flags & SRI_O_DOWN)
@@ -4116,7 +4116,7 @@ void sentinelRoleCommand(client *c) {
 
     di = dictGetIterator(sentinel.primaries);
     while ((de = dictNext(di)) != NULL) {
-        sentinelRedisInstance *ri = dictGetVal(de);
+        sentinelValkeyInstance *ri = dictGetVal(de);
 
         addReplyBulkCString(c, ri->name);
     }
@@ -4125,7 +4125,7 @@ void sentinelRoleCommand(client *c) {
 
 /* SENTINEL SET <primaryname> [<option> <value> ...] */
 void sentinelSetCommand(client *c) {
-    sentinelRedisInstance *ri;
+    sentinelValkeyInstance *ri;
     int j, changes = 0;
     int badarg = 0; /* Bad argument position for error reporting. */
     char *option;
@@ -4309,7 +4309,7 @@ void sentinelPublishCommand(client *c) {
 /* ===================== SENTINEL availability checks ======================= */
 
 /* Is this instance down from our point of view? */
-void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {
+void sentinelCheckSubjectivelyDown(sentinelValkeyInstance *ri) {
     mstime_t elapsed = 0;
 
     if (ri->link->act_ping_time)
@@ -4374,7 +4374,7 @@ void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {
  * reported in a given time range that the instance was not reachable.
  * However messages can be delayed so there are no strong guarantees about
  * N instances agreeing at the same time about the down state. */
-void sentinelCheckObjectivelyDown(sentinelRedisInstance *primary) {
+void sentinelCheckObjectivelyDown(sentinelValkeyInstance *primary) {
     dictIterator *di;
     dictEntry *de;
     unsigned int quorum = 0, odown = 0;
@@ -4385,7 +4385,7 @@ void sentinelCheckObjectivelyDown(sentinelRedisInstance *primary) {
         /* Count all the other sentinels. */
         di = dictGetIterator(primary->sentinels);
         while ((de = dictNext(di)) != NULL) {
-            sentinelRedisInstance *ri = dictGetVal(de);
+            sentinelValkeyInstance *ri = dictGetVal(de);
 
             if (ri->flags & SRI_PRIMARY_DOWN) quorum++;
         }
@@ -4411,7 +4411,7 @@ void sentinelCheckObjectivelyDown(sentinelRedisInstance *primary) {
 /* Receive the SENTINEL is-primary-down-by-addr reply, see the
  * sentinelAskPrimariesStateToOtherSentinels() function for more information. */
 void sentinelReceiveIsPrimaryDownReply(redisAsyncContext *c, void *reply, void *privdata) {
-    sentinelRedisInstance *ri = privdata;
+    sentinelValkeyInstance *ri = privdata;
     instanceLink *link = c->data;
     redisReply *r;
 
@@ -4448,13 +4448,13 @@ void sentinelReceiveIsPrimaryDownReply(redisAsyncContext *c, void *reply, void *
  * in order to get the replies that allow to reach the quorum
  * needed to mark the primary in ODOWN state and trigger a failover. */
 #define SENTINEL_ASK_FORCED (1 << 0)
-void sentinelAskPrimaryStateToOtherSentinels(sentinelRedisInstance *primary, int flags) {
+void sentinelAskPrimaryStateToOtherSentinels(sentinelValkeyInstance *primary, int flags) {
     dictIterator *di;
     dictEntry *de;
 
     di = dictGetIterator(primary->sentinels);
     while ((de = dictNext(di)) != NULL) {
-        sentinelRedisInstance *ri = dictGetVal(de);
+        sentinelValkeyInstance *ri = dictGetVal(de);
         mstime_t elapsed = mstime() - ri->last_primary_down_reply_time;
         char port[32];
         int retval;
@@ -4500,7 +4500,7 @@ void sentinelSimFailureCrash(void) {
  *
  * If a vote is not available returns NULL, otherwise return the Sentinel
  * runid and populate the leader_epoch with the epoch of the vote. */
-char *sentinelVoteLeader(sentinelRedisInstance *primary, uint64_t req_epoch, char *req_runid, uint64_t *leader_epoch) {
+char *sentinelVoteLeader(sentinelValkeyInstance *primary, uint64_t req_epoch, char *req_runid, uint64_t *leader_epoch) {
     if (req_epoch > sentinel.current_epoch) {
         sentinel.current_epoch = req_epoch;
         sentinelFlushConfig();
@@ -4554,7 +4554,7 @@ int sentinelLeaderIncr(dict *counters, char *runid) {
  * To be a leader for a given epoch, we should have the majority of
  * the Sentinels we know (ever seen since the last SENTINEL RESET) that
  * reported the same instance as leader for the same epoch. */
-char *sentinelGetLeader(sentinelRedisInstance *primary, uint64_t epoch) {
+char *sentinelGetLeader(sentinelValkeyInstance *primary, uint64_t epoch) {
     dict *counters;
     dictIterator *di;
     dictEntry *de;
@@ -4572,7 +4572,7 @@ char *sentinelGetLeader(sentinelRedisInstance *primary, uint64_t epoch) {
     /* Count other sentinels votes */
     di = dictGetIterator(primary->sentinels);
     while ((de = dictNext(di)) != NULL) {
-        sentinelRedisInstance *ri = dictGetVal(de);
+        sentinelValkeyInstance *ri = dictGetVal(de);
         if (ri->leader != NULL && ri->leader_epoch == sentinel.current_epoch) sentinelLeaderIncr(counters, ri->leader);
     }
     dictReleaseIterator(di);
@@ -4627,7 +4627,7 @@ char *sentinelGetLeader(sentinelRedisInstance *primary, uint64_t epoch) {
  * The command returns C_OK if the REPLICAOF command was accepted for
  * (later) delivery otherwise C_ERR. The command replies are just
  * discarded. */
-int sentinelSendReplicaOf(sentinelRedisInstance *ri, const sentinelAddr *addr) {
+int sentinelSendReplicaOf(sentinelValkeyInstance *ri, const sentinelAddr *addr) {
     char portstr[32];
     const char *host;
     int retval;
@@ -4688,7 +4688,7 @@ int sentinelSendReplicaOf(sentinelRedisInstance *ri, const sentinelAddr *addr) {
 }
 
 /* Setup the primary state to start a failover. */
-void sentinelStartFailover(sentinelRedisInstance *primary) {
+void sentinelStartFailover(sentinelValkeyInstance *primary) {
     serverAssert(primary->flags & SRI_PRIMARY);
 
     primary->failover_state = SENTINEL_FAILOVER_STATE_WAIT_START;
@@ -4711,7 +4711,7 @@ void sentinelStartFailover(sentinelRedisInstance *primary) {
  * start the failover but that we'll not be able to act.
  *
  * Return non-zero if a failover was started. */
-int sentinelStartFailoverIfNeeded(sentinelRedisInstance *primary) {
+int sentinelStartFailoverIfNeeded(sentinelValkeyInstance *primary) {
     /* We can't failover if the primary is not in O_DOWN state. */
     if (!(primary->flags & SRI_O_DOWN)) return 0;
 
@@ -4769,7 +4769,7 @@ int sentinelStartFailoverIfNeeded(sentinelRedisInstance *primary) {
  * sort suitable replicas in a "better first" order, to take the first of
  * the list. */
 int compareReplicasForPromotion(const void *a, const void *b) {
-    sentinelRedisInstance **sa = (sentinelRedisInstance **)a, **sb = (sentinelRedisInstance **)b;
+    sentinelValkeyInstance **sa = (sentinelValkeyInstance **)a, **sb = (sentinelValkeyInstance **)b;
     char *sa_runid, *sb_runid;
 
     if ((*sa)->replica_priority != (*sb)->replica_priority) return (*sa)->replica_priority - (*sb)->replica_priority;
@@ -4797,9 +4797,9 @@ int compareReplicasForPromotion(const void *a, const void *b) {
     return strcasecmp(sa_runid, sb_runid);
 }
 
-sentinelRedisInstance *sentinelSelectReplica(sentinelRedisInstance *primary) {
-    sentinelRedisInstance **instance = zmalloc(sizeof(instance[0]) * dictSize(primary->replicas));
-    sentinelRedisInstance *selected = NULL;
+sentinelValkeyInstance *sentinelSelectReplica(sentinelValkeyInstance *primary) {
+    sentinelValkeyInstance **instance = zmalloc(sizeof(instance[0]) * dictSize(primary->replicas));
+    sentinelValkeyInstance *selected = NULL;
     int instances = 0;
     dictIterator *di;
     dictEntry *de;
@@ -4811,7 +4811,7 @@ sentinelRedisInstance *sentinelSelectReplica(sentinelRedisInstance *primary) {
     di = dictGetIterator(primary->replicas);
 
     while ((de = dictNext(di)) != NULL) {
-        sentinelRedisInstance *replica = dictGetVal(de);
+        sentinelValkeyInstance *replica = dictGetVal(de);
         mstime_t info_validity_time;
 
         if (replica->flags & (SRI_S_DOWN | SRI_O_DOWN)) continue;
@@ -4832,7 +4832,7 @@ sentinelRedisInstance *sentinelSelectReplica(sentinelRedisInstance *primary) {
     }
     dictReleaseIterator(di);
     if (instances) {
-        qsort(instance, instances, sizeof(sentinelRedisInstance *), compareReplicasForPromotion);
+        qsort(instance, instances, sizeof(sentinelValkeyInstance *), compareReplicasForPromotion);
         selected = instance[0];
     }
     zfree(instance);
@@ -4840,7 +4840,7 @@ sentinelRedisInstance *sentinelSelectReplica(sentinelRedisInstance *primary) {
 }
 
 /* ---------------- Failover state machine implementation ------------------- */
-void sentinelFailoverWaitStart(sentinelRedisInstance *ri) {
+void sentinelFailoverWaitStart(sentinelValkeyInstance *ri) {
     char *leader;
     int isleader;
 
@@ -4871,8 +4871,8 @@ void sentinelFailoverWaitStart(sentinelRedisInstance *ri) {
     sentinelEvent(LL_WARNING, "+failover-state-select-slave", ri, "%@");
 }
 
-void sentinelFailoverSelectReplica(sentinelRedisInstance *ri) {
-    sentinelRedisInstance *replica = sentinelSelectReplica(ri);
+void sentinelFailoverSelectReplica(sentinelValkeyInstance *ri) {
+    sentinelValkeyInstance *replica = sentinelSelectReplica(ri);
 
     /* We don't handle the timeout in this state as the function aborts
      * the failover or go forward in the next state. */
@@ -4889,7 +4889,7 @@ void sentinelFailoverSelectReplica(sentinelRedisInstance *ri) {
     }
 }
 
-void sentinelFailoverSendReplicaOfNoOne(sentinelRedisInstance *ri) {
+void sentinelFailoverSendReplicaOfNoOne(sentinelValkeyInstance *ri) {
     int retval;
 
     /* We can't send the command to the promoted replica if it is now
@@ -4916,7 +4916,7 @@ void sentinelFailoverSendReplicaOfNoOne(sentinelRedisInstance *ri) {
 
 /* We actually wait for promotion indirectly checking with INFO when the
  * replica turns into a primary. */
-void sentinelFailoverWaitPromotion(sentinelRedisInstance *ri) {
+void sentinelFailoverWaitPromotion(sentinelValkeyInstance *ri) {
     /* Just handle the timeout. Switching to the next state is handled
      * by the function parsing the INFO command of the promoted replica. */
     if (mstime() - ri->failover_state_change_time > ri->failover_timeout) {
@@ -4925,7 +4925,7 @@ void sentinelFailoverWaitPromotion(sentinelRedisInstance *ri) {
     }
 }
 
-void sentinelFailoverDetectEnd(sentinelRedisInstance *primary) {
+void sentinelFailoverDetectEnd(sentinelValkeyInstance *primary) {
     int not_reconfigured = 0, timeout = 0;
     dictIterator *di;
     dictEntry *de;
@@ -4939,7 +4939,7 @@ void sentinelFailoverDetectEnd(sentinelRedisInstance *primary) {
      * configured. */
     di = dictGetIterator(primary->replicas);
     while ((de = dictNext(di)) != NULL) {
-        sentinelRedisInstance *replica = dictGetVal(de);
+        sentinelValkeyInstance *replica = dictGetVal(de);
 
         if (replica->flags & (SRI_PROMOTED | SRI_RECONF_DONE)) continue;
         if (replica->flags & SRI_S_DOWN) continue;
@@ -4969,7 +4969,7 @@ void sentinelFailoverDetectEnd(sentinelRedisInstance *primary) {
 
         di = dictGetIterator(primary->replicas);
         while ((de = dictNext(di)) != NULL) {
-            sentinelRedisInstance *replica = dictGetVal(de);
+            sentinelValkeyInstance *replica = dictGetVal(de);
             int retval;
 
             if (replica->flags & (SRI_PROMOTED | SRI_RECONF_DONE | SRI_RECONF_SENT)) continue;
@@ -4987,14 +4987,14 @@ void sentinelFailoverDetectEnd(sentinelRedisInstance *primary) {
 
 /* Send REPLICAOF <new primary address> to all the remaining replicas that
  * still don't appear to have the configuration updated. */
-void sentinelFailoverReconfNextReplica(sentinelRedisInstance *primary) {
+void sentinelFailoverReconfNextReplica(sentinelValkeyInstance *primary) {
     dictIterator *di;
     dictEntry *de;
     int in_progress = 0;
 
     di = dictGetIterator(primary->replicas);
     while ((de = dictNext(di)) != NULL) {
-        sentinelRedisInstance *replica = dictGetVal(de);
+        sentinelValkeyInstance *replica = dictGetVal(de);
 
         if (replica->flags & (SRI_RECONF_SENT | SRI_RECONF_INPROG)) in_progress++;
     }
@@ -5002,7 +5002,7 @@ void sentinelFailoverReconfNextReplica(sentinelRedisInstance *primary) {
 
     di = dictGetIterator(primary->replicas);
     while (in_progress < primary->parallel_syncs && (de = dictNext(di)) != NULL) {
-        sentinelRedisInstance *replica = dictGetVal(de);
+        sentinelValkeyInstance *replica = dictGetVal(de);
         int retval;
 
         /* Skip the promoted replica, and already configured replicas. */
@@ -5042,8 +5042,8 @@ void sentinelFailoverReconfNextReplica(sentinelRedisInstance *primary) {
 /* This function is called when the replica is in
  * SENTINEL_FAILOVER_STATE_UPDATE_CONFIG state. In this state we need
  * to remove it from the primary table and add the promoted replica instead. */
-void sentinelFailoverSwitchToPromotedReplica(sentinelRedisInstance *primary) {
-    sentinelRedisInstance *ref = primary->promoted_replica ? primary->promoted_replica : primary;
+void sentinelFailoverSwitchToPromotedReplica(sentinelValkeyInstance *primary) {
+    sentinelValkeyInstance *ref = primary->promoted_replica ? primary->promoted_replica : primary;
 
     sentinelEvent(LL_WARNING, "+switch-master", primary, "%s %s %d %s %d", primary->name,
                   announceSentinelAddr(primary->addr), primary->addr->port, announceSentinelAddr(ref->addr),
@@ -5052,7 +5052,7 @@ void sentinelFailoverSwitchToPromotedReplica(sentinelRedisInstance *primary) {
     sentinelResetPrimaryAndChangeAddress(primary, ref->addr->hostname, ref->addr->port);
 }
 
-void sentinelFailoverStateMachine(sentinelRedisInstance *ri) {
+void sentinelFailoverStateMachine(sentinelValkeyInstance *ri) {
     serverAssert(ri->flags & SRI_PRIMARY);
 
     if (!(ri->flags & SRI_FAILOVER_IN_PROGRESS)) return;
@@ -5071,7 +5071,7 @@ void sentinelFailoverStateMachine(sentinelRedisInstance *ri) {
  * This function can only be called before the promoted replica acknowledged
  * the replica -> primary switch. Otherwise the failover can't be aborted and
  * will reach its end (possibly by timeout). */
-void sentinelAbortFailover(sentinelRedisInstance *ri) {
+void sentinelAbortFailover(sentinelValkeyInstance *ri) {
     serverAssert(ri->flags & SRI_FAILOVER_IN_PROGRESS);
     serverAssert(ri->failover_state <= SENTINEL_FAILOVER_STATE_WAIT_PROMOTION);
 
@@ -5090,7 +5090,7 @@ void sentinelAbortFailover(sentinelRedisInstance *ri) {
  * -------------------------------------------------------------------------- */
 
 /* Perform scheduled operations for the specified instance. */
-void sentinelHandleRedisInstance(sentinelRedisInstance *ri) {
+void sentinelHandleValkeyInstance(sentinelValkeyInstance *ri) {
     /* ========== MONITORING HALF ============ */
     /* Every kind of instance */
     sentinelReconnectInstance(ri);
@@ -5120,20 +5120,20 @@ void sentinelHandleRedisInstance(sentinelRedisInstance *ri) {
 
 /* Perform scheduled operations for all the instances in the dictionary.
  * Recursively call the function against dictionaries of replicas. */
-void sentinelHandleDictOfRedisInstances(dict *instances) {
+void sentinelHandleDictOfValkeyInstances(dict *instances) {
     dictIterator *di;
     dictEntry *de;
-    sentinelRedisInstance *switch_to_promoted = NULL;
+    sentinelValkeyInstance *switch_to_promoted = NULL;
 
     /* There are a number of things we need to perform against every primary. */
     di = dictGetIterator(instances);
     while ((de = dictNext(di)) != NULL) {
-        sentinelRedisInstance *ri = dictGetVal(de);
+        sentinelValkeyInstance *ri = dictGetVal(de);
 
-        sentinelHandleRedisInstance(ri);
+        sentinelHandleValkeyInstance(ri);
         if (ri->flags & SRI_PRIMARY) {
-            sentinelHandleDictOfRedisInstances(ri->replicas);
-            sentinelHandleDictOfRedisInstances(ri->sentinels);
+            sentinelHandleDictOfValkeyInstances(ri->replicas);
+            sentinelHandleDictOfValkeyInstances(ri->sentinels);
             if (ri->failover_state == SENTINEL_FAILOVER_STATE_UPDATE_CONFIG) {
                 switch_to_promoted = ri;
             }
@@ -5176,7 +5176,7 @@ void sentinelCheckTiltCondition(void) {
 
 void sentinelTimer(void) {
     sentinelCheckTiltCondition();
-    sentinelHandleDictOfRedisInstances(sentinel.primaries);
+    sentinelHandleDictOfValkeyInstances(sentinel.primaries);
     sentinelRunPendingScripts();
     sentinelCollectTerminatedScripts();
     sentinelKillTimedoutScripts();

From f2bbd1ff0f650182e7df1fda1055551389bb79c2 Mon Sep 17 00:00:00 2001
From: bentotten <59932872+bentotten@users.noreply.github.com>
Date: Thu, 4 Jul 2024 16:55:55 -0700
Subject: [PATCH 46/53] Fix minor memory leak in clusterLoadConfig (#741)

We forgot to call sdsfreesplitres in the error path during a
nodes.conf corruption check, this function exits on the error
paths so this is just a cleanup.

Signed-off-by: bentotten <59932872+bentotten@users.noreply.github.com>
---
 src/cluster_legacy.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c
index f2f980d58c..61b5af8e29 100644
--- a/src/cluster_legacy.c
+++ b/src/cluster_legacy.c
@@ -578,6 +578,7 @@ int clusterLoadConfig(char *filename) {
                        memcmp(primary->shard_id, n->shard_id, CLUSTER_NAMELEN) != 0) {
                 /* If the primary has been added to a shard, make sure this
                  * node has the same persisted shard id as the primary. */
+                sdsfreesplitres(argv, argc);
                 goto fmterr;
             }
             n->replicaof = primary;

From 5f0ccf1478452598490683be4f78cf3e69ee259f Mon Sep 17 00:00:00 2001
From: nitaicaro <42576749+nitaicaro@users.noreply.github.com>
Date: Sun, 7 Jul 2024 18:44:48 +0300
Subject: [PATCH 47/53] Remove duplicate definition of UNUSED(V) (#755)

Signed-off-by: Nitai Caro <caronita@amazon.com>
Co-authored-by: Nitai Caro <caronita@amazon.com>
---
 src/dict.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/dict.c b/src/dict.c
index b6a06eb36a..280f0b6abc 100644
--- a/src/dict.c
+++ b/src/dict.c
@@ -1806,7 +1806,6 @@ void dictGetStats(char *buf, size_t bufsize, dict *d, int full) {
 #ifdef SERVER_TEST
 #include "testhelp.h"
 
-#define UNUSED(V) ((void)V)
 #define TEST(name) printf("test — %s\n", name);
 
 uint64_t hashCallback(const void *key) {

From bbfd041895ee7d7d8dd59b1c75aa44f5b473e483 Mon Sep 17 00:00:00 2001
From: uriyage <78144248+uriyage@users.noreply.github.com>
Date: Tue, 9 Jul 2024 06:01:39 +0300
Subject: [PATCH 48/53] Async IO threads (#758)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR is 1 of 3 PRs intended to achieve the goal of 1 million requests
per second, as detailed by [dan touitou](https://github.com/touitou-dan)
in https://github.com/valkey-io/valkey/issues/22. This PR modifies the
IO threads to be fully asynchronous, which is a first and necessary step
to allow more work offloading and better utilization of the IO threads.

### Current IO threads state:

Valkey IO threads were introduced in Redis 6.0 to allow better
utilization of multi-core machines. Before this, Redis was
single-threaded and could only use one CPU core for network and command
processing. The introduction of IO threads helps in offloading the IO
operations to multiple threads.

**Current IO Threads flow:**

1. Initialization: When Redis starts, it initializes a specified number
of IO threads. These threads are in addition to the main thread, each
thread starts with an empty list, the main thread will populate that
list in each event-loop with pending-read-clients or
pending-write-clients.
2. Read Phase: The main thread accepts incoming connections and reads
requests from clients. The reading of requests are offloaded to IO
threads. The main thread puts the clients ready-to-read in a list and
set the global io_threads_op to IO_THREADS_OP_READ, the IO threads pick
the clients up, perform the read operation and parse the first incoming
command.
3. Command Processing: After reading the requests, command processing is
still single-threaded and handled by the main thread.
4. Write Phase: Similar to the read phase, the write phase is also be
offloaded to IO threads. The main thread prepares the response in the
clients’ output buffer then the main thread puts the client in the list,
and sets the global io_threads_op to the IO_THREADS_OP_WRITE. The IO
threads then pick the clients up and perform the write operation to send
the responses back to clients.
5. Synchronization: The main-thread communicate with the threads on how
many jobs left per each thread with atomic counter. The main-thread
doesn’t access the clients while being handled by the IO threads.

**Issues with current implementation:**

* Underutilized Cores: The current implementation of IO-threads leads to
the underutilization of CPU cores.
* The main thread remains responsible for a significant portion of
IO-related tasks that could be offloaded to IO-threads.
* When the main-thread is processing client’s commands, the IO threads
are idle for a considerable amount of time.
* Notably, the main thread's performance during the IO-related tasks is
constrained by the speed of the slowest IO-thread.
* Limited Offloading: Currently, Since the Main-threads waits
synchronously for the IO threads, the Threads perform only read-parse,
and write operations, with parsing done only for the first command. If
the threads can do work asynchronously we may offload more work to the
threads reducing the load from the main-thread.
* TLS: Currently, we don't support IO threads with TLS (where offloading
IO would be more beneficial) since TLS read/write operations are not
thread-safe with the current implementation.

### Suggested change

Non-blocking main thread - The main thread and IO threads will operate
in parallel to maximize efficiency. The main thread will not be blocked
by IO operations. It will continue to process commands independently of
the IO thread's activities.

**Implementation details**

**Inter-thread communication.**

* We use a static, lock-free ring buffer of fixed size (2048 jobs) for
the main thread to send jobs and for the IO to receive them. If the ring
buffer fills up, the main thread will handle the task itself, acting as
back pressure (in case IO operations are more expensive than command
processing). A static ring buffer is a better candidate than a dynamic
job queue as it eliminates the need for allocation/freeing per job.
* An IO job will be in the format: ` [void* function-call-back | void
*data] `where data is either a client to read/write from and the
function-ptr is the function to be called with the data for example
readQueryFromClient using this format we can use it later to offload
other types of works to the IO threads.
* The Ring buffer is one way from the main-thread to the IO thread, Upon
read/write event the main thread will send a read/write job then in
before sleep it will iterate over the pending read/write clients to
checking for each client if the IO threads has already finished handling
it. The IO thread signals it has finished handling a client read/write
by toggling an atomic flag read_state / write_state on the client
struct.

**Thread Safety**

As suggested in this solution, the IO threads are reading from and
writing to the clients' buffers while the main thread may access those
clients.
We must ensure no race conditions or unsafe access occurs while keeping
the Valkey code simple and lock free.

Minimal Action in the IO Threads
The main change is to limit the IO thread operations to the bare
minimum. The IO thread will access only the client's struct and only the
necessary fields in this struct.
The IO threads will be responsible for the following:

* Read Operation: The IO thread will only read and parse a single
command. It will not update the server stats, handle read errors, or
parsing errors. These tasks will be taken care of by the main thread.
* Write Operation: The IO thread will only write the available data. It
will not free the client's replies, handle write errors, or update the
server statistics.


To achieve this without code duplication, the read/write code has been
refactored into smaller, independent components:

* Functions that perform only the read/parse/write calls.
* Functions that handle the read/parse/write results.

This refactor accounts for the majority of the modifications in this PR.

**Client Struct Safe Access**

As we ensure that the IO threads access memory only within the client
struct, we need to ensure thread safety only for the client's struct's
shared fields.

* Query Buffer
* Command parsing - The main thread will not try to parse a command from
the query buffer when a client is offloaded to the IO thread.
* Client's memory checks in client-cron - The main thread will not
access the client query buffer if it is offloaded and will handle the
querybuf grow/shrink when the client is back.
* CLIENT LIST command - The main thread will busy-wait for the IO thread
to finish handling the client, falling back to the current behavior
where the main thread waits for the IO thread to finish their
processing.
* Output Buffer
* The IO thread will not change the client's bufpos and won't free the
client's reply lists. These actions will be done by the main thread on
the client's return from the IO thread.
* bufpos / block→used: As the main thread may change the bufpos, the
reply-block→used, or add/delete blocks to the reply list while the IO
thread writes, we add two fields to the client struct: io_last_bufpos
and io_last_reply_block. The IO thread will write until the
io_last_bufpos, which was set by the main-thread before sending the
client to the IO thread. If more data has been added to the cob in
between, it will be written in the next write-job. In addition, the main
thread will not trim or merge reply blocks while the client is
offloaded.
* Parsing Fields
    * Client's cmd, argc, argv, reqtype, etc., are set during parsing.
* The main thread will indicate to the IO thread not to parse a cmd if
the client is not reset. In this case, the IO thread will only read from
the network and won't attempt to parse a new command.
* The main thread won't access the c→cmd/c→argv in the CLIENT LIST
command as stated before it will busy wait for the IO threads.
* Client Flags
* c→flags, which may be changed by the main thread in multiple places,
won't be accessed by the IO thread. Instead, the main thread will set
the c→io_flags with the information necessary for the IO thread to know
the client's state.
* Client Close
* On freeClient, the main thread will busy wait for the IO thread to
finish processing the client's read/write before proceeding to free the
client.
* Client's Memory Limits
* The IO thread won't handle the qb/cob limits. In case a client crosses
the qb limit, the IO thread will stop reading for it, letting the main
thread know that the client crossed the limit.

**TLS**

TLS is currently not supported with IO threads for the following
reasons:

1. Pending reads - If SSL has pending data that has already been read
from the socket, there is a risk of not calling the read handler again.
To handle this, a list is used to hold the pending clients. With IO
threads, multiple threads can access the list concurrently.
2. Event loop modification - Currently, the TLS code
registers/unregisters the file descriptor from the event loop depending
on the read/write results. With IO threads, multiple threads can modify
the event loop struct simultaneously.
3. The same client can be sent to 2 different threads concurrently
(https://github.com/redis/redis/issues/12540).

Those issues were handled in the current PR:

1. The IO thread only performs the read operation. The main thread will
check for pending reads after the client returns from the IO thread and
will be the only one to access the pending list.
2. The registering/unregistering of events will be similarly postponed
and handled by the main thread only.
3. Each client is being sent to the same dedicated thread (c→id %
num_of_threads).


**Sending Replies Immediately with IO threads.**

Currently, after processing a command, we add the client to the
pending_writes_list. Only after processing all the clients do we send
all the replies. Since the IO threads are now working asynchronously, we
can send the reply immediately after processing the client’s requests,
reducing the command latency. However, if we are using AOF=always, we
must wait for the AOF buffer to be written, in which case we revert to
the current behavior.

**IO threads dynamic adjustment**

Currently, we use an all-or-nothing approach when activating the IO
threads. The current logic is as follows: if the number of pending write
clients is greater than twice the number of threads (including the main
thread), we enable all threads; otherwise, we enable none. For example,
if 8 IO threads are defined, we enable all 8 threads if there are 16
pending clients; else, we enable none.
It makes more sense to enable partial activation of the IO threads. If
we have 10 pending clients, we will enable 5 threads, and so on. This
approach allows for a more granular and efficient allocation of
resources based on the current workload.

In addition, the user will now be able to change the number of I/O
threads at runtime. For example, when decreasing the number of threads
from 4 to 2, threads 3 and 4 will be closed after flushing their job
queues.

**Tests**

Currently, we run the io-threads tests with 4 IO threads
(https://github.com/valkey-io/valkey/blob/443d80f1686377ad42cbf92d98ecc6d240325ee1/.github/workflows/daily.yml#L353).
This means that we will not activate the IO threads unless there are 8
(threads * 2) pending write clients per single loop, which is unlikely
to happened in most of tests, meaning the IO threads are not currently
being tested.

To enforce the main thread to always offload work to the IO threads,
regardless of the number of pending events, we add an
events-per-io-thread configuration with a default value of 2. When set
to 0, this configuration will force the main thread to always offload
work to the IO threads.

When we offload every single read/write operation to the IO threads, the
IO-threads are running with 100% CPU when running multiple tests
concurrently some tests fail as a result of larger than expected command
latencies. To address this issue, we have to add some after or wait_for
calls to some of the tests to ensure they pass with IO threads as well.

Signed-off-by: Uri Yagelnik <uriy@amazon.com>
---
 .github/workflows/daily.yml            |    4 +-
 src/Makefile                           |    2 +-
 src/ae.c                               |    4 +-
 src/ae.h                               |    5 +-
 src/blocked.c                          |    2 +-
 src/config.c                           |    6 +-
 src/config.h                           |   11 +-
 src/connection.h                       |   18 +
 src/debug.c                            |    2 +
 src/eval.c                             |    2 +-
 src/io_threads.c                       |  377 ++++++
 src/io_threads.h                       |   13 +
 src/networking.c                       | 1569 ++++++++++++------------
 src/rdb.c                              |    2 +-
 src/replication.c                      |   22 +-
 src/server.c                           |  112 +-
 src/server.h                           |  107 +-
 src/socket.c                           |    2 +
 src/tls.c                              |   55 +-
 src/unix.c                             |    2 +
 tests/integration/failover.tcl         |    6 +
 tests/integration/replication.tcl      |    1 +
 tests/integration/shutdown.tcl         |   12 +
 tests/integration/valkey-cli.tcl       |    1 +
 tests/unit/client-eviction.tcl         |   23 +-
 tests/unit/cluster/pubsubshard.tcl     |    8 +-
 tests/unit/dump.tcl                    |    1 +
 tests/unit/info.tcl                    |   83 +-
 tests/unit/maxmemory.tcl               |    1 +
 tests/unit/memefficiency.tcl           |    3 +
 tests/unit/moduleapi/blockedclient.tcl |    2 +-
 tests/unit/pubsub.tcl                  |   12 +
 tests/unit/pubsubshard.tcl             |    8 +
 tests/unit/querybuf.tcl                |    2 +-
 tests/unit/type/list.tcl               |    7 +
 tests/unit/type/stream-cgroups.tcl     |   11 +-
 tests/unit/type/zset.tcl               |    1 +
 valkey.conf                            |   21 +-
 38 files changed, 1551 insertions(+), 969 deletions(-)
 create mode 100644 src/io_threads.c
 create mode 100644 src/io_threads.h

diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml
index 7679856d1d..91dbb26fab 100644
--- a/.github/workflows/daily.yml
+++ b/.github/workflows/daily.yml
@@ -358,10 +358,10 @@ jobs:
         run: sudo apt-get install tcl8.6 tclx
       - name: test
         if: true && !contains(github.event.inputs.skiptests, 'valkey')
-        run: ./runtest --config io-threads 4 --config io-threads-do-reads yes --accurate --verbose --tags network --dump-logs ${{github.event.inputs.test_args}}
+        run: ./runtest --config io-threads 2 --config events-per-io-thread 0 --accurate --verbose --tags network --dump-logs ${{github.event.inputs.test_args}}
       - name: cluster tests
         if: true && !contains(github.event.inputs.skiptests, 'cluster')
-        run: ./runtest-cluster --config io-threads 4 --config io-threads-do-reads yes ${{github.event.inputs.cluster_test_args}}
+        run: ./runtest-cluster --config io-threads 2 --config events-per-io-thread 0 ${{github.event.inputs.cluster_test_args}}
 
   test-ubuntu-reclaim-cache:
     runs-on: ubuntu-latest
diff --git a/src/Makefile b/src/Makefile
index 18e5527eff..4e8c34b253 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -401,7 +401,7 @@ endif
 ENGINE_NAME=valkey
 SERVER_NAME=$(ENGINE_NAME)-server$(PROG_SUFFIX)
 ENGINE_SENTINEL_NAME=$(ENGINE_NAME)-sentinel$(PROG_SUFFIX)
-ENGINE_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o
+ENGINE_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o io_threads.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o
 ENGINE_CLI_NAME=$(ENGINE_NAME)-cli$(PROG_SUFFIX)
 ENGINE_CLI_OBJ=anet.o adlist.o dict.o valkey-cli.o zmalloc.o release.o ae.o serverassert.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o
 ENGINE_BENCHMARK_NAME=$(ENGINE_NAME)-benchmark$(PROG_SUFFIX)
diff --git a/src/ae.c b/src/ae.c
index 62031cbeea..28b50c660f 100644
--- a/src/ae.c
+++ b/src/ae.c
@@ -392,7 +392,7 @@ int aeProcessEvents(aeEventLoop *eventLoop, int flags) {
         }
 
         /* After sleep callback. */
-        if (eventLoop->aftersleep != NULL && flags & AE_CALL_AFTER_SLEEP) eventLoop->aftersleep(eventLoop);
+        if (eventLoop->aftersleep != NULL && flags & AE_CALL_AFTER_SLEEP) eventLoop->aftersleep(eventLoop, numevents);
 
         for (j = 0; j < numevents; j++) {
             int fd = eventLoop->fired[j].fd;
@@ -489,6 +489,6 @@ void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep
     eventLoop->beforesleep = beforesleep;
 }
 
-void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *aftersleep) {
+void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeAfterSleepProc *aftersleep) {
     eventLoop->aftersleep = aftersleep;
 }
diff --git a/src/ae.h b/src/ae.h
index a6dcbce50d..3b1c96a01d 100644
--- a/src/ae.h
+++ b/src/ae.h
@@ -68,6 +68,7 @@ typedef void aeFileProc(struct aeEventLoop *eventLoop, int fd, void *clientData,
 typedef int aeTimeProc(struct aeEventLoop *eventLoop, long long id, void *clientData);
 typedef void aeEventFinalizerProc(struct aeEventLoop *eventLoop, void *clientData);
 typedef void aeBeforeSleepProc(struct aeEventLoop *eventLoop);
+typedef void aeAfterSleepProc(struct aeEventLoop *eventLoop, int numevents);
 
 /* File event structure */
 typedef struct aeFileEvent {
@@ -107,7 +108,7 @@ typedef struct aeEventLoop {
     int stop;
     void *apidata; /* This is used for polling API specific data */
     aeBeforeSleepProc *beforesleep;
-    aeBeforeSleepProc *aftersleep;
+    aeAfterSleepProc *aftersleep;
     int flags;
 } aeEventLoop;
 
@@ -130,7 +131,7 @@ int aeWait(int fd, int mask, long long milliseconds);
 void aeMain(aeEventLoop *eventLoop);
 char *aeGetApiName(void);
 void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep);
-void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *aftersleep);
+void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeAfterSleepProc *aftersleep);
 int aeGetSetSize(aeEventLoop *eventLoop);
 int aeResizeSetSize(aeEventLoop *eventLoop, int setsize);
 void aeSetDontWait(aeEventLoop *eventLoop, int noWait);
diff --git a/src/blocked.c b/src/blocked.c
index 15ef39af3b..a1d5306dad 100644
--- a/src/blocked.c
+++ b/src/blocked.c
@@ -146,7 +146,7 @@ void processUnblockedClients(void) {
         if (!c->flag.blocked) {
             /* If we have a queued command, execute it now. */
             if (processPendingCommandAndInputBuffer(c) == C_ERR) {
-                c = NULL;
+                continue;
             }
         }
         beforeNextClient(c);
diff --git a/src/config.c b/src/config.c
index f8784413f9..32e6018ff2 100644
--- a/src/config.c
+++ b/src/config.c
@@ -590,6 +590,9 @@ void loadServerConfigFromString(char *config) {
     if (server.config_hz < CONFIG_MIN_HZ) server.config_hz = CONFIG_MIN_HZ;
     if (server.config_hz > CONFIG_MAX_HZ) server.config_hz = CONFIG_MAX_HZ;
 
+    /* To ensure backward compatibility when io_threads_num is according to the previous maximum of 128. */
+    if (server.io_threads_num > IO_THREADS_MAX_NUM) server.io_threads_num = IO_THREADS_MAX_NUM;
+
     sdsfreesplitres(lines, totlines);
     reading_config_file = 0;
     return;
@@ -3023,7 +3026,7 @@ standardConfig static_configs[] = {
     /* Bool configs */
     createBoolConfig("rdbchecksum", NULL, IMMUTABLE_CONFIG, server.rdb_checksum, 1, NULL, NULL),
     createBoolConfig("daemonize", NULL, IMMUTABLE_CONFIG, server.daemonize, 0, NULL, NULL),
-    createBoolConfig("io-threads-do-reads", NULL, DEBUG_CONFIG | IMMUTABLE_CONFIG, server.io_threads_do_reads, 0, NULL, NULL), /* Read + parse from threads? */
+    createBoolConfig("io-threads-do-reads", NULL, DEBUG_CONFIG | IMMUTABLE_CONFIG, server.io_threads_do_reads, 1, NULL, NULL), /* Read + parse from threads */
     createBoolConfig("always-show-logo", NULL, IMMUTABLE_CONFIG, server.always_show_logo, 0, NULL, NULL),
     createBoolConfig("protected-mode", NULL, MODIFIABLE_CONFIG, server.protected_mode, 1, NULL, NULL),
     createBoolConfig("rdbcompression", NULL, MODIFIABLE_CONFIG, server.rdb_compression, 1, NULL, NULL),
@@ -3124,6 +3127,7 @@ standardConfig static_configs[] = {
     createIntConfig("databases", NULL, IMMUTABLE_CONFIG, 1, INT_MAX, server.dbnum, 16, INTEGER_CONFIG, NULL, NULL),
     createIntConfig("port", NULL, MODIFIABLE_CONFIG, 0, 65535, server.port, 6379, INTEGER_CONFIG, NULL, updatePort), /* TCP port. */
     createIntConfig("io-threads", NULL, DEBUG_CONFIG | IMMUTABLE_CONFIG, 1, 128, server.io_threads_num, 1, INTEGER_CONFIG, NULL, NULL), /* Single threaded by default */
+    createIntConfig("events-per-io-thread", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.events_per_io_thread, 2, INTEGER_CONFIG, NULL, NULL),
     createIntConfig("auto-aof-rewrite-percentage", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.aof_rewrite_perc, 100, INTEGER_CONFIG, NULL, NULL),
     createIntConfig("cluster-replica-validity-factor", "cluster-slave-validity-factor", MODIFIABLE_CONFIG, 0, INT_MAX, server.cluster_replica_validity_factor, 10, INTEGER_CONFIG, NULL, NULL), /* replica max data age factor. */
     createIntConfig("list-max-listpack-size", "list-max-ziplist-size", MODIFIABLE_CONFIG, INT_MIN, INT_MAX, server.list_max_listpack_size, -2, INTEGER_CONFIG, NULL, NULL),
diff --git a/src/config.h b/src/config.h
index 95c2e84a00..201e421976 100644
--- a/src/config.h
+++ b/src/config.h
@@ -264,6 +264,15 @@ void setproctitle(const char *fmt, ...);
 #error "Undefined or invalid BYTE_ORDER"
 #endif
 
+/* Cache line alignment */
+#ifndef CACHE_LINE_SIZE
+#if defined(__aarch64__) && defined(__APPLE__)
+#define CACHE_LINE_SIZE 128
+#else
+#define CACHE_LINE_SIZE 64
+#endif /* __aarch64__ && __APPLE__ */
+#endif /* CACHE_LINE_SIZE */
+
 #if (__i386 || __amd64 || __powerpc__) && __GNUC__
 #define GNUC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
 #if defined(__clang__)
@@ -329,7 +338,7 @@ void setcpuaffinity(const char *cpulist);
 #define HAVE_FADVISE
 #endif
 
-#define IO_THREADS_MAX_NUM 128
+#define IO_THREADS_MAX_NUM 16
 
 #ifndef CACHE_LINE_SIZE
 #if defined(__aarch64__) && defined(__APPLE__)
diff --git a/src/connection.h b/src/connection.h
index 3de581b417..c6466c2d4c 100644
--- a/src/connection.h
+++ b/src/connection.h
@@ -112,6 +112,12 @@ typedef struct ConnectionType {
     int (*has_pending_data)(void);
     int (*process_pending_data)(void);
 
+    /* Postpone update state - with IO threads & TLS we don't want the IO threads to update the event loop events - let
+     * the main-thread do it */
+    void (*postpone_update_state)(struct connection *conn, int);
+    /* Called by the main-thread */
+    void (*update_state)(struct connection *conn);
+
     /* TLS specified methods */
     sds (*get_peer_cert)(struct connection *conn);
 } ConnectionType;
@@ -456,4 +462,16 @@ static inline int connIsTLS(connection *conn) {
     return conn && conn->type == connectionTypeTls();
 }
 
+static inline void connUpdateState(connection *conn) {
+    if (conn->type->update_state) {
+        conn->type->update_state(conn);
+    }
+}
+
+static inline void connSetPostponeUpdateState(connection *conn, int on) {
+    if (conn->type->postpone_update_state) {
+        conn->type->postpone_update_state(conn, on);
+    }
+}
+
 #endif /* __REDIS_CONNECTION_H */
diff --git a/src/debug.c b/src/debug.c
index 36c425a4f4..9501b8a658 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -37,6 +37,7 @@
 #include "fpconv_dtoa.h"
 #include "cluster.h"
 #include "threads_mngr.h"
+#include "io_threads.h"
 
 #include <arpa/inet.h>
 #include <signal.h>
@@ -2159,6 +2160,7 @@ void removeSigSegvHandlers(void) {
 }
 
 void printCrashReport(void) {
+    server.crashed = 1;
     /* Log INFO and CLIENT LIST */
     logServerInfo();
 
diff --git a/src/eval.c b/src/eval.c
index e4e51f7da5..2afbf445f5 100644
--- a/src/eval.c
+++ b/src/eval.c
@@ -928,7 +928,7 @@ void ldbEndSession(client *c) {
 
     /* If it's a fork()ed session, we just exit. */
     if (ldb.forked) {
-        writeToClient(c, 0);
+        writeToClient(c);
         serverLog(LL_NOTICE, "Lua debugging session child exiting");
         exitFromChild(0);
     } else {
diff --git a/src/io_threads.c b/src/io_threads.c
new file mode 100644
index 0000000000..6149febabc
--- /dev/null
+++ b/src/io_threads.c
@@ -0,0 +1,377 @@
+#include "io_threads.h"
+
+static __thread int thread_id = 0; /* Thread local var */
+static pthread_t io_threads[IO_THREADS_MAX_NUM] = {0};
+static pthread_mutex_t io_threads_mutex[IO_THREADS_MAX_NUM];
+
+/* IO jobs queue functions - Used to send jobs from the main-thread to the IO thread. */
+typedef void (*job_handler)(void *);
+typedef struct iojob {
+    job_handler handler;
+    void *data;
+} iojob;
+
+typedef struct IOJobQueue {
+    iojob *ring_buffer;
+    size_t size;
+    _Atomic size_t head __attribute__((aligned(CACHE_LINE_SIZE))); /* Next write index for producer (main-thread) */
+    _Atomic size_t tail __attribute__((aligned(CACHE_LINE_SIZE))); /* Next read index for consumer  (IO-thread) */
+} IOJobQueue;
+IOJobQueue io_jobs[IO_THREADS_MAX_NUM] = {0};
+
+/* Initialize the job queue with a specified number of items. */
+static void IOJobQueue_init(IOJobQueue *jq, size_t item_count) {
+    debugServerAssertWithInfo(NULL, NULL, inMainThread());
+    jq->ring_buffer = zcalloc(item_count * sizeof(iojob));
+    jq->size = item_count; /* Total number of items */
+    jq->head = 0;
+    jq->tail = 0;
+}
+
+/* Clean up the job queue and free allocated memory. */
+static void IOJobQueue_cleanup(IOJobQueue *jq) {
+    debugServerAssertWithInfo(NULL, NULL, inMainThread());
+    zfree(jq->ring_buffer);
+    memset(jq, 0, sizeof(*jq));
+}
+
+static int IOJobQueue_isFull(const IOJobQueue *jq) {
+    debugServerAssertWithInfo(NULL, NULL, inMainThread());
+    size_t current_head = atomic_load_explicit(&jq->head, memory_order_relaxed);
+    /* We don't use memory_order_acquire for the tail due to performance reasons,
+     * In the worst case we will just assume wrongly the buffer is full and the main thread will do the job by itself. */
+    size_t current_tail = atomic_load_explicit(&jq->tail, memory_order_relaxed);
+    size_t next_head = (current_head + 1) % jq->size;
+    return next_head == current_tail;
+}
+
+/* Attempt to push a new job to the queue from the main thread.
+ * the caller must ensure the queue is not full before calling this function. */
+static void IOJobQueue_push(IOJobQueue *jq, job_handler handler, void *data) {
+    debugServerAssertWithInfo(NULL, NULL, inMainThread());
+    /* Assert the queue is not full - should not happen as the caller should check for it before. */
+    serverAssert(!IOJobQueue_isFull(jq));
+
+    /* No need to use atomic acquire for the head, as the main thread is the only one that writes to the head index. */
+    size_t current_head = atomic_load_explicit(&jq->head, memory_order_relaxed);
+    size_t next_head = (current_head + 1) % jq->size;
+
+    /* We store directly the job's fields to avoid allocating a new iojob structure. */
+    serverAssert(jq->ring_buffer[current_head].data == NULL);
+    serverAssert(jq->ring_buffer[current_head].handler == NULL);
+    jq->ring_buffer[current_head].data = data;
+    jq->ring_buffer[current_head].handler = handler;
+
+    /* memory_order_release to make sure the data is visible to the consumer (the IO thread). */
+    atomic_store_explicit(&jq->head, next_head, memory_order_release);
+}
+
+/* Returns the number of jobs currently available for consumption in the given job queue.
+ *
+ * This function  ensures memory visibility for the jobs by
+ * using a memory acquire fence when there are jobs available. */
+static size_t IOJobQueue_availableJobs(const IOJobQueue *jq) {
+    debugServerAssertWithInfo(NULL, NULL, !inMainThread());
+    /* We use memory_order_acquire to make sure the head and the job's fields are visible to the consumer (IO thread). */
+    size_t current_head = atomic_load_explicit(&jq->head, memory_order_acquire);
+    size_t current_tail = atomic_load_explicit(&jq->tail, memory_order_relaxed);
+
+    if (current_head >= current_tail) {
+        return current_head - current_tail;
+    } else {
+        return jq->size - (current_tail - current_head);
+    }
+}
+
+/* Checks if the job Queue is empty.
+ * returns 1 if the buffer is currently empty, 0 otherwise.
+ * Called by the main-thread only.
+ * This function uses relaxed memory order, so the caller need to use an acquire
+ * memory fence before calling this function to be sure it has the latest index
+ * from the other thread, especially when called repeatedly. */
+static int IOJobQueue_isEmpty(const IOJobQueue *jq) {
+    size_t current_head = atomic_load_explicit(&jq->head, memory_order_relaxed);
+    size_t current_tail = atomic_load_explicit(&jq->tail, memory_order_relaxed);
+    return current_head == current_tail;
+}
+
+/* Removes the next job from the given job queue by advancing the tail index.
+ * Called by the IO thread.
+ * The caller must ensure that the queue is not empty before calling this function.
+ * This function uses relaxed memory order, so the caller need to use an release memory fence
+ * after calling this function to make sure the updated tail is visible to the producer (main thread). */
+static void IOJobQueue_removeJob(IOJobQueue *jq) {
+    debugServerAssertWithInfo(NULL, NULL, !inMainThread());
+    size_t current_tail = atomic_load_explicit(&jq->tail, memory_order_relaxed);
+    jq->ring_buffer[current_tail].data = NULL;
+    jq->ring_buffer[current_tail].handler = NULL;
+    atomic_store_explicit(&jq->tail, (current_tail + 1) % jq->size, memory_order_relaxed);
+}
+
+/* Retrieves the next job handler and data from the job queue without removal.
+ * Called by the consumer (IO thread). Caller must ensure queue is not empty.*/
+static void IOJobQueue_peek(const IOJobQueue *jq, job_handler *handler, void **data) {
+    debugServerAssertWithInfo(NULL, NULL, !inMainThread());
+    size_t current_tail = atomic_load_explicit(&jq->tail, memory_order_relaxed);
+    iojob *job = &jq->ring_buffer[current_tail];
+    *handler = job->handler;
+    *data = job->data;
+}
+
+/* End of IO job queue functions */
+
+int inMainThread(void) {
+    return thread_id == 0;
+}
+
+/* Wait until the IO-thread is done with the client */
+void waitForClientIO(client *c) {
+    /* No need to wait if the client was not offloaded to the IO thread. */
+    if (c->io_read_state == CLIENT_IDLE && c->io_write_state == CLIENT_IDLE) return;
+
+    /* Wait for read operation to complete if pending. */
+    while (c->io_read_state == CLIENT_PENDING_IO) {
+        atomic_thread_fence(memory_order_acquire);
+    }
+
+    /* Wait for write operation to complete if pending. */
+    while (c->io_write_state == CLIENT_PENDING_IO) {
+        atomic_thread_fence(memory_order_acquire);
+    }
+
+    /* Final memory barrier to ensure all changes are visible */
+    atomic_thread_fence(memory_order_acquire);
+}
+
+/** Adjusts the number of active I/O threads based on the current event load.
+ * If increase_only is non-zero, only allows increasing the number of threads.*/
+void adjustIOThreadsByEventLoad(int numevents, int increase_only) {
+    if (server.io_threads_num == 1) return; /* All I/O is being done by the main thread. */
+    debugServerAssertWithInfo(NULL, NULL, server.io_threads_num > 1);
+
+    int target_threads =
+        server.events_per_io_thread == 0 ? server.io_threads_num : numevents / server.events_per_io_thread;
+
+    target_threads = max(1, min(target_threads, server.io_threads_num));
+
+    if (target_threads == server.active_io_threads_num) return;
+
+    if (target_threads < server.active_io_threads_num) {
+        if (increase_only) return;
+
+        int threads_to_deactivate_num = server.active_io_threads_num - target_threads;
+        for (int i = 0; i < threads_to_deactivate_num; i++) {
+            int tid = server.active_io_threads_num - 1;
+            IOJobQueue *jq = &io_jobs[tid];
+            /* We can't lock the thread if it may have pending jobs */
+            if (!IOJobQueue_isEmpty(jq)) return;
+            pthread_mutex_lock(&io_threads_mutex[tid]);
+            server.active_io_threads_num--;
+        }
+    } else {
+        int threads_to_activate_num = target_threads - server.active_io_threads_num;
+        for (int i = 0; i < threads_to_activate_num; i++) {
+            pthread_mutex_unlock(&io_threads_mutex[server.active_io_threads_num]);
+            server.active_io_threads_num++;
+        }
+    }
+}
+
+static void *IOThreadMain(void *myid) {
+    /* The ID is the thread ID number (from 1 to server.io_threads_num-1). ID 0 is the main thread. */
+    long id = (long)myid;
+    char thdname[32];
+
+    serverAssert(server.io_threads_num > 0);
+    serverAssert(id > 0 && id < server.io_threads_num);
+    snprintf(thdname, sizeof(thdname), "io_thd_%ld", id);
+    valkey_set_thread_title(thdname);
+    serverSetCpuAffinity(server.server_cpulist);
+    makeThreadKillable();
+    initSharedQueryBuf();
+
+    thread_id = (int)id;
+    size_t jobs_to_process = 0;
+    IOJobQueue *jq = &io_jobs[id];
+    while (1) {
+        /* Wait for jobs */
+        for (int j = 0; j < 1000000; j++) {
+            jobs_to_process = IOJobQueue_availableJobs(jq);
+            if (jobs_to_process) break;
+        }
+
+        /* Give the main thread a chance to stop this thread. */
+        if (jobs_to_process == 0) {
+            pthread_mutex_lock(&io_threads_mutex[id]);
+            pthread_mutex_unlock(&io_threads_mutex[id]);
+            continue;
+        }
+
+        for (size_t j = 0; j < jobs_to_process; j++) {
+            job_handler handler;
+            void *data;
+            /* We keep the job in the queue until it's processed. This ensures that if the main thread checks
+             * and finds the queue empty, it can be certain that the IO thread is not currently handling any job. */
+            IOJobQueue_peek(jq, &handler, &data);
+            handler(data);
+            /* Remove the job after it was processed */
+            IOJobQueue_removeJob(jq);
+        }
+        /* Memory barrier to make sure the main thread sees the updated tail index.
+         * We do it once per loop and not per tail-update for optimization reasons.
+         * As the main-thread main concern is to check if the queue is empty, it's enough to do it once at the end. */
+        atomic_thread_fence(memory_order_release);
+    }
+    freeSharedQueryBuf();
+    return NULL;
+}
+
+#define IO_JOB_QUEUE_SIZE 2048
+static void createIOThread(int id) {
+    pthread_t tid;
+    pthread_mutex_init(&io_threads_mutex[id], NULL);
+    IOJobQueue_init(&io_jobs[id], IO_JOB_QUEUE_SIZE);
+    pthread_mutex_lock(&io_threads_mutex[id]); /* Thread will be stopped. */
+    if (pthread_create(&tid, NULL, IOThreadMain, (void *)(long)id) != 0) {
+        serverLog(LL_WARNING, "Fatal: Can't initialize IO thread, pthread_create failed with: %s", strerror(errno));
+        exit(1);
+    }
+    io_threads[id] = tid;
+}
+
+/* Terminates the IO thread specified by id.
+ * Called on server shutdown */
+static void shutdownIOThread(int id) {
+    int err;
+    pthread_t tid = io_threads[id];
+    if (tid == pthread_self()) return;
+    if (tid == 0) return;
+
+    pthread_cancel(tid);
+
+    if ((err = pthread_join(tid, NULL)) != 0) {
+        serverLog(LL_WARNING, "IO thread(tid:%lu) can not be joined: %s", (unsigned long)tid, strerror(err));
+    } else {
+        serverLog(LL_NOTICE, "IO thread(tid:%lu) terminated", (unsigned long)tid);
+    }
+
+    IOJobQueue_cleanup(&io_jobs[id]);
+}
+
+void killIOThreads(void) {
+    for (int j = 1; j < server.io_threads_num; j++) { /* We don't kill thread 0, which is the main thread. */
+        shutdownIOThread(j);
+    }
+}
+
+/* Initialize the data structures needed for I/O threads. */
+void initIOThreads(void) {
+    server.active_io_threads_num = 1; /* We start with threads not active. */
+
+    /* Don't spawn any thread if the user selected a single thread:
+     * we'll handle I/O directly from the main thread. */
+    if (server.io_threads_num == 1) return;
+
+    serverAssert(server.io_threads_num <= IO_THREADS_MAX_NUM);
+
+    /* Spawn and initialize the I/O threads. */
+    for (int i = 1; i < server.io_threads_num; i++) {
+        createIOThread(i);
+    }
+}
+
+int trySendReadToIOThreads(client *c) {
+    if (server.active_io_threads_num <= 1) return C_ERR;
+    if (!server.io_threads_do_reads) return C_ERR;
+    /* If IO thread is areadty reading, return C_OK to make sure the main thread will not handle it. */
+    if (c->io_read_state != CLIENT_IDLE) return C_OK;
+    /* Currently, replica/master writes are not offloaded and are processed synchronously. */
+    if (c->flag.primary || getClientType(c) == CLIENT_TYPE_REPLICA) return C_ERR;
+    /* With Lua debug client we may call connWrite directly in the main thread */
+    if (c->flag.lua_debug) return C_ERR;
+    /* For simplicity let the main-thread handle the blocked clients */
+    if (c->flag.blocked || c->flag.unblocked) return C_ERR;
+    if (c->flag.close_asap) return C_ERR;
+    size_t tid = (c->id % (server.active_io_threads_num - 1)) + 1;
+
+    /* Handle case where client has a pending IO write job on a different thread:
+     * 1. A write job is still pending (io_write_state == CLIENT_PENDING_IO)
+     * 2. The pending job is on a different thread (c->cur_tid != tid)
+     *
+     * This situation can occur if active_io_threads_num increased since the
+     * original job assignment. In this case, we keep the job on its current
+     * thread to ensure the same thread handles the client's I/O operations. */
+    if (c->io_write_state == CLIENT_PENDING_IO && c->cur_tid != (uint8_t)tid) tid = c->cur_tid;
+
+    IOJobQueue *jq = &io_jobs[tid];
+    if (IOJobQueue_isFull(jq)) return C_ERR;
+
+    c->cur_tid = tid;
+    c->read_flags = canParseCommand(c) ? 0 : READ_FLAGS_DONT_PARSE;
+    c->read_flags |= authRequired(c) ? READ_FLAGS_AUTH_REQUIRED : 0;
+
+    c->io_read_state = CLIENT_PENDING_IO;
+    connSetPostponeUpdateState(c->conn, 1);
+    IOJobQueue_push(jq, ioThreadReadQueryFromClient, c);
+    c->flag.pending_read = 1;
+    listLinkNodeTail(server.clients_pending_io_read, &c->pending_read_list_node);
+    return C_OK;
+}
+
+/* This function attempts to offload the client's write to an I/O thread.
+ * Returns C_OK if the client's writes were successfully offloaded to an I/O thread,
+ * or C_ERR if the client is not eligible for offloading. */
+int trySendWriteToIOThreads(client *c) {
+    if (server.active_io_threads_num <= 1) return C_ERR;
+    /* The I/O thread is already writing for this client. */
+    if (c->io_write_state != CLIENT_IDLE) return C_OK;
+    /* Nothing to write */
+    if (!clientHasPendingReplies(c)) return C_ERR;
+    /* Currently, replica/master writes are not offloaded and are processed synchronously. */
+    if (c->flag.primary || getClientType(c) == CLIENT_TYPE_REPLICA) return C_ERR;
+    /* We can't offload debugged clients as the main-thread may read at the same time  */
+    if (c->flag.lua_debug) return C_ERR;
+
+    size_t tid = (c->id % (server.active_io_threads_num - 1)) + 1;
+    /* Handle case where client has a pending IO read job on a different thread:
+     * 1. A read job is still pending (io_read_state == CLIENT_PENDING_IO)
+     * 2. The pending job is on a different thread (c->cur_tid != tid)
+     *
+     * This situation can occur if active_io_threads_num increased since the
+     * original job assignment. In this case, we keep the job on its current
+     * thread to ensure the same thread handles the client's I/O operations. */
+    if (c->io_read_state == CLIENT_PENDING_IO && c->cur_tid != (uint8_t)tid) tid = c->cur_tid;
+
+    IOJobQueue *jq = &io_jobs[tid];
+    if (IOJobQueue_isFull(jq)) return C_ERR;
+
+    c->cur_tid = tid;
+    if (c->flag.pending_write) {
+        /* We move the client to the io pending write queue */
+        listUnlinkNode(server.clients_pending_write, &c->clients_pending_write_node);
+    } else {
+        c->flag.pending_write = 1;
+    }
+    serverAssert(c->clients_pending_write_node.prev == NULL && c->clients_pending_write_node.next == NULL);
+    listLinkNodeTail(server.clients_pending_io_write, &c->clients_pending_write_node);
+
+    /* Save the last block of the reply list to io_last_reply_block and the used
+     * position to io_last_bufpos. The I/O thread will write only up to
+     * io_last_bufpos, regardless of the c->bufpos value. This is to prevent I/O
+     * threads from reading data that might be invalid in their local CPU cache. */
+    c->io_last_reply_block = listLast(c->reply);
+    if (c->io_last_reply_block) {
+        c->io_last_bufpos = ((clientReplyBlock *)listNodeValue(c->io_last_reply_block))->used;
+    } else {
+        c->io_last_bufpos = (size_t)c->bufpos;
+    }
+    serverAssert(c->bufpos > 0 || c->io_last_bufpos > 0);
+
+    /* The main-thread will update the client state after the I/O thread completes the write. */
+    connSetPostponeUpdateState(c->conn, 1);
+    c->write_flags = 0;
+    c->io_write_state = CLIENT_PENDING_IO;
+
+    IOJobQueue_push(jq, ioThreadWriteToClient, c);
+    return C_OK;
+}
diff --git a/src/io_threads.h b/src/io_threads.h
new file mode 100644
index 0000000000..30d1cdad79
--- /dev/null
+++ b/src/io_threads.h
@@ -0,0 +1,13 @@
+#ifndef IO_THREADS_H
+#define IO_THREADS_H
+
+#include "server.h"
+
+void initIOThreads(void);
+void killIOThreads(void);
+int inMainThread(void);
+int trySendReadToIOThreads(client *c);
+int trySendWriteToIOThreads(client *c);
+void adjustIOThreadsByEventLoad(int numevents, int increase_only);
+
+#endif /* IO_THREADS_H */
diff --git a/src/networking.c b/src/networking.c
index f017e7c034..b249aa61f3 100644
--- a/src/networking.c
+++ b/src/networking.c
@@ -33,6 +33,7 @@
 #include "fpconv_dtoa.h"
 #include "fmtargs.h"
 #include <strings.h>
+#include "io_threads.h"
 #include <sys/socket.h>
 #include <sys/uio.h>
 #include <math.h>
@@ -46,6 +47,8 @@ char *getClientSockname(client *c);
 int ProcessingEventsWhileBlocked = 0; /* See processEventsWhileBlocked(). */
 __thread sds thread_shared_qb = NULL;
 
+typedef enum { PARSE_OK = 0, PARSE_ERR = -1, PARSE_NEEDMORE = -2 } parseResult;
+
 /* Return the size consumed from the allocator, for the specified SDS string,
  * including internal fragmentation. This function is used in order to compute
  * the client output buffer size. */
@@ -158,6 +161,9 @@ client *createClient(connection *conn) {
     c->argv_len_sum = 0;
     c->original_argc = 0;
     c->original_argv = NULL;
+    c->nread = 0;
+    c->read_flags = 0;
+    c->write_flags = 0;
     c->cmd = c->lastcmd = c->realcmd = NULL;
     c->cur_script = NULL;
     c->multibulklen = 0;
@@ -199,7 +205,9 @@ client *createClient(connection *conn) {
     c->sockname = NULL;
     c->client_list_node = NULL;
     c->postponed_list_node = NULL;
-    c->pending_read_list_node = NULL;
+    c->io_read_state = CLIENT_IDLE;
+    c->io_write_state = CLIENT_IDLE;
+    c->nwritten = 0;
     c->client_tracking_redirection = 0;
     c->client_tracking_prefixes = NULL;
     c->last_memory_usage = 0;
@@ -210,6 +218,7 @@ client *createClient(connection *conn) {
     c->auth_callback_privdata = NULL;
     c->auth_module = NULL;
     listInitNode(&c->clients_pending_write_node, c);
+    listInitNode(&c->pending_read_list_node, c);
     c->mem_usage_bucket = NULL;
     c->mem_usage_bucket_node = NULL;
     if (conn) linkClient(c);
@@ -300,13 +309,8 @@ int prepareClientToWrite(client *c) {
     if (!c->conn) return C_ERR; /* Fake client for AOF loading. */
 
     /* Schedule the client to write the output buffers to the socket, unless
-     * it should already be setup to do so (it has already pending data).
-     *
-     * If CLIENT_PENDING_READ is set, we're in an IO thread and should
-     * not put the client in pending write queue. Instead, it will be
-     * done by handleClientsWithPendingReadsUsingThreads() upon return.
-     */
-    if (!clientHasPendingReplies(c) && io_threads_op == IO_THREADS_OP_IDLE) putClientInPendingWriteQueue(c);
+     * it should already be setup to do so (it has already pending data). */
+    if (!clientHasPendingReplies(c)) putClientInPendingWriteQueue(c);
 
     /* Authorize the caller to queue in the output buffer of this client. */
     return C_OK;
@@ -745,7 +749,8 @@ void trimReplyUnusedTailSpace(client *c) {
      * allocation), otherwise there's a high chance realloc will NOP.
      * Also, to avoid large memmove which happens as part of realloc, we only do
      * that if the used part is small.  */
-    if (tail->size - tail->used > tail->size / 4 && tail->used < PROTO_REPLY_CHUNK_BYTES) {
+    if (tail->size - tail->used > tail->size / 4 && tail->used < PROTO_REPLY_CHUNK_BYTES &&
+        c->io_write_state != CLIENT_PENDING_IO) {
         size_t usable_size;
         size_t old_size = tail->size;
         tail = zrealloc_usable(tail, tail->used + sizeof(clientReplyBlock), &usable_size);
@@ -804,8 +809,10 @@ void setDeferredReply(client *c, void *node, const char *s, size_t length) {
      * - The prev node is non-NULL and has space in it or
      * - The next node is non-NULL,
      * - It has enough room already allocated
-     * - And not too large (avoid large memmove) */
-    if (ln->prev != NULL && (prev = listNodeValue(ln->prev)) && prev->size - prev->used > 0) {
+     * - And not too large (avoid large memmove)
+     * - And the client is not in a pending I/O state */
+    if (ln->prev != NULL && (prev = listNodeValue(ln->prev)) && prev->size - prev->used > 0 &&
+        c->io_write_state != CLIENT_PENDING_IO) {
         size_t len_to_copy = prev->size - prev->used;
         if (len_to_copy > length) len_to_copy = length;
         memcpy(prev->buf + prev->used, s, len_to_copy);
@@ -819,7 +826,7 @@ void setDeferredReply(client *c, void *node, const char *s, size_t length) {
     }
 
     if (ln->next != NULL && (next = listNodeValue(ln->next)) && next->size - next->used >= length &&
-        next->used < PROTO_REPLY_CHUNK_BYTES * 4) {
+        next->used < PROTO_REPLY_CHUNK_BYTES * 4 && c->io_write_state != CLIENT_PENDING_IO) {
         memmove(next->buf + length, next->buf, next->used);
         memcpy(next->buf, s, length);
         next->used += length;
@@ -1498,15 +1505,19 @@ void unlinkClient(client *c) {
     /* Remove from the list of pending writes if needed. */
     if (c->flag.pending_write) {
         serverAssert(&c->clients_pending_write_node.next != NULL || &c->clients_pending_write_node.prev != NULL);
-        listUnlinkNode(server.clients_pending_write, &c->clients_pending_write_node);
+        if (c->io_write_state == CLIENT_IDLE) {
+            listUnlinkNode(server.clients_pending_write, &c->clients_pending_write_node);
+        } else {
+            listUnlinkNode(server.clients_pending_io_write, &c->clients_pending_write_node);
+        }
         c->flag.pending_write = 0;
     }
 
     /* Remove from the list of pending reads if needed. */
-    serverAssert(!c->conn || io_threads_op == IO_THREADS_OP_IDLE);
-    if (c->pending_read_list_node != NULL) {
-        listDelNode(server.clients_pending_read, c->pending_read_list_node);
-        c->pending_read_list_node = NULL;
+    serverAssert(c->io_read_state != CLIENT_PENDING_IO && c->io_write_state != CLIENT_PENDING_IO);
+    if (c->flag.pending_read) {
+        listUnlinkNode(server.clients_pending_io_read, &c->pending_read_list_node);
+        c->flag.pending_read = 0;
     }
 
 
@@ -1585,6 +1596,9 @@ void freeClient(client *c) {
         return;
     }
 
+    /* Wait for IO operations to be done before proceeding */
+    waitForClientIO(c);
+
     /* For connected clients, call the disconnection event of modules hooks. */
     if (c->conn) {
         moduleFireServerEvent(VALKEYMODULE_EVENT_CLIENT_CHANGE, VALKEYMODULE_SUBEVENT_CLIENT_CHANGE_DISCONNECTED, c);
@@ -1735,22 +1749,9 @@ void freeClient(client *c) {
  * a context where calling freeClient() is not possible, because the client
  * should be valid for the continuation of the flow of the program. */
 void freeClientAsync(client *c) {
-    /* We need to handle concurrent access to the server.clients_to_close list
-     * only in the freeClientAsync() function, since it's the only function that
-     * may access the list while the server uses I/O threads. All the other accesses
-     * are in the context of the main thread while the other threads are
-     * idle. */
     if (c->flag.close_asap || c->flag.script) return;
     c->flag.close_asap = 1;
-    if (server.io_threads_num == 1) {
-        /* no need to bother with locking if there's just one thread (the main thread) */
-        listAddNodeTail(server.clients_to_close, c);
-        return;
-    }
-    static pthread_mutex_t async_free_queue_mutex = PTHREAD_MUTEX_INITIALIZER;
-    pthread_mutex_lock(&async_free_queue_mutex);
     listAddNodeTail(server.clients_to_close, c);
-    pthread_mutex_unlock(&async_free_queue_mutex);
 }
 
 /* Log errors for invalid use and free the client in async way.
@@ -1769,31 +1770,90 @@ void logInvalidUseAndFreeClientAsync(client *c, const char *fmt, ...) {
     freeClientAsync(c);
 }
 
-/* Perform processing of the client before moving on to processing the next client
- * this is useful for performing operations that affect the global state but can't
- * wait until we're done with all clients. In other words can't wait until beforeSleep()
- * return C_ERR in case client is no longer valid after call.
- * The input client argument: c, may be NULL in case the previous client was
- * freed before the call. */
-int beforeNextClient(client *c) {
+/* Resets the shared query buffer used by the given client.
+ * If any data remained in the buffer, the client will take ownership of the buffer
+ * and a new empty buffer will be allocated for the shared buffer. */
+void resetSharedQueryBuf(client *c) {
+    serverAssert(c->querybuf == thread_shared_qb);
+    size_t remaining = sdslen(c->querybuf) - c->qb_pos;
+
+    if (remaining > 0) {
+        /* Let the client take ownership of the shared buffer. */
+        initSharedQueryBuf();
+        return;
+    }
+
+    c->querybuf = NULL;
+    sdsclear(thread_shared_qb);
+    c->qb_pos = 0;
+}
+
+/* Trims the client query buffer to the current position. */
+void trimClientQueryBuffer(client *c) {
+    if (c->querybuf == thread_shared_qb) {
+        resetSharedQueryBuf(c);
+    }
+
+    if (c->querybuf == NULL) {
+        return;
+    }
+
+    serverAssert(c->qb_pos <= sdslen(c->querybuf));
+
+    if (c->qb_pos > 0) {
+        sdsrange(c->querybuf, c->qb_pos, -1);
+        c->qb_pos = 0;
+    }
+}
+
+/* Perform processing of the client before moving on to processing the next client.
+ * This is useful for performing operations that affect the global state but can't
+ * wait until we're done with all clients. In other words, it can't wait until beforeSleep().
+ * With IO threads enabled, this function offloads the write to the IO threads if possible. */
+void beforeNextClient(client *c) {
     /* Notice, this code is also called from 'processUnblockedClients'.
      * But in case of a module blocked client (see RM_Call 'K' flag) we do not reach this code path.
      * So whenever we change the code here we need to consider if we need this change on module
      * blocked client as well */
 
-    /* Skip the client processing if we're in an IO thread, in that case we'll perform
-       this operation later (this function is called again) in the fan-in stage of the threading mechanism */
-    if (io_threads_op != IO_THREADS_OP_IDLE) return C_OK;
+    /* Trim the query buffer to the current position. */
+    if (c->flag.primary) {
+        /* If the client is a primary, trim the querybuf to repl_applied,
+         * since primary client is very special, its querybuf not only
+         * used to parse command, but also proxy to sub-replicas.
+         *
+         * Here are some scenarios we cannot trim to qb_pos:
+         * 1. we don't receive complete command from primary
+         * 2. primary client blocked cause of client pause
+         * 3. io threads operate read, primary client flagged with CLIENT_PENDING_COMMAND
+         *
+         * In these scenarios, qb_pos points to the part of the current command
+         * or the beginning of next command, and the current command is not applied yet,
+         * so the repl_applied is not equal to qb_pos. */
+        if (c->repl_applied) {
+            sdsrange(c->querybuf, c->repl_applied, -1);
+            c->qb_pos -= c->repl_applied;
+            c->repl_applied = 0;
+        }
+    } else {
+        trimClientQueryBuffer(c);
+    }
     /* Handle async frees */
     /* Note: this doesn't make the server.clients_to_close list redundant because of
      * cases where we want an async free of a client other than myself. For example
      * in ACL modifications we disconnect clients authenticated to non-existent
      * users (see ACL LOAD). */
-    if (c && (c->flag.close_asap)) {
+    if (c->flag.close_asap) {
         freeClient(c);
-        return C_ERR;
+        return;
+    }
+
+    updateClientMemUsageAndBucket(c);
+    /* If IO threads are enabled try to write immediately the reply instead of waiting to beforeSleep,
+     * unless aof_fsync is set to always in which case we need to wait for beforeSleep after writing the aof buffer. */
+    if (server.aof_fsync != AOF_FSYNC_ALWAYS) {
+        trySendWriteToIOThreads(c);
     }
-    return C_OK;
 }
 
 /* Free the clients marked as CLOSE_ASAP, return the number of clients
@@ -1827,57 +1887,204 @@ client *lookupClientByID(uint64_t id) {
     return c;
 }
 
+void writeToReplica(client *c) {
+    /* Can be called from main-thread only as replica write offload is not supported yet */
+    serverAssert(inMainThread());
+    int nwritten = 0;
+    serverAssert(c->bufpos == 0 && listLength(c->reply) == 0);
+    while (clientHasPendingReplies(c)) {
+        replBufBlock *o = listNodeValue(c->ref_repl_buf_node);
+        serverAssert(o->used >= c->ref_block_pos);
+
+        /* Send current block if it is not fully sent. */
+        if (o->used > c->ref_block_pos) {
+            nwritten = connWrite(c->conn, o->buf + c->ref_block_pos, o->used - c->ref_block_pos);
+            if (nwritten <= 0) {
+                c->write_flags |= WRITE_FLAGS_WRITE_ERROR;
+                return;
+            }
+            c->nwritten += nwritten;
+            c->ref_block_pos += nwritten;
+        }
+
+        /* If we fully sent the object on head, go to the next one. */
+        listNode *next = listNextNode(c->ref_repl_buf_node);
+        if (next && c->ref_block_pos == o->used) {
+            o->refcount--;
+            ((replBufBlock *)(listNodeValue(next)))->refcount++;
+            c->ref_repl_buf_node = next;
+            c->ref_block_pos = 0;
+            incrementalTrimReplicationBacklog(REPL_BACKLOG_TRIM_BLOCKS_PER_CALL);
+        }
+    }
+}
+
 /* This function should be called from _writeToClient when the reply list is not empty,
  * it gathers the scattered buffers from reply list and sends them away with connWritev.
- * If we write successfully, it returns C_OK, otherwise, C_ERR is returned,
- * and 'nwritten' is an output parameter, it means how many bytes server write
- * to client. */
-static int _writevToClient(client *c, ssize_t *nwritten) {
+ * If we write successfully, it returns C_OK, otherwise, C_ERR is returned.
+ * Sets the c->nwritten to the number of bytes the server wrote to the client.
+ * Can be called from the main thread or an I/O thread */
+static int writevToClient(client *c) {
     int iovcnt = 0;
     int iovmax = min(IOV_MAX, c->conn->iovcnt);
-    struct iovec iov[iovmax];
-    size_t iov_bytes_len = 0;
+    struct iovec iov_arr[iovmax];
+    struct iovec *iov = iov_arr;
+    ssize_t bufpos, iov_bytes_len = 0;
+    listNode *lastblock;
+
+    if (inMainThread()) {
+        lastblock = listLast(c->reply);
+        bufpos = c->bufpos;
+    } else {
+        lastblock = c->io_last_reply_block;
+        bufpos = lastblock ? (size_t)c->bufpos : c->io_last_bufpos;
+    }
+
     /* If the static reply buffer is not empty,
      * add it to the iov array for writev() as well. */
-    if (c->bufpos > 0) {
+    if (bufpos > 0) {
         iov[iovcnt].iov_base = c->buf + c->sentlen;
-        iov[iovcnt].iov_len = c->bufpos - c->sentlen;
+        iov[iovcnt].iov_len = bufpos - c->sentlen;
         iov_bytes_len += iov[iovcnt++].iov_len;
     }
     /* The first node of reply list might be incomplete from the last call,
      * thus it needs to be calibrated to get the actual data address and length. */
-    size_t offset = c->bufpos > 0 ? 0 : c->sentlen;
+    size_t sentlen = bufpos > 0 ? 0 : c->sentlen;
     listIter iter;
     listNode *next;
     clientReplyBlock *o;
+    size_t used;
     listRewind(c->reply, &iter);
     while ((next = listNext(&iter)) && iovcnt < iovmax && iov_bytes_len < NET_MAX_WRITES_PER_EVENT) {
         o = listNodeValue(next);
-        if (o->used == 0) { /* empty node, just release it and skip. */
-            c->reply_bytes -= o->size;
-            listDelNode(c->reply, next);
-            offset = 0;
+
+        used = o->used;
+        /* Use c->io_last_bufpos as the currently used portion of the block.
+         *  We use io_last_bufpos instead of o->used to ensure that we only access data guaranteed to be visible to the
+         * current thread. Using o->used, which may have been updated by the main thread, could lead to accessing data
+         * that may not yet be visible to the current thread*/
+        if (!inMainThread() && next == lastblock) used = c->io_last_bufpos;
+
+        if (used == 0) { /* empty node, skip over it. */
+            if (next == lastblock) break;
+            sentlen = 0;
             continue;
         }
 
-        iov[iovcnt].iov_base = o->buf + offset;
-        iov[iovcnt].iov_len = o->used - offset;
+        iov[iovcnt].iov_base = o->buf + sentlen;
+        iov[iovcnt].iov_len = used - sentlen;
         iov_bytes_len += iov[iovcnt++].iov_len;
-        offset = 0;
+
+        sentlen = 0;
+        if (next == lastblock) break;
     }
-    if (iovcnt == 0) return C_OK;
-    *nwritten = connWritev(c->conn, iov, iovcnt);
-    if (*nwritten <= 0) return C_ERR;
+
+    serverAssert(iovcnt != 0);
+
+    ssize_t totwritten = 0;
+    while (1) {
+        int nwritten = connWritev(c->conn, iov, iovcnt);
+        if (nwritten <= 0) {
+            c->write_flags |= WRITE_FLAGS_WRITE_ERROR;
+            totwritten = totwritten > 0 ? totwritten : nwritten;
+            break;
+        }
+        totwritten += nwritten;
+
+        if (totwritten == iov_bytes_len) break;
+
+        if (totwritten > NET_MAX_WRITES_PER_EVENT) {
+            /* Note that we avoid to send more than NET_MAX_WRITES_PER_EVENT
+             * bytes, Since it's a good idea to serve
+             * other clients as well, even if a very large request comes from
+             * super fast link that is always able to accept data (in real world
+             * scenario think about 'KEYS *' against the loopback interface).
+             *
+             * However if we are over the maxmemory limit we ignore that and
+             * just deliver as much data as it is possible to deliver. */
+            int ignore_max_write_limit = server.maxmemory > 0 && zmalloc_used_memory() > server.maxmemory;
+            if (!ignore_max_write_limit) {
+                break;
+            }
+        }
+
+        /* proceed to the unwritten blocks */
+        while (nwritten > 0) {
+            if ((size_t)nwritten < iov[0].iov_len) {
+                iov[0].iov_base = (char *)iov[0].iov_base + nwritten;
+                iov[0].iov_len -= nwritten;
+                break;
+            }
+            nwritten -= iov[0].iov_len;
+            iov++;
+            iovcnt--;
+        }
+    }
+
+    c->nwritten = totwritten;
+    return totwritten > 0 ? C_OK : C_ERR;
+}
+
+/* This function does actual writing output buffers to non-replica client, it is called by writeToClient.
+ * If we write successfully, it returns C_OK, otherwise, C_ERR is returned,
+ * and 'c->nwritten' is set to the number of bytes the server wrote to the client. */
+int _writeToClient(client *c) {
+    listNode *lastblock;
+    size_t bufpos;
+
+    if (inMainThread()) {
+        /* In the main thread, access bufpos and lastblock directly */
+        lastblock = listLast(c->reply);
+        bufpos = (size_t)c->bufpos;
+    } else {
+        /* If there is a last block, use bufpos directly; otherwise, use io_last_bufpos */
+        bufpos = c->io_last_reply_block ? (size_t)c->bufpos : c->io_last_bufpos;
+        lastblock = c->io_last_reply_block;
+    }
+
+    /* If the reply list is not empty, use writev to save system calls and TCP packets */
+    if (lastblock) return writevToClient(c);
+
+    ssize_t bytes_to_write = bufpos - c->sentlen;
+    ssize_t tot_written = 0;
+
+    while (tot_written < bytes_to_write) {
+        int nwritten = connWrite(c->conn, c->buf + c->sentlen, bytes_to_write - tot_written);
+        if (nwritten <= 0) {
+            c->write_flags |= WRITE_FLAGS_WRITE_ERROR;
+            tot_written = tot_written > 0 ? tot_written : nwritten;
+            break;
+        }
+        tot_written += nwritten;
+    }
+
+    c->nwritten = tot_written;
+    return tot_written > 0 ? C_OK : C_ERR;
+}
+
+static void postWriteToReplica(client *c) {
+    serverAssert(inMainThread());
+    if (c->nwritten > 0) c->net_output_bytes += c->nwritten;
+}
+
+static void _postWriteToClient(client *c) {
+    if (c->nwritten <= 0) return;
+
+    listIter iter;
+    listNode *next;
+    clientReplyBlock *o;
+
+    server.stat_net_output_bytes += c->nwritten;
 
     /* Locate the new node which has leftover data and
      * release all nodes in front of it. */
-    ssize_t remaining = *nwritten;
-    if (c->bufpos > 0) { /* deal with static reply buffer first. */
+    ssize_t remaining = c->nwritten;
+    if (c->bufpos > 0) { /* Deal with static reply buffer first. */
         int buf_len = c->bufpos - c->sentlen;
-        c->sentlen += remaining;
+        c->sentlen += c->nwritten;
         /* If the buffer was sent, set bufpos to zero to continue with
          * the remainder of the reply. */
-        if (remaining >= buf_len) {
+        if (c->nwritten >= buf_len) {
             c->bufpos = 0;
             c->sentlen = 0;
         }
@@ -1896,116 +2103,31 @@ static int _writevToClient(client *c, ssize_t *nwritten) {
         listDelNode(c->reply, next);
         c->sentlen = 0;
     }
-
-    return C_OK;
-}
-
-/* This function does actual writing output buffers to different types of
- * clients, it is called by writeToClient.
- * If we write successfully, it returns C_OK, otherwise, C_ERR is returned,
- * and 'nwritten' is an output parameter, it means how many bytes server write
- * to client. */
-int _writeToClient(client *c, ssize_t *nwritten) {
-    *nwritten = 0;
-    if (getClientType(c) == CLIENT_TYPE_REPLICA) {
-        serverAssert(c->bufpos == 0 && listLength(c->reply) == 0);
-
-        replBufBlock *o = listNodeValue(c->ref_repl_buf_node);
-        serverAssert(o->used >= c->ref_block_pos);
-        /* Send current block if it is not fully sent. */
-        if (o->used > c->ref_block_pos) {
-            *nwritten = connWrite(c->conn, o->buf + c->ref_block_pos, o->used - c->ref_block_pos);
-            if (*nwritten <= 0) return C_ERR;
-            c->ref_block_pos += *nwritten;
-        }
-
-        /* If we fully sent the object on head, go to the next one. */
-        listNode *next = listNextNode(c->ref_repl_buf_node);
-        if (next && c->ref_block_pos == o->used) {
-            o->refcount--;
-            ((replBufBlock *)(listNodeValue(next)))->refcount++;
-            c->ref_repl_buf_node = next;
-            c->ref_block_pos = 0;
-            incrementalTrimReplicationBacklog(REPL_BACKLOG_TRIM_BLOCKS_PER_CALL);
-        }
-        return C_OK;
-    }
-
-    /* When the reply list is not empty, it's better to use writev to save us some
-     * system calls and TCP packets. */
-    if (listLength(c->reply) > 0) {
-        int ret = _writevToClient(c, nwritten);
-        if (ret != C_OK) return ret;
-
-        /* If there are no longer objects in the list, we expect
-         * the count of reply bytes to be exactly zero. */
-        if (listLength(c->reply) == 0) serverAssert(c->reply_bytes == 0);
-    } else if (c->bufpos > 0) {
-        *nwritten = connWrite(c->conn, c->buf + c->sentlen, c->bufpos - c->sentlen);
-        if (*nwritten <= 0) return C_ERR;
-        c->sentlen += *nwritten;
-
-        /* If the buffer was sent, set bufpos to zero to continue with
-         * the remainder of the reply. */
-        if ((int)c->sentlen == c->bufpos) {
-            c->bufpos = 0;
-            c->sentlen = 0;
-        }
-    }
-
-    return C_OK;
 }
 
-/* Write data in output buffers to client. Return C_OK if the client
- * is still valid after the call, C_ERR if it was freed because of some
- * error.  If handler_installed is set, it will attempt to clear the
- * write event.
- *
- * This function is called by threads, but always with handler_installed
- * set to 0. So when handler_installed is set to 0 the function must be
- * thread safe. */
-int writeToClient(client *c, int handler_installed) {
+/* Updates the client's memory usage and bucket and server stats after writing.
+ * If a write handler is installed , it will attempt to clear the write event.
+ * If the client is no longer valid, it will return C_ERR, otherwise C_OK. */
+int postWriteToClient(client *c) {
+    c->io_last_reply_block = NULL;
+    c->io_last_bufpos = 0;
     /* Update total number of writes on server */
-    atomic_fetch_add_explicit(&server.stat_total_writes_processed, 1, memory_order_relaxed);
-
-    ssize_t nwritten = 0, totwritten = 0;
-
-    while (clientHasPendingReplies(c)) {
-        int ret = _writeToClient(c, &nwritten);
-        if (ret == C_ERR) break;
-        totwritten += nwritten;
-        /* Note that we avoid to send more than NET_MAX_WRITES_PER_EVENT
-         * bytes, in a single threaded server it's a good idea to serve
-         * other clients as well, even if a very large request comes from
-         * super fast link that is always able to accept data (in real world
-         * scenario think about 'KEYS *' against the loopback interface).
-         *
-         * However if we are over the maxmemory limit we ignore that and
-         * just deliver as much data as it is possible to deliver.
-         *
-         * Moreover, we also send as much as possible if the client is
-         * a replica or a monitor (otherwise, on high-speed traffic, the
-         * replication/output buffer will grow indefinitely) */
-        if (totwritten > NET_MAX_WRITES_PER_EVENT &&
-            (server.maxmemory == 0 || zmalloc_used_memory() < server.maxmemory) && !c->flag.replica)
-            break;
-    }
-
+    server.stat_total_writes_processed++;
     if (getClientType(c) == CLIENT_TYPE_REPLICA) {
-        atomic_fetch_add_explicit(&server.stat_net_repl_output_bytes, totwritten, memory_order_relaxed);
+        postWriteToReplica(c);
     } else {
-        atomic_fetch_add_explicit(&server.stat_net_output_bytes, totwritten, memory_order_relaxed);
+        _postWriteToClient(c);
     }
-    c->net_output_bytes += totwritten;
 
-    if (nwritten == -1) {
+    if (c->write_flags & WRITE_FLAGS_WRITE_ERROR) {
         if (connGetState(c->conn) != CONN_STATE_CONNECTED) {
             serverLog(LL_VERBOSE, "Error writing to client: %s", connGetLastError(c->conn));
             freeClientAsync(c);
             return C_ERR;
         }
     }
-    if (totwritten > 0) {
+    if (c->nwritten > 0) {
+        c->net_output_bytes += c->nwritten;
         /* For clients representing primaries we don't count sending data
          * as an interaction, since we always send REPLCONF ACK commands
          * that take some time to just fill the socket output buffer.
@@ -2014,12 +2136,7 @@ int writeToClient(client *c, int handler_installed) {
     }
     if (!clientHasPendingReplies(c)) {
         c->sentlen = 0;
-        /* Note that writeToClient() is called in a threaded way, but
-         * aeDeleteFileEvent() is not thread safe: however writeToClient()
-         * is always called with handler_installed set to 0 from threads
-         * so we are fine. */
-        if (handler_installed) {
-            serverAssert(io_threads_op == IO_THREADS_OP_IDLE);
+        if (connHasWriteHandler(c->conn)) {
             connSetWriteHandler(c->conn, NULL);
         }
 
@@ -2029,17 +2146,239 @@ int writeToClient(client *c, int handler_installed) {
             return C_ERR;
         }
     }
-    /* Update client's memory usage after writing.
-     * Since this isn't thread safe we do this conditionally. In case of threaded writes this is done in
-     * handleClientsWithPendingWritesUsingThreads(). */
-    if (io_threads_op == IO_THREADS_OP_IDLE) updateClientMemUsageAndBucket(c);
+    /* Update client's memory usage after writing.*/
+    updateClientMemUsageAndBucket(c);
     return C_OK;
 }
 
+/* Write data in output buffers to client. Return C_OK if the client
+ * is still valid after the call, C_ERR if it was freed because of some
+ * error.
+ *
+ * This function is called by main-thread only */
+int writeToClient(client *c) {
+    if (c->io_write_state != CLIENT_IDLE || c->io_read_state != CLIENT_IDLE) return C_OK;
+
+    c->nwritten = 0;
+    c->write_flags = 0;
+
+    if (getClientType(c) == CLIENT_TYPE_REPLICA) {
+        writeToReplica(c);
+    } else {
+        _writeToClient(c);
+    }
+
+    return postWriteToClient(c);
+}
+
 /* Write event handler. Just send data to the client. */
 void sendReplyToClient(connection *conn) {
     client *c = connGetPrivateData(conn);
-    writeToClient(c, 1);
+    if (trySendWriteToIOThreads(c) == C_OK) return;
+    writeToClient(c);
+}
+
+void handleQbLimitReached(client *c) {
+    sds ci = catClientInfoString(sdsempty(), c), bytes = sdsempty();
+    bytes = sdscatrepr(bytes, c->querybuf, 64);
+    serverLog(LL_WARNING, "Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci,
+              bytes);
+    sdsfree(ci);
+    sdsfree(bytes);
+    freeClientAsync(c);
+    server.stat_client_qbuf_limit_disconnections++;
+}
+
+/* Handle read errors and update statistics.
+ *
+ * Called only from the main thread.
+ * If the read was done in an I/O thread, this function is invoked after the
+ * read job has completed, in the main thread context.
+ *
+ * Returns:
+ *   - C_OK if the querybuf can be further processed.
+ *   - C_ERR if not. */
+int handleReadResult(client *c) {
+    serverAssert(inMainThread());
+    server.stat_total_reads_processed++;
+    if (c->nread <= 0) {
+        if (c->nread == -1) {
+            if (connGetState(c->conn) != CONN_STATE_CONNECTED) {
+                serverLog(LL_VERBOSE, "Reading from client: %s", connGetLastError(c->conn));
+                freeClientAsync(c);
+            }
+        } else if (c->nread == 0) {
+            if (server.verbosity <= LL_VERBOSE) {
+                sds info = catClientInfoString(sdsempty(), c);
+                serverLog(LL_VERBOSE, "Client closed connection %s", info);
+                sdsfree(info);
+            }
+            freeClientAsync(c);
+        }
+        return C_ERR;
+    }
+
+    c->last_interaction = server.unixtime;
+    c->net_input_bytes += c->nread;
+    if (c->flag.primary) {
+        c->read_reploff += c->nread;
+        server.stat_net_repl_input_bytes += c->nread;
+    } else {
+        server.stat_net_input_bytes += c->nread;
+    }
+
+    /* Handle QB limit */
+    if (c->read_flags & READ_FLAGS_QB_LIMIT_REACHED) {
+        handleQbLimitReached(c);
+        return C_ERR;
+    }
+    return C_OK;
+}
+
+
+void handleParseError(client *c) {
+    int flags = c->read_flags;
+    if (flags & READ_FLAGS_ERROR_BIG_INLINE_REQUEST) {
+        addReplyError(c, "Protocol error: too big inline request");
+        setProtocolError("too big inline request", c);
+    } else if (flags & READ_FLAGS_ERROR_BIG_MULTIBULK) {
+        addReplyError(c, "Protocol error: too big mbulk count string");
+        setProtocolError("too big mbulk count string", c);
+    } else if (flags & READ_FLAGS_ERROR_INVALID_MULTIBULK_LEN) {
+        addReplyError(c, "Protocol error: invalid multibulk length");
+        setProtocolError("invalid mbulk count", c);
+    } else if (flags & READ_FLAGS_ERROR_UNAUTHENTICATED_MULTIBULK_LEN) {
+        addReplyError(c, "Protocol error: unauthenticated multibulk length");
+        setProtocolError("unauth mbulk count", c);
+    } else if (flags & READ_FLAGS_ERROR_UNAUTHENTICATED_BULK_LEN) {
+        addReplyError(c, "Protocol error: unauthenticated bulk length");
+        setProtocolError("unauth bulk length", c);
+    } else if (flags & READ_FLAGS_ERROR_BIG_BULK_COUNT) {
+        addReplyError(c, "Protocol error: too big bulk count string");
+        setProtocolError("too big bulk count string", c);
+    } else if (flags & READ_FLAGS_ERROR_MBULK_UNEXPECTED_CHARACTER) {
+        addReplyErrorFormat(c, "Protocol error: expected '$', got '%c'", c->querybuf[c->qb_pos]);
+        setProtocolError("expected $ but got something else", c);
+    } else if (flags & READ_FLAGS_ERROR_MBULK_INVALID_BULK_LEN) {
+        addReplyError(c, "Protocol error: invalid bulk length");
+        setProtocolError("invalid bulk length", c);
+    } else if (flags & READ_FLAGS_ERROR_UNBALANCED_QUOTES) {
+        addReplyError(c, "Protocol error: unbalanced quotes in request");
+        setProtocolError("unbalanced quotes in inline request", c);
+    } else if (flags & READ_FLAGS_ERROR_UNEXPECTED_INLINE_FROM_PRIMARY) {
+        serverLog(LL_WARNING, "WARNING: Receiving inline protocol from primary, primary stream corruption? Closing the "
+                              "primary connection and discarding the cached primary.");
+        setProtocolError("Master using the inline protocol. Desync?", c);
+    } else {
+        serverAssertWithInfo(c, NULL, "Unknown parsing error");
+    }
+}
+
+int isParsingError(client *c) {
+    return c->read_flags & (READ_FLAGS_ERROR_BIG_INLINE_REQUEST | READ_FLAGS_ERROR_BIG_MULTIBULK |
+                            READ_FLAGS_ERROR_INVALID_MULTIBULK_LEN | READ_FLAGS_ERROR_UNAUTHENTICATED_MULTIBULK_LEN |
+                            READ_FLAGS_ERROR_UNAUTHENTICATED_BULK_LEN | READ_FLAGS_ERROR_MBULK_INVALID_BULK_LEN |
+                            READ_FLAGS_ERROR_BIG_BULK_COUNT | READ_FLAGS_ERROR_MBULK_UNEXPECTED_CHARACTER |
+                            READ_FLAGS_ERROR_UNEXPECTED_INLINE_FROM_PRIMARY | READ_FLAGS_ERROR_UNBALANCED_QUOTES);
+}
+
+/* This function is called after the query-buffer was parsed.
+ * It is used to handle parsing errors and to update the client state.
+ * The function returns C_OK if a command can be executed, otherwise C_ERR. */
+parseResult handleParseResults(client *c) {
+    if (isParsingError(c)) {
+        handleParseError(c);
+        return PARSE_ERR;
+    }
+
+    if (c->read_flags & READ_FLAGS_INLINE_ZERO_QUERY_LEN && getClientType(c) == CLIENT_TYPE_REPLICA) {
+        c->repl_ack_time = server.unixtime;
+    }
+
+    if (c->read_flags & READ_FLAGS_INLINE_ZERO_QUERY_LEN) {
+        /* in case the client's query was an empty line we will ignore it and proceed to process the rest of the buffer
+         * if any */
+        resetClient(c);
+        return PARSE_OK;
+    }
+
+    if (c->read_flags & READ_FLAGS_PARSING_NEGATIVE_MBULK_LEN) {
+        /* Multibulk processing could see a <= 0 length. */
+        resetClient(c);
+        return PARSE_OK;
+    }
+
+    if (c->read_flags & READ_FLAGS_PARSING_COMPLETED) {
+        return PARSE_OK;
+    } else {
+        return PARSE_NEEDMORE;
+    }
+}
+
+/* Process the completion of an IO write operation for a client.
+ * This function handles various post-write tasks, including updating client state,
+ * returns 1 if processing completed successfully, 0 if processing is skipped. */
+int processClientIOWriteDone(client *c) {
+    /* memory barrier acquire to get the latest client state */
+    atomic_thread_fence(memory_order_acquire);
+    /* If a client is protected, don't proceed to check the write results as it may trigger conn close. */
+    if (c->flag.protected) return 0;
+
+    listUnlinkNode(server.clients_pending_io_write, &c->clients_pending_write_node);
+    c->flag.pending_write = 0;
+    c->io_write_state = CLIENT_IDLE;
+
+    /* Don't post-process-writes to clients that are going to be closed anyway. */
+    if (c->flag.close_asap) return 0;
+
+    /* Update processed count on server */
+    server.stat_io_writes_processed += 1;
+
+    connSetPostponeUpdateState(c->conn, 0);
+    connUpdateState(c->conn);
+    if (postWriteToClient(c) == C_ERR) {
+        return 1;
+    }
+
+    if (clientHasPendingReplies(c)) {
+        if (c->write_flags & WRITE_FLAGS_WRITE_ERROR) {
+            /* Install the write handler if there are pending writes in some of the clients as a result of not being
+             * able to write everything in one go. */
+            installClientWriteHandler(c);
+        } else {
+            /* If we can send the client to the I/O thread, let it handle the write. */
+            if (trySendWriteToIOThreads(c) == C_OK) return 1;
+            /* Try again in the next eventloop */
+            putClientInPendingWriteQueue(c);
+        }
+    }
+
+    return 1;
+}
+
+/* This function handles the post-processing of I/O write operations that have been
+ * completed for clients. It iterates through the list of clients with pending I/O
+ * writes and performs necessary actions based on their current state.
+ *
+ * Returns The number of clients processed during this function call. */
+int processIOThreadsWriteDone(void) {
+    if (listLength(server.clients_pending_io_write) == 0) return 0;
+    int processed = 0;
+    listNode *ln;
+
+    listNode *next = listFirst(server.clients_pending_io_write);
+    while (next) {
+        ln = next;
+        next = listNextNode(ln);
+        client *c = listNodeValue(ln);
+
+        /* Client is still waiting for a pending I/O - skip it */
+        if (c->io_write_state == CLIENT_PENDING_IO || c->io_read_state == CLIENT_PENDING_IO) continue;
+
+        processed += processClientIOWriteDone(c);
+    }
+
+    return processed;
 }
 
 /* This function is called just before entering the event loop, in the hope
@@ -2047,10 +2386,16 @@ void sendReplyToClient(connection *conn) {
  * need to use a syscall in order to install the writable event handler,
  * get it called, and so forth. */
 int handleClientsWithPendingWrites(void) {
+    int processed = 0;
+    int pending_writes = listLength(server.clients_pending_write);
+    if (pending_writes == 0) return processed; /* Return ASAP if there are no clients. */
+
+    /* Adjust the number of I/O threads based on the number of pending writes this is required in case pending_writes >
+     * poll_events (for example in pubsub) */
+    adjustIOThreadsByEventLoad(pending_writes, 1);
+
     listIter li;
     listNode *ln;
-    int processed = listLength(server.clients_pending_write);
-
     listRewind(server.clients_pending_write, &li);
     while ((ln = listNext(&li))) {
         client *c = listNodeValue(ln);
@@ -2064,8 +2409,18 @@ int handleClientsWithPendingWrites(void) {
         /* Don't write to clients that are going to be closed anyway. */
         if (c->flag.close_asap) continue;
 
+        if (!clientHasPendingReplies(c)) continue;
+
+        /* If we can send the client to the I/O thread, let it handle the write. */
+        if (trySendWriteToIOThreads(c) == C_OK) continue;
+
+        /* We can't write to the client while IO operation is in progress. */
+        if (c->io_write_state != CLIENT_IDLE || c->io_read_state != CLIENT_IDLE) continue;
+
+        processed++;
+
         /* Try to write buffers to the client socket. */
-        if (writeToClient(c, 0) == C_ERR) continue;
+        if (writeToClient(c) == C_ERR) continue;
 
         /* If after the synchronous writes above we still have data to
          * output to the client, we need to install the writable handler. */
@@ -2113,52 +2468,21 @@ void resetClient(client *c) {
     c->flag.reply_skip = 0;
     if (c->flag.reply_skip_next) {
         c->flag.reply_skip = 1;
-        c->flag.reply_skip_next = 0;
-    }
-}
-
-/* Initializes the shared query buffer to a new sds with the default capacity */
-void initSharedQueryBuf(void) {
-    thread_shared_qb = sdsnewlen(NULL, PROTO_IOBUF_LEN);
-    sdsclear(thread_shared_qb);
-}
-
-/* Resets the shared query buffer used by the given client.
- * If any data remained in the buffer, the client will take ownership of the buffer
- * and a new empty buffer will be allocated for the shared buffer. */
-void resetSharedQueryBuf(client *c) {
-    serverAssert(c->querybuf == thread_shared_qb);
-    size_t remaining = sdslen(c->querybuf) - c->qb_pos;
-
-    if (remaining > 0) {
-        /* Let the client take ownership of the shared buffer. */
-        initSharedQueryBuf();
-        return;
-    }
-
-    c->querybuf = NULL;
-    sdsclear(thread_shared_qb);
-    c->qb_pos = 0;
-}
-
-/* Trims the client query buffer to the current position. */
-void trimClientQueryBuffer(client *c) {
-    if (c->querybuf == thread_shared_qb) {
-        resetSharedQueryBuf(c);
-    }
-
-    if (c->querybuf == NULL) {
-        return;
-    }
-
-    serverAssert(c->qb_pos <= sdslen(c->querybuf));
-
-    if (c->qb_pos > 0) {
-        sdsrange(c->querybuf, c->qb_pos, -1);
-        c->qb_pos = 0;
+        c->flag.reply_skip_next = 0;
     }
 }
 
+/* Initializes the shared query buffer to a new sds with the default capacity */
+void initSharedQueryBuf(void) {
+    thread_shared_qb = sdsnewlen(NULL, PROTO_IOBUF_LEN);
+    sdsclear(thread_shared_qb);
+}
+
+void freeSharedQueryBuf(void) {
+    sdsfree(thread_shared_qb);
+    thread_shared_qb = NULL;
+}
+
 /* This function is used when we want to re-enter the event loop but there
  * is the risk that the client we are dealing with will be freed in some
  * way. This happens for instance in:
@@ -2193,16 +2517,14 @@ void unprotectClient(client *c) {
 
 /* Like processMultibulkBuffer(), but for the inline protocol instead of RESP,
  * this function consumes the client query buffer and creates a command ready
- * to be executed inside the client structure. Returns C_OK if the command
- * is ready to be executed, or C_ERR if there is still protocol to read to
- * have a well formed command. The function also returns C_ERR when there is
- * a protocol error: in such a case the client structure is setup to reply
- * with the error and close the connection. */
-int processInlineBuffer(client *c) {
+ * to be executed inside the client structure.
+ * Sets the client read_flags to indicate the parsing outcome. */
+void processInlineBuffer(client *c) {
     char *newline;
     int argc, j, linefeed_chars = 1;
     sds *argv, aux;
     size_t querylen;
+    int is_primary = c->read_flags & READ_FLAGS_PRIMARY;
 
     /* Search for end of line */
     newline = strchr(c->querybuf + c->qb_pos, '\n');
@@ -2210,10 +2532,9 @@ int processInlineBuffer(client *c) {
     /* Nothing to do without a \r\n */
     if (newline == NULL) {
         if (sdslen(c->querybuf) - c->qb_pos > PROTO_INLINE_MAX_SIZE) {
-            addReplyError(c, "Protocol error: too big inline request");
-            setProtocolError("too big inline request", c);
+            c->read_flags |= READ_FLAGS_ERROR_BIG_INLINE_REQUEST;
         }
-        return C_ERR;
+        return;
     }
 
     /* Handle the \r\n case. */
@@ -2225,15 +2546,13 @@ int processInlineBuffer(client *c) {
     argv = sdssplitargs(aux, &argc);
     sdsfree(aux);
     if (argv == NULL) {
-        addReplyError(c, "Protocol error: unbalanced quotes in request");
-        setProtocolError("unbalanced quotes in inline request", c);
-        return C_ERR;
+        c->read_flags |= READ_FLAGS_ERROR_UNBALANCED_QUOTES;
+        return;
     }
 
-    /* Newline from replicas can be used to refresh the last ACK time.
-     * This is useful for a replica to ping back while loading a big
-     * RDB file. */
-    if (querylen == 0 && getClientType(c) == CLIENT_TYPE_REPLICA) c->repl_ack_time = server.unixtime;
+    if (querylen == 0) {
+        c->read_flags |= READ_FLAGS_INLINE_ZERO_QUERY_LEN;
+    }
 
     /* Primaries should never send us inline protocol to run actual
      * commands. If this happens, it is likely due to a bug in the server where
@@ -2242,12 +2561,10 @@ int processInlineBuffer(client *c) {
      *
      * However there is an exception: primaries may send us just a newline
      * to keep the connection active. */
-    if (querylen != 0 && c->flag.primary) {
+    if (querylen != 0 && is_primary) {
         sdsfreesplitres(argv, argc);
-        serverLog(LL_WARNING, "WARNING: Receiving inline protocol from primary, primary stream corruption? Closing the "
-                              "primary connection and discarding the cached primary.");
-        setProtocolError("Primary using the inline protocol. Desync?", c);
-        return C_ERR;
+        c->read_flags |= READ_FLAGS_ERROR_UNEXPECTED_INLINE_FROM_PRIMARY;
+        return;
     }
 
     /* Move querybuffer position to the next query in the buffer. */
@@ -2268,7 +2585,7 @@ int processInlineBuffer(client *c) {
         c->argv_len_sum += sdslen(argv[j]);
     }
     zfree(argv);
-    return C_OK;
+    c->read_flags |= READ_FLAGS_PARSING_COMPLETED;
 }
 
 /* Helper function. Record protocol error details in server log,
@@ -2281,9 +2598,10 @@ static void setProtocolError(const char *errstr, client *c) {
 
         /* Sample some protocol to given an idea about what was inside. */
         char buf[256];
-        if (sdslen(c->querybuf) - c->qb_pos < PROTO_DUMP_LEN) {
+        buf[0] = '\0';
+        if (c->querybuf && sdslen(c->querybuf) - c->qb_pos < PROTO_DUMP_LEN) {
             snprintf(buf, sizeof(buf), "Query buffer during protocol error: '%s'", c->querybuf + c->qb_pos);
-        } else {
+        } else if (c->querybuf) {
             snprintf(buf, sizeof(buf), "Query buffer during protocol error: '%.*s' (... more %zu bytes ...) '%.*s'",
                      PROTO_DUMP_LEN / 2, c->querybuf + c->qb_pos, sdslen(c->querybuf) - c->qb_pos - PROTO_DUMP_LEN,
                      PROTO_DUMP_LEN / 2, c->querybuf + sdslen(c->querybuf) - PROTO_DUMP_LEN / 2);
@@ -2306,20 +2624,18 @@ static void setProtocolError(const char *errstr, client *c) {
 }
 
 /* Process the query buffer for client 'c', setting up the client argument
- * vector for command execution. Returns C_OK if after running the function
- * the client has a well-formed ready to be processed command, otherwise
- * C_ERR if there is still to read more buffer to get the full command.
- * The function also returns C_ERR when there is a protocol error: in such a
- * case the client structure is setup to reply with the error and close
- * the connection.
+ * vector for command execution.
+ * Sets the client's read_flags to indicate the parsing outcome.
  *
  * This function is called if processInputBuffer() detects that the next
  * command is in RESP format, so the first byte in the command is found
  * to be '*'. Otherwise for inline commands processInlineBuffer() is called. */
-int processMultibulkBuffer(client *c) {
+void processMultibulkBuffer(client *c) {
     char *newline = NULL;
     int ok;
     long long ll;
+    int is_primary = c->read_flags & READ_FLAGS_PRIMARY;
+    int auth_required = c->read_flags & READ_FLAGS_AUTH_REQUIRED;
 
     if (c->multibulklen == 0) {
         /* The client should have been reset */
@@ -2329,32 +2645,32 @@ int processMultibulkBuffer(client *c) {
         newline = strchr(c->querybuf + c->qb_pos, '\r');
         if (newline == NULL) {
             if (sdslen(c->querybuf) - c->qb_pos > PROTO_INLINE_MAX_SIZE) {
-                addReplyError(c, "Protocol error: too big mbulk count string");
-                setProtocolError("too big mbulk count string", c);
+                c->read_flags |= READ_FLAGS_ERROR_BIG_MULTIBULK;
             }
-            return C_ERR;
+            return;
         }
 
         /* Buffer should also contain \n */
-        if (newline - (c->querybuf + c->qb_pos) > (ssize_t)(sdslen(c->querybuf) - c->qb_pos - 2)) return C_ERR;
+        if (newline - (c->querybuf + c->qb_pos) > (ssize_t)(sdslen(c->querybuf) - c->qb_pos - 2)) return;
 
         /* We know for sure there is a whole line since newline != NULL,
          * so go ahead and find out the multi bulk length. */
         serverAssertWithInfo(c, NULL, c->querybuf[c->qb_pos] == '*');
         ok = string2ll(c->querybuf + 1 + c->qb_pos, newline - (c->querybuf + 1 + c->qb_pos), &ll);
         if (!ok || ll > INT_MAX) {
-            addReplyError(c, "Protocol error: invalid multibulk length");
-            setProtocolError("invalid mbulk count", c);
-            return C_ERR;
-        } else if (ll > 10 && authRequired(c)) {
-            addReplyError(c, "Protocol error: unauthenticated multibulk length");
-            setProtocolError("unauth mbulk count", c);
-            return C_ERR;
+            c->read_flags |= READ_FLAGS_ERROR_INVALID_MULTIBULK_LEN;
+            return;
+        } else if (ll > 10 && auth_required) {
+            c->read_flags |= READ_FLAGS_ERROR_UNAUTHENTICATED_MULTIBULK_LEN;
+            return;
         }
 
         c->qb_pos = (newline - c->querybuf) + 2;
 
-        if (ll <= 0) return C_OK;
+        if (ll <= 0) {
+            c->read_flags |= READ_FLAGS_PARSING_NEGATIVE_MBULK_LEN;
+            return;
+        }
 
         c->multibulklen = ll;
 
@@ -2372,9 +2688,8 @@ int processMultibulkBuffer(client *c) {
             newline = strchr(c->querybuf + c->qb_pos, '\r');
             if (newline == NULL) {
                 if (sdslen(c->querybuf) - c->qb_pos > PROTO_INLINE_MAX_SIZE) {
-                    addReplyError(c, "Protocol error: too big bulk count string");
-                    setProtocolError("too big bulk count string", c);
-                    return C_ERR;
+                    c->read_flags |= READ_FLAGS_ERROR_BIG_BULK_COUNT;
+                    return;
                 }
                 break;
             }
@@ -2383,24 +2698,21 @@ int processMultibulkBuffer(client *c) {
             if (newline - (c->querybuf + c->qb_pos) > (ssize_t)(sdslen(c->querybuf) - c->qb_pos - 2)) break;
 
             if (c->querybuf[c->qb_pos] != '$') {
-                addReplyErrorFormat(c, "Protocol error: expected '$', got '%c'", c->querybuf[c->qb_pos]);
-                setProtocolError("expected $ but got something else", c);
-                return C_ERR;
+                c->read_flags |= READ_FLAGS_ERROR_MBULK_UNEXPECTED_CHARACTER;
+                return;
             }
 
             ok = string2ll(c->querybuf + c->qb_pos + 1, newline - (c->querybuf + c->qb_pos + 1), &ll);
-            if (!ok || ll < 0 || (!c->flag.primary && ll > server.proto_max_bulk_len)) {
-                addReplyError(c, "Protocol error: invalid bulk length");
-                setProtocolError("invalid bulk length", c);
-                return C_ERR;
-            } else if (ll > 16384 && authRequired(c)) {
-                addReplyError(c, "Protocol error: unauthenticated bulk length");
-                setProtocolError("unauth bulk length", c);
-                return C_ERR;
+            if (!ok || ll < 0 || (!(is_primary) && ll > server.proto_max_bulk_len)) {
+                c->read_flags |= READ_FLAGS_ERROR_MBULK_INVALID_BULK_LEN;
+                return;
+            } else if (ll > 16384 && auth_required) {
+                c->read_flags |= READ_FLAGS_ERROR_UNAUTHENTICATED_BULK_LEN;
+                return;
             }
 
             c->qb_pos = newline - c->querybuf + 2;
-            if (!c->flag.primary && ll >= PROTO_MBULK_BIG_ARG) {
+            if (!(is_primary) && ll >= PROTO_MBULK_BIG_ARG) {
                 /* When the client is not a primary client (because primary
                  * client's querybuf can only be trimmed after data applied
                  * and sent to replicas).
@@ -2446,7 +2758,7 @@ int processMultibulkBuffer(client *c) {
             /* Optimization: if a non-primary client's buffer contains JUST our bulk element
              * instead of creating a new object by *copying* the sds we
              * just use the current sds string. */
-            if (!c->flag.primary && c->qb_pos == 0 && c->bulklen >= PROTO_MBULK_BIG_ARG &&
+            if (!is_primary && c->qb_pos == 0 && c->bulklen >= PROTO_MBULK_BIG_ARG &&
                 sdslen(c->querybuf) == (size_t)(c->bulklen + 2)) {
                 c->argv[c->argc++] = createObject(OBJ_STRING, c->querybuf);
                 c->argv_len_sum += c->bulklen;
@@ -2466,10 +2778,7 @@ int processMultibulkBuffer(client *c) {
     }
 
     /* We're done when c->multibulk == 0 */
-    if (c->multibulklen == 0) return C_OK;
-
-    /* Still not ready to process the command */
-    return C_ERR;
+    if (c->multibulklen == 0) c->read_flags |= READ_FLAGS_PARSING_COMPLETED;
 }
 
 /* Perform necessary tasks after a command was executed:
@@ -2572,122 +2881,103 @@ int processPendingCommandAndInputBuffer(client *c) {
     return C_OK;
 }
 
-/* This function is called every time, in the client structure 'c', there is
- * more query buffer to process, because we read more data from the socket
- * or because a client was blocked and later reactivated, so there could be
- * pending query buffer, already representing a full command, to process.
- * return C_ERR in case the client was freed during the processing */
+/* Parse a single command from the query buf.
+ *
+ * This function may be called from the main thread or from the I/O thread.
+ *
+ * Sets the client's read_flags to indicate the parsing outcome */
+void parseCommand(client *c) {
+    /* Determine request type when unknown. */
+    if (!c->reqtype) {
+        if (c->querybuf[c->qb_pos] == '*') {
+            c->reqtype = PROTO_REQ_MULTIBULK;
+        } else {
+            c->reqtype = PROTO_REQ_INLINE;
+        }
+    }
+
+    if (c->reqtype == PROTO_REQ_INLINE) {
+        processInlineBuffer(c);
+    } else if (c->reqtype == PROTO_REQ_MULTIBULK) {
+        processMultibulkBuffer(c);
+    } else {
+        serverPanic("Unknown request type");
+    }
+}
+
+int canParseCommand(client *c) {
+    if (c->cmd != NULL) return 0;
+
+    /* Don't parse a command if the client is in the middle of something. */
+    if (c->flag.blocked || c->flag.unblocked) return 0;
+
+    /* Don't process more buffers from clients that have already pending
+     * commands to execute in c->argv. */
+    if (c->flag.pending_command) return 0;
+
+    /* Don't process input from the primary while there is a busy script
+     * condition on the replica. We want just to accumulate the replication
+     * stream (instead of replying -BUSY like we do with other clients) and
+     * later resume the processing. */
+    if (isInsideYieldingLongCommand() && c->flag.primary) return 0;
+
+    /* CLIENT_CLOSE_AFTER_REPLY closes the connection once the reply is
+     * written to the client. Make sure to not let the reply grow after
+     * this flag has been set (i.e. don't process more commands).
+     *
+     * The same applies for clients we want to terminate ASAP. */
+    if (c->flag.close_after_reply || c->flag.close_asap) return 0;
+
+    return 1;
+}
+
 int processInputBuffer(client *c) {
-    /* Keep processing while there is something in the input buffer */
+    /* Parse the query buffer. */
     while (c->querybuf && c->qb_pos < sdslen(c->querybuf)) {
-        /* Immediately abort if the client is in the middle of something. */
-        if (c->flag.blocked) break;
-
-        /* Don't process more buffers from clients that have already pending
-         * commands to execute in c->argv. */
-        if (c->flag.pending_command) break;
-
-        /* Don't process input from the primary while there is a busy script
-         * condition on the replica. We want just to accumulate the replication
-         * stream (instead of replying -BUSY like we do with other clients) and
-         * later resume the processing. */
-        if (isInsideYieldingLongCommand() && c->flag.primary) break;
-
-        /* CLIENT_CLOSE_AFTER_REPLY closes the connection once the reply is
-         * written to the client. Make sure to not let the reply grow after
-         * this flag has been set (i.e. don't process more commands).
-         *
-         * The same applies for clients we want to terminate ASAP. */
-        if (c->flag.close_after_reply || c->flag.close_asap) break;
-
-        /* Determine request type when unknown. */
-        if (!c->reqtype) {
-            if (c->querybuf[c->qb_pos] == '*') {
-                c->reqtype = PROTO_REQ_MULTIBULK;
-            } else {
-                c->reqtype = PROTO_REQ_INLINE;
-            }
+        if (!canParseCommand(c)) {
+            break;
         }
 
-        if (c->reqtype == PROTO_REQ_INLINE) {
-            if (processInlineBuffer(c) != C_OK) break;
-        } else if (c->reqtype == PROTO_REQ_MULTIBULK) {
-            if (processMultibulkBuffer(c) != C_OK) break;
-        } else {
-            serverPanic("Unknown request type");
+        c->read_flags = c->flag.primary ? READ_FLAGS_PRIMARY : 0;
+        c->read_flags |= authRequired(c) ? READ_FLAGS_AUTH_REQUIRED : 0;
+
+        parseCommand(c);
+
+        if (handleParseResults(c) != PARSE_OK) {
+            break;
         }
 
-        /* Multibulk processing could see a <= 0 length. */
         if (c->argc == 0) {
-            resetClient(c);
-        } else {
-            /* If we are in the context of an I/O thread, we can't really
-             * execute the command here. All we can do is to flag the client
-             * as one that needs to process the command. */
-            if (io_threads_op != IO_THREADS_OP_IDLE) {
-                serverAssert(io_threads_op == IO_THREADS_OP_READ);
-                c->flag.pending_command = 1;
-                break;
-            }
-
-            if (c->querybuf == thread_shared_qb) {
-                /* Before processing the command, reset the shared query buffer to its default state.
-                 * This avoids unintentionally modifying the shared qb during processCommand as we may use
-                 * the shared qb for other clients during processEventsWhileBlocked */
-                resetSharedQueryBuf(c);
-            }
+            /* No command to process - continue parsing the query buf. */
+            continue;
+        }
 
-            /* We are finally ready to execute the command. */
-            if (processCommandAndResetClient(c) == C_ERR) {
-                /* If the client is no longer valid, we avoid exiting this
-                 * loop and trimming the client buffer later. So we return
-                 * ASAP in that case. */
-                return C_ERR;
-            }
+        if (c->querybuf == thread_shared_qb) {
+            /* Before processing the command, reset the shared query buffer to its default state.
+             * This avoids unintentionally modifying the shared qb during processCommand as we may use
+             * the shared qb for other clients during processEventsWhileBlocked */
+            resetSharedQueryBuf(c);
         }
-    }
 
-    if (c->flag.primary) {
-        /* If the client is a primary, trim the querybuf to repl_applied,
-         * since primary client is very special, its querybuf not only
-         * used to parse command, but also proxy to sub-replicas.
-         *
-         * Here are some scenarios we cannot trim to qb_pos:
-         * 1. we don't receive complete command from primary
-         * 2. primary client blocked cause of client pause
-         * 3. io threads operate read, primary client flagged with CLIENT_PENDING_COMMAND
-         *
-         * In these scenarios, qb_pos points to the part of the current command
-         * or the beginning of next command, and the current command is not applied yet,
-         * so the repl_applied is not equal to qb_pos. */
-        if (c->repl_applied) {
-            sdsrange(c->querybuf, c->repl_applied, -1);
-            c->qb_pos -= c->repl_applied;
-            c->repl_applied = 0;
+        /* We are finally ready to execute the command. */
+        if (processCommandAndResetClient(c) == C_ERR) {
+            /* If the client is no longer valid, we avoid exiting this
+             * loop and trimming the client buffer later. So we return
+             * ASAP in that case. */
+            return C_ERR;
         }
-    } else {
-        trimClientQueryBuffer(c);
     }
 
-    /* Update client memory usage after processing the query buffer, this is
-     * important in case the query buffer is big and wasn't drained during
-     * the above loop (because of partially sent big commands). */
-    if (io_threads_op == IO_THREADS_OP_IDLE) updateClientMemUsageAndBucket(c);
-
     return C_OK;
 }
 
-void readQueryFromClient(connection *conn) {
-    client *c = connGetPrivateData(conn);
-    int nread, big_arg = 0;
+/* This function can be called from the main-thread or from the IO-thread.
+ * The function allocates query-buf for the client if required and reads to it from the network.
+ * It will set c->nread to the bytes read from the network. */
+void readToQueryBuf(client *c) {
+    int big_arg = 0;
     size_t qblen, readlen;
-
-    /* Check if we want to read from the client later when exiting from
-     * the event loop. This is the case if threaded I/O is enabled. */
-    if (postponeClientRead(c)) return;
-
-    /* Update total number of reads on server */
-    atomic_fetch_add_explicit(&server.stat_total_reads_processed, 1, memory_order_relaxed);
+    int is_primary = c->read_flags & READ_FLAGS_PRIMARY;
 
     readlen = PROTO_IOBUF_LEN;
     qblen = c->querybuf ? sdslen(c->querybuf) : 0;
@@ -2717,7 +3007,7 @@ void readQueryFromClient(connection *conn) {
         qblen = sdslen(c->querybuf);
     }
 
-    if (!c->flag.primary && // primary client's querybuf can grow greedy.
+    if (!is_primary && // primary client's querybuf can grow greedy.
         (big_arg || sdsalloc(c->querybuf) < PROTO_IOBUF_LEN)) {
         /* When reading a BIG_ARG we won't be reading more than that one arg
          * into the query buffer, so we don't need to pre-allocate more than we
@@ -2734,65 +3024,38 @@ void readQueryFromClient(connection *conn) {
         /* Read as much as possible from the socket to save read(2) system calls. */
         readlen = sdsavail(c->querybuf);
     }
-    nread = connRead(c->conn, c->querybuf + qblen, readlen);
-    if (nread == -1) {
-        if (connGetState(conn) == CONN_STATE_CONNECTED) {
-            goto done;
-        } else {
-            serverLog(LL_VERBOSE, "Reading from client: %s", connGetLastError(c->conn));
-            freeClientAsync(c);
-            goto done;
-        }
-    } else if (nread == 0) {
-        if (server.verbosity <= LL_VERBOSE) {
-            sds info = catClientInfoString(sdsempty(), c);
-            serverLog(LL_VERBOSE, "Client closed connection %s", info);
-            sdsfree(info);
-        }
-        freeClientAsync(c);
-        goto done;
+    c->nread = connRead(c->conn, c->querybuf + qblen, readlen);
+    if (c->nread <= 0) {
+        return;
     }
 
-    sdsIncrLen(c->querybuf, nread);
+    sdsIncrLen(c->querybuf, c->nread);
     qblen = sdslen(c->querybuf);
     if (c->querybuf_peak < qblen) c->querybuf_peak = qblen;
-
-    c->last_interaction = server.unixtime;
-    if (c->flag.primary) {
-        c->read_reploff += nread;
-        atomic_fetch_add_explicit(&server.stat_net_repl_input_bytes, nread, memory_order_relaxed);
-    } else {
-        atomic_fetch_add_explicit(&server.stat_net_input_bytes, nread, memory_order_relaxed);
-    }
-    c->net_input_bytes += nread;
-
-    if (!c->flag.primary &&
+    if (!is_primary) {
         /* The commands cached in the MULTI/EXEC queue have not been executed yet,
          * so they are also considered a part of the query buffer in a broader sense.
          *
          * For unauthenticated clients, the query buffer cannot exceed 1MB at most. */
-        (c->mstate.argv_len_sums + sdslen(c->querybuf) > server.client_max_querybuf_len ||
-         (c->mstate.argv_len_sums + sdslen(c->querybuf) > 1024 * 1024 && authRequired(c)))) {
-        sds ci = catClientInfoString(sdsempty(), c), bytes = sdsempty();
-
-        bytes = sdscatrepr(bytes, c->querybuf, 64);
-        serverLog(LL_WARNING, "Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci,
-                  bytes);
-        sdsfree(ci);
-        sdsfree(bytes);
-        freeClientAsync(c);
-        atomic_fetch_add_explicit(&server.stat_client_qbuf_limit_disconnections, 1, memory_order_relaxed);
-        goto done;
+        size_t qb_memory = sdslen(c->querybuf) + c->mstate.argv_len_sums;
+        if (qb_memory > server.client_max_querybuf_len ||
+            (qb_memory > 1024 * 1024 && (c->read_flags & READ_FLAGS_AUTH_REQUIRED))) {
+            c->read_flags |= READ_FLAGS_QB_LIMIT_REACHED;
+        }
     }
+}
 
-    /* There is more data in the client input buffer, continue parsing it
-     * and check if there is a full command to execute. */
-    if (processInputBuffer(c) == C_ERR) c = NULL;
+void readQueryFromClient(connection *conn) {
+    client *c = connGetPrivateData(conn);
+    /* Check if we can send the client to be handled by the IO-thread */
+    if (postponeClientRead(c)) return;
 
-done:
-    if (c && c->querybuf == thread_shared_qb) {
-        sdsclear(thread_shared_qb);
-        c->querybuf = NULL;
+    if (c->io_write_state != CLIENT_IDLE || c->io_read_state != CLIENT_IDLE) return;
+
+    readToQueryBuf(c);
+
+    if (handleReadResult(c) == C_OK) {
+        if (processInputBuffer(c) == C_ERR) return;
     }
     beforeNextClient(c);
 }
@@ -2849,6 +3112,7 @@ char *getClientSockname(client *c) {
 /* Concatenate a string representing the state of a client in a human
  * readable format, into the sds string 's'. */
 sds catClientInfoString(sds s, client *client) {
+    if (!server.crashed) waitForClientIO(client);
     char flags[17], events[3], conninfo[CONN_INFO_LEN], *p;
 
     p = flags;
@@ -4056,7 +4320,7 @@ void flushReplicasOutputBuffers(void) {
          */
         if (replica->repl_state == REPLICA_STATE_ONLINE && !(replica->flag.close_asap) && can_receive_writes &&
             !replica->repl_start_cmd_stream_on_ack && clientHasPendingReplies(replica)) {
-            writeToClient(replica, 0);
+            writeToClient(replica);
         }
     }
 }
@@ -4218,375 +4482,79 @@ void processEventsWhileBlocked(void) {
     server.cmd_time_snapshot = prev_cmd_time_snapshot;
 }
 
-/* ==========================================================================
- * Threaded I/O
- * ========================================================================== */
-
-typedef struct __attribute__((aligned(CACHE_LINE_SIZE))) threads_pending {
-    _Atomic unsigned long value;
-} threads_pending;
-
-pthread_t io_threads[IO_THREADS_MAX_NUM];
-pthread_mutex_t io_threads_mutex[IO_THREADS_MAX_NUM];
-threads_pending io_threads_pending[IO_THREADS_MAX_NUM];
-int io_threads_op;
-/* IO_THREADS_OP_IDLE, IO_THREADS_OP_READ or IO_THREADS_OP_WRITE. */ // TODO: should access to this be atomic??!
-
-/* This is the list of clients each thread will serve when threaded I/O is
- * used. We spawn io_threads_num-1 threads, since one is the main thread
- * itself. */
-list *io_threads_list[IO_THREADS_MAX_NUM];
-
-static inline unsigned long getIOPendingCount(int i) {
-    unsigned long count = atomic_load(&io_threads_pending[i].value);
-    return count;
-}
-
-static inline void setIOPendingCount(int i, unsigned long count) {
-    atomic_store(&io_threads_pending[i].value, count);
-}
-
-void *IOThreadMain(void *myid) {
-    /* The ID is the thread number (from 0 to server.io_threads_num-1), and is
-     * used by the thread to just manipulate a single sub-array of clients. */
-    long id = (unsigned long)myid;
-    char thdname[16];
-
-    snprintf(thdname, sizeof(thdname), "io_thd_%ld", id);
-    valkey_set_thread_title(thdname);
-    serverSetCpuAffinity(server.server_cpulist);
-    makeThreadKillable();
-    initSharedQueryBuf();
-
-    while (1) {
-        /* Wait for start */
-        for (int j = 0; j < 1000000; j++) {
-            if (getIOPendingCount(id) != 0) break;
-        }
-
-        /* Give the main thread a chance to stop this thread. */
-        if (getIOPendingCount(id) == 0) {
-            pthread_mutex_lock(&io_threads_mutex[id]);
-            pthread_mutex_unlock(&io_threads_mutex[id]);
-            continue;
-        }
+/* Return 1 if the client read is handled using threaded I/O.
+ * 0 otherwise. */
+int postponeClientRead(client *c) {
+    if (ProcessingEventsWhileBlocked) return 0;
 
-        serverAssert(getIOPendingCount(id) != 0);
-
-        /* Process: note that the main thread will never touch our list
-         * before we drop the pending count to 0. */
-        listIter li;
-        listNode *ln;
-        listRewind(io_threads_list[id], &li);
-        while ((ln = listNext(&li))) {
-            client *c = listNodeValue(ln);
-            if (io_threads_op == IO_THREADS_OP_WRITE) {
-                writeToClient(c, 0);
-            } else if (io_threads_op == IO_THREADS_OP_READ) {
-                readQueryFromClient(c->conn);
-            } else {
-                serverPanic("io_threads_op value is unknown");
-            }
-        }
-        listEmpty(io_threads_list[id]);
-        setIOPendingCount(id, 0);
-    }
+    return (trySendReadToIOThreads(c) == C_OK);
 }
 
-/* Initialize the data structures needed for threaded I/O. */
-void initThreadedIO(void) {
-    server.io_threads_active = 0; /* We start with threads not active. */
-
-    /* Indicate that io-threads are currently idle */
-    io_threads_op = IO_THREADS_OP_IDLE;
-
-    /* Don't spawn any thread if the user selected a single thread:
-     * we'll handle I/O directly from the main thread. */
-    if (server.io_threads_num == 1) return;
+int processIOThreadsReadDone(void) {
+    if (listLength(server.clients_pending_io_read) == 0) return 0;
+    int processed = 0;
+    listNode *ln;
 
-    if (server.io_threads_num > IO_THREADS_MAX_NUM) {
-        serverLog(LL_WARNING,
-                  "Fatal: too many I/O threads configured. "
-                  "The maximum number is %d.",
-                  IO_THREADS_MAX_NUM);
-        exit(1);
-    }
-
-    /* Spawn and initialize the I/O threads. */
-    for (int i = 0; i < server.io_threads_num; i++) {
-        /* Things we do for all the threads including the main thread. */
-        io_threads_list[i] = listCreate();
-        if (i == 0) continue; /* Thread 0 is the main thread. */
-
-        /* Things we do only for the additional threads. */
-        pthread_t tid;
-        pthread_mutex_init(&io_threads_mutex[i], NULL);
-        setIOPendingCount(i, 0);
-        pthread_mutex_lock(&io_threads_mutex[i]); /* Thread will be stopped. */
-        if (pthread_create(&tid, NULL, IOThreadMain, (void *)(long)i) != 0) {
-            serverLog(LL_WARNING, "Fatal: Can't initialize IO thread.");
-            exit(1);
-        }
-        io_threads[i] = tid;
-    }
-}
+    listNode *next = listFirst(server.clients_pending_io_read);
+    while (next) {
+        ln = next;
+        next = listNextNode(ln);
+        client *c = listNodeValue(ln);
 
-void killIOThreads(void) {
-    int err, j;
-    for (j = 0; j < server.io_threads_num; j++) {
-        if (io_threads[j] == pthread_self()) continue;
-        if (io_threads[j] && pthread_cancel(io_threads[j]) == 0) {
-            if ((err = pthread_join(io_threads[j], NULL)) != 0) {
-                serverLog(LL_WARNING, "IO thread(tid:%lu) can not be joined: %s", (unsigned long)io_threads[j],
-                          strerror(err));
-            } else {
-                serverLog(LL_WARNING, "IO thread(tid:%lu) terminated", (unsigned long)io_threads[j]);
-            }
+        /* Client is still waiting for a pending I/O - skip it */
+        if (c->io_write_state == CLIENT_PENDING_IO || c->io_read_state == CLIENT_PENDING_IO) continue;
+        /* If the write job is done, process it ASAP to free the buffer and handle connection errors */
+        if (c->io_write_state == CLIENT_COMPLETED_IO) {
+            processClientIOWriteDone(c);
         }
-    }
-}
-
-void startThreadedIO(void) {
-    serverAssert(server.io_threads_active == 0);
-    for (int j = 1; j < server.io_threads_num; j++) pthread_mutex_unlock(&io_threads_mutex[j]);
-    server.io_threads_active = 1;
-}
-
-void stopThreadedIO(void) {
-    /* We may have still clients with pending reads when this function
-     * is called: handle them before stopping the threads. */
-    handleClientsWithPendingReadsUsingThreads();
-    serverAssert(server.io_threads_active == 1);
-    for (int j = 1; j < server.io_threads_num; j++) pthread_mutex_lock(&io_threads_mutex[j]);
-    server.io_threads_active = 0;
-}
-
-/* This function checks if there are not enough pending clients to justify
- * taking the I/O threads active: in that case I/O threads are stopped if
- * currently active. We track the pending writes as a measure of clients
- * we need to handle in parallel, however the I/O threading is disabled
- * globally for reads as well if we have too little pending clients.
- *
- * The function returns 0 if the I/O threading should be used because there
- * are enough active threads, otherwise 1 is returned and the I/O threads
- * could be possibly stopped (if already active) as a side effect. */
-int stopThreadedIOIfNeeded(void) {
-    int pending = listLength(server.clients_pending_write);
-
-    /* Return ASAP if IO threads are disabled (single threaded mode). */
-    if (server.io_threads_num == 1) return 1;
-
-    if (pending < (server.io_threads_num * 2)) {
-        if (server.io_threads_active) stopThreadedIO();
-        return 1;
-    } else {
-        return 0;
-    }
-}
-
-/* This function achieves thread safety using a fan-out -> fan-in paradigm:
- * Fan out: The main thread fans out work to the io-threads which block until
- * setIOPendingCount() is called with a value larger than 0 by the main thread.
- * Fan in: The main thread waits until getIOPendingCount() returns 0. Then
- * it can safely perform post-processing and return to normal synchronous
- * work. */
-int handleClientsWithPendingWritesUsingThreads(void) {
-    int processed = listLength(server.clients_pending_write);
-    if (processed == 0) return 0; /* Return ASAP if there are no clients. */
+        /* memory barrier acquire to get the updated client state */
+        atomic_thread_fence(memory_order_acquire);
+        /* Don't post-process-writes to clients that are going to be closed anyway. */
+        if (c->flag.close_asap) continue;
+        /* If a client is protected, don't do anything,
+         * that may trigger read/write error or recreate handler. */
+        if (c->flag.protected) continue;
 
-    /* If I/O threads are disabled or we have few clients to serve, don't
-     * use I/O threads, but the boring synchronous code. */
-    if (server.io_threads_num == 1 || stopThreadedIOIfNeeded()) {
-        return handleClientsWithPendingWrites();
-    }
+        listUnlinkNode(server.clients_pending_io_read, ln);
+        c->flag.pending_read = 0;
+        c->io_read_state = CLIENT_IDLE;
 
-    /* Start threads if needed. */
-    if (!server.io_threads_active) startThreadedIO();
+        processed++;
+        server.stat_io_reads_processed++;
 
-    /* Distribute the clients across N different lists. */
-    listIter li;
-    listNode *ln;
-    listRewind(server.clients_pending_write, &li);
-    int item_id = 0;
-    while ((ln = listNext(&li))) {
-        client *c = listNodeValue(ln);
-        c->flag.pending_write = 0;
+        connSetPostponeUpdateState(c->conn, 0);
+        connUpdateState(c->conn);
 
-        /* Remove clients from the list of pending writes since
-         * they are going to be closed ASAP. */
-        if (c->flag.close_asap) {
-            listUnlinkNode(server.clients_pending_write, ln);
+        /* On read error - stop here. */
+        if (handleReadResult(c) == C_ERR) {
             continue;
         }
 
-        /* Since all replicas and replication backlog use global replication
-         * buffer, to guarantee data accessing thread safe, we must put all
-         * replicas client into io_threads_list[0] i.e. main thread handles
-         * sending the output buffer of all replicas. */
-        if (getClientType(c) == CLIENT_TYPE_REPLICA) {
-            listAddNodeTail(io_threads_list[0], c);
-            continue;
+        if (!(c->read_flags & READ_FLAGS_DONT_PARSE)) {
+            parseResult res = handleParseResults(c);
+            /* On parse error - stop here. */
+            if (res == PARSE_ERR) {
+                continue;
+            } else if (res == PARSE_NEEDMORE) {
+                beforeNextClient(c);
+                continue;
+            }
         }
 
-        int target_id = item_id % server.io_threads_num;
-        listAddNodeTail(io_threads_list[target_id], c);
-        item_id++;
-    }
-
-    /* Give the start condition to the waiting threads, by setting the
-     * start condition atomic var. */
-    io_threads_op = IO_THREADS_OP_WRITE;
-    for (int j = 1; j < server.io_threads_num; j++) {
-        int count = listLength(io_threads_list[j]);
-        setIOPendingCount(j, count);
-    }
-
-    /* Also use the main thread to process a slice of clients. */
-    listRewind(io_threads_list[0], &li);
-    while ((ln = listNext(&li))) {
-        client *c = listNodeValue(ln);
-        writeToClient(c, 0);
-    }
-    listEmpty(io_threads_list[0]);
-
-    /* Wait for all the other threads to end their work. */
-    while (1) {
-        unsigned long pending = 0;
-        for (int j = 1; j < server.io_threads_num; j++) pending += getIOPendingCount(j);
-        if (pending == 0) break;
-    }
-
-    io_threads_op = IO_THREADS_OP_IDLE;
-
-    /* Run the list of clients again to install the write handler where
-     * needed. */
-    listRewind(server.clients_pending_write, &li);
-    while ((ln = listNext(&li))) {
-        client *c = listNodeValue(ln);
-
-        /* Update the client in the mem usage after we're done processing it in the io-threads */
-        updateClientMemUsageAndBucket(c);
-
-        /* Install the write handler if there are pending writes in some
-         * of the clients. */
-        if (clientHasPendingReplies(c)) {
-            installClientWriteHandler(c);
+        if (c->argc > 0) {
+            c->flag.pending_command = 1;
         }
-    }
-    while (listLength(server.clients_pending_write) > 0) {
-        listUnlinkNode(server.clients_pending_write, server.clients_pending_write->head);
-    }
-
-    /* Update processed count on server */
-    server.stat_io_writes_processed += processed;
-
-    return processed;
-}
-
-/* Return 1 if we want to handle the client read later using threaded I/O.
- * This is called by the readable handler of the event loop.
- * As a side effect of calling this function the client is put in the
- * pending read clients and flagged as such. */
-int postponeClientRead(client *c) {
-    if (server.io_threads_active && server.io_threads_do_reads && !ProcessingEventsWhileBlocked &&
-        !(c->flag.primary || c->flag.replica || c->flag.blocked) && io_threads_op == IO_THREADS_OP_IDLE) {
-        listAddNodeHead(server.clients_pending_read, c);
-        c->pending_read_list_node = listFirst(server.clients_pending_read);
-        return 1;
-    } else {
-        return 0;
-    }
-}
-
-/* When threaded I/O is also enabled for the reading + parsing side, the
- * readable handler will just put normal clients into a queue of clients to
- * process (instead of serving them synchronously). This function runs
- * the queue using the I/O threads, and process them in order to accumulate
- * the reads in the buffers, and also parse the first command available
- * rendering it in the client structures.
- * This function achieves thread safety using a fan-out -> fan-in paradigm:
- * Fan out: The main thread fans out work to the io-threads which block until
- * setIOPendingCount() is called with a value larger than 0 by the main thread.
- * Fan in: The main thread waits until getIOPendingCount() returns 0. Then
- * it can safely perform post-processing and return to normal synchronous
- * work. */
-int handleClientsWithPendingReadsUsingThreads(void) {
-    if (!server.io_threads_active || !server.io_threads_do_reads) return 0;
-    int processed = listLength(server.clients_pending_read);
-    if (processed == 0) return 0;
-
-    /* Distribute the clients across N different lists. */
-    listIter li;
-    listNode *ln;
-    listRewind(server.clients_pending_read, &li);
-    int item_id = 0;
-    while ((ln = listNext(&li))) {
-        client *c = listNodeValue(ln);
-        int target_id = item_id % server.io_threads_num;
-        listAddNodeTail(io_threads_list[target_id], c);
-        item_id++;
-    }
-
-    /* Give the start condition to the waiting threads, by setting the
-     * start condition atomic var. */
-    io_threads_op = IO_THREADS_OP_READ;
-    for (int j = 1; j < server.io_threads_num; j++) {
-        int count = listLength(io_threads_list[j]);
-        setIOPendingCount(j, count);
-    }
-
-    /* Also use the main thread to process a slice of clients. */
-    listRewind(io_threads_list[0], &li);
-    while ((ln = listNext(&li))) {
-        client *c = listNodeValue(ln);
-        readQueryFromClient(c->conn);
-    }
-    listEmpty(io_threads_list[0]);
-
-    /* Wait for all the other threads to end their work. */
-    while (1) {
-        unsigned long pending = 0;
-        for (int j = 1; j < server.io_threads_num; j++) pending += getIOPendingCount(j);
-        if (pending == 0) break;
-    }
-
-    io_threads_op = IO_THREADS_OP_IDLE;
-
-    /* Run the list of clients again to process the new buffers. */
-    while (listLength(server.clients_pending_read)) {
-        ln = listFirst(server.clients_pending_read);
-        client *c = listNodeValue(ln);
-        listDelNode(server.clients_pending_read, ln);
-        c->pending_read_list_node = NULL;
-
-        serverAssert(!c->flag.blocked);
 
-        if (beforeNextClient(c) == C_ERR) {
-            /* If the client is no longer valid, we avoid
-             * processing the client later. So we just go
-             * to the next. */
-            continue;
+        size_t list_length_before_command_execute = listLength(server.clients_pending_io_read);
+        if (processPendingCommandAndInputBuffer(c) == C_OK) {
+            beforeNextClient(c);
         }
-
-        /* Once io-threads are idle we can update the client in the mem usage */
-        updateClientMemUsageAndBucket(c);
-
-        if (processPendingCommandAndInputBuffer(c) == C_ERR) {
-            /* If the client is no longer valid, we avoid
-             * processing the client later. So we just go
-             * to the next. */
-            continue;
+        if (list_length_before_command_execute != listLength(server.clients_pending_io_read)) {
+            /* A client was unlink from the list possibly making the next node invalid */
+            next = listFirst(server.clients_pending_io_read);
         }
-
-        /* We may have pending replies if a thread readQueryFromClient() produced
-         * replies and did not put the client in pending write queue (it can't).
-         */
-        if (!c->flag.pending_write && clientHasPendingReplies(c)) putClientInPendingWriteQueue(c);
     }
 
-    /* Update processed count on server */
-    server.stat_io_reads_processed += processed;
-
     return processed;
 }
 
@@ -4640,3 +4608,44 @@ void evictClients(void) {
         }
     }
 }
+
+/* IO threads functions */
+
+void ioThreadReadQueryFromClient(void *data) {
+    client *c = data;
+    serverAssert(c->io_read_state == CLIENT_PENDING_IO);
+
+    /* Read */
+    readToQueryBuf(c);
+
+    /* Check for read errors. */
+    if (c->nread <= 0) {
+        goto done;
+    }
+
+    /* Skip command parsing if the READ_FLAGS_DONT_PARSE flag is set. */
+    if (c->read_flags & READ_FLAGS_DONT_PARSE) {
+        goto done;
+    }
+
+    /* Handle QB limit */
+    if (c->read_flags & READ_FLAGS_QB_LIMIT_REACHED) {
+        goto done;
+    }
+
+    parseCommand(c);
+
+done:
+    trimClientQueryBuffer(c);
+    atomic_thread_fence(memory_order_release);
+    c->io_read_state = CLIENT_COMPLETED_IO;
+}
+
+void ioThreadWriteToClient(void *data) {
+    client *c = data;
+    serverAssert(c->io_write_state == CLIENT_PENDING_IO);
+    c->nwritten = 0;
+    _writeToClient(c);
+    atomic_thread_fence(memory_order_release);
+    c->io_write_state = CLIENT_COMPLETED_IO;
+}
diff --git a/src/rdb.c b/src/rdb.c
index 8b1037ab93..f9ccd676fd 100644
--- a/src/rdb.c
+++ b/src/rdb.c
@@ -2931,7 +2931,7 @@ void rdbLoadProgressCallback(rio *r, const void *buf, size_t len) {
         processModuleLoadingProgressEvent(0);
     }
     if (server.repl_state == REPL_STATE_TRANSFER && rioCheckType(r) == RIO_TYPE_CONN) {
-        atomic_fetch_add_explicit(&server.stat_net_repl_input_bytes, len, memory_order_relaxed);
+        server.stat_net_repl_input_bytes += len;
     }
 }
 
diff --git a/src/replication.c b/src/replication.c
index 6779b4f1b4..21ccb0e92d 100644
--- a/src/replication.c
+++ b/src/replication.c
@@ -765,9 +765,11 @@ int primaryTryPartialResynchronization(client *c, long long psync_offset) {
     }
 
     /* If we reached this point, we are able to perform a partial resync:
-     * 1) Set client state to make it a replica.
-     * 2) Inform the client we can continue with +CONTINUE
-     * 3) Send the backlog data (from the offset to the end) to the replica. */
+     * 1) Make sure no IO operations are being performed before changing the client state.
+     * 2) Set client state to make it a replica.
+     * 3) Inform the client we can continue with +CONTINUE
+     * 4) Send the backlog data (from the offset to the end) to the replica. */
+    waitForClientIO(c);
     c->flag.replica = 1;
     c->repl_state = REPLICA_STATE_ONLINE;
     c->repl_ack_time = server.unixtime;
@@ -1009,6 +1011,8 @@ void syncCommand(client *c) {
     c->repl_state = REPLICA_STATE_WAIT_BGSAVE_START;
     if (server.repl_disable_tcp_nodelay) connDisableTcpNoDelay(c->conn); /* Non critical if it fails. */
     c->repldbfd = -1;
+    /* Wait for any IO pending operation to finish before changing the client state */
+    waitForClientIO(c);
     c->flag.replica = 1;
     listAddNodeTail(server.replicas, c);
 
@@ -1377,7 +1381,7 @@ void sendBulkToReplica(connection *conn) {
             freeClient(replica);
             return;
         }
-        atomic_fetch_add_explicit(&server.stat_net_repl_output_bytes, nwritten, memory_order_relaxed);
+        server.stat_net_repl_output_bytes += nwritten;
         sdsrange(replica->replpreamble, nwritten, -1);
         if (sdslen(replica->replpreamble) == 0) {
             sdsfree(replica->replpreamble);
@@ -1405,7 +1409,7 @@ void sendBulkToReplica(connection *conn) {
         return;
     }
     replica->repldboff += nwritten;
-    atomic_fetch_add_explicit(&server.stat_net_repl_output_bytes, nwritten, memory_order_relaxed);
+    server.stat_net_repl_output_bytes += nwritten;
     if (replica->repldboff == replica->repldbsize) {
         closeRepldbfd(replica);
         connSetWriteHandler(replica->conn, NULL);
@@ -1447,7 +1451,7 @@ void rdbPipeWriteHandler(struct connection *conn) {
         return;
     } else {
         replica->repldboff += nwritten;
-        atomic_fetch_add_explicit(&server.stat_net_repl_output_bytes, nwritten, memory_order_relaxed);
+        server.stat_net_repl_output_bytes += nwritten;
         if (replica->repldboff < server.rdb_pipe_bufflen) {
             replica->repl_last_partial_write = server.unixtime;
             return; /* more data to write.. */
@@ -1520,7 +1524,7 @@ void rdbPipeReadHandler(struct aeEventLoop *eventLoop, int fd, void *clientData,
                 /* Note: when use diskless replication, 'repldboff' is the offset
                  * of 'rdb_pipe_buff' sent rather than the offset of entire RDB. */
                 replica->repldboff = nwritten;
-                atomic_fetch_add_explicit(&server.stat_net_repl_output_bytes, nwritten, memory_order_relaxed);
+                server.stat_net_repl_output_bytes += nwritten;
             }
             /* If we were unable to write all the data to one of the replicas,
              * setup write handler (and disable pipe read handler, below) */
@@ -1831,7 +1835,7 @@ void readSyncBulkPayload(connection *conn) {
         } else {
             /* nread here is returned by connSyncReadLine(), which calls syncReadLine() and
              * convert "\r\n" to '\0' so 1 byte is lost. */
-            atomic_fetch_add_explicit(&server.stat_net_repl_input_bytes, nread + 1, memory_order_relaxed);
+            server.stat_net_repl_input_bytes += nread + 1;
         }
 
         if (buf[0] == '-') {
@@ -1900,7 +1904,7 @@ void readSyncBulkPayload(connection *conn) {
             cancelReplicationHandshake(1);
             return;
         }
-        atomic_fetch_add_explicit(&server.stat_net_repl_input_bytes, nread, memory_order_relaxed);
+        server.stat_net_repl_input_bytes += nread;
 
         /* When a mark is used, we want to detect EOF asap in order to avoid
          * writing the EOF mark into the file... */
diff --git a/src/server.c b/src/server.c
index 57456c6597..465aa29391 100644
--- a/src/server.c
+++ b/src/server.c
@@ -39,6 +39,7 @@
 #include "syscheck.h"
 #include "threads_mngr.h"
 #include "fmtargs.h"
+#include "io_threads.h"
 
 #include <time.h>
 #include <signal.h>
@@ -754,6 +755,8 @@ int clientsCronResizeQueryBuffer(client *c) {
  * The buffer peak will be reset back to the buffer position every server.reply_buffer_peak_reset_time milliseconds
  * The function always returns 0 as it never terminates the client. */
 int clientsCronResizeOutputBuffer(client *c, mstime_t now_ms) {
+    if (c->io_write_state != CLIENT_IDLE) return 0;
+
     size_t new_buffer_size = 0;
     char *oldbuf = NULL;
     const size_t buffer_target_shrink_size = c->buf_usable_size / 2;
@@ -904,7 +907,6 @@ void removeClientFromMemUsageBucket(client *c, int allow_eviction) {
  * returns 1 if client eviction for this client is allowed, 0 otherwise.
  */
 int updateClientMemUsageAndBucket(client *c) {
-    serverAssert(io_threads_op == IO_THREADS_OP_IDLE && c->conn);
     int allow_eviction = clientEvictionAllowed(c);
     removeClientFromMemUsageBucket(c, allow_eviction);
 
@@ -997,6 +999,7 @@ void clientsCron(void) {
         head = listFirst(server.clients);
         c = listNodeValue(head);
         listRotateHeadToTail(server.clients);
+        if (c->io_read_state != CLIENT_IDLE || c->io_write_state != CLIENT_IDLE) continue;
         /* The following functions do different service checks on the client.
          * The protocol is that they return non-zero if the client was
          * terminated. */
@@ -1075,8 +1078,7 @@ void databasesCron(void) {
 static inline void updateCachedTimeWithUs(int update_daylight_info, const long long ustime) {
     server.ustime = ustime;
     server.mstime = server.ustime / 1000;
-    time_t unixtime = server.mstime / 1000;
-    atomic_store_explicit(&server.unixtime, unixtime, memory_order_relaxed);
+    server.unixtime = server.mstime / 1000;
 
     /* To get information about daylight saving time, we need to call
      * localtime_r and cache the result. However calling localtime_r in this
@@ -1257,23 +1259,18 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
     monotime cron_start = getMonotonicUs();
 
     run_with_period(100) {
-        long long stat_net_input_bytes, stat_net_output_bytes;
-        long long stat_net_repl_input_bytes, stat_net_repl_output_bytes;
-
-        stat_net_input_bytes = atomic_load_explicit(&server.stat_net_input_bytes, memory_order_relaxed);
-        stat_net_output_bytes = atomic_load_explicit(&server.stat_net_output_bytes, memory_order_relaxed);
-        stat_net_repl_input_bytes = atomic_load_explicit(&server.stat_net_repl_input_bytes, memory_order_relaxed);
-        stat_net_repl_output_bytes = atomic_load_explicit(&server.stat_net_repl_output_bytes, memory_order_relaxed);
-
         monotime current_time = getMonotonicUs();
         long long factor = 1000000; // us
         trackInstantaneousMetric(STATS_METRIC_COMMAND, server.stat_numcommands, current_time, factor);
-        trackInstantaneousMetric(STATS_METRIC_NET_INPUT, stat_net_input_bytes + stat_net_repl_input_bytes, current_time,
-                                 factor);
-        trackInstantaneousMetric(STATS_METRIC_NET_OUTPUT, stat_net_output_bytes + stat_net_repl_output_bytes,
+        trackInstantaneousMetric(STATS_METRIC_NET_INPUT, server.stat_net_input_bytes + server.stat_net_repl_input_bytes,
                                  current_time, factor);
-        trackInstantaneousMetric(STATS_METRIC_NET_INPUT_REPLICATION, stat_net_repl_input_bytes, current_time, factor);
-        trackInstantaneousMetric(STATS_METRIC_NET_OUTPUT_REPLICATION, stat_net_repl_output_bytes, current_time, factor);
+        trackInstantaneousMetric(STATS_METRIC_NET_OUTPUT,
+                                 server.stat_net_output_bytes + server.stat_net_repl_output_bytes, current_time,
+                                 factor);
+        trackInstantaneousMetric(STATS_METRIC_NET_INPUT_REPLICATION, server.stat_net_repl_input_bytes, current_time,
+                                 factor);
+        trackInstantaneousMetric(STATS_METRIC_NET_OUTPUT_REPLICATION, server.stat_net_repl_output_bytes, current_time,
+                                 factor);
         trackInstantaneousMetric(STATS_METRIC_EL_CYCLE, server.duration_stats[EL_DURATION_TYPE_EL].cnt, current_time,
                                  factor);
         trackInstantaneousMetric(STATS_METRIC_EL_DURATION, server.duration_stats[EL_DURATION_TYPE_EL].sum,
@@ -1433,9 +1430,6 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
         migrateCloseTimedoutSockets();
     }
 
-    /* Stop the I/O threads if we don't have enough pending work. */
-    stopThreadedIOIfNeeded();
-
     /* Resize tracking keys table if needed. This is also done at every
      * command execution, but we want to be sure that if the last command
      * executed changes the value via CONFIG SET, the server will perform
@@ -1580,23 +1574,31 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
      * events to handle. */
     if (ProcessingEventsWhileBlocked) {
         uint64_t processed = 0;
-        processed += handleClientsWithPendingReadsUsingThreads();
+        processed += processIOThreadsReadDone();
         processed += connTypeProcessPendingData();
         if (server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE) flushAppendOnlyFile(0);
         processed += handleClientsWithPendingWrites();
+        int last_procssed = 0;
+        do {
+            /* Try to process all the pending IO events. */
+            last_procssed = processIOThreadsReadDone() + processIOThreadsWriteDone();
+            processed += last_procssed;
+        } while (last_procssed != 0);
         processed += freeClientsInAsyncFreeQueue();
         server.events_processed_while_blocked += processed;
         return;
     }
 
     /* We should handle pending reads clients ASAP after event loop. */
-    handleClientsWithPendingReadsUsingThreads();
+    processIOThreadsReadDone();
 
     /* Handle pending data(typical TLS). (must be done before flushAppendOnlyFile) */
     connTypeProcessPendingData();
 
-    /* If any connection type(typical TLS) still has pending unread data don't sleep at all. */
-    int dont_sleep = connTypeHasPendingData();
+    /* If any connection type(typical TLS) still has pending unread data or if there are clients
+     * with pending IO reads/writes, don't sleep at all. */
+    int dont_sleep = connTypeHasPendingData() || listLength(server.clients_pending_io_read) > 0 ||
+                     listLength(server.clients_pending_io_write) > 0;
 
     /* Call the Cluster before sleep function. Note that this function
      * may change the state of Cluster (from ok to fail or vice versa),
@@ -1659,7 +1661,7 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
     long long prev_fsynced_reploff = server.fsynced_reploff;
 
     /* Write the AOF buffer on disk,
-     * must be done before handleClientsWithPendingWritesUsingThreads,
+     * must be done before handleClientsWithPendingWrites,
      * in case of appendfsync=always. */
     if (server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE) flushAppendOnlyFile(0);
 
@@ -1679,7 +1681,14 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
     }
 
     /* Handle writes with pending output buffers. */
-    handleClientsWithPendingWritesUsingThreads();
+    handleClientsWithPendingWrites();
+
+    /* Try to process more IO reads that are ready to be processed. */
+    if (server.aof_fsync != AOF_FSYNC_ALWAYS) {
+        processIOThreadsReadDone();
+    }
+
+    processIOThreadsWriteDone();
 
     /* Record cron time in beforeSleep. This does not include the time consumed by AOF writing and IO writing above. */
     monotime cron_start_time_after_write = getMonotonicUs();
@@ -1729,7 +1738,7 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
 /* This function is called immediately after the event loop multiplexing
  * API returned, and the control is going to soon return to the server by invoking
  * the different events callbacks. */
-void afterSleep(struct aeEventLoop *eventLoop) {
+void afterSleep(struct aeEventLoop *eventLoop, int numevents) {
     UNUSED(eventLoop);
     /********************* WARNING ********************
      * Do NOT add anything above moduleAcquireGIL !!! *
@@ -1761,6 +1770,8 @@ void afterSleep(struct aeEventLoop *eventLoop) {
     if (!ProcessingEventsWhileBlocked) {
         server.cmd_time_snapshot = server.mstime;
     }
+
+    adjustIOThreadsByEventLoad(numevents, 0);
 }
 
 /* =========================== Server initialization ======================== */
@@ -2478,10 +2489,10 @@ void resetServerStats(void) {
     server.stat_sync_partial_ok = 0;
     server.stat_sync_partial_err = 0;
     server.stat_io_reads_processed = 0;
-    atomic_store_explicit(&server.stat_total_reads_processed, 0, memory_order_relaxed);
+    server.stat_total_reads_processed = 0;
     server.stat_io_writes_processed = 0;
-    atomic_store_explicit(&server.stat_total_writes_processed, 0, memory_order_relaxed);
-    atomic_store_explicit(&server.stat_client_qbuf_limit_disconnections, 0, memory_order_relaxed);
+    server.stat_total_writes_processed = 0;
+    server.stat_client_qbuf_limit_disconnections = 0;
     server.stat_client_outbuf_limit_disconnections = 0;
     for (j = 0; j < STATS_METRIC_COUNT; j++) {
         server.inst_metric[j].idx = 0;
@@ -2492,10 +2503,10 @@ void resetServerStats(void) {
     server.stat_aof_rewrites = 0;
     server.stat_rdb_saves = 0;
     server.stat_aofrw_consecutive_failures = 0;
-    atomic_store_explicit(&server.stat_net_input_bytes, 0, memory_order_relaxed);
-    atomic_store_explicit(&server.stat_net_output_bytes, 0, memory_order_relaxed);
-    atomic_store_explicit(&server.stat_net_repl_input_bytes, 0, memory_order_relaxed);
-    atomic_store_explicit(&server.stat_net_repl_output_bytes, 0, memory_order_relaxed);
+    server.stat_net_input_bytes = 0;
+    server.stat_net_output_bytes = 0;
+    server.stat_net_repl_input_bytes = 0;
+    server.stat_net_repl_output_bytes = 0;
     server.stat_unexpected_error_replies = 0;
     server.stat_total_error_replies = 0;
     server.stat_dump_payload_sanitizations = 0;
@@ -2545,7 +2556,8 @@ void initServer(void) {
     server.replicas = listCreate();
     server.monitors = listCreate();
     server.clients_pending_write = listCreate();
-    server.clients_pending_read = listCreate();
+    server.clients_pending_io_write = listCreate();
+    server.clients_pending_io_read = listCreate();
     server.clients_timeout_table = raxNew();
     server.replication_allowed = 1;
     server.replicas_eldb = -1; /* Force to emit the first SELECT command. */
@@ -2641,6 +2653,7 @@ void initServer(void) {
     server.rdb_last_load_keys_expired = 0;
     server.rdb_last_load_keys_loaded = 0;
     server.dirty = 0;
+    server.crashed = 0;
     resetServerStats();
     /* A few stats we don't want to reset: server startup time, and peak mem. */
     server.stat_starttime = time(NULL);
@@ -2796,7 +2809,7 @@ void initListeners(void) {
  * see: https://sourceware.org/bugzilla/show_bug.cgi?id=19329 */
 void InitServerLast(void) {
     bioInit();
-    initThreadedIO();
+    initIOThreads();
     set_jemalloc_bg_thread(server.jemalloc_bg_thread);
     server.initial_memory_usage = zmalloc_used_memory();
 }
@@ -5395,7 +5408,7 @@ sds genValkeyInfoString(dict *section_dict, int all_sections, int everything) {
             "lru_clock:%u\r\n", server.lruclock,
             "executable:%s\r\n", server.executable ? server.executable : "",
             "config_file:%s\r\n", server.configfile ? server.configfile : "",
-            "io_threads_active:%i\r\n", server.io_threads_active,
+            "io_threads_active:%i\r\n", server.active_io_threads_num > 1,
             "availability_zone:%s\r\n", server.availability_zone));
         /* clang-format on */
 
@@ -5630,23 +5643,10 @@ sds genValkeyInfoString(dict *section_dict, int all_sections, int everything) {
 
     /* Stats */
     if (all_sections || (dictFind(section_dict, "stats") != NULL)) {
-        long long stat_total_reads_processed, stat_total_writes_processed;
-        long long stat_net_input_bytes, stat_net_output_bytes;
-        long long stat_net_repl_input_bytes, stat_net_repl_output_bytes;
         long long current_eviction_exceeded_time =
             server.stat_last_eviction_exceeded_time ? (long long)elapsedUs(server.stat_last_eviction_exceeded_time) : 0;
         long long current_active_defrag_time =
             server.stat_last_active_defrag_time ? (long long)elapsedUs(server.stat_last_active_defrag_time) : 0;
-        long long stat_client_qbuf_limit_disconnections;
-
-        stat_total_reads_processed = atomic_load_explicit(&server.stat_total_reads_processed, memory_order_relaxed);
-        stat_total_writes_processed = atomic_load_explicit(&server.stat_total_writes_processed, memory_order_relaxed);
-        stat_net_input_bytes = atomic_load_explicit(&server.stat_net_input_bytes, memory_order_relaxed);
-        stat_net_output_bytes = atomic_load_explicit(&server.stat_net_output_bytes, memory_order_relaxed);
-        stat_net_repl_input_bytes = atomic_load_explicit(&server.stat_net_repl_input_bytes, memory_order_relaxed);
-        stat_net_repl_output_bytes = atomic_load_explicit(&server.stat_net_repl_output_bytes, memory_order_relaxed);
-        stat_client_qbuf_limit_disconnections =
-            atomic_load_explicit(&server.stat_client_qbuf_limit_disconnections, memory_order_relaxed);
 
         if (sections++) info = sdscat(info, "\r\n");
         /* clang-format off */
@@ -5654,10 +5654,10 @@ sds genValkeyInfoString(dict *section_dict, int all_sections, int everything) {
             "total_connections_received:%lld\r\n", server.stat_numconnections,
             "total_commands_processed:%lld\r\n", server.stat_numcommands,
             "instantaneous_ops_per_sec:%lld\r\n", getInstantaneousMetric(STATS_METRIC_COMMAND),
-            "total_net_input_bytes:%lld\r\n", stat_net_input_bytes + stat_net_repl_input_bytes,
-            "total_net_output_bytes:%lld\r\n", stat_net_output_bytes + stat_net_repl_output_bytes,
-            "total_net_repl_input_bytes:%lld\r\n", stat_net_repl_input_bytes,
-            "total_net_repl_output_bytes:%lld\r\n", stat_net_repl_output_bytes,
+            "total_net_input_bytes:%lld\r\n", server.stat_net_input_bytes + server.stat_net_repl_input_bytes,
+            "total_net_output_bytes:%lld\r\n", server.stat_net_output_bytes + server.stat_net_repl_output_bytes,
+            "total_net_repl_input_bytes:%lld\r\n", server.stat_net_repl_input_bytes,
+            "total_net_repl_output_bytes:%lld\r\n", server.stat_net_repl_output_bytes,
             "instantaneous_input_kbps:%.2f\r\n", (float)getInstantaneousMetric(STATS_METRIC_NET_INPUT)/1024,
             "instantaneous_output_kbps:%.2f\r\n", (float)getInstantaneousMetric(STATS_METRIC_NET_OUTPUT)/1024,
             "instantaneous_input_repl_kbps:%.2f\r\n", (float)getInstantaneousMetric(STATS_METRIC_NET_INPUT_REPLICATION)/1024,
@@ -5696,11 +5696,11 @@ sds genValkeyInfoString(dict *section_dict, int all_sections, int everything) {
             "unexpected_error_replies:%lld\r\n", server.stat_unexpected_error_replies,
             "total_error_replies:%lld\r\n", server.stat_total_error_replies,
             "dump_payload_sanitizations:%lld\r\n", server.stat_dump_payload_sanitizations,
-            "total_reads_processed:%lld\r\n", stat_total_reads_processed,
-            "total_writes_processed:%lld\r\n", stat_total_writes_processed,
+            "total_reads_processed:%lld\r\n", server.stat_total_reads_processed,
+            "total_writes_processed:%lld\r\n", server.stat_total_writes_processed,
             "io_threaded_reads_processed:%lld\r\n", server.stat_io_reads_processed,
             "io_threaded_writes_processed:%lld\r\n", server.stat_io_writes_processed,
-            "client_query_buffer_limit_disconnections:%lld\r\n", stat_client_qbuf_limit_disconnections,
+            "client_query_buffer_limit_disconnections:%lld\r\n", server.stat_client_qbuf_limit_disconnections,
             "client_output_buffer_limit_disconnections:%lld\r\n", server.stat_client_outbuf_limit_disconnections,
             "reply_buffer_shrinks:%lld\r\n", server.stat_reply_buffer_shrinks,
             "reply_buffer_expands:%lld\r\n", server.stat_reply_buffer_expands,
diff --git a/src/server.h b/src/server.h
index 73f68a73d4..36a4b641e7 100644
--- a/src/server.h
+++ b/src/server.h
@@ -1117,6 +1117,12 @@ typedef struct {
 } clientReqResInfo;
 #endif
 
+typedef enum {
+    CLIENT_IDLE = 0,        /* Initial state: client is idle. */
+    CLIENT_PENDING_IO = 1,  /* Main-thread sets this state when client is sent to IO-thread for read/write. */
+    CLIENT_COMPLETED_IO = 2 /* IO-thread sets this state after completing IO operation. */
+} clientIOState;
+
 typedef struct ClientFlags {
     uint64_t primary : 1;             /* This client is a primary */
     uint64_t replica : 1;             /* This client is a replica */
@@ -1141,6 +1147,7 @@ typedef struct ClientFlags {
     uint64_t prevent_repl_prop : 1;   /* Don't propagate to replicas. */
     uint64_t prevent_prop : 1;        /* Don't propagate to AOF or replicas. */
     uint64_t pending_write : 1;       /* Client has output to send but a write handler is yet not installed. */
+    uint64_t pending_read : 1;        /* Client has output to send but a write handler is yet not installed. */
     uint64_t reply_off : 1;           /* Don't send replies to client. */
     uint64_t reply_skip_next : 1;     /* Set CLIENT_REPLY_SKIP for next cmd */
     uint64_t reply_skip : 1;          /* Don't send just this reply. */
@@ -1173,7 +1180,7 @@ typedef struct ClientFlags {
     uint64_t reprocessing_command : 1;     /* The client is re-processing the command. */
     uint64_t replication_done : 1;         /* Indicate that replication has been done on the client */
     uint64_t authenticated : 1;            /* Indicate a client has successfully authenticated */
-    uint64_t reserved : 10;                /* Reserved for future use */
+    uint64_t reserved : 9;                 /* Reserved for future use */
 } ClientFlags;
 
 typedef struct client {
@@ -1198,6 +1205,13 @@ typedef struct client {
     int original_argc;                   /* Num of arguments of original command if arguments were rewritten. */
     robj **original_argv;                /* Arguments of original command if arguments were rewritten. */
     size_t argv_len_sum;                 /* Sum of lengths of objects in argv list. */
+    volatile uint8_t io_read_state;      /* Indicate the IO read state of the client */
+    volatile uint8_t io_write_state;     /* Indicate the IO write state of the client */
+    uint8_t cur_tid;                     /* ID of IO thread currently performing IO for this client */
+    int nread;                           /* Number of bytes of the last read. */
+    int nwritten;                        /* Number of bytes of the last write. */
+    int read_flags;                      /* Client Read flags - used to communicate the client read state. */
+    uint16_t write_flags;                /* Client Write flags - used to communicate the client write state. */
     struct serverCommand *cmd, *lastcmd; /* Last command executed. */
     struct serverCommand *realcmd;       /* The original command that was executed by the client,
                                            Used to update error stats in case the c->cmd was modified
@@ -1209,6 +1223,7 @@ typedef struct client {
     int multibulklen;                    /* Number of multi bulk arguments left to read. */
     long bulklen;                        /* Length of bulk argument in multi bulk request. */
     list *reply;                         /* List of reply objects to send to the client. */
+    listNode *io_last_reply_block;       /* Last client reply block when sent to IO thread */
     unsigned long long reply_bytes;      /* Tot bytes of objects in reply list. */
     list *deferred_reply_errors;         /* Used for module thread safe contexts. */
     size_t sentlen;                      /* Amount of bytes already sent in the current
@@ -1253,7 +1268,6 @@ typedef struct client {
     sds sockname;                        /* Cached connection target address. */
     listNode *client_list_node;          /* list node in client list */
     listNode *postponed_list_node;       /* list node within the postponed list */
-    listNode *pending_read_list_node;    /* list node in clients pending read list */
     void *module_blocked_client;         /* Pointer to the ValkeyModuleBlockedClient associated with this
                                           * client. This is set in case of module authentication before the
                                           * unblocked client is reprocessed to handle reply callbacks. */
@@ -1293,12 +1307,14 @@ typedef struct client {
     size_t ref_block_pos;        /* Access position of referenced buffer block,
                                   * i.e. the next offset to send. */
 
-    /* list node in clients_pending_write list */
+    /* list node in clients_pending_write or in clients_pending_io_write list */
     listNode clients_pending_write_node;
+    listNode pending_read_list_node; /* list node in clients_pending_io_read list */
     /* Response buffer */
     size_t buf_peak;                   /* Peak used size of buffer in last 5 sec interval. */
     mstime_t buf_peak_last_reset_time; /* keeps the last time the buffer peak value was reset */
     int bufpos;
+    size_t io_last_bufpos;  /* The client's bufpos at the time it was sent to the IO thread */
     size_t buf_usable_size; /* Usable size of buffer. */
     char *buf;
 #ifdef LOG_REQ_RES
@@ -1629,7 +1645,8 @@ struct valkeyServer {
     list *clients;                         /* List of active clients */
     list *clients_to_close;                /* Clients to close asynchronously */
     list *clients_pending_write;           /* There is to write or install handler. */
-    list *clients_pending_read;            /* Client has pending read socket buffers. */
+    list *clients_pending_io_read;         /* List of clients with pending read to be process by I/O threads. */
+    list *clients_pending_io_write;        /* List of clients with pending write to be process by I/O threads. */
     list *replicas, *monitors;             /* List of replicas and MONITORs */
     client *current_client;                /* The client that triggered the command execution (External or AOF). */
     client *executing_client;              /* The client executing the current command (possibly script or module). */
@@ -1657,7 +1674,8 @@ struct valkeyServer {
     int protected_mode;                       /* Don't accept external connections. */
     int io_threads_num;                       /* Number of IO threads to use. */
     int io_threads_do_reads;                  /* Read and parse from IO threads? */
-    int io_threads_active;                    /* Is IO threads currently active? */
+    int active_io_threads_num;                /* Current number of active IO threads, includes main thread. */
+    int events_per_io_thread;                 /* Number of events on the event loop to trigger IO threads activation. */
     long long events_processed_while_blocked; /* processEventsWhileBlocked() */
     int enable_protected_configs; /* Enable the modification of protected configs, see PROTECTED_ACTION_ALLOWED_* */
     int enable_debug_cmd;         /* Enable DEBUG commands, see PROTECTED_ACTION_ALLOWED_* */
@@ -1710,15 +1728,14 @@ struct valkeyServer {
     long long slowlog_log_slower_than;             /* SLOWLOG time limit (to get logged) */
     unsigned long slowlog_max_len;                 /* SLOWLOG max number of items logged */
     struct malloc_stats cron_malloc_stats;         /* sampled in serverCron(). */
-    _Atomic long long stat_net_input_bytes;        /* Bytes read from network. */
-    _Atomic long long stat_net_output_bytes;       /* Bytes written to network. */
-    _Atomic long long
-        stat_net_repl_input_bytes; /* Bytes read during replication, added to stat_net_input_bytes in 'info'. */
-    _Atomic long long
-        stat_net_repl_output_bytes;    /* Bytes written during replication, added to stat_net_output_bytes in 'info'. */
-    size_t stat_current_cow_peak;      /* Peak size of copy on write bytes. */
-    size_t stat_current_cow_bytes;     /* Copy on write bytes while child is active. */
-    monotime stat_current_cow_updated; /* Last update time of stat_current_cow_bytes */
+    long long stat_net_input_bytes;                /* Bytes read from network. */
+    long long stat_net_output_bytes;               /* Bytes written to network. */
+    long long stat_net_repl_input_bytes; /* Bytes read during replication, added to stat_net_input_bytes in 'info'. */
+    /* Bytes written during replication, added to stat_net_output_bytes in 'info'. */
+    long long stat_net_repl_output_bytes;
+    size_t stat_current_cow_peak;                       /* Peak size of copy on write bytes. */
+    size_t stat_current_cow_bytes;                      /* Copy on write bytes while child is active. */
+    monotime stat_current_cow_updated;                  /* Last update time of stat_current_cow_bytes */
     size_t stat_current_save_keys_processed;            /* Processed keys while child is active. */
     size_t stat_current_save_keys_total;                /* Number of keys when child started. */
     size_t stat_rdb_cow_bytes;                          /* Copy on write bytes during RDB saving. */
@@ -1730,12 +1747,12 @@ struct valkeyServer {
     long long
         stat_unexpected_error_replies;  /* Number of unexpected (aof-loading, replica to primary, etc.) error replies */
     long long stat_total_error_replies; /* Total number of issued error replies ( command + rejected errors ) */
-    long long stat_dump_payload_sanitizations;               /* Number deep dump payloads integrity validations. */
-    long long stat_io_reads_processed;                       /* Number of read events processed by IO / Main threads */
-    long long stat_io_writes_processed;                      /* Number of write events processed by IO / Main threads */
-    _Atomic long long stat_total_reads_processed;            /* Total number of read events processed */
-    _Atomic long long stat_total_writes_processed;           /* Total number of write events processed */
-    _Atomic long long stat_client_qbuf_limit_disconnections; /* Total number of clients reached query buf length limit */
+    long long stat_dump_payload_sanitizations;         /* Number deep dump payloads integrity validations. */
+    long long stat_io_reads_processed;                 /* Number of read events processed by IO threads */
+    long long stat_io_writes_processed;                /* Number of write events processed by IO threads */
+    long long stat_total_reads_processed;              /* Total number of read events processed */
+    long long stat_total_writes_processed;             /* Total number of write events processed */
+    long long stat_client_qbuf_limit_disconnections;   /* Total number of clients reached query buf length limit */
     long long stat_client_outbuf_limit_disconnections; /* Total number of clients reached output buf length limit */
     /* The following two are used to track instantaneous metrics, like
      * number of operations per second, network traffic. */
@@ -1881,6 +1898,8 @@ struct valkeyServer {
     int syslog_facility;   /* Syslog facility */
     int crashlog_enabled;  /* Enable signal handler for crashlog.
                             * disable for clean core dumps. */
+    int crashed;           /* True if the server has crashed, used in catClientInfoString
+                            * to indicate that no wait for IO threads is needed. */
     int memcheck_enabled;  /* Enable memory check on crash. */
     int use_exit_on_panic; /* Use exit() on panic and assert rather than
                             * abort(). useful for Valgrind. */
@@ -2002,7 +2021,7 @@ struct valkeyServer {
     int list_max_listpack_size;
     int list_compress_depth;
     /* time cache */
-    _Atomic time_t unixtime;     /* Unix time sampled every cron cycle. */
+    time_t unixtime;             /* Unix time sampled every cron cycle. */
     time_t timezone;             /* Cached timezone. As set by tzset(). */
     int daylight_active;         /* Currently in daylight saving time. */
     mstime_t mstime;             /* 'unixtime' in milliseconds. */
@@ -2491,11 +2510,6 @@ typedef struct {
 #define OBJ_HASH_KEY 1
 #define OBJ_HASH_VALUE 2
 
-#define IO_THREADS_OP_IDLE 0
-#define IO_THREADS_OP_READ 1
-#define IO_THREADS_OP_WRITE 2
-extern int io_threads_op;
-
 /*-----------------------------------------------------------------------------
  * Extern declarations
  *----------------------------------------------------------------------------*/
@@ -2601,11 +2615,35 @@ void dictVanillaFree(dict *d, void *val);
     (1ULL << 0) /* Indicating that we should not update                                                                \
                    error stats after sending error reply */
 /* networking.c -- Networking and Client related operations */
+
+/* Read flags for various read errors and states */
+#define READ_FLAGS_QB_LIMIT_REACHED (1 << 0)
+#define READ_FLAGS_ERROR_BIG_INLINE_REQUEST (1 << 1)
+#define READ_FLAGS_ERROR_BIG_MULTIBULK (1 << 2)
+#define READ_FLAGS_ERROR_INVALID_MULTIBULK_LEN (1 << 3)
+#define READ_FLAGS_ERROR_UNAUTHENTICATED_MULTIBULK_LEN (1 << 4)
+#define READ_FLAGS_ERROR_UNAUTHENTICATED_BULK_LEN (1 << 5)
+#define READ_FLAGS_ERROR_BIG_BULK_COUNT (1 << 6)
+#define READ_FLAGS_ERROR_MBULK_UNEXPECTED_CHARACTER (1 << 7)
+#define READ_FLAGS_ERROR_MBULK_INVALID_BULK_LEN (1 << 8)
+#define READ_FLAGS_ERROR_UNEXPECTED_INLINE_FROM_PRIMARY (1 << 9)
+#define READ_FLAGS_ERROR_UNBALANCED_QUOTES (1 << 10)
+#define READ_FLAGS_INLINE_ZERO_QUERY_LEN (1 << 11)
+#define READ_FLAGS_PARSING_NEGATIVE_MBULK_LEN (1 << 12)
+#define READ_FLAGS_PARSING_COMPLETED (1 << 13)
+#define READ_FLAGS_PRIMARY (1 << 14)
+#define READ_FLAGS_DONT_PARSE (1 << 15)
+#define READ_FLAGS_AUTH_REQUIRED (1 << 16)
+
+/* Write flags for various write errors and states */
+#define WRITE_FLAGS_WRITE_ERROR (1 << 0)
+
+
 client *createClient(connection *conn);
 void freeClient(client *c);
 void freeClientAsync(client *c);
 void logInvalidUseAndFreeClientAsync(client *c, const char *fmt, ...);
-int beforeNextClient(client *c);
+void beforeNextClient(client *c);
 void clearClientConnectionState(client *c);
 void resetClient(client *c);
 void freeClientOriginalArgv(client *c);
@@ -2698,24 +2736,28 @@ void whileBlockedCron(void);
 void blockingOperationStarts(void);
 void blockingOperationEnds(void);
 int handleClientsWithPendingWrites(void);
-int handleClientsWithPendingWritesUsingThreads(void);
-int handleClientsWithPendingReadsUsingThreads(void);
-int stopThreadedIOIfNeeded(void);
+void adjustThreadedIOIfNeeded(void);
 int clientHasPendingReplies(client *c);
 int updateClientMemUsageAndBucket(client *c);
 void removeClientFromMemUsageBucket(client *c, int allow_eviction);
 void unlinkClient(client *c);
-int writeToClient(client *c, int handler_installed);
+int writeToClient(client *c);
 void linkClient(client *c);
 void protectClient(client *c);
 void unprotectClient(client *c);
-void initThreadedIO(void);
 void initSharedQueryBuf(void);
+void freeSharedQueryBuf(void);
 client *lookupClientByID(uint64_t id);
 int authRequired(client *c);
 void putClientInPendingWriteQueue(client *c);
 client *createCachedResponseClient(int resp);
 void deleteCachedResponseClient(client *recording_client);
+void waitForClientIO(client *c);
+void ioThreadReadQueryFromClient(void *data);
+void ioThreadWriteToClient(void *data);
+int canParseCommand(client *c);
+int processIOThreadsReadDone(void);
+int processIOThreadsWriteDone(void);
 
 /* logreqres.c - logging of requests and responses */
 void reqresReset(client *c, int free_buf);
@@ -3834,7 +3876,6 @@ void xorDigest(unsigned char *digest, const void *ptr, size_t len);
 sds catSubCommandFullname(const char *parent_name, const char *sub_name);
 void commandAddSubcommand(struct serverCommand *parent, struct serverCommand *subcommand, const char *declared_name);
 void debugDelay(int usec);
-void killIOThreads(void);
 void killThreads(void);
 void makeThreadKillable(void);
 void swapMainDbWithTempDb(serverDb *tempDb);
diff --git a/src/socket.c b/src/socket.c
index 5aa3606990..b2f8f1aaec 100644
--- a/src/socket.c
+++ b/src/socket.c
@@ -423,6 +423,8 @@ static ConnectionType CT_Socket = {
     /* pending data */
     .has_pending_data = NULL,
     .process_pending_data = NULL,
+    .postpone_update_state = NULL,
+    .update_state = NULL,
 };
 
 int connBlock(connection *conn) {
diff --git a/src/tls.c b/src/tls.c
index 2d4d6cd0ae..1913d876fa 100644
--- a/src/tls.c
+++ b/src/tls.c
@@ -442,6 +442,7 @@ typedef enum { WANT_READ = 1, WANT_WRITE } WantIOType;
 #define TLS_CONN_FLAG_READ_WANT_WRITE (1 << 0)
 #define TLS_CONN_FLAG_WRITE_WANT_READ (1 << 1)
 #define TLS_CONN_FLAG_FD_SET (1 << 2)
+#define TLS_CONN_FLAG_POSTPONE_UPDATE_STATE (1 << 3)
 
 typedef struct tls_connection {
     connection c;
@@ -596,7 +597,34 @@ static void registerSSLEvent(tls_connection *conn, WantIOType want) {
     }
 }
 
+static void postPoneUpdateSSLState(connection *conn_, int postpone) {
+    tls_connection *conn = (tls_connection *)conn_;
+    if (postpone) {
+        conn->flags |= TLS_CONN_FLAG_POSTPONE_UPDATE_STATE;
+    } else {
+        conn->flags &= ~TLS_CONN_FLAG_POSTPONE_UPDATE_STATE;
+    }
+}
+
+static void updatePendingData(tls_connection *conn) {
+    if (conn->flags & TLS_CONN_FLAG_POSTPONE_UPDATE_STATE) return;
+
+    /* If SSL has pending data, already read from the socket, we're at risk of not calling the read handler again, make
+     * sure to add it to a list of pending connection that should be handled anyway. */
+    if (SSL_pending(conn->ssl) > 0) {
+        if (!conn->pending_list_node) {
+            listAddNodeTail(pending_list, conn);
+            conn->pending_list_node = listLast(pending_list);
+        }
+    } else if (conn->pending_list_node) {
+        listDelNode(pending_list, conn->pending_list_node);
+        conn->pending_list_node = NULL;
+    }
+}
+
 static void updateSSLEvent(tls_connection *conn) {
+    if (conn->flags & TLS_CONN_FLAG_POSTPONE_UPDATE_STATE) return;
+
     int mask = aeGetFileEvents(server.el, conn->c.fd);
     int need_read = conn->c.read_handler || (conn->flags & TLS_CONN_FLAG_WRITE_WANT_READ);
     int need_write = conn->c.write_handler || (conn->flags & TLS_CONN_FLAG_READ_WANT_WRITE);
@@ -610,6 +638,12 @@ static void updateSSLEvent(tls_connection *conn) {
     if (!need_write && (mask & AE_WRITABLE)) aeDeleteFileEvent(server.el, conn->c.fd, AE_WRITABLE);
 }
 
+static void updateSSLState(connection *conn_) {
+    tls_connection *conn = (tls_connection *)conn_;
+    updateSSLEvent(conn);
+    updatePendingData(conn);
+}
+
 static void tlsHandleEvent(tls_connection *conn, int mask) {
     int ret, conn_error;
 
@@ -711,19 +745,8 @@ static void tlsHandleEvent(tls_connection *conn, int mask) {
             if (!callHandler((connection *)conn, conn->c.read_handler)) return;
         }
 
-        /* If SSL has pending that, already read from the socket, we're at
-         * risk of not calling the read handler again, make sure to add it
-         * to a list of pending connection that should be handled anyway. */
-        if ((mask & AE_READABLE)) {
-            if (SSL_pending(conn->ssl) > 0) {
-                if (!conn->pending_list_node) {
-                    listAddNodeTail(pending_list, conn);
-                    conn->pending_list_node = listLast(pending_list);
-                }
-            } else if (conn->pending_list_node) {
-                listDelNode(pending_list, conn->pending_list_node);
-                conn->pending_list_node = NULL;
-            }
+        if (mask & AE_READABLE) {
+            updatePendingData(conn);
         }
 
         break;
@@ -1051,11 +1074,13 @@ static int tlsProcessPendingData(void) {
     listIter li;
     listNode *ln;
 
-    int processed = listLength(pending_list);
+    int processed = 0;
     listRewind(pending_list, &li);
     while ((ln = listNext(&li))) {
         tls_connection *conn = listNodeValue(ln);
+        if (conn->flags & TLS_CONN_FLAG_POSTPONE_UPDATE_STATE) continue;
         tlsHandleEvent(conn, AE_READABLE);
+        processed++;
     }
     return processed;
 }
@@ -1125,6 +1150,8 @@ static ConnectionType CT_TLS = {
     /* pending data */
     .has_pending_data = tlsHasPendingData,
     .process_pending_data = tlsProcessPendingData,
+    .postpone_update_state = postPoneUpdateSSLState,
+    .update_state = updateSSLState,
 
     /* TLS specified methods */
     .get_peer_cert = connTLSGetPeerCert,
diff --git a/src/unix.c b/src/unix.c
index ca38e83ed0..795b2db9f1 100644
--- a/src/unix.c
+++ b/src/unix.c
@@ -198,6 +198,8 @@ static ConnectionType CT_Unix = {
     /* pending data */
     .has_pending_data = NULL,
     .process_pending_data = NULL,
+    .postpone_update_state = NULL,
+    .update_state = NULL,
 };
 
 int RedisRegisterConnectionTypeUnix(void) {
diff --git a/tests/integration/failover.tcl b/tests/integration/failover.tcl
index 70bb66284d..3049cd0ca0 100644
--- a/tests/integration/failover.tcl
+++ b/tests/integration/failover.tcl
@@ -257,6 +257,12 @@ start_server {overrides {save {}}} {
         # during the pause. This write will not be interrupted.
         pause_process [srv -1 pid]
         set rd [valkey_deferring_client]
+        # wait for the client creation
+        wait_for_condition 50 100 {
+            [s connected_clients] == 2
+        } else {
+            fail "Client creation failed"
+        }
         $rd SET FOO BAR
         $node_0 failover to $node_1_host $node_1_port
         resume_process [srv -1 pid]
diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl
index f56fe0a1dc..9634f78252 100644
--- a/tests/integration/replication.tcl
+++ b/tests/integration/replication.tcl
@@ -167,6 +167,7 @@ start_server {tags {"repl external:skip"}} {
         test {BLPOP followed by role change, issue #2473} {
             set rd [valkey_deferring_client]
             $rd blpop foo 0 ; # Block while B is a master
+            wait_for_blocked_clients_count 1
 
             # Turn B into master of A
             $A slaveof no one
diff --git a/tests/integration/shutdown.tcl b/tests/integration/shutdown.tcl
index b2fdb845a3..9949afe27c 100644
--- a/tests/integration/shutdown.tcl
+++ b/tests/integration/shutdown.tcl
@@ -156,6 +156,12 @@ test "Shutting down master waits for replica then fails" {
             set rd2 [valkey_deferring_client -1]
             $rd1 shutdown
             $rd2 shutdown
+            wait_for_condition 50 100 {
+                [llength [lsearch -all [split [string trim [$master client list]] "\r\n"] *cmd=shutdown*]] == 2
+            } else {
+                fail "SHUTDOWN not called on all clients"
+            }
+
             set info_clients [$master info clients]
             assert_match "*connected_clients:3*" $info_clients
             assert_match "*blocked_clients:2*" $info_clients
@@ -209,6 +215,12 @@ test "Shutting down master waits for replica then aborted" {
             set rd2 [valkey_deferring_client -1]
             $rd1 shutdown
             $rd2 shutdown
+            wait_for_condition 50 100 {
+                [llength [lsearch -all [split [string trim [$master client list]] "\r\n"] *cmd=shutdown*]] == 2
+            } else {
+                fail "SHUTDOWN not called on all clients"
+            }
+
             set info_clients [$master info clients]
             assert_match "*connected_clients:3*" $info_clients
             assert_match "*blocked_clients:2*" $info_clients
diff --git a/tests/integration/valkey-cli.tcl b/tests/integration/valkey-cli.tcl
index 153c527055..6344215a25 100644
--- a/tests/integration/valkey-cli.tcl
+++ b/tests/integration/valkey-cli.tcl
@@ -65,6 +65,7 @@ start_server {tags {"cli"}} {
 
     proc run_command {fd cmd} {
         write_cli $fd $cmd
+        after 50
         set _ [format_output [read_cli $fd]]
     }
 
diff --git a/tests/unit/client-eviction.tcl b/tests/unit/client-eviction.tcl
index afcdcd1323..ceeb20f7b6 100644
--- a/tests/unit/client-eviction.tcl
+++ b/tests/unit/client-eviction.tcl
@@ -91,17 +91,31 @@ start_server {} {
         lassign [gen_client] rr cname
         # Attempt to fill the query buff with only half the percentage threshold verify we're not disconnected
         set n [expr $maxmemory_clients_actual / 2]
-        $rr write [join [list "*1\r\n\$$n\r\n" [string repeat v $n]] ""]
+        # send incomplete command (n - 1) to make sure we don't use the shared qb
+        $rr write [join [list "*1\r\n\$$n\r\n" [string repeat v [expr {$n - 1}]]] ""]
         $rr flush
+        # Wait for the client to start using a private query buffer. 
+        wait_for_condition 10 10 {
+            [client_field $cname qbuf] > 0
+        } else {
+            fail "client should start using a private query buffer"
+        }
         set tot_mem [client_field $cname tot-mem]
         assert {$tot_mem >= $n && $tot_mem < $maxmemory_clients_actual}
 
         # Attempt to fill the query buff with the percentage threshold of maxmemory and verify we're evicted
         $rr close
         lassign [gen_client] rr cname
+        # send incomplete command (maxmemory_clients_actual - 1) to make sure we don't use the shared qb
         catch {
-            $rr write [join [list "*1\r\n\$$maxmemory_clients_actual\r\n" [string repeat v $maxmemory_clients_actual]] ""]
+            $rr write [join [list "*1\r\n\$$maxmemory_clients_actual\r\n" [string repeat v [expr {$maxmemory_clients_actual - 1}]]] ""]
             $rr flush
+            # Wait for the client to start using a private query buffer. 
+            wait_for_condition 10 10 {
+                [client_field $cname qbuf] > 0
+            } else {
+                fail "client should start using a private query buffer"
+            }
         } e
         assert {![client_exists $cname]}
         $rr close
@@ -399,6 +413,11 @@ start_server {} {
 
         # Decrease maxmemory_clients and expect client eviction
         r config set maxmemory-clients [expr $maxmemory_clients / 2]
+        wait_for_condition 50 10 {
+            [llength [lsearch -all [split [string trim [r client list]] "\r\n"] *name=client*]] < $client_count
+        } else {
+            fail "Failed to evict clients"
+        }
         set connected_clients [llength [lsearch -all [split [string trim [r client list]] "\r\n"] *name=client*]]
         assert {$connected_clients > 0 && $connected_clients < $client_count}
 
diff --git a/tests/unit/cluster/pubsubshard.tcl b/tests/unit/cluster/pubsubshard.tcl
index e32b6a3a0e..d38c22dedb 100644
--- a/tests/unit/cluster/pubsubshard.tcl
+++ b/tests/unit/cluster/pubsubshard.tcl
@@ -62,7 +62,13 @@ test "sunsubscribe without specifying any channel would unsubscribe all shard ch
     set sub_res [ssubscribe $subscribeclient [list "\{channel.0\}1" "\{channel.0\}2" "\{channel.0\}3"]]
     assert_equal [list 1 2 3] $sub_res
     sunsubscribe $subscribeclient
-
+    
+    # wait for the unsubscribe to take effect
+    wait_for_condition 50 10 {
+        [$publishclient spublish "\{channel.0\}1" hello] eq 0
+    } else {
+        fail "unsubscribe did not take effect as expected"
+    }
     assert_equal 0 [$publishclient spublish "\{channel.0\}1" hello]
     assert_equal 0 [$publishclient spublish "\{channel.0\}2" hello]
     assert_equal 0 [$publishclient spublish "\{channel.0\}3" hello]
diff --git a/tests/unit/dump.tcl b/tests/unit/dump.tcl
index 9018270d67..e4c0f9d312 100644
--- a/tests/unit/dump.tcl
+++ b/tests/unit/dump.tcl
@@ -287,6 +287,7 @@ start_server {tags {"dump"}} {
 
             set rd [valkey_deferring_client]
             $rd debug sleep 1.0 ; # Make second server unable to reply.
+            after 100; # wait to make sure DEBUG command was executed.
             set e {}
             catch {r -1 migrate $second_host $second_port key 9 500} e
             assert_match {IOERR*} $e
diff --git a/tests/unit/info.tcl b/tests/unit/info.tcl
index 17dc6a1861..befecae220 100644
--- a/tests/unit/info.tcl
+++ b/tests/unit/info.tcl
@@ -295,47 +295,50 @@ start_server {tags {"info" "external:skip"}} {
             }
         }
 
-        test {stats: eventloop metrics} {
-            set info1 [r info stats]
-            set cycle1 [getInfoProperty $info1 eventloop_cycles]
-            set el_sum1 [getInfoProperty $info1 eventloop_duration_sum]
-            set cmd_sum1 [getInfoProperty $info1 eventloop_duration_cmd_sum]
-            assert_morethan $cycle1 0
-            assert_morethan $el_sum1 0
-            assert_morethan $cmd_sum1 0
-            after 110 ;# default hz is 10, wait for a cron tick. 
-            set info2 [r info stats]
-            set cycle2 [getInfoProperty $info2 eventloop_cycles]
-            set el_sum2 [getInfoProperty $info2 eventloop_duration_sum]
-            set cmd_sum2 [getInfoProperty $info2 eventloop_duration_cmd_sum]
-            if {$::verbose} { puts "eventloop metrics cycle1: $cycle1, cycle2: $cycle2" }
-            assert_morethan $cycle2 $cycle1
-            assert_lessthan $cycle2 [expr $cycle1+10] ;# we expect 2 or 3 cycles here, but allow some tolerance
-            if {$::verbose} { puts "eventloop metrics el_sum1: $el_sum1, el_sum2: $el_sum2" }
-            assert_morethan $el_sum2 $el_sum1
-            assert_lessthan $el_sum2 [expr $el_sum1+30000] ;# we expect roughly 100ms here, but allow some tolerance
-            if {$::verbose} { puts "eventloop metrics cmd_sum1: $cmd_sum1, cmd_sum2: $cmd_sum2" }
-            assert_morethan $cmd_sum2 $cmd_sum1
-            assert_lessthan $cmd_sum2 [expr $cmd_sum1+15000] ;# we expect about tens of ms here, but allow some tolerance
-        }
-
-        test {stats: instantaneous metrics} {
-            r config resetstat
-            set retries 0
-            for {set retries 1} {$retries < 4} {incr retries} {
-                after 1600 ;# hz is 10, wait for 16 cron tick so that sample array is fulfilled
-                set value [s instantaneous_eventloop_cycles_per_sec]
-                if {$value > 0} break
+        # skip the following 2 tests if we are running with io-threads as the eventloop metrics are different in that case.
+        if {[r config get io-threads] eq 0} {
+            test {stats: eventloop metrics} {
+                set info1 [r info stats]
+                set cycle1 [getInfoProperty $info1 eventloop_cycles]
+                set el_sum1 [getInfoProperty $info1 eventloop_duration_sum]
+                set cmd_sum1 [getInfoProperty $info1 eventloop_duration_cmd_sum]
+                assert_morethan $cycle1 0
+                assert_morethan $el_sum1 0
+                assert_morethan $cmd_sum1 0
+                after 110 ;# default hz is 10, wait for a cron tick. 
+                set info2 [r info stats]
+                set cycle2 [getInfoProperty $info2 eventloop_cycles]
+                set el_sum2 [getInfoProperty $info2 eventloop_duration_sum]
+                set cmd_sum2 [getInfoProperty $info2 eventloop_duration_cmd_sum]
+                if {$::verbose} { puts "eventloop metrics cycle1: $cycle1, cycle2: $cycle2" }
+                assert_morethan $cycle2 $cycle1
+                assert_lessthan $cycle2 [expr $cycle1+10] ;# we expect 2 or 3 cycles here, but allow some tolerance
+                if {$::verbose} { puts "eventloop metrics el_sum1: $el_sum1, el_sum2: $el_sum2" }
+                assert_morethan $el_sum2 $el_sum1
+                assert_lessthan $el_sum2 [expr $el_sum1+30000] ;# we expect roughly 100ms here, but allow some tolerance
+                if {$::verbose} { puts "eventloop metrics cmd_sum1: $cmd_sum1, cmd_sum2: $cmd_sum2" }
+                assert_morethan $cmd_sum2 $cmd_sum1
+                assert_lessthan $cmd_sum2 [expr $cmd_sum1+15000] ;# we expect about tens of ms here, but allow some tolerance
+            }
+    
+            test {stats: instantaneous metrics} {
+                r config resetstat
+                set retries 0
+                for {set retries 1} {$retries < 4} {incr retries} {
+                    after 1600 ;# hz is 10, wait for 16 cron tick so that sample array is fulfilled
+                    set value [s instantaneous_eventloop_cycles_per_sec]
+                    if {$value > 0} break
+                }
+    
+                assert_lessthan $retries 4
+                if {$::verbose} { puts "instantaneous metrics instantaneous_eventloop_cycles_per_sec: $value" }
+                assert_morethan $value 0
+                assert_lessthan $value [expr $retries*15] ;# default hz is 10
+                set value [s instantaneous_eventloop_duration_usec]
+                if {$::verbose} { puts "instantaneous metrics instantaneous_eventloop_duration_usec: $value" }
+                assert_morethan $value 0
+                assert_lessthan $value [expr $retries*22000] ;# default hz is 10, so duration < 1000 / 10, allow some tolerance
             }
-
-            assert_lessthan $retries 4
-            if {$::verbose} { puts "instantaneous metrics instantaneous_eventloop_cycles_per_sec: $value" }
-            assert_morethan $value 0
-            assert_lessthan $value [expr $retries*15] ;# default hz is 10
-            set value [s instantaneous_eventloop_duration_usec]
-            if {$::verbose} { puts "instantaneous metrics instantaneous_eventloop_duration_usec: $value" }
-            assert_morethan $value 0
-            assert_lessthan $value [expr $retries*22000] ;# default hz is 10, so duration < 1000 / 10, allow some tolerance
         }
 
         test {stats: debug metrics} {
diff --git a/tests/unit/maxmemory.tcl b/tests/unit/maxmemory.tcl
index ee1232796d..66dae2546a 100644
--- a/tests/unit/maxmemory.tcl
+++ b/tests/unit/maxmemory.tcl
@@ -98,6 +98,7 @@ start_server {tags {"maxmemory" "external:skip"}} {
                         $rr write "\r\n"
                         $rr flush
                     }
+                    after 100; # give the server some time to process the input buffer - this was added to make sure the test pass with io-threads active.
                 }]} {
                     lremove clients $rr
                 }
diff --git a/tests/unit/memefficiency.tcl b/tests/unit/memefficiency.tcl
index 525db407bf..feb98d9cdd 100644
--- a/tests/unit/memefficiency.tcl
+++ b/tests/unit/memefficiency.tcl
@@ -404,6 +404,8 @@ run_solo {defrag} {
             r save ;# saving an rdb iterates over all the data / pointers
         } {OK}
 
+        # Skip the following two tests if we are running with IO threads, as the IO threads allocate the command arguments in a different arena. As a result, fragmentation is not as expected.
+        if {[r config get io-threads] eq 0} {
         test "Active defrag pubsub: $type" {
             r flushdb
             r config resetstat
@@ -502,6 +504,7 @@ run_solo {defrag} {
             }
             $rd_pubsub close
         }
+        } ;# io-threads
 
         if {$type eq "standalone"} { ;# skip in cluster mode
         test "Active defrag big list: $type" {
diff --git a/tests/unit/moduleapi/blockedclient.tcl b/tests/unit/moduleapi/blockedclient.tcl
index d94ef5c5ba..bb0a15db50 100644
--- a/tests/unit/moduleapi/blockedclient.tcl
+++ b/tests/unit/moduleapi/blockedclient.tcl
@@ -128,7 +128,7 @@ foreach call_type {nested normal} {
         # send another command after the blocked one, to make sure we don't attempt to process it
         $rd ping
         $rd flush
-
+        after 100
         # make sure we get BUSY error, and that we didn't get it too early
         assert_error {*BUSY Slow module operation*} {r ping}
         assert_morethan_equal [expr [clock clicks -milliseconds]-$start] $busy_time_limit
diff --git a/tests/unit/pubsub.tcl b/tests/unit/pubsub.tcl
index 2f336dfcb9..72d0498ce1 100644
--- a/tests/unit/pubsub.tcl
+++ b/tests/unit/pubsub.tcl
@@ -85,6 +85,12 @@ start_server {tags {"pubsub network"}} {
         set rd1 [valkey_deferring_client]
         assert_equal {1 2 3} [subscribe $rd1 {chan1 chan2 chan3}]
         unsubscribe $rd1
+        # wait for the unsubscribe to take effect
+        wait_for_condition 50 100 {
+            [r publish chan1 hello] eq 0
+        } else {
+            fail "unsubscribe did not take effect"
+        }
         assert_equal 0 [r publish chan1 hello]
         assert_equal 0 [r publish chan2 hello]
         assert_equal 0 [r publish chan3 hello]
@@ -158,6 +164,12 @@ start_server {tags {"pubsub network"}} {
         set rd1 [valkey_deferring_client]
         assert_equal {1 2 3} [psubscribe $rd1 {chan1.* chan2.* chan3.*}]
         punsubscribe $rd1
+        # wait for the unsubscribe to take effect
+        wait_for_condition 50 100 {
+            [r publish chan1.hi hello] eq 0
+        } else {
+            fail "unsubscribe did not take effect"
+        }
         assert_equal 0 [r publish chan1.hi hello]
         assert_equal 0 [r publish chan2.hi hello]
         assert_equal 0 [r publish chan3.hi hello]
diff --git a/tests/unit/pubsubshard.tcl b/tests/unit/pubsubshard.tcl
index d56f36ffaa..e19db211f7 100644
--- a/tests/unit/pubsubshard.tcl
+++ b/tests/unit/pubsubshard.tcl
@@ -46,6 +46,14 @@ start_server {tags {"pubsubshard external:skip"}} {
         assert_equal {2} [ssubscribe $rd1 {chan2}]
         assert_equal {3} [ssubscribe $rd1 {chan3}]
         sunsubscribe $rd1
+
+        # wait for the unsubscribe to take effect
+        wait_for_condition 50 100 {
+            [r spublish chan1 hello] eq 0
+        } else {
+            fail "unsubscribe did not take effect"
+        }
+
         assert_equal 0 [r SPUBLISH chan1 hello]
         assert_equal 0 [r SPUBLISH chan2 hello]
         assert_equal 0 [r SPUBLISH chan3 hello]
diff --git a/tests/unit/querybuf.tcl b/tests/unit/querybuf.tcl
index 519743d248..f0f432b38f 100644
--- a/tests/unit/querybuf.tcl
+++ b/tests/unit/querybuf.tcl
@@ -92,7 +92,7 @@ start_server {tags {"querybuf slow"}} {
             # Write something smaller, so query buf peak can shrink
             $rd set x [string repeat A 100]
             set new_test_client_qbuf [client_query_buffer test_client]
-            if {$new_test_client_qbuf < $orig_test_client_qbuf} { break } 
+            if {$new_test_client_qbuf < $orig_test_client_qbuf && $new_test_client_qbuf > 0} { break } 
             if {[expr [clock milliseconds] - $t] > 1000} { break }
             after 10
         }
diff --git a/tests/unit/type/list.tcl b/tests/unit/type/list.tcl
index e6c8bb331f..4773a58820 100644
--- a/tests/unit/type/list.tcl
+++ b/tests/unit/type/list.tcl
@@ -1100,6 +1100,13 @@ foreach {pop} {BLPOP BLMPOP_LEFT} {
         $watching_client get somekey{t}
         $watching_client read
         $watching_client exec
+        # wait for exec to be called.
+        wait_for_condition 50 10 {
+            [llength [lsearch -all [split [string trim [r client list]] "\r\n"] *cmd=exec*]] == 1
+        } else {
+            fail "$cmd was not called"
+        }
+
         # Blocked BLPOPLPUSH may create problems, unblock it.
         r lpush srclist{t} element
         set res [$watching_client read]
diff --git a/tests/unit/type/stream-cgroups.tcl b/tests/unit/type/stream-cgroups.tcl
index 2cd812e521..d934e48140 100644
--- a/tests/unit/type/stream-cgroups.tcl
+++ b/tests/unit/type/stream-cgroups.tcl
@@ -520,7 +520,7 @@ start_server {
 
         # Before the fix in #13004, this time would have been 1200+ (i.e. more than 1200ms),
         # now it should be 1000, but in order to avoid timing issues, we increase the range a bit.
-        assert_range [expr $end-$start] 1000 1150
+        assert_range [expr $end-$start] 1000 1199
 
         $rd1 close
         $rd2 close
@@ -931,14 +931,14 @@ start_server {
         set reply [r xinfo consumers mystream mygroup]
         set consumer_info [lindex $reply 0]
         assert_equal [lindex $consumer_info 1] "Alice" ;# consumer name
-        assert {[dict get $consumer_info idle] < 80} ;# consumer idle (seen-time)
-        assert {[dict get $consumer_info inactive] < 80} ;# consumer inactive (active-time)
+        assert {[dict get $consumer_info idle] < 300} ;# consumer idle (seen-time)
+        assert {[dict get $consumer_info inactive] < 300} ;# consumer inactive (active-time)
 
         after 100
         r XREADGROUP GROUP mygroup Alice COUNT 1 STREAMS mystream >
         set reply [r xinfo consumers mystream mygroup]
         set consumer_info [lindex $reply 0]
-        assert {[dict get $consumer_info idle] < 80} ;# consumer idle (seen-time)
+        assert {[dict get $consumer_info idle] < 300} ;# consumer idle (seen-time)
         assert {[dict get $consumer_info inactive] >= 100} ;# consumer inactive (active-time)
 
 
@@ -1324,6 +1324,9 @@ start_server {
             assert_equal [dict get $group entries-read] 3
             assert_equal [dict get $group lag] 0
 
+            # wait for replica offset
+            wait_for_ofs_sync $master $replica
+
             set reply [$replica XINFO STREAM mystream FULL]
             set group [lindex [dict get $reply groups] 0]
             assert_equal [dict get $group entries-read] 3
diff --git a/tests/unit/type/zset.tcl b/tests/unit/type/zset.tcl
index f6c643a5ef..b341bbf69d 100644
--- a/tests/unit/type/zset.tcl
+++ b/tests/unit/type/zset.tcl
@@ -2012,6 +2012,7 @@ start_server {tags {"zset"}} {
             # Before the fix in #13004, this time would have been 1200+ (i.e. more than 1200ms),
             # now it should be 1000, but in order to avoid timing issues, we increase the range a bit.
             assert_range [expr $end-$start] 1000 1150
+            puts "Time: [expr $end-$start]"
 
             r debug set-active-expire 1
             $rd close
diff --git a/valkey.conf b/valkey.conf
index e4ffd0f8ad..8badf1487a 100644
--- a/valkey.conf
+++ b/valkey.conf
@@ -1288,9 +1288,8 @@ lazyfree-lazy-user-flush no
 # to pipelining nor sharding of the instance.
 #
 # By default threading is disabled, we suggest enabling it only in machines
-# that have at least 4 or more cores, leaving at least one spare core.
-# Using more than 8 threads is unlikely to help much. We also recommend using
-# threaded I/O only if you actually have performance problems, with 
+# that have at least 3 or more cores, leaving at least one spare core.
+# We also recommend using threaded I/O only if you actually have performance problems, with 
 # instances being able to use a quite big percentage of CPU time, otherwise
 # there is no point in using this feature.
 #
@@ -1301,19 +1300,9 @@ lazyfree-lazy-user-flush no
 # io-threads 4
 #
 # Setting io-threads to 1 will just use the main thread as usual.
-# When I/O threads are enabled, we only use threads for writes, that is
-# to thread the write(2) syscall and transfer the client buffers to the
-# socket. However it is also possible to enable threading of reads and
-# protocol parsing using the following configuration directive, by setting
-# it to yes:
-#
-# io-threads-do-reads no
-#
-# Usually threading reads doesn't help much.
-#
-# NOTE 1: This configuration directive cannot be changed at runtime via
-# CONFIG SET. Also, this feature currently does not work when SSL is
-# enabled.
+# When I/O threads are enabled, we use threads for reads and writes, that is
+# to thread the write and read syscall and transfer the client buffers to the
+# socket and to enable threading of reads and protocol parsing. 
 #
 # NOTE 2: If you want to test the server speedup using valkey-benchmark, make
 # sure you also run the benchmark itself in threaded mode, using the

From 548b4e0ea943da765a4fec46e73484b0742f58f8 Mon Sep 17 00:00:00 2001
From: "K.G. Wang" <wkgcass@hotmail.com>
Date: Tue, 9 Jul 2024 11:29:44 +0800
Subject: [PATCH 49/53] Calculate the actual mask to be removed in the
 eventloop before aeApiDelEvent (#725)

for kqueue:
EV_DELETE fails if the specified fd is not associated with the kqfd. If
EVFILT_WRITE is associated but EVFILT_READ is not, then calling
aeApiDelEvent with mask = -1 or `(AE_READABLE|AE_WRITABLE)` will
cause the kevent() to fail with errno = 2(No such file or directory) and
EVFILT_WRITE not dissociated. So we need to calculate the actual mask
to be removed, instead of passing in whatever user provides.

for evport:
The comment clearly states that aeApiDelEvent "rely on the fact that our
caller has already updated the mask in the eventLoop".

for epoll:
There's no need to calculate the "actual mask" twice, once in
`aeDeleteFileEvent` and another in `aeApiDelEvent`, let's just use the
mask recorded in the eventLoop.

Fixes #715

Signed-off-by: wkgcass <wkgcass@hotmail.com>
Co-authored-by: Andy Pan <i@andypan.me>
Co-authored-by: Binbin <binloveplay1314@qq.com>
---
 src/ae.c       | 13 ++++++++++++-
 src/ae_epoll.c |  6 ++++--
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/ae.c b/src/ae.c
index 28b50c660f..b6a1ce0b10 100644
--- a/src/ae.c
+++ b/src/ae.c
@@ -183,7 +183,9 @@ void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask) {
      * is removed. */
     if (mask & AE_WRITABLE) mask |= AE_BARRIER;
 
-    aeApiDelEvent(eventLoop, fd, mask);
+    /* Only remove attached events */
+    mask = mask & fe->mask;
+
     fe->mask = fe->mask & (~mask);
     if (fd == eventLoop->maxfd && fe->mask == AE_NONE) {
         /* Update the max fd */
@@ -193,6 +195,15 @@ void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask) {
             if (eventLoop->events[j].mask != AE_NONE) break;
         eventLoop->maxfd = j;
     }
+
+    /* Check whether there are events to be removed.
+     * Note: user may remove the AE_BARRIER without
+     * touching the actual events. */
+    if (mask & (AE_READABLE | AE_WRITABLE)) {
+        /* Must be invoked after the eventLoop mask is modified,
+         * which is required by evport and epoll */
+        aeApiDelEvent(eventLoop, fd, mask);
+    }
 }
 
 void *aeGetFileClientData(aeEventLoop *eventLoop, int fd) {
diff --git a/src/ae_epoll.c b/src/ae_epoll.c
index 78820b99bf..c8b4ac743f 100644
--- a/src/ae_epoll.c
+++ b/src/ae_epoll.c
@@ -87,10 +87,12 @@ static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {
     return 0;
 }
 
-static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int delmask) {
+static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int mask) {
     aeApiState *state = eventLoop->apidata;
     struct epoll_event ee = {0}; /* avoid valgrind warning */
-    int mask = eventLoop->events[fd].mask & (~delmask);
+
+    /* We rely on the fact that our caller has already updated the mask in the eventLoop. */
+    mask = eventLoop->events[fd].mask;
 
     ee.events = 0;
     if (mask & AE_READABLE) ee.events |= EPOLLIN;

From b99c7237f45cd75be4db164b483cf1cb2c2febf9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Viktor=20S=C3=B6derqvist?= <viktor.soderqvist@est.tech>
Date: Tue, 9 Jul 2024 21:40:49 +0200
Subject: [PATCH 50/53] Fix unstable test case EVAL+WAITAOF (#766)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Test case "EVAL - Scripts do not block on waitaof" observed to fail in
e.g.
https://github.com/valkey-io/valkey/actions/runs/9860131487/job/27233756421?pr=688

It can happen that the local AOF has been written and 1 is returned here
where 0 is expected. Writing a key inside the EVAL script makes sure
there's no time to write the AOF.

Signed-off-by: Viktor Söderqvist <viktor.soderqvist@est.tech>
---
 tests/unit/scripting.tcl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/scripting.tcl b/tests/unit/scripting.tcl
index 9e174c18d1..a9bf242904 100644
--- a/tests/unit/scripting.tcl
+++ b/tests/unit/scripting.tcl
@@ -294,7 +294,7 @@ start_server {tags {"scripting"}} {
     } {0}
 
     test {EVAL - Scripts do not block on waitaof} {
-        run_script {return redis.pcall('waitaof','0','1','0')} 0
+        run_script {redis.call('incr', 'x') return redis.pcall('waitaof','0','1','0')} 0
     } {0 0}
 
     test {EVAL - Scripts do not block on XREAD with BLOCK option} {

From 6a5a11f21c6b6a5c8678c50e96b559919962ba10 Mon Sep 17 00:00:00 2001
From: Brennan <31714723+BCathcart@users.noreply.github.com>
Date: Tue, 9 Jul 2024 13:25:42 -0700
Subject: [PATCH 51/53] Fix ULong config boundary checking (#752)

I noticed in #738 that we don't properly check ULong config boundaries
and made the change there. I'm pulling out that particular commit into
this PR since we don't know if we want to merge the configurable cluster
blacklist TTL yet.

---------

Signed-off-by: Brennan Cathcart <brennancathcart@gmail.com>
Signed-off-by: Madelyn Olson <madelyneolson@gmail.com>
Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
---
 src/config.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/config.c b/src/config.c
index 32e6018ff2..adbfdd43de 100644
--- a/src/config.c
+++ b/src/config.c
@@ -2054,6 +2054,7 @@ static void numericConfigInit(standardConfig *config) {
 
 static int numericBoundaryCheck(standardConfig *config, long long ll, const char **err) {
     if (config->data.numeric.numeric_type == NUMERIC_TYPE_ULONG_LONG ||
+        config->data.numeric.numeric_type == NUMERIC_TYPE_ULONG ||
         config->data.numeric.numeric_type == NUMERIC_TYPE_UINT ||
         config->data.numeric.numeric_type == NUMERIC_TYPE_SIZE_T) {
         /* Boundary check for unsigned types */

From a323dce8900341328114b86a92078c50cec0d9b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Viktor=20S=C3=B6derqvist?= <viktor.soderqvist@est.tech>
Date: Wed, 10 Jul 2024 13:53:52 +0200
Subject: [PATCH 52/53] Dual stack and client-specific IPs in cluster (#736)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New configs:

* `cluster-announce-client-ipv4`
* `cluster-announce-client-ipv6`

New module API function:

* `ValkeyModule_GetClusterNodeInfoForClient`, takes a client id and is
otherwise just like its non-ForClient cousin.

If configured, one of these IP addresses are reported to each client in
CLUSTER SLOTS, CLUSTER SHARDS, CLUSTER NODES and redirects, replacing
the IP (`custer-announce-ip` or the auto-detected IP) of each node.
Which one is reported to the client depends on whether the client is
connected over IPv4 or IPv6.

Benefits:

* This allows clients using IPv4 to get the IPv4 addresses of all
cluster nodes and IPv6 clients to get the IPv6 clients.
* This allows the IPs visible to clients to be different to the IPs used
between the cluster nodes due to NAT'ing.

The information is propagated in the cluster bus using new Ping
extensions. (Old nodes without this feature ignore unknown Ping
extensions.)

This adds another dimension to CLUSTER SLOTS reply. It now depends on
the client's use of TLS, the IP address family and RESP version.
Refactoring: The cached connection type definition is moved from
connection.h (it actually has nothing to do with the connection
abstraction) to server.h and is changed to a bitmap, with one bit for
each of TLS, IPv6 and RESP3.

Fixes #337

---------

Signed-off-by: Viktor Söderqvist <viktor.soderqvist@est.tech>
---
 src/cluster.c                             |  27 ++-
 src/cluster.h                             |   6 +-
 src/cluster_legacy.c                      | 283 ++++++++++++++--------
 src/cluster_legacy.h                      |  14 ++
 src/config.c                              |  33 +++
 src/connection.h                          |   2 -
 src/module.c                              |  40 ++-
 src/networking.c                          |  10 +
 src/server.h                              |  14 +-
 src/valkeymodule.h                        |   8 +
 tests/support/cluster_util.tcl            |  21 ++
 tests/support/server.tcl                  |   2 +-
 tests/unit/cluster/announce-client-ip.tcl | 149 ++++++++++++
 valkey.conf                               |  18 +-
 14 files changed, 497 insertions(+), 130 deletions(-)
 create mode 100644 tests/unit/cluster/announce-client-ip.tcl

diff --git a/src/cluster.c b/src/cluster.c
index 45fde52842..dd643af988 100644
--- a/src/cluster.c
+++ b/src/cluster.c
@@ -747,7 +747,7 @@ int verifyClusterNodeId(const char *name, int length) {
 }
 
 int isValidAuxChar(int c) {
-    return isalnum(c) || (strchr("!#$%&()*+:;<>?@[]^{|}~", c) == NULL);
+    return isalnum(c) || (strchr("!#$%&()*+.:;<>?@[]^{|}~", c) == NULL);
 }
 
 int isValidAuxString(char *s, unsigned int length) {
@@ -1194,7 +1194,7 @@ void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_co
         int port = clusterNodeClientPort(n, shouldReturnTlsInfo());
         addReplyErrorSds(c,
                          sdscatprintf(sdsempty(), "-%s %d %s:%d", (error_code == CLUSTER_REDIR_ASK) ? "ASK" : "MOVED",
-                                      hashslot, clusterNodePreferredEndpoint(n), port));
+                                      hashslot, clusterNodePreferredEndpoint(n, c), port));
     } else {
         serverPanic("getNodeByQuery() unknown error.");
     }
@@ -1267,7 +1267,7 @@ void addNodeToNodeReply(client *c, clusterNode *node) {
     char *hostname = clusterNodeHostname(node);
     addReplyArrayLen(c, 4);
     if (server.cluster_preferred_endpoint_type == CLUSTER_ENDPOINT_TYPE_IP) {
-        addReplyBulkCString(c, clusterNodeIp(node));
+        addReplyBulkCString(c, clusterNodeIp(node, c));
     } else if (server.cluster_preferred_endpoint_type == CLUSTER_ENDPOINT_TYPE_HOSTNAME) {
         if (hostname != NULL && hostname[0] != '\0') {
             addReplyBulkCString(c, hostname);
@@ -1300,7 +1300,7 @@ void addNodeToNodeReply(client *c, clusterNode *node) {
 
     if (server.cluster_preferred_endpoint_type != CLUSTER_ENDPOINT_TYPE_IP) {
         addReplyBulkCString(c, "ip");
-        addReplyBulkCString(c, clusterNodeIp(node));
+        addReplyBulkCString(c, clusterNodeIp(node, c));
         length--;
     }
     if (server.cluster_preferred_endpoint_type != CLUSTER_ENDPOINT_TYPE_HOSTNAME && hostname != NULL &&
@@ -1353,12 +1353,10 @@ void addNodeReplyForClusterSlot(client *c, clusterNode *node, int start_slot, in
 }
 
 void clearCachedClusterSlotsResponse(void) {
-    for (connTypeForCaching conn_type = CACHE_CONN_TCP; conn_type < CACHE_CONN_TYPE_MAX; conn_type++) {
-        for (int resp = 0; resp <= 3; resp++) {
-            if (server.cached_cluster_slot_info[conn_type][resp]) {
-                sdsfree(server.cached_cluster_slot_info[conn_type][resp]);
-                server.cached_cluster_slot_info[conn_type][resp] = NULL;
-            }
+    for (int conn_type = 0; conn_type < CACHE_CONN_TYPE_MAX; conn_type++) {
+        if (server.cached_cluster_slot_info[conn_type]) {
+            sdsfree(server.cached_cluster_slot_info[conn_type]);
+            server.cached_cluster_slot_info[conn_type] = NULL;
         }
     }
 }
@@ -1415,14 +1413,17 @@ void clusterCommandSlots(client *c) {
      *               3) node ID
      *           ... continued until done
      */
-    connTypeForCaching conn_type = shouldReturnTlsInfo();
+    int conn_type = 0;
+    if (connIsTLS(c->conn)) conn_type |= CACHE_CONN_TYPE_TLS;
+    if (isClientConnIpV6(c)) conn_type |= CACHE_CONN_TYPE_IPv6;
+    if (c->resp == 3) conn_type |= CACHE_CONN_TYPE_RESP3;
 
     if (detectAndUpdateCachedNodeHealth()) clearCachedClusterSlotsResponse();
 
-    sds cached_reply = server.cached_cluster_slot_info[conn_type][c->resp];
+    sds cached_reply = server.cached_cluster_slot_info[conn_type];
     if (!cached_reply) {
         cached_reply = generateClusterSlotResponse(c->resp);
-        server.cached_cluster_slot_info[conn_type][c->resp] = cached_reply;
+        server.cached_cluster_slot_info[conn_type] = cached_reply;
     } else {
         debugServerAssertWithInfo(c, NULL, verifyCachedClusterSlotsResponse(cached_reply, c->resp) == 1);
     }
diff --git a/src/cluster.h b/src/cluster.h
index a83b4ac282..d841381088 100644
--- a/src/cluster.h
+++ b/src/cluster.h
@@ -48,6 +48,8 @@ int clusterSendModuleMessageToTarget(const char *target,
 
 void clusterUpdateMyselfFlags(void);
 void clusterUpdateMyselfIp(void);
+void clusterUpdateMyselfClientIpV4(void);
+void clusterUpdateMyselfClientIpV6(void);
 void clusterUpdateMyselfHostname(void);
 void clusterUpdateMyselfAnnouncedPorts(void);
 void clusterUpdateMyselfHumanNodename(void);
@@ -85,7 +87,7 @@ int handleDebugClusterCommand(client *c);
 int clusterNodePending(clusterNode *node);
 int clusterNodeIsPrimary(clusterNode *n);
 char **getClusterNodesList(size_t *numnodes);
-char *clusterNodeIp(clusterNode *node);
+char *clusterNodeIp(clusterNode *node, client *c);
 int clusterNodeIsReplica(clusterNode *node);
 clusterNode *clusterNodeGetPrimary(clusterNode *node);
 char *clusterNodeGetName(clusterNode *node);
@@ -100,7 +102,7 @@ clusterNode *getImportingSlotSource(int slot);
 clusterNode *getNodeBySlot(int slot);
 int clusterNodeClientPort(clusterNode *n, int use_tls);
 char *clusterNodeHostname(clusterNode *node);
-const char *clusterNodePreferredEndpoint(clusterNode *n);
+const char *clusterNodePreferredEndpoint(clusterNode *n, client *c);
 long long clusterNodeReplOffset(clusterNode *node);
 clusterNode *clusterLookupNode(const char *name, int length);
 int detectAndUpdateCachedNodeHealth(void);
diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c
index 61b5af8e29..035b9fc876 100644
--- a/src/cluster_legacy.c
+++ b/src/cluster_legacy.c
@@ -98,16 +98,22 @@ unsigned int delKeysInSlot(unsigned int hashslot);
 void clusterAddNodeToShard(const char *shard_id, clusterNode *node);
 list *clusterLookupNodeListByShardId(const char *shard_id);
 void clusterRemoveNodeFromShard(clusterNode *node);
-int auxShardIdSetter(clusterNode *n, void *value, int length);
+int auxShardIdSetter(clusterNode *n, void *value, size_t length);
 sds auxShardIdGetter(clusterNode *n, sds s);
 int auxShardIdPresent(clusterNode *n);
-int auxHumanNodenameSetter(clusterNode *n, void *value, int length);
+int auxHumanNodenameSetter(clusterNode *n, void *value, size_t length);
 sds auxHumanNodenameGetter(clusterNode *n, sds s);
 int auxHumanNodenamePresent(clusterNode *n);
-int auxTcpPortSetter(clusterNode *n, void *value, int length);
+int auxAnnounceClientIpV4Setter(clusterNode *n, void *value, size_t length);
+sds auxAnnounceClientIpV4Getter(clusterNode *n, sds s);
+int auxAnnounceClientIpV4Present(clusterNode *n);
+int auxAnnounceClientIpV6Setter(clusterNode *n, void *value, size_t length);
+sds auxAnnounceClientIpV6Getter(clusterNode *n, sds s);
+int auxAnnounceClientIpV6Present(clusterNode *n);
+int auxTcpPortSetter(clusterNode *n, void *value, size_t length);
 sds auxTcpPortGetter(clusterNode *n, sds s);
 int auxTcpPortPresent(clusterNode *n);
-int auxTlsPortSetter(clusterNode *n, void *value, int length);
+int auxTlsPortSetter(clusterNode *n, void *value, size_t length);
 sds auxTlsPortGetter(clusterNode *n, sds s);
 int auxTlsPortPresent(clusterNode *n);
 static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen);
@@ -190,7 +196,7 @@ dictType clusterSdsToListType = {
 
 /* Aux field setter function prototype
  * return C_OK when the update is successful; C_ERR otherwise */
-typedef int(aux_value_setter)(clusterNode *n, void *value, int length);
+typedef int(aux_value_setter)(clusterNode *n, void *value, size_t length);
 /* Aux field getter function prototype
  * return an sds that is a concatenation of the input sds string and
  * the aux value */
@@ -211,7 +217,9 @@ typedef enum {
     af_human_nodename,
     af_tcp_port,
     af_tls_port,
-    af_count,
+    af_announce_client_ipv4,
+    af_announce_client_ipv6,
+    af_count, /* must be the last field */
 } auxFieldIndex;
 
 /* Note that
@@ -223,9 +231,11 @@ auxFieldHandler auxFieldHandlers[] = {
     {"nodename", auxHumanNodenameSetter, auxHumanNodenameGetter, auxHumanNodenamePresent},
     {"tcp-port", auxTcpPortSetter, auxTcpPortGetter, auxTcpPortPresent},
     {"tls-port", auxTlsPortSetter, auxTlsPortGetter, auxTlsPortPresent},
+    {"client-ipv4", auxAnnounceClientIpV4Setter, auxAnnounceClientIpV4Getter, auxAnnounceClientIpV4Present},
+    {"client-ipv6", auxAnnounceClientIpV6Setter, auxAnnounceClientIpV6Getter, auxAnnounceClientIpV6Present},
 };
 
-int auxShardIdSetter(clusterNode *n, void *value, int length) {
+int auxShardIdSetter(clusterNode *n, void *value, size_t length) {
     if (verifyClusterNodeId(value, length) == C_ERR) {
         return C_ERR;
     }
@@ -249,19 +259,12 @@ int auxShardIdPresent(clusterNode *n) {
     return strlen(n->shard_id);
 }
 
-int auxHumanNodenameSetter(clusterNode *n, void *value, int length) {
-    if (n && !strncmp(value, n->human_nodename, length)) {
-        return C_OK;
-    } else if (!n && (length == 0)) {
+int auxHumanNodenameSetter(clusterNode *n, void *value, size_t length) {
+    if (sdslen(n->human_nodename) == length && !strncmp(value, n->human_nodename, length)) {
         return C_OK;
     }
-    if (n) {
-        n->human_nodename = sdscpylen(n->human_nodename, value, length);
-    } else if (sdslen(n->human_nodename) != 0) {
-        sdsclear(n->human_nodename);
-    } else {
-        return C_ERR;
-    }
+
+    n->human_nodename = sdscpylen(n->human_nodename, value, length);
     return C_OK;
 }
 
@@ -273,7 +276,59 @@ int auxHumanNodenamePresent(clusterNode *n) {
     return sdslen(n->human_nodename);
 }
 
-int auxTcpPortSetter(clusterNode *n, void *value, int length) {
+int auxAnnounceClientIpV4Setter(clusterNode *n, void *value, size_t length) {
+    if (sdslen(n->announce_client_ipv4) == length && !strncmp(value, n->announce_client_ipv4, length)) {
+        /* Unchanged value */
+        return C_OK;
+    }
+
+    if (length != 0) {
+        /* Validate IPv4 address */
+        struct sockaddr_in sa;
+        if (inet_pton(AF_INET, (const char *)value, &(sa.sin_addr)) == 0) {
+            return C_ERR;
+        }
+    }
+
+    n->announce_client_ipv4 = sdscpylen(n->announce_client_ipv4, value, length);
+    return C_OK;
+}
+
+sds auxAnnounceClientIpV4Getter(clusterNode *n, sds s) {
+    return sdscatprintf(s, "%s", n->announce_client_ipv4);
+}
+
+int auxAnnounceClientIpV4Present(clusterNode *n) {
+    return sdslen(n->announce_client_ipv4) != 0;
+}
+
+int auxAnnounceClientIpV6Setter(clusterNode *n, void *value, size_t length) {
+    if (sdslen(n->announce_client_ipv6) == length && !strncmp(value, n->announce_client_ipv6, length)) {
+        /* Unchanged value */
+        return C_OK;
+    }
+
+    if (length != 0) {
+        /* Validate IPv6 address */
+        struct sockaddr_in6 sa;
+        if (inet_pton(AF_INET6, (const char *)value, &(sa.sin6_addr)) == 0) {
+            return C_ERR;
+        }
+    }
+
+    n->announce_client_ipv6 = sdscpylen(n->announce_client_ipv6, value, length);
+    return C_OK;
+}
+
+sds auxAnnounceClientIpV6Getter(clusterNode *n, sds s) {
+    return sdscatprintf(s, "%s", n->announce_client_ipv6);
+}
+
+int auxAnnounceClientIpV6Present(clusterNode *n) {
+    return sdslen(n->announce_client_ipv6) != 0;
+}
+
+int auxTcpPortSetter(clusterNode *n, void *value, size_t length) {
     if (length > 5 || length < 1) {
         return C_ERR;
     }
@@ -292,7 +347,7 @@ int auxTcpPortPresent(clusterNode *n) {
     return n->tcp_port >= 0 && n->tcp_port < 65536;
 }
 
-int auxTlsPortSetter(clusterNode *n, void *value, int length) {
+int auxTlsPortSetter(clusterNode *n, void *value, size_t length) {
     if (length > 5 || length < 1) {
         return C_ERR;
     }
@@ -885,38 +940,37 @@ void clusterUpdateMyselfIp(void) {
     }
 }
 
-/* Update the hostname for the specified node with the provided C string. */
-static void updateAnnouncedHostname(clusterNode *node, char *new) {
-    /* Previous and new hostname are the same, no need to update. */
-    if (new && !strcmp(new, node->hostname)) {
+static void updateSdsExtensionField(char **field, const char *value) {
+    if (value != NULL && !strcmp(value, *field)) {
         return;
-    } else if (!new && (sdslen(node->hostname) == 0)) {
+    } else if (value == NULL && sdslen(*field) == 0) {
         return;
     }
 
-    if (new) {
-        node->hostname = sdscpy(node->hostname, new);
-    } else if (sdslen(node->hostname) != 0) {
-        sdsclear(node->hostname);
+    if (value != NULL) {
+        *field = sdscpy(*field, value);
+    } else {
+        sdsclear(*field);
     }
     clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
 }
 
-static void updateAnnouncedHumanNodename(clusterNode *node, char *new) {
-    if (new && !strcmp(new, node->human_nodename)) {
-        return;
-    } else if (!new && (sdslen(node->human_nodename) == 0)) {
-        return;
-    }
+/* Update the hostname for the specified node with the provided C string. */
+static void updateAnnouncedHostname(clusterNode *node, char *value) {
+    updateSdsExtensionField(&node->hostname, value);
+}
 
-    if (new) {
-        node->human_nodename = sdscpy(node->human_nodename, new);
-    } else if (sdslen(node->human_nodename) != 0) {
-        sdsclear(node->human_nodename);
-    }
-    clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
+static void updateAnnouncedHumanNodename(clusterNode *node, char *value) {
+    updateSdsExtensionField(&node->human_nodename, value);
+}
+
+static void updateAnnouncedClientIpV4(clusterNode *node, char *value) {
+    updateSdsExtensionField(&node->announce_client_ipv4, value);
 }
 
+static void updateAnnouncedClientIpV6(clusterNode *node, char *value) {
+    updateSdsExtensionField(&node->announce_client_ipv6, value);
+}
 
 static void updateShardId(clusterNode *node, const char *shard_id) {
     if (shard_id && memcmp(node->shard_id, shard_id, CLUSTER_NAMELEN) != 0) {
@@ -956,6 +1010,16 @@ void clusterUpdateMyselfHumanNodename(void) {
     updateAnnouncedHumanNodename(myself, server.cluster_announce_human_nodename);
 }
 
+void clusterUpdateMyselfClientIpV4(void) {
+    if (!myself) return;
+    updateAnnouncedClientIpV4(myself, server.cluster_announce_client_ipv4);
+}
+
+void clusterUpdateMyselfClientIpV6(void) {
+    if (!myself) return;
+    updateAnnouncedClientIpV6(myself, server.cluster_announce_client_ipv6);
+}
+
 void clusterInit(void) {
     int saveconf = 0;
 
@@ -1033,14 +1097,14 @@ void clusterInit(void) {
 
     server.cluster->mf_end = 0;
     server.cluster->mf_replica = NULL;
-    for (connTypeForCaching conn_type = CACHE_CONN_TCP; conn_type < CACHE_CONN_TYPE_MAX; conn_type++) {
-        for (int resp = 0; resp <= 3; resp++) {
-            server.cached_cluster_slot_info[conn_type][resp] = NULL;
-        }
+    for (int conn_type = 0; conn_type < CACHE_CONN_TYPE_MAX; conn_type++) {
+        server.cached_cluster_slot_info[conn_type] = NULL;
     }
     resetManualFailover();
     clusterUpdateMyselfFlags();
     clusterUpdateMyselfIp();
+    clusterUpdateMyselfClientIpV4();
+    clusterUpdateMyselfClientIpV6();
     clusterUpdateMyselfHostname();
     clusterUpdateMyselfHumanNodename();
 }
@@ -1344,6 +1408,8 @@ clusterNode *createClusterNode(char *nodename, int flags) {
     node->link = NULL;
     node->inbound_link = NULL;
     memset(node->ip, 0, sizeof(node->ip));
+    node->announce_client_ipv4 = sdsempty();
+    node->announce_client_ipv6 = sdsempty();
     node->hostname = sdsempty();
     node->human_nodename = sdsempty();
     node->tcp_port = 0;
@@ -1515,6 +1581,8 @@ void freeClusterNode(clusterNode *n) {
     sdsfree(nodename);
     sdsfree(n->hostname);
     sdsfree(n->human_nodename);
+    sdsfree(n->announce_client_ipv4);
+    sdsfree(n->announce_client_ipv6);
 
     /* Release links and associated data structures. */
     if (n->link) freeClusterLink(n->link);
@@ -2555,45 +2623,49 @@ static clusterMsgPingExt *getNextPingExt(clusterMsgPingExt *ext) {
 }
 
 /* All PING extensions must be 8-byte aligned */
-uint32_t getAlignedPingExtSize(uint32_t dataSize) {
+static uint32_t getAlignedPingExtSize(uint32_t dataSize) {
     return sizeof(clusterMsgPingExt) + EIGHT_BYTE_ALIGN(dataSize);
 }
 
-uint32_t getHostnamePingExtSize(void) {
-    if (sdslen(myself->hostname) == 0) {
-        return 0;
-    }
-    return getAlignedPingExtSize(sdslen(myself->hostname) + 1);
-}
-
-uint32_t getHumanNodenamePingExtSize(void) {
-    if (sdslen(myself->human_nodename) == 0) {
-        return 0;
-    }
-    return getAlignedPingExtSize(sdslen(myself->human_nodename) + 1);
-}
-
-uint32_t getShardIdPingExtSize(void) {
+static uint32_t getShardIdPingExtSize(void) {
     return getAlignedPingExtSize(sizeof(clusterMsgPingExtShardId));
 }
 
-uint32_t getForgottenNodeExtSize(void) {
+static uint32_t getForgottenNodeExtSize(void) {
     return getAlignedPingExtSize(sizeof(clusterMsgPingExtForgottenNode));
 }
 
-void *preparePingExt(clusterMsgPingExt *ext, uint16_t type, uint32_t length) {
+static void *preparePingExt(clusterMsgPingExt *ext, uint16_t type, uint32_t length) {
     ext->type = htons(type);
     ext->length = htonl(length);
     return &ext->ext[0];
 }
 
+/* If value is nonempty and cursor_ptr points to a non-NULL cursor, writes a
+ * ping extension at the cursor, advances the cursor, increments totlen and
+ * returns 1. If value is nonempty and cursor_ptr points to NULL, just computes
+ * the size, increments totlen and returns 1. If value is empty, returns 0. */
+static uint32_t
+writeSdsPingExtIfNonempty(uint32_t *totlen_ptr, clusterMsgPingExt **cursor_ptr, clusterMsgPingtypes type, sds value) {
+    size_t len = sdslen(value);
+    if (len == 0) return 0;
+    size_t size = getAlignedPingExtSize(len + 1);
+    if (*cursor_ptr != NULL) {
+        void *ext = preparePingExt(*cursor_ptr, type, size);
+        memcpy(ext, value, len);
+        *cursor_ptr = getNextPingExt(*cursor_ptr);
+    }
+    *totlen_ptr += size;
+    return 1;
+}
+
 /* 1. If a NULL hdr is provided, compute the extension size;
- * 2. If a non-NULL hdr is provided, write the hostname ping
- *    extension at the start of the cursor. This function
+ * 2. If a non-NULL hdr is provided, write the ping
+ *    extensions at the start of the cursor. This function
  *    will update the cursor to point to the end of the
  *    written extension and will return the amount of bytes
  *    written. */
-uint32_t writePingExt(clusterMsg *hdr, int gossipcount) {
+static uint32_t writePingExtensions(clusterMsg *hdr, int gossipcount) {
     uint16_t extensions = 0;
     uint32_t totlen = 0;
     clusterMsgPingExt *cursor = NULL;
@@ -2602,36 +2674,14 @@ uint32_t writePingExt(clusterMsg *hdr, int gossipcount) {
         cursor = getInitialPingExt(hdr, gossipcount);
     }
 
-    /* hostname is optional */
-    if (sdslen(myself->hostname) != 0) {
-        if (cursor != NULL) {
-            /* Populate hostname */
-            clusterMsgPingExtHostname *ext =
-                preparePingExt(cursor, CLUSTERMSG_EXT_TYPE_HOSTNAME, getHostnamePingExtSize());
-            memcpy(ext->hostname, myself->hostname, sdslen(myself->hostname));
-
-            /* Move the write cursor */
-            cursor = getNextPingExt(cursor);
-        }
-
-        totlen += getHostnamePingExtSize();
-        extensions++;
-    }
-
-    if (sdslen(myself->human_nodename) != 0) {
-        if (cursor != NULL) {
-            /* Populate human_nodename */
-            clusterMsgPingExtHumanNodename *ext =
-                preparePingExt(cursor, CLUSTERMSG_EXT_TYPE_HUMAN_NODENAME, getHumanNodenamePingExtSize());
-            memcpy(ext->human_nodename, myself->human_nodename, sdslen(myself->human_nodename));
-
-            /* Move the write cursor */
-            cursor = getNextPingExt(cursor);
-        }
-
-        totlen += getHumanNodenamePingExtSize();
-        extensions++;
-    }
+    /* Write simple optional SDS ping extensions. */
+    extensions += writeSdsPingExtIfNonempty(&totlen, &cursor, CLUSTERMSG_EXT_TYPE_HOSTNAME, myself->hostname);
+    extensions +=
+        writeSdsPingExtIfNonempty(&totlen, &cursor, CLUSTERMSG_EXT_TYPE_HUMAN_NODENAME, myself->human_nodename);
+    extensions +=
+        writeSdsPingExtIfNonempty(&totlen, &cursor, CLUSTERMSG_EXT_TYPE_CLIENT_IPV4, myself->announce_client_ipv4);
+    extensions +=
+        writeSdsPingExtIfNonempty(&totlen, &cursor, CLUSTERMSG_EXT_TYPE_CLIENT_IPV6, myself->announce_client_ipv6);
 
     /* Gossip forgotten nodes */
     if (dictSize(server.cluster->nodes_black_list) > 0) {
@@ -2681,6 +2731,8 @@ void clusterProcessPingExtensions(clusterMsg *hdr, clusterLink *link) {
     clusterNode *sender = link->node ? link->node : clusterLookupNode(hdr->sender, CLUSTER_NAMELEN);
     char *ext_hostname = NULL;
     char *ext_humannodename = NULL;
+    char *ext_clientipv4 = NULL;
+    char *ext_clientipv6 = NULL;
     char *ext_shardid = NULL;
     uint16_t extensions = ntohs(hdr->extensions);
     /* Loop through all the extensions and process them */
@@ -2694,6 +2746,14 @@ void clusterProcessPingExtensions(clusterMsg *hdr, clusterLink *link) {
             clusterMsgPingExtHumanNodename *humannodename_ext =
                 (clusterMsgPingExtHumanNodename *)&(ext->ext[0].human_nodename);
             ext_humannodename = humannodename_ext->human_nodename;
+        } else if (type == CLUSTERMSG_EXT_TYPE_CLIENT_IPV4) {
+            clusterMsgPingExtClientIpV4 *clientipv4_ext =
+                (clusterMsgPingExtClientIpV4 *)&(ext->ext[0].announce_client_ipv4);
+            ext_clientipv4 = clientipv4_ext->announce_client_ipv4;
+        } else if (type == CLUSTERMSG_EXT_TYPE_CLIENT_IPV6) {
+            clusterMsgPingExtClientIpV6 *clientipv6_ext =
+                (clusterMsgPingExtClientIpV6 *)&(ext->ext[0].announce_client_ipv6);
+            ext_clientipv6 = clientipv6_ext->announce_client_ipv6;
         } else if (type == CLUSTERMSG_EXT_TYPE_FORGOTTEN_NODE) {
             clusterMsgPingExtForgottenNode *forgotten_node_ext = &(ext->ext[0].forgotten_node);
             clusterNode *n = clusterLookupNode(forgotten_node_ext->name, CLUSTER_NAMELEN);
@@ -2722,6 +2782,8 @@ void clusterProcessPingExtensions(clusterMsg *hdr, clusterLink *link) {
      * set it now. */
     updateAnnouncedHostname(sender, ext_hostname);
     updateAnnouncedHumanNodename(sender, ext_humannodename);
+    updateAnnouncedClientIpV4(sender, ext_clientipv4);
+    updateAnnouncedClientIpV6(sender, ext_clientipv6);
     /* If the node did not send us a shard-id extension, it means the sender
      * does not support it (old version), node->shard_id is randomly generated.
      * A cluster-wide consensus for the node's shard_id is not necessary.
@@ -3681,7 +3743,7 @@ void clusterSendPing(clusterLink *link, int type) {
     estlen = sizeof(clusterMsg) - sizeof(union clusterMsgData);
     estlen += (sizeof(clusterMsgDataGossip) * (wanted + pfail_wanted));
     if (link->node && nodeSupportsExtensions(link->node)) {
-        estlen += writePingExt(NULL, 0);
+        estlen += writePingExtensions(NULL, 0);
     }
     /* Note: clusterBuildMessageHdr() expects the buffer to be always at least
      * sizeof(clusterMsg) or more. */
@@ -3752,7 +3814,7 @@ void clusterSendPing(clusterLink *link, int type) {
     uint32_t totlen = 0;
 
     if (link->node && nodeSupportsExtensions(link->node)) {
-        totlen += writePingExt(hdr, gossipcount);
+        totlen += writePingExtensions(hdr, gossipcount);
     } else {
         serverLog(LL_DEBUG, "Unable to send extensions data, however setting ext data flag to true");
         hdr->mflags[0] |= CLUSTERMSG_FLAG0_EXT_DATA;
@@ -5248,15 +5310,19 @@ sds representSlotInfo(sds ci, uint16_t *slot_info_pairs, int slot_info_pairs_cou
 /* Generate a csv-alike representation of the specified cluster node.
  * See clusterGenNodesDescription() top comment for more information.
  *
+ * If a client is provided, we're creating a reply to the CLUSTER NODES command.
+ * If client is NULL, we are creating the content of nodes.conf.
+ *
  * The function returns the string representation as an SDS string. */
 sds clusterGenNodeDescription(client *c, clusterNode *node, int tls_primary) {
     int j, start;
     sds ci;
     int port = clusterNodeClientPort(node, tls_primary);
+    char *ip = clusterNodeIp(node, c);
 
     /* Node coordinates */
     ci = sdscatlen(sdsempty(), node->name, CLUSTER_NAMELEN);
-    ci = sdscatfmt(ci, " %s:%i@%i", node->ip, port, node->cport);
+    ci = sdscatfmt(ci, " %s:%i@%i", ip, port, node->cport);
     if (sdslen(node->hostname) != 0) {
         ci = sdscatfmt(ci, ",%s", node->hostname);
     }
@@ -5571,11 +5637,11 @@ void addNodeDetailsToShardReply(client *c, clusterNode *node) {
     }
 
     addReplyBulkCString(c, "ip");
-    addReplyBulkCString(c, node->ip);
+    addReplyBulkCString(c, clusterNodeIp(node, c));
     reply_count++;
 
     addReplyBulkCString(c, "endpoint");
-    addReplyBulkCString(c, clusterNodePreferredEndpoint(node));
+    addReplyBulkCString(c, clusterNodePreferredEndpoint(node, c));
     reply_count++;
 
     if (sdslen(node->hostname) != 0) {
@@ -5844,7 +5910,16 @@ int clusterNodePending(clusterNode *node) {
     return node->flags & (CLUSTER_NODE_NOADDR | CLUSTER_NODE_HANDSHAKE);
 }
 
-char *clusterNodeIp(clusterNode *node) {
+/* Returns the IP of the node as seen by the given client, or by the cluster node if c is NULL. */
+char *clusterNodeIp(clusterNode *node, client *c) {
+    if (c == NULL) {
+        return node->ip;
+    }
+    if (isClientConnIpV6(c)) {
+        if (sdslen(node->announce_client_ipv6) != 0) return node->announce_client_ipv6;
+    } else {
+        if (sdslen(node->announce_client_ipv4) != 0) return node->announce_client_ipv4;
+    }
     return node->ip;
 }
 
@@ -6509,10 +6584,10 @@ long long clusterNodeReplOffset(clusterNode *node) {
     return node->repl_offset;
 }
 
-const char *clusterNodePreferredEndpoint(clusterNode *n) {
+const char *clusterNodePreferredEndpoint(clusterNode *n, client *c) {
     char *hostname = clusterNodeHostname(n);
     switch (server.cluster_preferred_endpoint_type) {
-    case CLUSTER_ENDPOINT_TYPE_IP: return clusterNodeIp(n);
+    case CLUSTER_ENDPOINT_TYPE_IP: return clusterNodeIp(n, c);
     case CLUSTER_ENDPOINT_TYPE_HOSTNAME: return (hostname != NULL && hostname[0] != '\0') ? hostname : "?";
     case CLUSTER_ENDPOINT_TYPE_UNKNOWN_ENDPOINT: return "";
     }
diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h
index d054d86017..3c5696273b 100644
--- a/src/cluster_legacy.h
+++ b/src/cluster_legacy.h
@@ -138,6 +138,8 @@ typedef enum {
     CLUSTERMSG_EXT_TYPE_HUMAN_NODENAME,
     CLUSTERMSG_EXT_TYPE_FORGOTTEN_NODE,
     CLUSTERMSG_EXT_TYPE_SHARDID,
+    CLUSTERMSG_EXT_TYPE_CLIENT_IPV4,
+    CLUSTERMSG_EXT_TYPE_CLIENT_IPV6,
 } clusterMsgPingtypes;
 
 /* Helper function for making sure extensions are eight byte aligned. */
@@ -162,6 +164,14 @@ typedef struct {
     char shard_id[CLUSTER_NAMELEN]; /* The shard_id, 40 bytes fixed. */
 } clusterMsgPingExtShardId;
 
+typedef struct {
+    char announce_client_ipv4[1]; /* Announced client IPv4, ends with \0. */
+} clusterMsgPingExtClientIpV4;
+
+typedef struct {
+    char announce_client_ipv6[1]; /* Announced client IPv6, ends with \0. */
+} clusterMsgPingExtClientIpV6;
+
 typedef struct {
     uint32_t length; /* Total length of this extension message (including this header) */
     uint16_t type;   /* Type of this extension message (see clusterMsgPingtypes) */
@@ -171,6 +181,8 @@ typedef struct {
         clusterMsgPingExtHumanNodename human_nodename;
         clusterMsgPingExtForgottenNode forgotten_node;
         clusterMsgPingExtShardId shard_id;
+        clusterMsgPingExtClientIpV4 announce_client_ipv4;
+        clusterMsgPingExtClientIpV6 announce_client_ipv6;
     } ext[]; /* Actual extension information, formatted so that the data is 8
               * byte aligned, regardless of its content. */
 } clusterMsgPingExt;
@@ -303,6 +315,8 @@ struct _clusterNode {
     mstime_t orphaned_time;                 /* Starting time of orphaned primary condition */
     long long repl_offset;                  /* Last known repl offset for this node. */
     char ip[NET_IP_STR_LEN];                /* Latest known IP address of this node */
+    sds announce_client_ipv4;               /* IPv4 for clients only. */
+    sds announce_client_ipv6;               /* IPv6 for clients only. */
     sds hostname;                           /* The known hostname for this node */
     sds human_nodename;                     /* The known human readable nodename for this node */
     int tcp_port;                           /* Latest known clients TCP port. */
diff --git a/src/config.c b/src/config.c
index adbfdd43de..3c82f9ee7a 100644
--- a/src/config.c
+++ b/src/config.c
@@ -35,6 +35,7 @@
 
 #include <fcntl.h>
 #include <sys/stat.h>
+#include <arpa/inet.h>
 #include <glob.h>
 #include <string.h>
 #include <locale.h>
@@ -2382,6 +2383,24 @@ static int isValidAnnouncedHostname(char *val, const char **err) {
     return 1;
 }
 
+static int isValidIpV4(char *val, const char **err) {
+    struct sockaddr_in sa;
+    if (val[0] != '\0' && inet_pton(AF_INET, val, &(sa.sin_addr)) == 0) {
+        *err = "Invalid IPv4 address";
+        return 0;
+    }
+    return 1;
+}
+
+static int isValidIpV6(char *val, const char **err) {
+    struct sockaddr_in6 sa;
+    if (val[0] != '\0' && inet_pton(AF_INET6, val, &(sa.sin6_addr)) == 0) {
+        *err = "Invalid IPv6 address";
+        return 0;
+    }
+    return 1;
+}
+
 /* Validate specified string is a valid proc-title-template */
 static int isValidProcTitleTemplate(char *val, const char **err) {
     if (!validateProcTitleTemplate(val)) {
@@ -2623,6 +2642,18 @@ static int updateClusterIp(const char **err) {
     return 1;
 }
 
+int updateClusterClientIpV4(const char **err) {
+    UNUSED(err);
+    clusterUpdateMyselfClientIpV4();
+    return 1;
+}
+
+int updateClusterClientIpV6(const char **err) {
+    UNUSED(err);
+    clusterUpdateMyselfClientIpV6();
+    return 1;
+}
+
 int updateClusterHostname(const char **err) {
     UNUSED(err);
     clusterUpdateMyselfHostname();
@@ -3081,6 +3112,8 @@ standardConfig static_configs[] = {
     createStringConfig("replica-announce-ip", "slave-announce-ip", MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.replica_announce_ip, NULL, NULL, NULL),
     createStringConfig("primaryuser", "masteruser", MODIFIABLE_CONFIG | SENSITIVE_CONFIG, EMPTY_STRING_IS_NULL, server.primary_user, NULL, NULL, NULL),
     createStringConfig("cluster-announce-ip", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_ip, NULL, NULL, updateClusterIp),
+    createStringConfig("cluster-announce-client-ipv4", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_client_ipv4, NULL, isValidIpV4, updateClusterClientIpV4),
+    createStringConfig("cluster-announce-client-ipv6", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_client_ipv6, NULL, isValidIpV6, updateClusterClientIpV6),
     createStringConfig("cluster-config-file", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.cluster_configfile, "nodes.conf", isValidClusterConfigFile, NULL),
     createStringConfig("cluster-announce-hostname", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_hostname, NULL, isValidAnnouncedHostname, updateClusterHostname),
     createStringConfig("cluster-announce-human-nodename", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_human_nodename, NULL, isValidAnnouncedNodename, updateClusterHumanNodename),
diff --git a/src/connection.h b/src/connection.h
index c6466c2d4c..d59f7bc7fc 100644
--- a/src/connection.h
+++ b/src/connection.h
@@ -62,8 +62,6 @@ typedef enum {
 #define CONN_TYPE_TLS "tls"
 #define CONN_TYPE_MAX 8 /* 8 is enough to be extendable */
 
-typedef enum connTypeForCaching { CACHE_CONN_TCP, CACHE_CONN_TLS, CACHE_CONN_TYPE_MAX } connTypeForCaching;
-
 typedef void (*ConnectionCallbackFunc)(struct connection *conn);
 
 typedef struct ConnectionType {
diff --git a/src/module.c b/src/module.c
index 5844fcbdea..876b948323 100644
--- a/src/module.c
+++ b/src/module.c
@@ -8950,6 +8950,14 @@ size_t VM_GetClusterSize(void) {
     return getClusterSize();
 }
 
+int moduleGetClusterNodeInfoForClient(ValkeyModuleCtx *ctx,
+                                      client *c,
+                                      const char *node_id,
+                                      char *ip,
+                                      char *primary_id,
+                                      int *port,
+                                      int *flags);
+
 /* Populate the specified info for the node having as ID the specified 'id',
  * then returns VALKEYMODULE_OK. Otherwise if the format of node ID is invalid
  * or the node ID does not exist from the POV of this local node, VALKEYMODULE_ERR
@@ -8971,14 +8979,41 @@ size_t VM_GetClusterSize(void) {
  * * VALKEYMODULE_NODE_NOFAILOVER:   The replica is configured to never failover
  */
 int VM_GetClusterNodeInfo(ValkeyModuleCtx *ctx, const char *id, char *ip, char *primary_id, int *port, int *flags) {
+    return moduleGetClusterNodeInfoForClient(ctx, NULL, id, ip, primary_id, port, flags);
+}
+
+/* Like VM_GetClusterNodeInfo(), but returns IP address specifically for the given
+ * client, depending on whether the client is connected over IPv4 or IPv6.
+ *
+ * See also VM_GetClientId(). */
+int VM_GetClusterNodeInfoForClient(ValkeyModuleCtx *ctx,
+                                   uint64_t client_id,
+                                   const char *node_id,
+                                   char *ip,
+                                   char *primary_id,
+                                   int *port,
+                                   int *flags) {
+    client *c = lookupClientByID(client_id);
+    if (c == NULL) return VALKEYMODULE_ERR;
+    return moduleGetClusterNodeInfoForClient(ctx, c, node_id, ip, primary_id, port, flags);
+}
+
+
+int moduleGetClusterNodeInfoForClient(ValkeyModuleCtx *ctx,
+                                      client *c,
+                                      const char *node_id,
+                                      char *ip,
+                                      char *primary_id,
+                                      int *port,
+                                      int *flags) {
     UNUSED(ctx);
 
-    clusterNode *node = clusterLookupNode(id, strlen(id));
+    clusterNode *node = clusterLookupNode(node_id, strlen(node_id));
     if (node == NULL || clusterNodePending(node)) {
         return VALKEYMODULE_ERR;
     }
 
-    if (ip) valkey_strlcpy(ip, clusterNodeIp(node), NET_IP_STR_LEN);
+    if (ip) valkey_strlcpy(ip, clusterNodeIp(node, c), NET_IP_STR_LEN);
 
     if (primary_id) {
         /* If the information is not available, the function will set the
@@ -13708,6 +13743,7 @@ void moduleRegisterCoreAPI(void) {
     REGISTER_API(RegisterClusterMessageReceiver);
     REGISTER_API(SendClusterMessage);
     REGISTER_API(GetClusterNodeInfo);
+    REGISTER_API(GetClusterNodeInfoForClient);
     REGISTER_API(GetClusterNodesList);
     REGISTER_API(FreeClusterNodesList);
     REGISTER_API(CreateTimer);
diff --git a/src/networking.c b/src/networking.c
index b249aa61f3..0a91dbb645 100644
--- a/src/networking.c
+++ b/src/networking.c
@@ -3109,6 +3109,16 @@ char *getClientSockname(client *c) {
     return c->sockname;
 }
 
+int isClientConnIpV6(client *c) {
+    /* The cached client peer id is on the form "[IPv6]:port" for IPv6
+     * addresses, so we just check for '[' here. */
+    if (c->conn->type == NULL && server.current_client) {
+        /* Fake client? Use current client instead. */
+        c = server.current_client;
+    }
+    return getClientPeerId(c)[0] == '[';
+}
+
 /* Concatenate a string representing the state of a client in a human
  * readable format, into the sds string 's'. */
 sds catClientInfoString(sds s, client *client) {
diff --git a/src/server.h b/src/server.h
index 36a4b641e7..66d6d66da5 100644
--- a/src/server.h
+++ b/src/server.h
@@ -1505,6 +1505,15 @@ struct malloc_stats {
     size_t allocator_frag_smallbins_bytes;
 };
 
+/*-----------------------------------------------------------------------------
+ * Cached state per client connection type flags (bitwise or)
+ *-----------------------------------------------------------------------------*/
+
+#define CACHE_CONN_TYPE_TLS (1 << 0)
+#define CACHE_CONN_TYPE_IPv6 (1 << 1)
+#define CACHE_CONN_TYPE_RESP3 (1 << 2)
+#define CACHE_CONN_TYPE_MAX (1 << 3)
+
 /*-----------------------------------------------------------------------------
  * TLS Context Configuration
  *----------------------------------------------------------------------------*/
@@ -2052,6 +2061,8 @@ struct valkeyServer {
     int cluster_replica_no_failover;                       /* Prevent replica from starting a failover
                                                             if the primary is in failure state. */
     char *cluster_announce_ip;                             /* IP address to announce on cluster bus. */
+    char *cluster_announce_client_ipv4;                    /* IPv4 for clients, to announce on cluster bus. */
+    char *cluster_announce_client_ipv6;                    /* IPv6 for clients, to announce on cluster bus. */
     char *cluster_announce_hostname;                       /* hostname to announce on cluster bus. */
     char *cluster_announce_human_nodename;                 /* Human readable node name assigned to a node. */
     int cluster_preferred_endpoint_type;                   /* Use the announced hostname when available. */
@@ -2070,7 +2081,7 @@ struct valkeyServer {
                                                             * dropping packets of a specific type */
     /* Debug config that goes along with cluster_drop_packet_filter. When set, the link is closed on packet drop. */
     uint32_t debug_cluster_close_link_on_packet_drop : 1;
-    sds cached_cluster_slot_info[CACHE_CONN_TYPE_MAX][4]; /* Align to RESP3 */
+    sds cached_cluster_slot_info[CACHE_CONN_TYPE_MAX]; /* Index in array is a bitwise or of CACHE_CONN_TYPE_* */
     /* Scripting */
     mstime_t busy_reply_threshold;  /* Script / module timeout in milliseconds */
     int pre_command_oom_state;      /* OOM before command (script?) was started */
@@ -2707,6 +2718,7 @@ void freeClientReplyValue(void *o);
 void *dupClientReplyValue(void *o);
 char *getClientPeerId(client *client);
 char *getClientSockName(client *client);
+int isClientConnIpV6(client *c);
 sds catClientInfoString(sds s, client *client);
 sds getAllClientsInfoString(int type);
 int clientSetName(client *c, robj *name, const char **err);
diff --git a/src/valkeymodule.h b/src/valkeymodule.h
index 16f7929081..c664a9ed5a 100644
--- a/src/valkeymodule.h
+++ b/src/valkeymodule.h
@@ -1472,6 +1472,13 @@ VALKEYMODULE_API int (*ValkeyModule_GetClusterNodeInfo)(ValkeyModuleCtx *ctx,
                                                         char *primary_id,
                                                         int *port,
                                                         int *flags) VALKEYMODULE_ATTR;
+VALKEYMODULE_API int (*ValkeyModule_GetClusterNodeInfoForClient)(ValkeyModuleCtx *ctx,
+                                                                 uint64_t client_id,
+                                                                 const char *node_id,
+                                                                 char *ip,
+                                                                 char *primary_id,
+                                                                 int *port,
+                                                                 int *flags) VALKEYMODULE_ATTR;
 VALKEYMODULE_API char **(*ValkeyModule_GetClusterNodesList)(ValkeyModuleCtx *ctx, size_t *numnodes)VALKEYMODULE_ATTR;
 VALKEYMODULE_API void (*ValkeyModule_FreeClusterNodesList)(char **ids) VALKEYMODULE_ATTR;
 VALKEYMODULE_API ValkeyModuleTimerID (*ValkeyModule_CreateTimer)(ValkeyModuleCtx *ctx,
@@ -1938,6 +1945,7 @@ static int ValkeyModule_Init(ValkeyModuleCtx *ctx, const char *name, int ver, in
     VALKEYMODULE_GET_API(RegisterClusterMessageReceiver);
     VALKEYMODULE_GET_API(SendClusterMessage);
     VALKEYMODULE_GET_API(GetClusterNodeInfo);
+    VALKEYMODULE_GET_API(GetClusterNodeInfoForClient);
     VALKEYMODULE_GET_API(GetClusterNodesList);
     VALKEYMODULE_GET_API(FreeClusterNodesList);
     VALKEYMODULE_GET_API(CreateTimer);
diff --git a/tests/support/cluster_util.tcl b/tests/support/cluster_util.tcl
index 5708dfac7e..c19aea3c15 100644
--- a/tests/support/cluster_util.tcl
+++ b/tests/support/cluster_util.tcl
@@ -345,6 +345,27 @@ proc are_hostnames_propagated {match_string} {
     return 1
 }
 
+# Check if cluster's announced IPs are consistent and match a pattern
+# Optionally, a list of clients can be supplied.
+proc are_cluster_announced_ips_propagated {match_string {clients {}}} {
+    for {set j 0} {$j < [llength $::servers]} {incr j} {
+        if {$clients eq {}} {
+            set client [srv [expr -1*$j] "client"]
+        } else {
+            set client [lindex $clients $j]
+        }
+        set cfg [$client cluster slots]
+        foreach node $cfg {
+            for {set i 2} {$i < [llength $node]} {incr i} {
+                if {! [string match $match_string [lindex [lindex $node $i] 0]] } {
+                    return 0
+                }
+            }
+        }
+    }
+    return 1
+}
+
 proc wait_node_marked_fail {ref_node_index instance_id_to_check} {
     wait_for_condition 1000 50 {
         [check_cluster_node_mark fail $ref_node_index $instance_id_to_check]
diff --git a/tests/support/server.tcl b/tests/support/server.tcl
index b0750fcb8a..cc8a9ea64f 100644
--- a/tests/support/server.tcl
+++ b/tests/support/server.tcl
@@ -622,7 +622,7 @@ proc start_server {options {code undefined}} {
     # setup properties to be able to initialize a client object
     set port_param [expr $::tls ? {"tls-port"} : {"port"}]
     set host $::host
-    if {[dict exists $config bind]} { set host [dict get $config bind] }
+    if {[dict exists $config bind]} { set host [lindex [dict get $config bind] 0] }
     if {[dict exists $config $port_param]} { set port [dict get $config $port_param] }
 
     # setup config dict
diff --git a/tests/unit/cluster/announce-client-ip.tcl b/tests/unit/cluster/announce-client-ip.tcl
new file mode 100644
index 0000000000..f0aebd0807
--- /dev/null
+++ b/tests/unit/cluster/announce-client-ip.tcl
@@ -0,0 +1,149 @@
+# Small cluster. No need for failovers.
+start_cluster 2 2 {tags {external:skip cluster} overrides {cluster-replica-no-failover yes}} {
+
+    test "Set cluster announced IPv4 to invalid IP" {
+        catch {R 0 config set cluster-announce-client-ipv4 banana} e
+        assert_match "*Invalid IPv4 address*" $e
+    }
+
+    test "Set cluster announced IPv4 and check that it propagates" {
+        for {set j 0} {$j < [llength $::servers]} {incr j} {
+            set res [R $j config set cluster-announce-client-ipv4 "111.222.111.$j"]
+        }
+
+        # CLUSTER SLOTS
+        wait_for_condition 50 100 {
+            [are_cluster_announced_ips_propagated {111.222.111.*}]
+        } else {
+            fail "cluster-announce-client-ipv4 were not propagated"
+        }
+
+        # CLUSTER SHARDS
+        for {set j 0} {$j < [llength $::servers]} {incr j} {
+            foreach shard [R $j CLUSTER SHARDS] {
+                foreach node [dict get $shard "nodes"] {
+                    set ip [dict get $node "ip"]
+                    set endpoint [dict get $node "endpoint"]
+                    assert_match "111.222.111*" $ip
+                    assert_match "111.222.111*" $endpoint
+                }
+            }
+        }
+
+        # CLUSTER NODES
+        for {set j 0} {$j < [llength $::servers]} {incr j} {
+            set lines [split [R $j CLUSTER NODES] "\r\n"]
+            foreach l $lines {
+                set l [string trim $l]
+                if {$l eq {}} continue
+                assert_equal 1 [regexp {^[0-9a-f]+ 111\.222\.111\.[0-9]} $l]
+            }
+        }
+
+        # Redirects
+        catch {R 0 set foo foo} e
+        assert_match "MOVED * 111.222.111*:*" $e
+
+        # Now that everything is propagated, assert everyone agrees
+        wait_for_cluster_propagation
+    }
+
+    test "Clear announced client IPv4 and check that it propagates" {
+        for {set j 0} {$j < [llength $::servers]} {incr j} {
+            R $j config set cluster-announce-client-ipv4 ""
+        }
+
+        wait_for_condition 50 100 {
+            [are_cluster_announced_ips_propagated "127.0.0.1"] eq 1
+        } else {
+            fail "Cleared cluster-announce-client-ipv4 were not propagated"
+        }
+
+        # Redirect use the IP address
+        catch {R 0 set foo foo} e
+        assert_match "MOVED * 127.0.0.1:*" $e
+
+        # Now that everything is propagated, assert everyone agrees
+        wait_for_cluster_propagation
+    }
+}
+
+start_cluster 2 2 {tags {external:skip cluster ipv6} overrides {cluster-replica-no-failover yes bind {127.0.0.1 ::1}}} {
+    # Connecting to localhost as "::1" makes the clients use IPv6.
+    set clients {}
+    for {set j 0} {$j < [llength $::servers]} {incr j} {
+        set level [expr -1 * $j]
+        lappend clients [valkey ::1 [srv $level port] 0 $::tls]
+    }
+
+    test "Set cluster announced IPv6 to invalid IP" {
+        catch {R 0 config set cluster-announce-client-ipv6 banana} e
+        assert_match "*Invalid IPv6 address*" $e
+    }
+
+    test "Set cluster announced IPv6 and check that it propagates" {
+        for {set j 0} {$j < [llength $::servers]} {incr j} {
+            R $j config set cluster-announce-client-ipv6 "cafe:1234::$j"
+        }
+
+        # CLUSTER SLOTS
+        wait_for_condition 50 100 {
+            [are_cluster_announced_ips_propagated "cafe:1234::*" $clients] eq 1
+        } else {
+            fail "cluster-announce-client-ipv6 were not propagated"
+        }
+
+        # CLUSTER SHARDS
+        for {set j 0} {$j < [llength $::servers]} {incr j} {
+            foreach shard [[lindex $clients $j] CLUSTER SHARDS] {
+                foreach node [dict get $shard "nodes"] {
+                    set ip [dict get $node "ip"]
+                    set endpoint [dict get $node "endpoint"]
+                    assert_match "cafe:1234::*" $ip
+                    assert_match "cafe:1234::*" $endpoint
+                }
+            }
+        }
+
+        # CLUSTER NODES
+        for {set j 0} {$j < [llength $::servers]} {incr j} {
+            set lines [split [[lindex $clients $j] CLUSTER NODES] "\r\n"]
+            foreach l $lines {
+                set l [string trim $l]
+                if {$l eq {}} continue
+                assert_equal 1 [regexp {^[0-9a-f]+ cafe:1234::[0-9]} $l]
+            }
+        }
+
+        # Redirects
+        catch {[lindex $clients 0] set foo foo} e
+        assert_match "MOVED * cafe:1234::*:*" $e
+
+        # Now that everything is propagated, assert everyone agrees
+        wait_for_cluster_propagation
+    }
+
+    test "Clear announced client IPv6 and check that it propagates" {
+        for {set j 0} {$j < [llength $::servers]} {incr j} {
+            R $j config set cluster-announce-client-ipv6 ""
+        }
+
+        wait_for_condition 50 100 {
+            [are_cluster_announced_ips_propagated "127.0.0.1" $clients] eq 1
+        } else {
+            fail "Cleared cluster-announce-client-ipv6 were not propagated"
+        }
+
+        # Redirects
+        catch {[lindex $clients 0] set foo foo} e
+        assert_match "MOVED * 127.0.0.1:*" $e
+
+        # Now that everything is propagated, assert everyone agrees
+        wait_for_cluster_propagation
+    }
+
+    # Close clients
+    for {set j 0} {$j < [llength $::servers]} {incr j} {
+        [lindex $clients $j] close
+    }
+}
diff --git a/valkey.conf b/valkey.conf
index 8badf1487a..39820e30ec 100644
--- a/valkey.conf
+++ b/valkey.conf
@@ -1771,22 +1771,28 @@ aof-timestamp-enabled no
 #
 # In order to make a cluster work in such environments, a static
 # configuration where each node knows its public address is needed. The
-# following four options are used for this scope, and are:
+# following options are used for this scope, and are:
 #
 # * cluster-announce-ip
+# * cluster-announce-client-ipv4
+# * cluster-announce-client-ipv6
 # * cluster-announce-port
 # * cluster-announce-tls-port
 # * cluster-announce-bus-port
 #
-# Each instructs the node about its address, client ports (for connections
-# without and with TLS) and cluster message bus port. The information is then
-# published in the header of the bus packets so that other nodes will be able to
-# correctly map the address of the node publishing the information.
+# Each instructs the node about its address, possibly other addresses to expose
+# to clients, client ports (for connections without and with TLS) and cluster
+# message bus port. The information is then published in the bus packets so that
+# other nodes will be able to correctly map the address of the node publishing
+# the information.
 #
 # If tls-cluster is set to yes and cluster-announce-tls-port is omitted or set
 # to zero, then cluster-announce-port refers to the TLS port. Note also that
 # cluster-announce-tls-port has no effect if tls-cluster is set to no.
 #
+# If cluster-announce-client-ipv4 and cluster-announce-client-ipv6 are omitted,
+# then cluster-announce-ip is exposed to clients.
+#
 # If the above options are not used, the normal cluster auto-detection
 # will be used instead.
 #
@@ -1798,6 +1804,8 @@ aof-timestamp-enabled no
 # Example:
 #
 # cluster-announce-ip 10.1.1.5
+# cluster-announce-client-ipv4 123.123.123.5
+# cluster-announce-client-ipv6 2001:db8::8a2e:370:7334
 # cluster-announce-tls-port 6379
 # cluster-announce-port 0
 # cluster-announce-bus-port 6380

From 9948f07a01c90174074d4bf1f7db10768307e052 Mon Sep 17 00:00:00 2001
From: Madelyn Olson <madelyneolson@gmail.com>
Date: Thu, 11 Jul 2024 12:10:13 -0500
Subject: [PATCH 53/53] Temporary skip blockwait aof test until it's fixed
 (#773)

See https://github.com/valkey-io/valkey/issues/770 for details about
failure. Want to prevent the test failures.

Signed-off-by: Madelyn Olson <madelyneolson@gmail.com>
---
 tests/unit/scripting.tcl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/unit/scripting.tcl b/tests/unit/scripting.tcl
index a9bf242904..b608d39195 100644
--- a/tests/unit/scripting.tcl
+++ b/tests/unit/scripting.tcl
@@ -293,9 +293,10 @@ start_server {tags {"scripting"}} {
         run_script {return redis.pcall('wait','1','0')} 0
     } {0}
 
+    # Temporarily disable test for external until it is stabilized, see https://github.com/valkey-io/valkey/issues/770
     test {EVAL - Scripts do not block on waitaof} {
         run_script {redis.call('incr', 'x') return redis.pcall('waitaof','0','1','0')} 0
-    } {0 0}
+    } {0 0} {external:skip}
 
     test {EVAL - Scripts do not block on XREAD with BLOCK option} {
         r del s