From 76fc041685217ad7279c486be892c16f182a3454 Mon Sep 17 00:00:00 2001 From: Harkrishn Patro Date: Thu, 13 Jun 2024 15:58:03 -0700 Subject: [PATCH 1/5] represent cluster node flags with bitwise shift value (#642) While debugging a cluster bus issue, found the cluster node flags were represented in numbers. I generally find it easy when these are represented as bitwise shift operation. It improves readability a bit. Signed-off-by: Harkrishn Patro --- src/cluster_legacy.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h index 4cb715730f..fb80f45eec 100644 --- a/src/cluster_legacy.h +++ b/src/cluster_legacy.h @@ -41,17 +41,17 @@ typedef struct clusterLink { } clusterLink; /* Cluster node flags and macros. */ -#define CLUSTER_NODE_PRIMARY 1 /* The node is a primary */ -#define CLUSTER_NODE_REPLICA 2 /* The node is a replica */ -#define CLUSTER_NODE_PFAIL 4 /* Failure? Need acknowledge */ -#define CLUSTER_NODE_FAIL 8 /* The node is believed to be malfunctioning */ -#define CLUSTER_NODE_MYSELF 16 /* This node is myself */ -#define CLUSTER_NODE_HANDSHAKE 32 /* We have still to exchange the first ping */ -#define CLUSTER_NODE_NOADDR 64 /* We don't know the address of this node */ -#define CLUSTER_NODE_MEET 128 /* Send a MEET message to this node */ -#define CLUSTER_NODE_MIGRATE_TO 256 /* Primary eligible for replica migration. */ -#define CLUSTER_NODE_NOFAILOVER 512 /* replica will not try to failover. */ -#define CLUSTER_NODE_EXTENSIONS_SUPPORTED 1024 /* This node supports extensions. */ +#define CLUSTER_NODE_PRIMARY (1 << 0) /* The node is a primary */ +#define CLUSTER_NODE_REPLICA (1 << 1) /* The node is a replica */ +#define CLUSTER_NODE_PFAIL (1 << 2) /* Failure? Need acknowledge */ +#define CLUSTER_NODE_FAIL (1 << 3) /* The node is believed to be malfunctioning */ +#define CLUSTER_NODE_MYSELF (1 << 4) /* This node is myself */ +#define CLUSTER_NODE_HANDSHAKE (1 << 5) /* We have still to exchange the first ping */ +#define CLUSTER_NODE_NOADDR (1 << 6) /* We don't know the address of this node */ +#define CLUSTER_NODE_MEET (1 << 7) /* Send a MEET message to this node */ +#define CLUSTER_NODE_MIGRATE_TO (1 << 8) /* Primary eligible for replica migration. */ +#define CLUSTER_NODE_NOFAILOVER (1 << 9) /* replica will not try to failover. */ +#define CLUSTER_NODE_EXTENSIONS_SUPPORTED (1 << 10) /* This node supports extensions. */ #define CLUSTER_NODE_NULL_NAME \ "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ "\000\000\000\000\000\000\000\000\000\000\000\000" From d309b9b23550cd64621bf62f42504bf23838ab15 Mon Sep 17 00:00:00 2001 From: Binbin Date: Fri, 14 Jun 2024 07:47:20 +0800 Subject: [PATCH 2/5] Make configs dir/dbfilename/cluster-config-file reject empty string (#636) Until now, these configuration items allowed typing empty strings, but empty strings behave strangely. Empty dir will fail in chdir with No such file or directory: ``` ./src/valkey-server --dir "" *** FATAL CONFIG FILE ERROR (Version 255.255.255) *** Reading the configuration file, at line 2 >>> 'dir ""' No such file or directory ``` Empty dbfilename will cause shutdown to fail since it will always fail in rdb save: ``` ./src/valkey-server --dbfilename "" * User requested shutdown... * Saving the final RDB snapshot before exiting. # Error moving temp DB file temp-19530.rdb on the final destination (in server root dir /xxx/xxx/valkey): No such file or directory # Error trying to save the DB, can't exit. # Errors trying to shut down the server. Check the logs for more information. ``` Empty cluster-config-file will fail in clusterLockConfig: ``` ./src/valkey-server --cluster-enabled yes --cluster-config-file "" Can't open in order to acquire a lock: No such file or directory ``` With this patch, now we will just reject it in config set like: ``` *** FATAL CONFIG FILE ERROR (Version 255.255.255) *** Reading the configuration file, at line 2 >>> 'xxx ""' xxx can't be empty ``` Signed-off-by: Binbin --- src/config.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/config.c b/src/config.c index a609a8f18d..83e2a51db1 100644 --- a/src/config.c +++ b/src/config.c @@ -2295,7 +2295,19 @@ static int isValidActiveDefrag(int val, const char **err) { return 1; } +static int isValidClusterConfigFile(char *val, const char **err) { + if (!strcmp(val, "")) { + *err = "cluster-config-file can't be empty"; + return 0; + } + return 1; +} + static int isValidDBfilename(char *val, const char **err) { + if (!strcmp(val, "")) { + *err = "dbfilename can't be empty"; + return 0; + } if (!pathIsBaseName(val)) { *err = "dbfilename can't be a path, just a filename"; return 0; @@ -2658,6 +2670,10 @@ static int setConfigDirOption(standardConfig *config, sds *argv, int argc, const *err = "wrong number of arguments"; return 0; } + if (!strcmp(argv[0], "")) { + *err = "dir can't be empty"; + return 0; + } if (chdir(argv[0]) == -1) { *err = strerror(errno); return 0; @@ -3061,7 +3077,7 @@ standardConfig static_configs[] = { createStringConfig("replica-announce-ip", "slave-announce-ip", MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.replica_announce_ip, NULL, NULL, NULL), createStringConfig("primaryuser", "masteruser", MODIFIABLE_CONFIG | SENSITIVE_CONFIG, EMPTY_STRING_IS_NULL, server.primary_user, NULL, NULL, NULL), createStringConfig("cluster-announce-ip", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_ip, NULL, NULL, updateClusterIp), - createStringConfig("cluster-config-file", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.cluster_configfile, "nodes.conf", NULL, NULL), + createStringConfig("cluster-config-file", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.cluster_configfile, "nodes.conf", isValidClusterConfigFile, NULL), createStringConfig("cluster-announce-hostname", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_hostname, NULL, isValidAnnouncedHostname, updateClusterHostname), createStringConfig("cluster-announce-human-nodename", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_human_nodename, NULL, isValidAnnouncedNodename, updateClusterHumanNodename), createStringConfig("syslog-ident", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.syslog_ident, SERVER_NAME, NULL, NULL), From 5d9d41868df1a1e21de73f69e8b36feadf271fe7 Mon Sep 17 00:00:00 2001 From: Ping Xie Date: Thu, 13 Jun 2024 17:52:50 -0700 Subject: [PATCH 3/5] Replace `DEBUG RESTART` with `pause_server` and `resume_server` (#652) --- tests/unit/cluster/slot-migration.tcl | 33 ++++++++++----------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/tests/unit/cluster/slot-migration.tcl b/tests/unit/cluster/slot-migration.tcl index 2016edcbc1..3ff669db90 100644 --- a/tests/unit/cluster/slot-migration.tcl +++ b/tests/unit/cluster/slot-migration.tcl @@ -43,20 +43,11 @@ proc check_server_response {server_id} { } # restart a server and wait for it to come back online -proc restart_server_and_wait {server_id} { +proc fail_server {server_id} { set node_timeout [lindex [R 0 CONFIG GET cluster-node-timeout] 1] - set result [catch {R $server_id DEBUG RESTART [expr 3*$node_timeout]} err] - - # Check if the error is the expected "I/O error reading reply" - if {$result != 0 && $err ne "I/O error reading reply"} { - fail "Unexpected error restarting server $server_id: $err" - } - - wait_for_condition 100 100 { - [check_server_response $server_id] eq 1 - } else { - fail "Server $server_id didn't come back online in time" - } + pause_process [srv [expr -1*$server_id] pid] + after [expr 3*$node_timeout] + resume_process [srv [expr -1*$server_id] pid] } start_cluster 3 3 {tags {external:skip cluster} overrides {cluster-allow-replica-migration no cluster-node-timeout 1000} } { @@ -91,8 +82,8 @@ start_cluster 3 3 {tags {external:skip cluster} overrides {cluster-allow-replica } test "Migration target is auto-updated after failover in target shard" { - # Restart R1 to trigger an auto-failover to R4 - restart_server_and_wait 1 + # Trigger an auto-failover from R1 to R4 + fail_server 1 # Wait for R1 to become a replica wait_for_role 1 slave # Validate final states @@ -111,8 +102,8 @@ start_cluster 3 3 {tags {external:skip cluster} overrides {cluster-allow-replica } test "Migration source is auto-updated after failover in source shard" { - # Restart R0 to trigger an auto-failover to R3 - restart_server_and_wait 0 + # Trigger an auto-failover from R0 to R3 + fail_server 0 # Wait for R0 to become a replica wait_for_role 0 slave # Validate final states @@ -261,8 +252,8 @@ start_cluster 3 5 {tags {external:skip cluster} overrides {cluster-allow-replica test "Empty-shard migration target is auto-updated after faiover in target shard" { wait_for_role 6 master - # Restart R6 to trigger an auto-failover to R7 - restart_server_and_wait 6 + # Trigger an auto-failover from R6 to R7 + fail_server 6 # Wait for R6 to become a replica wait_for_role 6 slave # Validate final states @@ -282,8 +273,8 @@ start_cluster 3 5 {tags {external:skip cluster} overrides {cluster-allow-replica test "Empty-shard migration source is auto-updated after faiover in source shard" { wait_for_role 0 master - # Restart R0 to trigger an auto-failover to R3 - restart_server_and_wait 0 + # Trigger an auto-failover from R0 to R3 + fail_server 0 # Wait for R0 to become a replica wait_for_role 0 slave # Validate final states From 8a776c35090c287a309ed49514cc3051d9f39707 Mon Sep 17 00:00:00 2001 From: Ping Xie Date: Thu, 13 Jun 2024 23:43:36 -0700 Subject: [PATCH 4/5] Fix potential infinite loop in `clusterNodeGetPrimary` (#651) --- src/cluster_legacy.c | 44 +++++++++----------------------------------- 1 file changed, 9 insertions(+), 35 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index cd3786fe05..f566cf5a35 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -3128,39 +3128,7 @@ int clusterProcessPacket(clusterLink *link) { clusterUpdateSlotsConfigWith(sender_primary, senderConfigEpoch, hdr->myslots); /* Explicitly check for a replication loop before attempting the replication - * chain folding logic. - * - * In some rare case, slot config updates (via either PING/PONG or UPDATE) - * can be delivered out of order as illustrated below. - * - * 1. To keep the discussion simple, let's assume we have 2 shards, shard a - * and shard b. Let's also assume there are two slots in total with shard - * a owning slot 1 and shard b owning slot 2. - * 2. Shard a has two nodes: primary A and replica A*; shard b has primary - * B and replica B*. - * 3. A manual failover was initiated on A* and A* just wins the election. - * 4. A* announces to the world that it now owns slot 1 using PING messages. - * These PING messages are queued in the outgoing buffer to every other - * node in the cluster, namely, A, B, and B*. - * 5. Keep in mind that there is no ordering in the delivery of these PING - * messages. For the stale PING message to appear, we need the following - * events in the exact order as they are laid out. - * a. An old PING message before A* becomes the new primary is still queued - * in A*'s outgoing buffer to A. This later becomes the stale message, - * which says A* is a replica of A. It is followed by A*'s election - * winning announcement PING message. - * b. B or B* processes A's election winning announcement PING message - * and sets slots[1]=A*. - * c. A sends a PING message to B (or B*). Since A hasn't learnt that A* - * wins the election, it claims that it owns slot 1 but with a lower - * epoch than B has on slot 1. This leads to B sending an UPDATE to - * A directly saying A* is the new owner of slot 1 with a higher epoch. - * d. A receives the UPDATE from B and executes clusterUpdateSlotsConfigWith. - * A now realizes that it is a replica of A* hence setting myself->replicaof - * to A*. - * e. Finally, the pre-failover PING message queued up in A*'s outgoing - * buffer to A is delivered and processed, out of order though, to A. - * f. This stale PING message creates the replication loop */ + * chain folding logic. */ if (myself->replicaof && myself->replicaof->replicaof && myself->replicaof->replicaof != myself) { /* Safeguard against sub-replicas. A replica's primary can turn itself * into a replica if its last slot is removed. If no other node takes @@ -5853,8 +5821,14 @@ int clusterNodeIsReplica(clusterNode *node) { } clusterNode *clusterNodeGetPrimary(clusterNode *node) { - while (node->replicaof != NULL) node = node->replicaof; - return node; + clusterNode *primary = node; + while (primary->replicaof != NULL) { + primary = primary->replicaof; + if (primary == node) break; + } + /* Assert that a node's replicaof/primary chain does not form a cycle. */ + debugServerAssert(primary->replicaof == NULL); + return primary; } char *clusterNodeGetName(clusterNode *node) { From 4c6bf30f58335baa4564415a0a75011d23aa1714 Mon Sep 17 00:00:00 2001 From: Wenwen-Chen Date: Fri, 14 Jun 2024 18:56:59 +0800 Subject: [PATCH 5/5] Latency report: Rebranding and refine Dave dialog (#644) This patch try to correct the latency report. 1. Rename Redis to Valkey. 2. Remove redundant Dave dialog, and refine the output message. --------- Signed-off-by: Wenwen Chen Signed-off-by: hwware --- src/latency.c | 69 ++++++++++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 36 deletions(-) diff --git a/src/latency.c b/src/latency.c index 94840379bd..eacb5fbbc4 100644 --- a/src/latency.c +++ b/src/latency.c @@ -216,9 +216,8 @@ sds createLatencyReport(void) { * was never enabled so far. */ if (dictSize(server.latency_events) == 0 && server.latency_monitor_threshold == 0) { report = sdscat( - report, "I'm sorry, Dave, I can't do that. Latency monitoring is disabled in this Redis instance. You may " - "use \"CONFIG SET latency-monitor-threshold .\" in order to enable it. If we weren't " - "in a deep space mission I'd suggest to take a look at https://redis.io/topics/latency-monitor.\n"); + report, "I'm sorry, Dave, I can't do that. Latency monitoring is disabled in this Valkey instance. You may " + "use \"CONFIG SET latency-monitor-threshold .\" in order to enable it.\n"); return report; } @@ -237,8 +236,7 @@ sds createLatencyReport(void) { if (ts == NULL) continue; eventnum++; if (eventnum == 1) { - report = sdscat(report, "Dave, I have observed latency spikes in this Redis instance. You don't mind " - "talking about it, do you Dave?\n\n"); + report = sdscat(report, "Latency spikes are observed in this Valkey instance.\n\n"); } analyzeLatencyForEvent(event, &ls); @@ -358,18 +356,16 @@ sds createLatencyReport(void) { } if (eventnum == 0 && advices == 0) { - report = sdscat(report, "Dave, no latency spike was observed during the lifetime of this Redis instance, not " - "in the slightest bit. I honestly think you ought to sit down calmly, take a stress " - "pill, and think things over.\n"); + report = sdscat(report, "No latency spike was observed during the lifetime of this Valkey instance, not " + "in the slightest bit.\n"); } else if (eventnum > 0 && advices == 0) { - report = - sdscat(report, "\nWhile there are latency events logged, I'm not able to suggest any easy fix. Please use " - "the Redis community to get some help, providing this report in your help request.\n"); + report = sdscat(report, "\nThere are latency events logged that are not easy to fix. Please get some " + "help from Valkey community, providing this report in your help request.\n"); } else { /* Add all the suggestions accumulated so far. */ /* Better VM. */ - report = sdscat(report, "\nI have a few advices for you:\n\n"); + report = sdscat(report, "\nHere is some advice for you:\n\n"); if (advise_better_vm) { report = sdscat(report, "- If you are using a virtual machine, consider upgrading it with a faster one " "using a hypervisior that provides less latency during fork() calls. Xen is known " @@ -382,8 +378,8 @@ sds createLatencyReport(void) { report = sdscatprintf(report, "- There are latency issues with potentially slow commands you are using. Try to enable " - "the Slow Log Redis feature using the command 'CONFIG SET slowlog-log-slower-than %llu'. " - "If the Slow log is disabled Redis is not able to log slow commands execution for you.\n", + "the Slow Log Valkey feature using the command 'CONFIG SET slowlog-log-slower-than %llu'. " + "If the Slow log is disabled Valkey is not able to log slow commands execution for you.\n", (unsigned long long)server.latency_monitor_threshold * 1000); } @@ -398,20 +394,20 @@ sds createLatencyReport(void) { if (advise_slowlog_inspect) { report = sdscat(report, "- Check your Slow Log to understand what are the commands you are running which are too " - "slow to execute. Please check https://redis.io/commands/slowlog for more information.\n"); + "slow to execute. Please check https://valkey.io/commands/slowlog for more information.\n"); } /* Intrinsic latency. */ if (advise_scheduler) { report = sdscat( report, - "- The system is slow to execute Redis code paths not containing system calls. This usually means the " - "system does not provide Redis CPU time to run for long periods. You should try to:\n" + "- The system is slow to execute Valkey code paths not containing system calls. This usually means the " + "system does not provide Valkey CPU time to run for long periods. You should try to:\n" " 1) Lower the system load.\n" - " 2) Use a computer / VM just for Redis if you are running other software in the same system.\n" + " 2) Use a computer / VM just for Valkey if you are running other software in the same system.\n" " 3) Check if you have a \"noisy neighbour\" problem.\n" - " 4) Check with 'redis-cli --intrinsic-latency 100' what is the intrinsic latency in your system.\n" - " 5) Check if the problem is allocator-related by recompiling Redis with MALLOC=libc, if you are " + " 4) Check with 'valkey-cli --intrinsic-latency 100' what is the intrinsic latency in your system.\n" + " 5) Check if the problem is allocator-related by recompiling Valkey with MALLOC=libc, if you are " "using Jemalloc. However this may create fragmentation problems.\n"); } @@ -423,11 +419,11 @@ sds createLatencyReport(void) { } if (advise_ssd) { - report = - sdscat(report, "- SSD disks are able to reduce fsync latency, and total time needed for snapshotting " - "and AOF log rewriting (resulting in smaller memory usage). With extremely high write " - "load SSD disks can be a good option. However Redis should perform reasonably with high " - "load using normal disks. Use this advice as a last resort.\n"); + report = sdscat(report, + "- SSD disks are able to reduce fsync latency, and total time needed for snapshotting " + "and AOF log rewriting (resulting in smaller memory usage). With extremely high write " + "load SSD disks can be a good option. However Valkey should perform reasonably with high " + "load using normal disks. Use this advice as a last resort.\n"); } if (advise_data_writeback) { @@ -435,12 +431,12 @@ sds createLatencyReport(void) { sdscat(report, "- Mounting ext3/4 filesystems with data=writeback can provide a performance boost " "compared to data=ordered, however this mode of operation provides less guarantees, and " "sometimes it can happen that after a hard crash the AOF file will have a half-written " - "command at the end and will require to be repaired before Redis restarts.\n"); + "command at the end and will require to be repaired before Valkey restarts.\n"); } if (advise_disk_contention) { report = sdscat(report, "- Try to lower the disk contention. This is often caused by other disk intensive " - "processes running in the same computer (including other Redis instances).\n"); + "processes running in the same computer (including other Valkey instances).\n"); } if (advise_no_appendfsync) { @@ -464,7 +460,7 @@ sds createLatencyReport(void) { } if (advise_hz && server.hz < 100) { - report = sdscat(report, "- In order to make the Redis keys expiring process more incremental, try to set " + report = sdscat(report, "- In order to make the Valkey keys expiring process more incremental, try to set " "the 'hz' configuration parameter to 100 using 'CONFIG SET hz 100'.\n"); } @@ -477,19 +473,20 @@ sds createLatencyReport(void) { if (advise_mass_eviction) { report = sdscat(report, "- Sudden changes to the 'maxmemory' setting via 'CONFIG SET', or allocation of " - "large objects via sets or sorted sets intersections, STORE option of SORT, Redis " + "large objects via sets or sorted sets intersections, STORE option of SORT, Valkey " "Cluster large keys migrations (RESTORE command), may create sudden memory " "pressure forcing the server to block trying to evict keys. \n"); } if (advise_disable_thp) { - report = sdscat(report, "- I detected a non zero amount of anonymous huge pages used by your process. This " - "creates very serious latency events in different conditions, especially when " - "Redis is persisting on disk. To disable THP support use the command 'echo never > " - "/sys/kernel/mm/transparent_hugepage/enabled', make sure to also add it into " - "/etc/rc.local so that the command will be executed again after a reboot. Note " - "that even if you have already disabled THP, you still need to restart the Redis " - "process to get rid of the huge pages already created.\n"); + report = + sdscat(report, "- I detected a non zero amount of anonymous huge pages used by your process. This " + "creates very serious latency events in different conditions, especially when " + "Valkey is persisting on disk. To disable THP support use the command 'echo never > " + "/sys/kernel/mm/transparent_hugepage/enabled', make sure to also add it into " + "/etc/rc.local so that the command will be executed again after a reboot. Note " + "that even if you have already disabled THP, you still need to restart the Valkey " + "process to get rid of the huge pages already created.\n"); } }