Merge branch 'unstable' of https://github.com/adetunjii/valkey into f…

…ix/format-yaml
valkey-io · Jun 14, 2024 · bb6349b · bb6349b
2 parents f52b326 + 4c6bf30
commit bb6349b
Show file tree

Hide file tree

Showing 5 changed files with 82 additions and 104 deletions.
diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c
@@ -3128,39 +3128,7 @@ int clusterProcessPacket(clusterLink *link) {
             clusterUpdateSlotsConfigWith(sender_primary, senderConfigEpoch, hdr->myslots);
 
         /* Explicitly check for a replication loop before attempting the replication
-         * chain folding logic.
-         *
-         * In some rare case, slot config updates (via either PING/PONG or UPDATE)
-         * can be delivered out of order as illustrated below.
-         *
-         * 1. To keep the discussion simple, let's assume we have 2 shards, shard a
-         *    and shard b. Let's also assume there are two slots in total with shard
-         *    a owning slot 1 and shard b owning slot 2.
-         * 2. Shard a has two nodes: primary A and replica A*; shard b has primary
-         *    B and replica B*.
-         * 3. A manual failover was initiated on A* and A* just wins the election.
-         * 4. A* announces to the world that it now owns slot 1 using PING messages.
-         *    These PING messages are queued in the outgoing buffer to every other
-         *    node in the cluster, namely, A, B, and B*.
-         * 5. Keep in mind that there is no ordering in the delivery of these PING
-         *    messages. For the stale PING message to appear, we need the following
-         *    events in the exact order as they are laid out.
-         * a. An old PING message before A* becomes the new primary is still queued
-         *    in A*'s outgoing buffer to A. This later becomes the stale message,
-         *    which says A* is a replica of A. It is followed by A*'s election
-         *    winning announcement PING message.
-         * b. B or B* processes A's election winning announcement PING message
-         *    and sets slots[1]=A*.
-         * c. A sends a PING message to B (or B*). Since A hasn't learnt that A*
-         *    wins the election, it claims that it owns slot 1 but with a lower
-         *    epoch than B has on slot 1. This leads to B sending an UPDATE to
-         *    A directly saying A* is the new owner of slot 1 with a higher epoch.
-         * d. A receives the UPDATE from B and executes clusterUpdateSlotsConfigWith.
-         *    A now realizes that it is a replica of A* hence setting myself->replicaof
-         *    to A*.
-         * e. Finally, the pre-failover PING message queued up in A*'s outgoing
-         *    buffer to A is delivered and processed, out of order though, to A.
-         * f. This stale PING message creates the replication loop */
+         * chain folding logic. */
         if (myself->replicaof && myself->replicaof->replicaof && myself->replicaof->replicaof != myself) {
             /* Safeguard against sub-replicas. A replica's primary can turn itself
              * into a replica if its last slot is removed. If no other node takes
@@ -5853,8 +5821,14 @@ int clusterNodeIsReplica(clusterNode *node) {
 }
 
 clusterNode *clusterNodeGetPrimary(clusterNode *node) {
-    while (node->replicaof != NULL) node = node->replicaof;
-    return node;
+    clusterNode *primary = node;
+    while (primary->replicaof != NULL) {
+        primary = primary->replicaof;
+        if (primary == node) break;
+    }
+    /* Assert that a node's replicaof/primary chain does not form a cycle. */
+    debugServerAssert(primary->replicaof == NULL);
+    return primary;
 }
 
 char *clusterNodeGetName(clusterNode *node) {

diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h
@@ -41,17 +41,17 @@ typedef struct clusterLink {
 } clusterLink;
 
 /* Cluster node flags and macros. */
-#define CLUSTER_NODE_PRIMARY 1                 /* The node is a primary */
-#define CLUSTER_NODE_REPLICA 2                 /* The node is a replica */
-#define CLUSTER_NODE_PFAIL 4                   /* Failure? Need acknowledge */
-#define CLUSTER_NODE_FAIL 8                    /* The node is believed to be malfunctioning */
-#define CLUSTER_NODE_MYSELF 16                 /* This node is myself */
-#define CLUSTER_NODE_HANDSHAKE 32              /* We have still to exchange the first ping */
-#define CLUSTER_NODE_NOADDR 64                 /* We don't know the address of this node */
-#define CLUSTER_NODE_MEET 128                  /* Send a MEET message to this node */
-#define CLUSTER_NODE_MIGRATE_TO 256            /* Primary eligible for replica migration. */
-#define CLUSTER_NODE_NOFAILOVER 512            /* replica will not try to failover. */
-#define CLUSTER_NODE_EXTENSIONS_SUPPORTED 1024 /* This node supports extensions. */
+#define CLUSTER_NODE_PRIMARY (1 << 0)               /* The node is a primary */
+#define CLUSTER_NODE_REPLICA (1 << 1)               /* The node is a replica */
+#define CLUSTER_NODE_PFAIL (1 << 2)                 /* Failure? Need acknowledge */
+#define CLUSTER_NODE_FAIL (1 << 3)                  /* The node is believed to be malfunctioning */
+#define CLUSTER_NODE_MYSELF (1 << 4)                /* This node is myself */
+#define CLUSTER_NODE_HANDSHAKE (1 << 5)             /* We have still to exchange the first ping */
+#define CLUSTER_NODE_NOADDR (1 << 6)                /* We don't know the address of this node */
+#define CLUSTER_NODE_MEET (1 << 7)                  /* Send a MEET message to this node */
+#define CLUSTER_NODE_MIGRATE_TO (1 << 8)            /* Primary eligible for replica migration. */
+#define CLUSTER_NODE_NOFAILOVER (1 << 9)            /* replica will not try to failover. */
+#define CLUSTER_NODE_EXTENSIONS_SUPPORTED (1 << 10) /* This node supports extensions. */
 #define CLUSTER_NODE_NULL_NAME                                                                                         \
     "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
     "\000\000\000\000\000\000\000\000\000\000\000\000"

diff --git a/src/config.c b/src/config.c
@@ -2295,7 +2295,19 @@ static int isValidActiveDefrag(int val, const char **err) {
     return 1;
 }
 
+static int isValidClusterConfigFile(char *val, const char **err) {
+    if (!strcmp(val, "")) {
+        *err = "cluster-config-file can't be empty";
+        return 0;
+    }
+    return 1;
+}
+
 static int isValidDBfilename(char *val, const char **err) {
+    if (!strcmp(val, "")) {
+        *err = "dbfilename can't be empty";
+        return 0;
+    }
     if (!pathIsBaseName(val)) {
         *err = "dbfilename can't be a path, just a filename";
         return 0;
@@ -2658,6 +2670,10 @@ static int setConfigDirOption(standardConfig *config, sds *argv, int argc, const
         *err = "wrong number of arguments";
         return 0;
     }
+    if (!strcmp(argv[0], "")) {
+        *err = "dir can't be empty";
+        return 0;
+    }
     if (chdir(argv[0]) == -1) {
         *err = strerror(errno);
         return 0;
@@ -3061,7 +3077,7 @@ standardConfig static_configs[] = {
     createStringConfig("replica-announce-ip", "slave-announce-ip", MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.replica_announce_ip, NULL, NULL, NULL),
     createStringConfig("primaryuser", "masteruser", MODIFIABLE_CONFIG | SENSITIVE_CONFIG, EMPTY_STRING_IS_NULL, server.primary_user, NULL, NULL, NULL),
     createStringConfig("cluster-announce-ip", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_ip, NULL, NULL, updateClusterIp),
-    createStringConfig("cluster-config-file", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.cluster_configfile, "nodes.conf", NULL, NULL),
+    createStringConfig("cluster-config-file", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.cluster_configfile, "nodes.conf", isValidClusterConfigFile, NULL),
     createStringConfig("cluster-announce-hostname", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_hostname, NULL, isValidAnnouncedHostname, updateClusterHostname),
     createStringConfig("cluster-announce-human-nodename", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_human_nodename, NULL, isValidAnnouncedNodename, updateClusterHumanNodename),
     createStringConfig("syslog-ident", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.syslog_ident, SERVER_NAME, NULL, NULL),

diff --git a/src/latency.c b/src/latency.c
@@ -216,9 +216,8 @@ sds createLatencyReport(void) {
      * was never enabled so far. */
     if (dictSize(server.latency_events) == 0 && server.latency_monitor_threshold == 0) {
         report = sdscat(
-            report, "I'm sorry, Dave, I can't do that. Latency monitoring is disabled in this Redis instance. You may "
-                    "use \"CONFIG SET latency-monitor-threshold <milliseconds>.\" in order to enable it. If we weren't "
-                    "in a deep space mission I'd suggest to take a look at https://redis.io/topics/latency-monitor.\n");
+            report, "I'm sorry, Dave, I can't do that. Latency monitoring is disabled in this Valkey instance. You may "
+                    "use \"CONFIG SET latency-monitor-threshold <milliseconds>.\" in order to enable it.\n");
         return report;
     }
 
@@ -237,8 +236,7 @@ sds createLatencyReport(void) {
         if (ts == NULL) continue;
         eventnum++;
         if (eventnum == 1) {
-            report = sdscat(report, "Dave, I have observed latency spikes in this Redis instance. You don't mind "
-                                    "talking about it, do you Dave?\n\n");
+            report = sdscat(report, "Latency spikes are observed in this Valkey instance.\n\n");
         }
         analyzeLatencyForEvent(event, &ls);
 
@@ -358,18 +356,16 @@ sds createLatencyReport(void) {
     }
 
     if (eventnum == 0 && advices == 0) {
-        report = sdscat(report, "Dave, no latency spike was observed during the lifetime of this Redis instance, not "
-                                "in the slightest bit. I honestly think you ought to sit down calmly, take a stress "
-                                "pill, and think things over.\n");
+        report = sdscat(report, "No latency spike was observed during the lifetime of this Valkey instance, not "
+                                "in the slightest bit.\n");
     } else if (eventnum > 0 && advices == 0) {
-        report =
-            sdscat(report, "\nWhile there are latency events logged, I'm not able to suggest any easy fix. Please use "
-                           "the Redis community to get some help, providing this report in your help request.\n");
+        report = sdscat(report, "\nThere are latency events logged that are not easy to fix. Please get some "
+                                "help from Valkey community, providing this report in your help request.\n");
     } else {
         /* Add all the suggestions accumulated so far. */
 
         /* Better VM. */
-        report = sdscat(report, "\nI have a few advices for you:\n\n");
+        report = sdscat(report, "\nHere is some advice for you:\n\n");
         if (advise_better_vm) {
             report = sdscat(report, "- If you are using a virtual machine, consider upgrading it with a faster one "
                                     "using a hypervisior that provides less latency during fork() calls. Xen is known "
@@ -382,8 +378,8 @@ sds createLatencyReport(void) {
             report =
                 sdscatprintf(report,
                              "- There are latency issues with potentially slow commands you are using. Try to enable "
-                             "the Slow Log Redis feature using the command 'CONFIG SET slowlog-log-slower-than %llu'. "
-                             "If the Slow log is disabled Redis is not able to log slow commands execution for you.\n",
+                             "the Slow Log Valkey feature using the command 'CONFIG SET slowlog-log-slower-than %llu'. "
+                             "If the Slow log is disabled Valkey is not able to log slow commands execution for you.\n",
                              (unsigned long long)server.latency_monitor_threshold * 1000);
         }
 
@@ -398,20 +394,20 @@ sds createLatencyReport(void) {
         if (advise_slowlog_inspect) {
             report = sdscat(report,
                             "- Check your Slow Log to understand what are the commands you are running which are too "
-                            "slow to execute. Please check https://redis.io/commands/slowlog for more information.\n");
+                            "slow to execute. Please check https://valkey.io/commands/slowlog for more information.\n");
         }
 
         /* Intrinsic latency. */
         if (advise_scheduler) {
             report = sdscat(
                 report,
-                "- The system is slow to execute Redis code paths not containing system calls. This usually means the "
-                "system does not provide Redis CPU time to run for long periods. You should try to:\n"
+                "- The system is slow to execute Valkey code paths not containing system calls. This usually means the "
+                "system does not provide Valkey CPU time to run for long periods. You should try to:\n"
                 "  1) Lower the system load.\n"
-                "  2) Use a computer / VM just for Redis if you are running other software in the same system.\n"
+                "  2) Use a computer / VM just for Valkey if you are running other software in the same system.\n"
                 "  3) Check if you have a \"noisy neighbour\" problem.\n"
-                "  4) Check with 'redis-cli --intrinsic-latency 100' what is the intrinsic latency in your system.\n"
-                "  5) Check if the problem is allocator-related by recompiling Redis with MALLOC=libc, if you are "
+                "  4) Check with 'valkey-cli --intrinsic-latency 100' what is the intrinsic latency in your system.\n"
+                "  5) Check if the problem is allocator-related by recompiling Valkey with MALLOC=libc, if you are "
                 "using Jemalloc. However this may create fragmentation problems.\n");
         }
 
@@ -423,24 +419,24 @@ sds createLatencyReport(void) {
         }
 
         if (advise_ssd) {
-            report =
-                sdscat(report, "- SSD disks are able to reduce fsync latency, and total time needed for snapshotting "
-                               "and AOF log rewriting (resulting in smaller memory usage). With extremely high write "
-                               "load SSD disks can be a good option. However Redis should perform reasonably with high "
-                               "load using normal disks. Use this advice as a last resort.\n");
+            report = sdscat(report,
+                            "- SSD disks are able to reduce fsync latency, and total time needed for snapshotting "
+                            "and AOF log rewriting (resulting in smaller memory usage). With extremely high write "
+                            "load SSD disks can be a good option. However Valkey should perform reasonably with high "
+                            "load using normal disks. Use this advice as a last resort.\n");
         }
 
         if (advise_data_writeback) {
             report =
                 sdscat(report, "- Mounting ext3/4 filesystems with data=writeback can provide a performance boost "
                                "compared to data=ordered, however this mode of operation provides less guarantees, and "
                                "sometimes it can happen that after a hard crash the AOF file will have a half-written "
-                               "command at the end and will require to be repaired before Redis restarts.\n");
+                               "command at the end and will require to be repaired before Valkey restarts.\n");
         }
 
         if (advise_disk_contention) {
             report = sdscat(report, "- Try to lower the disk contention. This is often caused by other disk intensive "
-                                    "processes running in the same computer (including other Redis instances).\n");
+                                    "processes running in the same computer (including other Valkey instances).\n");
         }
 
         if (advise_no_appendfsync) {
@@ -464,7 +460,7 @@ sds createLatencyReport(void) {
         }
 
         if (advise_hz && server.hz < 100) {
-            report = sdscat(report, "- In order to make the Redis keys expiring process more incremental, try to set "
+            report = sdscat(report, "- In order to make the Valkey keys expiring process more incremental, try to set "
                                     "the 'hz' configuration parameter to 100 using 'CONFIG SET hz 100'.\n");
         }
 
@@ -477,19 +473,20 @@ sds createLatencyReport(void) {
 
         if (advise_mass_eviction) {
             report = sdscat(report, "- Sudden changes to the 'maxmemory' setting via 'CONFIG SET', or allocation of "
-                                    "large objects via sets or sorted sets intersections, STORE option of SORT, Redis "
+                                    "large objects via sets or sorted sets intersections, STORE option of SORT, Valkey "
                                     "Cluster large keys migrations (RESTORE command), may create sudden memory "
                                     "pressure forcing the server to block trying to evict keys. \n");
         }
 
         if (advise_disable_thp) {
-            report = sdscat(report, "- I detected a non zero amount of anonymous huge pages used by your process. This "
-                                    "creates very serious latency events in different conditions, especially when "
-                                    "Redis is persisting on disk. To disable THP support use the command 'echo never > "
-                                    "/sys/kernel/mm/transparent_hugepage/enabled', make sure to also add it into "
-                                    "/etc/rc.local so that the command will be executed again after a reboot. Note "
-                                    "that even if you have already disabled THP, you still need to restart the Redis "
-                                    "process to get rid of the huge pages already created.\n");
+            report =
+                sdscat(report, "- I detected a non zero amount of anonymous huge pages used by your process. This "
+                               "creates very serious latency events in different conditions, especially when "
+                               "Valkey is persisting on disk. To disable THP support use the command 'echo never > "
+                               "/sys/kernel/mm/transparent_hugepage/enabled', make sure to also add it into "
+                               "/etc/rc.local so that the command will be executed again after a reboot. Note "
+                               "that even if you have already disabled THP, you still need to restart the Valkey "
+                               "process to get rid of the huge pages already created.\n");
         }
     }