valkey-io · hpatro · Jul 1, 2024 · Jul 17, 2024 · Sep 10, 2024
diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c
@@ -726,10 +726,7 @@ int clusterLoadConfig(char *filename) {
     return C_OK;
 
 fmterr:
-    serverLog(LL_WARNING, "Unrecoverable error: corrupted cluster config file \"%s\".", line);
-    zfree(line);
-    if (fp) fclose(fp);
-    exit(1);
+    serverPanic("Unrecoverable error: corrupted cluster config file \"%s\".", line);
 }
 
 /* Cluster node configuration is exactly the same as CLUSTER NODES output.
@@ -973,6 +970,27 @@ static void updateAnnouncedClientIpV6(clusterNode *node, char *value) {
 }
 
 static void updateShardId(clusterNode *node, const char *shard_id) {
+    /* Ensure replica shard IDs match their primary's to maintain cluster consistency.
+     *
+     * Shard ID updates must prioritize the primary, then propagate to replicas.
+     * This is critical due to the eventual consistency of shard IDs during cluster
+     * expansion. New replicas might replicate from a primary before fully
+     * synchronizing shard IDs with the rest of the cluster.
+     *
+     * Without this enforcement, a temporary inconsistency can arise where a
+     * replica's shard ID diverges from its primary's. This inconsistency is
+     * persisted in the primary's nodes.conf file. While this divergence will
+     * eventually resolve, if the primary crashes beforehand, it will enter a
+     * crash-restart loop due to the mismatch in its nodes.conf. */
+    if (shard_id && nodeIsReplica(node) &&
+        memcmp(clusterNodeGetPrimary(node)->shard_id, shard_id, CLUSTER_NAMELEN) != 0) {
+        serverLog(
+            LL_NOTICE,
+            "Shard id %.40s update request for node id %.40s diverges from existing primary shard id %.40s, rejecting!",
+            shard_id, node->name, clusterNodeGetPrimary(node)->shard_id);
+        return;
+    }
+
     if (shard_id && memcmp(node->shard_id, shard_id, CLUSTER_NAMELEN) != 0) {
         clusterRemoveNodeFromShard(node);
         memcpy(node->shard_id, shard_id, CLUSTER_NAMELEN);

diff --git a/tests/support/cluster.tcl b/tests/support/cluster.tcl
@@ -365,3 +365,9 @@ proc ::valkey_cluster::get_slot_from_keys {keys} {
     }
     return $slot
 }
+
+# Check if the server responds with "PONG"
+proc check_server_response {server_id} {
+    # Send a PING command and check if the response is "PONG"
+    return [expr {[catch {R $server_id PING} result] == 0 && $result eq "PONG"}]
+}
diff --git a/tests/unit/cluster/shardid-propagation.tcl b/tests/unit/cluster/shardid-propagation.tcl
@@ -0,0 +1,59 @@
+set old_singledb $::singledb
+set ::singledb 1
+
+tags {tls:skip external:skip cluster} {
+    set base_conf [list cluster-enabled yes save ""] 
+    start_multiple_servers 2 [list overrides $base_conf] {
+        test "Cluster nodes are reachable" {
+            for {set id 0} {$id < [llength $::servers]} {incr id} {
+                # Every node should be reachable.
+                wait_for_condition 1000 50 {
+                    ([catch {R $id ping} ping_reply] == 0) &&
+                    ($ping_reply eq {PONG})
+                } else {
+                    catch {R $id ping} err
+                    fail "Node #$id keeps replying '$err' to PING."
+                }
+            }
+        }
+
+        test "Before slots allocation, all nodes report cluster failure" {
+            wait_for_cluster_state fail
+        }
+
+        test "Cluster nodes haven't met each other" {
+            assert {[llength [get_cluster_nodes 1]] == 1}
+            assert {[llength [get_cluster_nodes 0]] == 1}
+        }
+
+        test "Allocate slots" {
+            cluster_allocate_slots 1 1
+        }
+
+        test "Restart of node in cluster mode doesn't cause nodes.conf corruption due to shard id mismatch" {
+            set primary_id [R 0 CLUSTER MYID]
+            R 1 CLUSTER MEET 127.0.0.1 [srv 0 port]
+            set pattern *$primary_id*
+            wait_for_condition 100 50 {
+                [string match $pattern [R 1 CLUSTER NODES]]
+            } else {
+                fail "Meet took longer"
+            }
+            R 1 CLUSTER REPLICATE $primary_id
+            wait_replica_online [srv 0 client]
+            set result [catch {R 0 DEBUG RESTART 0} err]
+
+            if {$result != 0 && $err ne "I/O error reading reply"} {
+                fail "Unexpected error restarting server: $err"
+            }
+
+             wait_for_condition 100 100 {
+                [check_server_response 0] eq 1
+            } else {
+                fail "Server didn't come back online in time"
+            }
+        }
+    }
+}
+
+set ::singledb $old_singledb
diff --git a/tests/unit/cluster/slot-migration.tcl b/tests/unit/cluster/slot-migration.tcl
@@ -36,12 +36,6 @@ proc wait_for_slot_state {srv_idx pattern} {
     }
 }
 
-# Check if the server responds with "PONG"
-proc check_server_response {server_id} {
-    # Send a PING command and check if the response is "PONG"
-    return [expr {[catch {R $server_id PING} result] == 0 && $result eq "PONG"}]
-}
-
 # restart a server and wait for it to come back online
 proc fail_server {server_id} {
     set node_timeout [lindex [R 0 CONFIG GET cluster-node-timeout] 1]