From 138a7d9846c9c8216541a928c4494645731cb658 Mon Sep 17 00:00:00 2001 From: Ping Xie Date: Thu, 9 May 2024 18:12:55 -0700 Subject: [PATCH] Handle role change error in `cluster setslot` when migrating the last slot away with allow-replica-migration enabled (#466) --- src/cluster_legacy.c | 4 ++-- src/valkey-cli.c | 29 +++++++++++++++++++++++------ 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index f1a8d87b74..a50d0f8459 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -6320,7 +6320,7 @@ int clusterParseSetSlotCommand(client *c, int *slot_out, clusterNode **node_out, return 1; } -void clusterSetSlotCommand(client *c) { +void clusterCommandSetSlot(client *c) { int slot; int timeout_ms; clusterNode *n; @@ -6575,7 +6575,7 @@ int clusterCommandSpecial(client *c) { /* SETSLOT 10 IMPORTING */ /* SETSLOT 10 STABLE */ /* SETSLOT 10 NODE */ - clusterSetSlotCommand(c); + clusterCommandSetSlot(c); } else if (!strcasecmp(c->argv[1]->ptr,"bumpepoch") && c->argc == 2) { /* CLUSTER BUMPEPOCH */ int retval = clusterBumpConfigEpochWithoutConsensus(); diff --git a/src/valkey-cli.c b/src/valkey-cli.c index 83ac973628..d762084734 100644 --- a/src/valkey-cli.c +++ b/src/valkey-cli.c @@ -5241,13 +5241,30 @@ static int clusterManagerMoveSlot(clusterManagerNode *source, /* Inform the source node. If the source node has just lost its last * slot and the target node has already informed the source node, the * source node has turned itself into a replica. This is not an error in - * this scenario so we ignore it. See issue #9223. */ + * this scenario so we ignore it. See issue #9223. + * + * Another acceptable error can arise now that the primary pre-replicates + * `cluster setslot` commands to replicas while blocking the client on the + * primary. This change enhances the reliability of `cluster setslot` in + * the face of primary failures. However, while our client is blocked on + * the primary awaiting replication, the primary might become a replica + * for the same reason as mentioned above, resulting in the client being + * unblocked with the role change error. */ success = clusterManagerSetSlot(source, target, slot, "node", err); - const char *acceptable = "ERR Please use SETSLOT only with masters."; - if (!success && err && !strncmp(*err, acceptable, strlen(acceptable))) { - zfree(*err); - *err = NULL; - } else if (!success && err) { + if (!success && err) { + const char *acceptable[] = { + "ERR Please use SETSLOT only with masters.", + "UNBLOCKED"}; + for (size_t i = 0; i < sizeof(acceptable)/sizeof(acceptable[0]); i++) { + if (!strncmp(*err, acceptable[i], strlen(acceptable[i]))) { + zfree(*err); + *err = NULL; + success = 1; + break; + } + } + } + if (!success && err) { return 0; }