diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index 7679856d1d..91dbb26fab 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -358,10 +358,10 @@ jobs: run: sudo apt-get install tcl8.6 tclx - name: test if: true && !contains(github.event.inputs.skiptests, 'valkey') - run: ./runtest --config io-threads 4 --config io-threads-do-reads yes --accurate --verbose --tags network --dump-logs ${{github.event.inputs.test_args}} + run: ./runtest --config io-threads 2 --config events-per-io-thread 0 --accurate --verbose --tags network --dump-logs ${{github.event.inputs.test_args}} - name: cluster tests if: true && !contains(github.event.inputs.skiptests, 'cluster') - run: ./runtest-cluster --config io-threads 4 --config io-threads-do-reads yes ${{github.event.inputs.cluster_test_args}} + run: ./runtest-cluster --config io-threads 2 --config events-per-io-thread 0 ${{github.event.inputs.cluster_test_args}} test-ubuntu-reclaim-cache: runs-on: ubuntu-latest diff --git a/deps/Makefile b/deps/Makefile index 67b7d41026..f1e4bd6ce2 100644 --- a/deps/Makefile +++ b/deps/Makefile @@ -79,7 +79,7 @@ ifeq ($(uname_S),SunOS) LUA_CFLAGS= -D__C99FEATURES__=1 endif -LUA_CFLAGS+= -Wall -DLUA_ANSI -DENABLE_CJSON_GLOBAL -DREDIS_STATIC='' -DLUA_USE_MKSTEMP $(CFLAGS) +LUA_CFLAGS+= -Wall -DLUA_ANSI -DENABLE_CJSON_GLOBAL -DLUA_USE_MKSTEMP $(CFLAGS) LUA_LDFLAGS+= $(LDFLAGS) ifeq ($(LUA_DEBUG),yes) LUA_CFLAGS+= -O0 -g -DLUA_USE_APICHECK diff --git a/src/Makefile b/src/Makefile index 302ad06b84..4e8c34b253 100644 --- a/src/Makefile +++ b/src/Makefile @@ -35,7 +35,7 @@ DEPENDENCY_TARGETS=hiredis linenoise lua hdr_histogram fpconv NODEPS:=clean distclean # Default settings -STD=-pedantic -DSERVER_STATIC='' +STD=-pedantic # Use -Wno-c11-extensions on clang, either where explicitly used or on # platforms we can assume it's being used. @@ -401,7 +401,7 @@ endif ENGINE_NAME=valkey SERVER_NAME=$(ENGINE_NAME)-server$(PROG_SUFFIX) ENGINE_SENTINEL_NAME=$(ENGINE_NAME)-sentinel$(PROG_SUFFIX) -ENGINE_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o +ENGINE_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o io_threads.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o ENGINE_CLI_NAME=$(ENGINE_NAME)-cli$(PROG_SUFFIX) ENGINE_CLI_OBJ=anet.o adlist.o dict.o valkey-cli.o zmalloc.o release.o ae.o serverassert.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o ENGINE_BENCHMARK_NAME=$(ENGINE_NAME)-benchmark$(PROG_SUFFIX) diff --git a/src/acl.c b/src/acl.c index bda449e8d2..51aa567165 100644 --- a/src/acl.c +++ b/src/acl.c @@ -506,11 +506,11 @@ void ACLFreeUserAndKillClients(user *u) { * more defensive to set the default user and put * it in non authenticated mode. */ c->user = DefaultUser; - c->flags &= ~CLIENT_AUTHENTICATED; + c->flag.authenticated = 0; /* We will write replies to this client later, so we can't * close it directly even if async. */ if (c == server.current_client) { - c->flags |= CLIENT_CLOSE_AFTER_COMMAND; + c->flag.close_after_command = 1; } else { freeClientAsync(c); } @@ -1494,13 +1494,13 @@ void addAuthErrReply(client *c, robj *err) { * The return value is AUTH_OK on success (valid username / password pair) & AUTH_ERR otherwise. */ int checkPasswordBasedAuth(client *c, robj *username, robj *password) { if (ACLCheckUserCredentials(username, password) == C_OK) { - c->flags |= CLIENT_AUTHENTICATED; + c->flag.authenticated = 1; c->user = ACLGetUserByName(username->ptr, sdslen(username->ptr)); moduleNotifyUserChanged(c); return AUTH_OK; } else { - addACLLogEntry(c, ACL_DENIED_AUTH, (c->flags & CLIENT_MULTI) ? ACL_LOG_CTX_MULTI : ACL_LOG_CTX_TOPLEVEL, 0, - username->ptr, NULL); + addACLLogEntry(c, ACL_DENIED_AUTH, (c->flag.multi) ? ACL_LOG_CTX_MULTI : ACL_LOG_CTX_TOPLEVEL, 0, username->ptr, + NULL); return AUTH_ERR; } } diff --git a/src/ae.c b/src/ae.c index 62031cbeea..b6a1ce0b10 100644 --- a/src/ae.c +++ b/src/ae.c @@ -183,7 +183,9 @@ void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask) { * is removed. */ if (mask & AE_WRITABLE) mask |= AE_BARRIER; - aeApiDelEvent(eventLoop, fd, mask); + /* Only remove attached events */ + mask = mask & fe->mask; + fe->mask = fe->mask & (~mask); if (fd == eventLoop->maxfd && fe->mask == AE_NONE) { /* Update the max fd */ @@ -193,6 +195,15 @@ void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask) { if (eventLoop->events[j].mask != AE_NONE) break; eventLoop->maxfd = j; } + + /* Check whether there are events to be removed. + * Note: user may remove the AE_BARRIER without + * touching the actual events. */ + if (mask & (AE_READABLE | AE_WRITABLE)) { + /* Must be invoked after the eventLoop mask is modified, + * which is required by evport and epoll */ + aeApiDelEvent(eventLoop, fd, mask); + } } void *aeGetFileClientData(aeEventLoop *eventLoop, int fd) { @@ -392,7 +403,7 @@ int aeProcessEvents(aeEventLoop *eventLoop, int flags) { } /* After sleep callback. */ - if (eventLoop->aftersleep != NULL && flags & AE_CALL_AFTER_SLEEP) eventLoop->aftersleep(eventLoop); + if (eventLoop->aftersleep != NULL && flags & AE_CALL_AFTER_SLEEP) eventLoop->aftersleep(eventLoop, numevents); for (j = 0; j < numevents; j++) { int fd = eventLoop->fired[j].fd; @@ -489,6 +500,6 @@ void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep eventLoop->beforesleep = beforesleep; } -void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *aftersleep) { +void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeAfterSleepProc *aftersleep) { eventLoop->aftersleep = aftersleep; } diff --git a/src/ae.h b/src/ae.h index a6dcbce50d..3b1c96a01d 100644 --- a/src/ae.h +++ b/src/ae.h @@ -68,6 +68,7 @@ typedef void aeFileProc(struct aeEventLoop *eventLoop, int fd, void *clientData, typedef int aeTimeProc(struct aeEventLoop *eventLoop, long long id, void *clientData); typedef void aeEventFinalizerProc(struct aeEventLoop *eventLoop, void *clientData); typedef void aeBeforeSleepProc(struct aeEventLoop *eventLoop); +typedef void aeAfterSleepProc(struct aeEventLoop *eventLoop, int numevents); /* File event structure */ typedef struct aeFileEvent { @@ -107,7 +108,7 @@ typedef struct aeEventLoop { int stop; void *apidata; /* This is used for polling API specific data */ aeBeforeSleepProc *beforesleep; - aeBeforeSleepProc *aftersleep; + aeAfterSleepProc *aftersleep; int flags; } aeEventLoop; @@ -130,7 +131,7 @@ int aeWait(int fd, int mask, long long milliseconds); void aeMain(aeEventLoop *eventLoop); char *aeGetApiName(void); void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep); -void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *aftersleep); +void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeAfterSleepProc *aftersleep); int aeGetSetSize(aeEventLoop *eventLoop); int aeResizeSetSize(aeEventLoop *eventLoop, int setsize); void aeSetDontWait(aeEventLoop *eventLoop, int noWait); diff --git a/src/ae_epoll.c b/src/ae_epoll.c index 78820b99bf..c8b4ac743f 100644 --- a/src/ae_epoll.c +++ b/src/ae_epoll.c @@ -87,10 +87,12 @@ static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) { return 0; } -static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int delmask) { +static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int mask) { aeApiState *state = eventLoop->apidata; struct epoll_event ee = {0}; /* avoid valgrind warning */ - int mask = eventLoop->events[fd].mask & (~delmask); + + /* We rely on the fact that our caller has already updated the mask in the eventLoop. */ + mask = eventLoop->events[fd].mask; ee.events = 0; if (mask & AE_READABLE) ee.events |= EPOLLIN; diff --git a/src/ae_kqueue.c b/src/ae_kqueue.c index 3cb6fbae4a..4159f25744 100644 --- a/src/ae_kqueue.c +++ b/src/ae_kqueue.c @@ -101,31 +101,24 @@ static void aeApiFree(aeEventLoop *eventLoop) { static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) { aeApiState *state = eventLoop->apidata; - struct kevent ke; + struct kevent evs[2]; + int nch = 0; - if (mask & AE_READABLE) { - EV_SET(&ke, fd, EVFILT_READ, EV_ADD, 0, 0, NULL); - if (kevent(state->kqfd, &ke, 1, NULL, 0, NULL) == -1) return -1; - } - if (mask & AE_WRITABLE) { - EV_SET(&ke, fd, EVFILT_WRITE, EV_ADD, 0, 0, NULL); - if (kevent(state->kqfd, &ke, 1, NULL, 0, NULL) == -1) return -1; - } - return 0; + if (mask & AE_READABLE) EV_SET(evs + nch++, fd, EVFILT_READ, EV_ADD, 0, 0, NULL); + if (mask & AE_WRITABLE) EV_SET(evs + nch++, fd, EVFILT_WRITE, EV_ADD, 0, 0, NULL); + + return kevent(state->kqfd, evs, nch, NULL, 0, NULL); } static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int mask) { aeApiState *state = eventLoop->apidata; - struct kevent ke; + struct kevent evs[2]; + int nch = 0; - if (mask & AE_READABLE) { - EV_SET(&ke, fd, EVFILT_READ, EV_DELETE, 0, 0, NULL); - kevent(state->kqfd, &ke, 1, NULL, 0, NULL); - } - if (mask & AE_WRITABLE) { - EV_SET(&ke, fd, EVFILT_WRITE, EV_DELETE, 0, 0, NULL); - kevent(state->kqfd, &ke, 1, NULL, 0, NULL); - } + if (mask & AE_READABLE) EV_SET(evs + nch++, fd, EVFILT_READ, EV_DELETE, 0, 0, NULL); + if (mask & AE_WRITABLE) EV_SET(evs + nch++, fd, EVFILT_WRITE, EV_DELETE, 0, 0, NULL); + + kevent(state->kqfd, evs, nch, NULL, 0, NULL); } static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) { diff --git a/src/aof.c b/src/aof.c index ac9ffd5fcb..1a47d9c688 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1364,7 +1364,8 @@ struct client *createAOFClient(void) { * background processing there is a chance that the * command execution order will be violated. */ - c->flags = CLIENT_DENY_BLOCKING; + c->raw_flag = 0; + c->flag.deny_blocking = 1; /* We set the fake client as a replica waiting for the synchronization * so that the server will not try to send replies to this client. */ @@ -1536,7 +1537,7 @@ int loadSingleAppendOnlyFile(char *filename) { /* Run the command in the context of a fake client */ fakeClient->cmd = fakeClient->lastcmd = cmd; - if (fakeClient->flags & CLIENT_MULTI && fakeClient->cmd->proc != execCommand) { + if (fakeClient->flag.multi && fakeClient->cmd->proc != execCommand) { /* Note: we don't have to attempt calling evalGetCommandFlags, * since this is AOF, the checks in processCommand are not made * anyway.*/ @@ -1549,7 +1550,7 @@ int loadSingleAppendOnlyFile(char *filename) { serverAssert(fakeClient->bufpos == 0 && listLength(fakeClient->reply) == 0); /* The fake client should never get blocked */ - serverAssert((fakeClient->flags & CLIENT_BLOCKED) == 0); + serverAssert(fakeClient->flag.blocked == 0); /* Clean up. Command code may have changed argv/argc so we use the * argv/argc of the client instead of the local variables. */ @@ -1562,7 +1563,7 @@ int loadSingleAppendOnlyFile(char *filename) { * If the client is in the middle of a MULTI/EXEC, handle it as it was * a short read, even if technically the protocol is correct: we want * to remove the unprocessed tail and continue. */ - if (fakeClient->flags & CLIENT_MULTI) { + if (fakeClient->flag.multi) { serverLog(LL_WARNING, "Revert incomplete MULTI/EXEC transaction in AOF file %s", filename); valid_up_to = valid_before_multi; goto uxeof; diff --git a/src/blocked.c b/src/blocked.c index 6d8d4fbc7c..a1d5306dad 100644 --- a/src/blocked.c +++ b/src/blocked.c @@ -86,12 +86,12 @@ void initClientBlockingState(client *c) { * flag is set client query buffer is not longer processed, but accumulated, * and will be processed when the client is unblocked. */ void blockClient(client *c, int btype) { - /* Master client should never be blocked unless pause or module */ - serverAssert(!(c->flags & CLIENT_PRIMARY && btype != BLOCKED_MODULE && btype != BLOCKED_POSTPONE)); + /* Primary client should never be blocked unless pause or module */ + serverAssert(!(c->flag.primary && btype != BLOCKED_MODULE && btype != BLOCKED_POSTPONE)); - c->flags |= CLIENT_BLOCKED; + c->flag.blocked = 1; c->bstate.btype = btype; - if (!(c->flags & CLIENT_MODULE)) + if (!c->flag.module) server.blocked_clients++; /* We count blocked client stats on regular clients and not on module clients */ server.blocked_clients_by_type[btype]++; addClientToTimeoutTable(c); @@ -130,10 +130,10 @@ void processUnblockedClients(void) { serverAssert(ln != NULL); c = ln->value; listDelNode(server.unblocked_clients, ln); - c->flags &= ~CLIENT_UNBLOCKED; + c->flag.unblocked = 0; - if (c->flags & CLIENT_MODULE) { - if (!(c->flags & CLIENT_BLOCKED)) { + if (c->flag.module) { + if (!c->flag.blocked) { moduleCallCommandUnblockedHandler(c); } continue; @@ -143,10 +143,10 @@ void processUnblockedClients(void) { * is blocked again. Actually processInputBuffer() checks that the * client is not blocked before to proceed, but things may change and * the code is conceptually more correct this way. */ - if (!(c->flags & CLIENT_BLOCKED)) { + if (!c->flag.blocked) { /* If we have a queued command, execute it now. */ if (processPendingCommandAndInputBuffer(c) == C_ERR) { - c = NULL; + continue; } } beforeNextClient(c); @@ -172,8 +172,8 @@ void processUnblockedClients(void) { void queueClientForReprocessing(client *c) { /* The client may already be into the unblocked list because of a previous * blocking operation, don't add back it into the list multiple times. */ - if (!(c->flags & CLIENT_UNBLOCKED)) { - c->flags |= CLIENT_UNBLOCKED; + if (!c->flag.unblocked) { + c->flag.unblocked = 1; listAddNodeTail(server.unblocked_clients, c); } } @@ -199,7 +199,7 @@ void unblockClient(client *c, int queue_for_reprocessing) { /* Reset the client for a new query, unless the client has pending command to process * or in case a shutdown operation was canceled and we are still in the processCommand sequence */ - if (!(c->flags & CLIENT_PENDING_COMMAND) && c->bstate.btype != BLOCKED_SHUTDOWN) { + if (!c->flag.pending_command && c->bstate.btype != BLOCKED_SHUTDOWN) { freeClientOriginalArgv(c); /* Clients that are not blocked on keys are not reprocessed so we must * call reqresAppendResponse here (for clients blocked on key, @@ -210,11 +210,11 @@ void unblockClient(client *c, int queue_for_reprocessing) { } /* We count blocked client stats on regular clients and not on module clients */ - if (!(c->flags & CLIENT_MODULE)) server.blocked_clients--; + if (!c->flag.module) server.blocked_clients--; server.blocked_clients_by_type[c->bstate.btype]--; /* Clear the flags, and put the client in the unblocked list so that * we'll process new commands in its query buffer ASAP. */ - c->flags &= ~CLIENT_BLOCKED; + c->flag.blocked = 0; c->bstate.btype = BLOCKED_NONE; c->bstate.unblock_on_nokey = 0; removeClientFromTimeoutTable(c); @@ -256,7 +256,7 @@ void replyToClientsBlockedOnShutdown(void) { listRewind(server.clients, &li); while ((ln = listNext(&li))) { client *c = listNodeValue(ln); - if (c->flags & CLIENT_BLOCKED && c->bstate.btype == BLOCKED_SHUTDOWN) { + if (c->flag.blocked && c->bstate.btype == BLOCKED_SHUTDOWN) { addReplyError(c, "Errors trying to SHUTDOWN. Check logs."); unblockClient(c, 1); } @@ -265,8 +265,8 @@ void replyToClientsBlockedOnShutdown(void) { /* Mass-unblock clients because something changed in the instance that makes * blocking no longer safe. For example clients blocked in list operations - * in an instance which turns from master to replica is unsafe, so this function - * is called when a master turns into a replica. + * in an instance which turns from primary to replica is unsafe, so this function + * is called when a primary turns into a replica. * * The semantics is to send an -UNBLOCKED error to the client, disconnecting * it at the same time. */ @@ -278,7 +278,7 @@ void disconnectAllBlockedClients(void) { while ((ln = listNext(&li))) { client *c = listNodeValue(ln); - if (c->flags & CLIENT_BLOCKED) { + if (c->flag.blocked) { /* POSTPONEd clients are an exception, when they'll be unblocked, the * command processing will start from scratch, and the command will * be either executed or rejected. (unlike LIST blocked clients for @@ -287,7 +287,7 @@ void disconnectAllBlockedClients(void) { unblockClientOnError(c, "-UNBLOCKED force unblock from blocking operation, " "instance state changed (master -> replica?)"); - c->flags |= CLIENT_CLOSE_AFTER_REPLY; + c->flag.close_after_reply = 1; } } } @@ -368,7 +368,7 @@ void blockForKeys(client *c, int btype, robj **keys, int numkeys, mstime_t timeo list *l; int j; - if (!(c->flags & CLIENT_REPROCESSING_COMMAND)) { + if (!c->flag.reprocessing_command) { /* If the client is re-processing the command, we do not set the timeout * because we need to retain the client's original timeout. */ c->bstate.timeout = timeout; @@ -411,7 +411,7 @@ void blockForKeys(client *c, int btype, robj **keys, int numkeys, mstime_t timeo /* Currently we assume key blocking will require reprocessing the command. * However in case of modules, they have a different way to handle the reprocessing * which does not require setting the pending command flag */ - if (btype != BLOCKED_MODULE) c->flags |= CLIENT_PENDING_COMMAND; + if (btype != BLOCKED_MODULE) c->flag.pending_command = 1; blockClient(c, btype); } @@ -605,7 +605,7 @@ void blockPostponeClient(client *c) { listAddNodeTail(server.postponed_clients, c); c->postponed_list_node = listLast(server.postponed_clients); /* Mark this client to execute its command */ - c->flags |= CLIENT_PENDING_COMMAND; + c->flag.pending_command = 1; } /* Block client due to shutdown command */ @@ -633,8 +633,8 @@ static void unblockClientOnKey(client *c, robj *key) { unblockClient(c, 0); /* In case this client was blocked on keys during command * we need to re process the command again */ - if (c->flags & CLIENT_PENDING_COMMAND) { - c->flags &= ~CLIENT_PENDING_COMMAND; + if (c->flag.pending_command) { + c->flag.pending_command = 0; /* We want the command processing and the unblock handler (see RM_Call 'K' option) * to run atomically, this is why we must enter the execution unit here before * running the command, and exit the execution unit after calling the unblock handler (if exists). @@ -644,8 +644,8 @@ static void unblockClientOnKey(client *c, robj *key) { server.current_client = c; enterExecutionUnit(1, 0); processCommandAndResetClient(c); - if (!(c->flags & CLIENT_BLOCKED)) { - if (c->flags & CLIENT_MODULE) { + if (!c->flag.blocked) { + if (c->flag.module) { moduleCallCommandUnblockedHandler(c); } else { queueClientForReprocessing(c); @@ -690,7 +690,7 @@ void unblockClientOnTimeout(client *c) { if (c->bstate.btype == BLOCKED_MODULE && isModuleClientUnblocked(c)) return; replyToBlockedClientTimedOut(c); - if (c->flags & CLIENT_PENDING_COMMAND) c->flags &= ~CLIENT_PENDING_COMMAND; + if (c->flag.pending_command) c->flag.pending_command = 0; unblockClient(c, 1); } @@ -699,7 +699,7 @@ void unblockClientOnTimeout(client *c) { void unblockClientOnError(client *c, const char *err_str) { if (err_str) addReplyError(c, err_str); updateStatsOnUnblock(c, 0, 0, 1); - if (c->flags & CLIENT_PENDING_COMMAND) c->flags &= ~CLIENT_PENDING_COMMAND; + if (c->flag.pending_command) c->flag.pending_command = 0; unblockClient(c, 1); } diff --git a/src/cluster.c b/src/cluster.c index 00f3c2d889..dd643af988 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -747,7 +747,7 @@ int verifyClusterNodeId(const char *name, int length) { } int isValidAuxChar(int c) { - return isalnum(c) || (strchr("!#$%&()*+:;<>?@[]^{|}~", c) == NULL); + return isalnum(c) || (strchr("!#$%&()*+.:;<>?@[]^{|}~", c) == NULL); } int isValidAuxString(char *s, unsigned int length) { @@ -813,12 +813,12 @@ void clusterCommandHelp(client *c) { " Return the node's shard id.", "NODES", " Return cluster configuration seen by node. Output format:", - " ...", + " ...", "REPLICAS ", " Return replicas.", "SLOTS", " Return information about slots range mappings. Each range is made of:", - " start, end, master and replicas IP addresses, ports and ids", + " start, end, primary and replicas IP addresses, ports and ids", "SHARDS", " Return information about slot range mappings and the nodes associated with them.", NULL}; @@ -985,7 +985,7 @@ getNodeByQuery(client *c, struct serverCommand *cmd, robj **argv, int argc, int if (cmd->proc == execCommand) { /* If CLIENT_MULTI flag is not set EXEC is just going to return an * error. */ - if (!(c->flags & CLIENT_MULTI)) return myself; + if (!c->flag.multi) return myself; ms = &c->mstate; } else { /* In order to have a single codepath create a fake Multi State @@ -1048,7 +1048,7 @@ getNodeByQuery(client *c, struct serverCommand *cmd, robj **argv, int argc, int * can safely serve the request, otherwise we return a TRYAGAIN * error). To do so we set the importing/migrating state and * increment a counter for every missing key. */ - if (clusterNodeIsPrimary(myself) || c->flags & CLIENT_READONLY) { + if (clusterNodeIsPrimary(myself) || c->flag.readonly) { if (n == clusterNodeGetPrimary(myself) && getMigratingSlotDest(slot) != NULL) { migrating_slot = 1; } else if (getImportingSlotSource(slot) != NULL) { @@ -1143,7 +1143,7 @@ getNodeByQuery(client *c, struct serverCommand *cmd, robj **argv, int argc, int * request as "ASKING", we can serve the request. However if the request * involves multiple keys and we don't have them all, the only option is * to send a TRYAGAIN error. */ - if (importing_slot && (c->flags & CLIENT_ASKING || cmd_flags & CMD_ASKING)) { + if (importing_slot && (c->flag.asking || cmd_flags & CMD_ASKING)) { if (multiple_keys && missing_keys) { if (error_code) *error_code = CLUSTER_REDIR_UNSTABLE; return NULL; @@ -1157,7 +1157,7 @@ getNodeByQuery(client *c, struct serverCommand *cmd, robj **argv, int argc, int * is serving, we can reply without redirection. */ int is_write_command = (cmd_flags & CMD_WRITE) || (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_WRITE)); - if (((c->flags & CLIENT_READONLY) || pubsubshard_included) && !is_write_command && clusterNodeIsReplica(myself) && + if ((c->flag.readonly || pubsubshard_included) && !is_write_command && clusterNodeIsReplica(myself) && clusterNodeGetPrimary(myself) == n) { return myself; } @@ -1194,7 +1194,7 @@ void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_co int port = clusterNodeClientPort(n, shouldReturnTlsInfo()); addReplyErrorSds(c, sdscatprintf(sdsempty(), "-%s %d %s:%d", (error_code == CLUSTER_REDIR_ASK) ? "ASK" : "MOVED", - hashslot, clusterNodePreferredEndpoint(n), port)); + hashslot, clusterNodePreferredEndpoint(n, c), port)); } else { serverPanic("getNodeByQuery() unknown error."); } @@ -1213,11 +1213,15 @@ void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_co * returns 1. Otherwise 0 is returned and no operation is performed. */ int clusterRedirectBlockedClientIfNeeded(client *c) { clusterNode *myself = getMyClusterNode(); - if (c->flags & CLIENT_BLOCKED && (c->bstate.btype == BLOCKED_LIST || c->bstate.btype == BLOCKED_ZSET || - c->bstate.btype == BLOCKED_STREAM || c->bstate.btype == BLOCKED_MODULE)) { + if (c->flag.blocked && (c->bstate.btype == BLOCKED_LIST || c->bstate.btype == BLOCKED_ZSET || + c->bstate.btype == BLOCKED_STREAM || c->bstate.btype == BLOCKED_MODULE)) { dictEntry *de; dictIterator *di; + /* If the client is blocked on module, but not on a specific key, + * don't unblock it. */ + if (c->bstate.btype == BLOCKED_MODULE && !moduleClientIsBlockedOnKeys(c)) return 0; + /* If the cluster is down, unblock the client with the right error. * If the cluster is configured to allow reads on cluster down, we * still want to emit this error since a write will be required @@ -1227,10 +1231,6 @@ int clusterRedirectBlockedClientIfNeeded(client *c) { return 1; } - /* If the client is blocked on module, but not on a specific key, - * don't unblock it (except for the CLUSTER_FAIL case above). */ - if (c->bstate.btype == BLOCKED_MODULE && !moduleClientIsBlockedOnKeys(c)) return 0; - /* All keys must belong to the same slot, so check first key only. */ di = dictGetIterator(c->bstate.keys); if ((de = dictNext(di)) != NULL) { @@ -1240,7 +1240,7 @@ int clusterRedirectBlockedClientIfNeeded(client *c) { /* if the client is read-only and attempting to access key that our * replica can handle, allow it. */ - if ((c->flags & CLIENT_READONLY) && !(c->lastcmd->flags & CMD_WRITE) && clusterNodeIsReplica(myself) && + if (c->flag.readonly && !(c->lastcmd->flags & CMD_WRITE) && clusterNodeIsReplica(myself) && clusterNodeGetPrimary(myself) == node) { node = myself; } @@ -1267,7 +1267,7 @@ void addNodeToNodeReply(client *c, clusterNode *node) { char *hostname = clusterNodeHostname(node); addReplyArrayLen(c, 4); if (server.cluster_preferred_endpoint_type == CLUSTER_ENDPOINT_TYPE_IP) { - addReplyBulkCString(c, clusterNodeIp(node)); + addReplyBulkCString(c, clusterNodeIp(node, c)); } else if (server.cluster_preferred_endpoint_type == CLUSTER_ENDPOINT_TYPE_HOSTNAME) { if (hostname != NULL && hostname[0] != '\0') { addReplyBulkCString(c, hostname); @@ -1300,7 +1300,7 @@ void addNodeToNodeReply(client *c, clusterNode *node) { if (server.cluster_preferred_endpoint_type != CLUSTER_ENDPOINT_TYPE_IP) { addReplyBulkCString(c, "ip"); - addReplyBulkCString(c, clusterNodeIp(node)); + addReplyBulkCString(c, clusterNodeIp(node, c)); length--; } if (server.cluster_preferred_endpoint_type != CLUSTER_ENDPOINT_TYPE_HOSTNAME && hostname != NULL && @@ -1353,7 +1353,7 @@ void addNodeReplyForClusterSlot(client *c, clusterNode *node, int start_slot, in } void clearCachedClusterSlotsResponse(void) { - for (connTypeForCaching conn_type = CACHE_CONN_TCP; conn_type < CACHE_CONN_TYPE_MAX; conn_type++) { + for (int conn_type = 0; conn_type < CACHE_CONN_TYPE_MAX; conn_type++) { if (server.cached_cluster_slot_info[conn_type]) { sdsfree(server.cached_cluster_slot_info[conn_type]); server.cached_cluster_slot_info[conn_type] = NULL; @@ -1361,8 +1361,8 @@ void clearCachedClusterSlotsResponse(void) { } } -sds generateClusterSlotResponse(void) { - client *recording_client = createCachedResponseClient(); +sds generateClusterSlotResponse(int resp) { + client *recording_client = createCachedResponseClient(resp); clusterNode *n = NULL; int num_primaries = 0, start = -1; void *slot_replylen = addReplyDeferredLen(recording_client); @@ -1392,8 +1392,8 @@ sds generateClusterSlotResponse(void) { return cluster_slot_response; } -int verifyCachedClusterSlotsResponse(sds cached_response) { - sds generated_response = generateClusterSlotResponse(); +int verifyCachedClusterSlotsResponse(sds cached_response, int resp) { + sds generated_response = generateClusterSlotResponse(resp); int is_equal = !sdscmp(generated_response, cached_response); /* Here, we use LL_WARNING so this gets printed when debug assertions are enabled and the system is about to crash. */ if (!is_equal) @@ -1413,16 +1413,19 @@ void clusterCommandSlots(client *c) { * 3) node ID * ... continued until done */ - connTypeForCaching conn_type = connIsTLS(c->conn); + int conn_type = 0; + if (connIsTLS(c->conn)) conn_type |= CACHE_CONN_TYPE_TLS; + if (isClientConnIpV6(c)) conn_type |= CACHE_CONN_TYPE_IPv6; + if (c->resp == 3) conn_type |= CACHE_CONN_TYPE_RESP3; if (detectAndUpdateCachedNodeHealth()) clearCachedClusterSlotsResponse(); sds cached_reply = server.cached_cluster_slot_info[conn_type]; if (!cached_reply) { - cached_reply = generateClusterSlotResponse(); + cached_reply = generateClusterSlotResponse(c->resp); server.cached_cluster_slot_info[conn_type] = cached_reply; } else { - debugServerAssertWithInfo(c, NULL, verifyCachedClusterSlotsResponse(cached_reply) == 1); + debugServerAssertWithInfo(c, NULL, verifyCachedClusterSlotsResponse(cached_reply, c->resp) == 1); } addReplyProto(c, cached_reply, sdslen(cached_reply)); @@ -1441,7 +1444,7 @@ void askingCommand(client *c) { addReplyError(c, "This instance has cluster support disabled"); return; } - c->flags |= CLIENT_ASKING; + c->flag.asking = 1; addReply(c, shared.ok); } @@ -1449,20 +1452,12 @@ void askingCommand(client *c) { * In this mode replica will not redirect clients as long as clients access * with read-only commands to keys that are served by the replica's primary. */ void readonlyCommand(client *c) { - if (server.cluster_enabled == 0) { - addReplyError(c, "This instance has cluster support disabled"); - return; - } - c->flags |= CLIENT_READONLY; + c->flag.readonly = 1; addReply(c, shared.ok); } /* The READWRITE command just clears the READONLY command state. */ void readwriteCommand(client *c) { - if (server.cluster_enabled == 0) { - addReplyError(c, "This instance has cluster support disabled"); - return; - } - c->flags &= ~CLIENT_READONLY; + c->flag.readonly = 0; addReply(c, shared.ok); } diff --git a/src/cluster.h b/src/cluster.h index a1d617b695..6f93563650 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -48,6 +48,8 @@ int clusterSendModuleMessageToTarget(const char *target, void clusterUpdateMyselfFlags(void); void clusterUpdateMyselfIp(void); +void clusterUpdateMyselfClientIpV4(void); +void clusterUpdateMyselfClientIpV6(void); void clusterUpdateMyselfHostname(void); void clusterUpdateMyselfAnnouncedPorts(void); void clusterUpdateMyselfHumanNodename(void); @@ -85,7 +87,7 @@ int handleDebugClusterCommand(client *c); int clusterNodePending(clusterNode *node); int clusterNodeIsPrimary(clusterNode *n); char **getClusterNodesList(size_t *numnodes); -char *clusterNodeIp(clusterNode *node); +char *clusterNodeIp(clusterNode *node, client *c); int clusterNodeIsReplica(clusterNode *node); clusterNode *clusterNodeGetPrimary(clusterNode *node); char *clusterNodeGetName(clusterNode *node); @@ -100,13 +102,15 @@ clusterNode *getImportingSlotSource(int slot); clusterNode *getNodeBySlot(int slot); int clusterNodeClientPort(clusterNode *n, int use_tls); char *clusterNodeHostname(clusterNode *node); -const char *clusterNodePreferredEndpoint(clusterNode *n); +const char *clusterNodePreferredEndpoint(clusterNode *n, client *c); long long clusterNodeReplOffset(clusterNode *node); clusterNode *clusterLookupNode(const char *name, int length); int detectAndUpdateCachedNodeHealth(void); -client *createCachedResponseClient(void); +client *createCachedResponseClient(int resp); void deleteCachedResponseClient(client *recording_client); void clearCachedClusterSlotsResponse(void); +unsigned int countKeysInSlot(unsigned int hashslot); +int getSlotOrReply(client *c, robj *o); /* functions with shared implementations */ int clusterNodeIsMyself(clusterNode *n); diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 9a9d485bd7..7928c680f5 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -98,16 +98,22 @@ unsigned int delKeysInSlot(unsigned int hashslot); void clusterAddNodeToShard(const char *shard_id, clusterNode *node); list *clusterLookupNodeListByShardId(const char *shard_id); void clusterRemoveNodeFromShard(clusterNode *node); -int auxShardIdSetter(clusterNode *n, void *value, int length); +int auxShardIdSetter(clusterNode *n, void *value, size_t length); sds auxShardIdGetter(clusterNode *n, sds s); int auxShardIdPresent(clusterNode *n); -int auxHumanNodenameSetter(clusterNode *n, void *value, int length); +int auxHumanNodenameSetter(clusterNode *n, void *value, size_t length); sds auxHumanNodenameGetter(clusterNode *n, sds s); int auxHumanNodenamePresent(clusterNode *n); -int auxTcpPortSetter(clusterNode *n, void *value, int length); +int auxAnnounceClientIpV4Setter(clusterNode *n, void *value, size_t length); +sds auxAnnounceClientIpV4Getter(clusterNode *n, sds s); +int auxAnnounceClientIpV4Present(clusterNode *n); +int auxAnnounceClientIpV6Setter(clusterNode *n, void *value, size_t length); +sds auxAnnounceClientIpV6Getter(clusterNode *n, sds s); +int auxAnnounceClientIpV6Present(clusterNode *n); +int auxTcpPortSetter(clusterNode *n, void *value, size_t length); sds auxTcpPortGetter(clusterNode *n, sds s); int auxTcpPortPresent(clusterNode *n); -int auxTlsPortSetter(clusterNode *n, void *value, int length); +int auxTlsPortSetter(clusterNode *n, void *value, size_t length); sds auxTlsPortGetter(clusterNode *n, sds s); int auxTlsPortPresent(clusterNode *n); static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen); @@ -116,6 +122,12 @@ int verifyClusterNodeId(const char *name, int length); sds clusterEncodeOpenSlotsAuxField(int rdbflags); int clusterDecodeOpenSlotsAuxField(int rdbflags, sds s); +/* Only primaries that own slots have voting rights. + * Returns 1 if the node has voting rights, otherwise returns 0. */ +static inline int clusterNodeIsVotingPrimary(clusterNode *n) { + return (n->flags & CLUSTER_NODE_PRIMARY) && n->numslots; +} + int getNodeDefaultClientPort(clusterNode *n) { return server.tls_cluster ? n->tls_port : n->tcp_port; } @@ -187,7 +199,7 @@ dictType clusterSdsToListType = { /* Aux field setter function prototype * return C_OK when the update is successful; C_ERR otherwise */ -typedef int(aux_value_setter)(clusterNode *n, void *value, int length); +typedef int(aux_value_setter)(clusterNode *n, void *value, size_t length); /* Aux field getter function prototype * return an sds that is a concatenation of the input sds string and * the aux value */ @@ -208,7 +220,9 @@ typedef enum { af_human_nodename, af_tcp_port, af_tls_port, - af_count, + af_announce_client_ipv4, + af_announce_client_ipv6, + af_count, /* must be the last field */ } auxFieldIndex; /* Note that @@ -220,9 +234,11 @@ auxFieldHandler auxFieldHandlers[] = { {"nodename", auxHumanNodenameSetter, auxHumanNodenameGetter, auxHumanNodenamePresent}, {"tcp-port", auxTcpPortSetter, auxTcpPortGetter, auxTcpPortPresent}, {"tls-port", auxTlsPortSetter, auxTlsPortGetter, auxTlsPortPresent}, + {"client-ipv4", auxAnnounceClientIpV4Setter, auxAnnounceClientIpV4Getter, auxAnnounceClientIpV4Present}, + {"client-ipv6", auxAnnounceClientIpV6Setter, auxAnnounceClientIpV6Getter, auxAnnounceClientIpV6Present}, }; -int auxShardIdSetter(clusterNode *n, void *value, int length) { +int auxShardIdSetter(clusterNode *n, void *value, size_t length) { if (verifyClusterNodeId(value, length) == C_ERR) { return C_ERR; } @@ -246,19 +262,12 @@ int auxShardIdPresent(clusterNode *n) { return strlen(n->shard_id); } -int auxHumanNodenameSetter(clusterNode *n, void *value, int length) { - if (n && !strncmp(value, n->human_nodename, length)) { - return C_OK; - } else if (!n && (length == 0)) { +int auxHumanNodenameSetter(clusterNode *n, void *value, size_t length) { + if (sdslen(n->human_nodename) == length && !strncmp(value, n->human_nodename, length)) { return C_OK; } - if (n) { - n->human_nodename = sdscpylen(n->human_nodename, value, length); - } else if (sdslen(n->human_nodename) != 0) { - sdsclear(n->human_nodename); - } else { - return C_ERR; - } + + n->human_nodename = sdscpylen(n->human_nodename, value, length); return C_OK; } @@ -270,7 +279,59 @@ int auxHumanNodenamePresent(clusterNode *n) { return sdslen(n->human_nodename); } -int auxTcpPortSetter(clusterNode *n, void *value, int length) { +int auxAnnounceClientIpV4Setter(clusterNode *n, void *value, size_t length) { + if (sdslen(n->announce_client_ipv4) == length && !strncmp(value, n->announce_client_ipv4, length)) { + /* Unchanged value */ + return C_OK; + } + + if (length != 0) { + /* Validate IPv4 address */ + struct sockaddr_in sa; + if (inet_pton(AF_INET, (const char *)value, &(sa.sin_addr)) == 0) { + return C_ERR; + } + } + + n->announce_client_ipv4 = sdscpylen(n->announce_client_ipv4, value, length); + return C_OK; +} + +sds auxAnnounceClientIpV4Getter(clusterNode *n, sds s) { + return sdscatprintf(s, "%s", n->announce_client_ipv4); +} + +int auxAnnounceClientIpV4Present(clusterNode *n) { + return sdslen(n->announce_client_ipv4) != 0; +} + +int auxAnnounceClientIpV6Setter(clusterNode *n, void *value, size_t length) { + if (sdslen(n->announce_client_ipv6) == length && !strncmp(value, n->announce_client_ipv6, length)) { + /* Unchanged value */ + return C_OK; + } + + if (length != 0) { + /* Validate IPv6 address */ + struct sockaddr_in6 sa; + if (inet_pton(AF_INET6, (const char *)value, &(sa.sin6_addr)) == 0) { + return C_ERR; + } + } + + n->announce_client_ipv6 = sdscpylen(n->announce_client_ipv6, value, length); + return C_OK; +} + +sds auxAnnounceClientIpV6Getter(clusterNode *n, sds s) { + return sdscatprintf(s, "%s", n->announce_client_ipv6); +} + +int auxAnnounceClientIpV6Present(clusterNode *n) { + return sdslen(n->announce_client_ipv6) != 0; +} + +int auxTcpPortSetter(clusterNode *n, void *value, size_t length) { if (length > 5 || length < 1) { return C_ERR; } @@ -289,7 +350,7 @@ int auxTcpPortPresent(clusterNode *n) { return n->tcp_port >= 0 && n->tcp_port < 65536; } -int auxTlsPortSetter(clusterNode *n, void *value, int length) { +int auxTlsPortSetter(clusterNode *n, void *value, size_t length) { if (length > 5 || length < 1) { return C_ERR; } @@ -533,9 +594,9 @@ int clusterLoadConfig(char *filename) { serverAssert(server.cluster->myself == NULL); myself = server.cluster->myself = n; n->flags |= CLUSTER_NODE_MYSELF; - } else if (!strcasecmp(s, "master")) { + } else if (!strcasecmp(s, "master") || !strcasecmp(s, "primary")) { n->flags |= CLUSTER_NODE_PRIMARY; - } else if (!strcasecmp(s, "slave")) { + } else if (!strcasecmp(s, "slave") || !strcasecmp(s, "replica")) { n->flags |= CLUSTER_NODE_REPLICA; } else if (!strcasecmp(s, "fail?")) { n->flags |= CLUSTER_NODE_PFAIL; @@ -578,6 +639,7 @@ int clusterLoadConfig(char *filename) { memcmp(primary->shard_id, n->shard_id, CLUSTER_NAMELEN) != 0) { /* If the primary has been added to a shard, make sure this * node has the same persisted shard id as the primary. */ + sdsfreesplitres(argv, argc); goto fmterr; } n->replicaof = primary; @@ -885,38 +947,37 @@ void clusterUpdateMyselfIp(void) { } } -/* Update the hostname for the specified node with the provided C string. */ -static void updateAnnouncedHostname(clusterNode *node, char *new) { - /* Previous and new hostname are the same, no need to update. */ - if (new && !strcmp(new, node->hostname)) { +static void updateSdsExtensionField(char **field, const char *value) { + if (value != NULL && !strcmp(value, *field)) { return; - } else if (!new && (sdslen(node->hostname) == 0)) { + } else if (value == NULL && sdslen(*field) == 0) { return; } - if (new) { - node->hostname = sdscpy(node->hostname, new); - } else if (sdslen(node->hostname) != 0) { - sdsclear(node->hostname); + if (value != NULL) { + *field = sdscpy(*field, value); + } else { + sdsclear(*field); } clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG); } -static void updateAnnouncedHumanNodename(clusterNode *node, char *new) { - if (new && !strcmp(new, node->human_nodename)) { - return; - } else if (!new && (sdslen(node->human_nodename) == 0)) { - return; - } +/* Update the hostname for the specified node with the provided C string. */ +static void updateAnnouncedHostname(clusterNode *node, char *value) { + updateSdsExtensionField(&node->hostname, value); +} - if (new) { - node->human_nodename = sdscpy(node->human_nodename, new); - } else if (sdslen(node->human_nodename) != 0) { - sdsclear(node->human_nodename); - } - clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG); +static void updateAnnouncedHumanNodename(clusterNode *node, char *value) { + updateSdsExtensionField(&node->human_nodename, value); +} + +static void updateAnnouncedClientIpV4(clusterNode *node, char *value) { + updateSdsExtensionField(&node->announce_client_ipv4, value); } +static void updateAnnouncedClientIpV6(clusterNode *node, char *value) { + updateSdsExtensionField(&node->announce_client_ipv6, value); +} static void updateShardId(clusterNode *node, const char *shard_id) { if (shard_id && memcmp(node->shard_id, shard_id, CLUSTER_NAMELEN) != 0) { @@ -956,6 +1017,16 @@ void clusterUpdateMyselfHumanNodename(void) { updateAnnouncedHumanNodename(myself, server.cluster_announce_human_nodename); } +void clusterUpdateMyselfClientIpV4(void) { + if (!myself) return; + updateAnnouncedClientIpV4(myself, server.cluster_announce_client_ipv4); +} + +void clusterUpdateMyselfClientIpV6(void) { + if (!myself) return; + updateAnnouncedClientIpV6(myself, server.cluster_announce_client_ipv6); +} + void clusterInit(void) { int saveconf = 0; @@ -1035,12 +1106,14 @@ void clusterInit(void) { server.cluster->mf_end = 0; server.cluster->mf_replica = NULL; - for (connTypeForCaching conn_type = CACHE_CONN_TCP; conn_type < CACHE_CONN_TYPE_MAX; conn_type++) { + for (int conn_type = 0; conn_type < CACHE_CONN_TYPE_MAX; conn_type++) { server.cached_cluster_slot_info[conn_type] = NULL; } resetManualFailover(); clusterUpdateMyselfFlags(); clusterUpdateMyselfIp(); + clusterUpdateMyselfClientIpV4(); + clusterUpdateMyselfClientIpV6(); clusterUpdateMyselfHostname(); clusterUpdateMyselfHumanNodename(); } @@ -1188,6 +1261,7 @@ clusterLink *createClusterLink(clusterNode *node) { * This function will just make sure that the original node associated * with this link will have the 'link' field set to NULL. */ void freeClusterLink(clusterLink *link) { + serverAssert(link != NULL); if (link->conn) { connClose(link->conn); link->conn = NULL; @@ -1343,6 +1417,8 @@ clusterNode *createClusterNode(char *nodename, int flags) { node->link = NULL; node->inbound_link = NULL; memset(node->ip, 0, sizeof(node->ip)); + node->announce_client_ipv4 = sdsempty(); + node->announce_client_ipv6 = sdsempty(); node->hostname = sdsempty(); node->human_nodename = sdsempty(); node->tcp_port = 0; @@ -1514,6 +1590,8 @@ void freeClusterNode(clusterNode *n) { sdsfree(nodename); sdsfree(n->hostname); sdsfree(n->human_nodename); + sdsfree(n->announce_client_ipv4); + sdsfree(n->announce_client_ipv6); /* Release links and associated data structures. */ if (n->link) freeClusterLink(n->link); @@ -1764,10 +1842,8 @@ void clusterHandleConfigEpochCollision(clusterNode *sender) { server.cluster->currentEpoch++; myself->configEpoch = server.cluster->currentEpoch; clusterSaveConfigOrDie(1); - serverLog(LL_VERBOSE, - "WARNING: configEpoch collision with node %.40s (%s)." - " configEpoch set to %llu", - sender->name, sender->human_nodename, (unsigned long long)myself->configEpoch); + serverLog(LL_NOTICE, "configEpoch collision with node %.40s (%s). configEpoch set to %llu", sender->name, + sender->human_nodename, (unsigned long long)myself->configEpoch); } /* ----------------------------------------------------------------------------- @@ -1876,8 +1952,8 @@ void markNodeAsFailingIfNeeded(clusterNode *node) { if (nodeFailed(node)) return; /* Already FAILing. */ failures = clusterNodeFailureReportsCount(node); - /* Also count myself as a voter if I'm a primary. */ - if (clusterNodeIsPrimary(myself)) failures++; + /* Also count myself as a voter if I'm a voting primary. */ + if (clusterNodeIsVotingPrimary(myself)) failures++; if (failures < needed_quorum) return; /* No weak agreement from primaries. */ serverLog(LL_NOTICE, "Marking node %.40s (%s) as failing (quorum reached).", node->name, node->human_nodename); @@ -1904,11 +1980,11 @@ void clearNodeFailureIfNeeded(clusterNode *node) { serverAssert(nodeFailed(node)); - /* For replicas we always clear the FAIL flag if we can contact the - * node again. */ - if (nodeIsReplica(node) || node->numslots == 0) { - serverLog(LL_NOTICE, "Clear FAIL state for node %.40s (%s):%s is reachable again.", node->name, - node->human_nodename, nodeIsReplica(node) ? "replica" : "master without slots"); + /* For replicas or primaries without slots, that is, nodes without voting + * right, we always clear the FAIL flag if we can contact the node again. */ + if (!clusterNodeIsVotingPrimary(node)) { + serverLog(LL_NOTICE, "Clear FAIL state for node %.40s (%s): %s is reachable again.", node->name, + node->human_nodename, nodeIsReplica(node) ? "replica" : "primary without slots"); node->flags &= ~CLUSTER_NODE_FAIL; clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE | CLUSTER_TODO_SAVE_CONFIG); } @@ -1917,7 +1993,7 @@ void clearNodeFailureIfNeeded(clusterNode *node) { * 1) The FAIL state is old enough. * 2) It is yet serving slots from our point of view (not failed over). * Apparently no one is going to fix these slots, clear the FAIL flag. */ - if (clusterNodeIsPrimary(node) && node->numslots > 0 && + if (clusterNodeIsVotingPrimary(node) && (now - node->fail_time) > (server.cluster_node_timeout * CLUSTER_FAIL_UNDO_TIME_MULT)) { serverLog( LL_NOTICE, @@ -2099,17 +2175,17 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) { /* Ignore gossips about self. */ if (node && node != myself) { /* We already know this node. - Handle failure reports, only when the sender is a primary. */ - if (sender && clusterNodeIsPrimary(sender)) { + Handle failure reports, only when the sender is a voting primary. */ + if (sender && clusterNodeIsVotingPrimary(sender)) { if (flags & (CLUSTER_NODE_FAIL | CLUSTER_NODE_PFAIL)) { if (clusterNodeAddFailureReport(node, sender)) { - serverLog(LL_VERBOSE, "Node %.40s (%s) reported node %.40s (%s) as not reachable.", - sender->name, sender->human_nodename, node->name, node->human_nodename); + serverLog(LL_NOTICE, "Node %.40s (%s) reported node %.40s (%s) as not reachable.", sender->name, + sender->human_nodename, node->name, node->human_nodename); } markNodeAsFailingIfNeeded(node); } else { if (clusterNodeDelFailureReport(node, sender)) { - serverLog(LL_VERBOSE, "Node %.40s (%s) reported node %.40s (%s) is back online.", sender->name, + serverLog(LL_NOTICE, "Node %.40s (%s) reported node %.40s (%s) is back online.", sender->name, sender->human_nodename, node->name, node->human_nodename); } } @@ -2556,45 +2632,49 @@ static clusterMsgPingExt *getNextPingExt(clusterMsgPingExt *ext) { } /* All PING extensions must be 8-byte aligned */ -uint32_t getAlignedPingExtSize(uint32_t dataSize) { +static uint32_t getAlignedPingExtSize(uint32_t dataSize) { return sizeof(clusterMsgPingExt) + EIGHT_BYTE_ALIGN(dataSize); } -uint32_t getHostnamePingExtSize(void) { - if (sdslen(myself->hostname) == 0) { - return 0; - } - return getAlignedPingExtSize(sdslen(myself->hostname) + 1); -} - -uint32_t getHumanNodenamePingExtSize(void) { - if (sdslen(myself->human_nodename) == 0) { - return 0; - } - return getAlignedPingExtSize(sdslen(myself->human_nodename) + 1); -} - -uint32_t getShardIdPingExtSize(void) { +static uint32_t getShardIdPingExtSize(void) { return getAlignedPingExtSize(sizeof(clusterMsgPingExtShardId)); } -uint32_t getForgottenNodeExtSize(void) { +static uint32_t getForgottenNodeExtSize(void) { return getAlignedPingExtSize(sizeof(clusterMsgPingExtForgottenNode)); } -void *preparePingExt(clusterMsgPingExt *ext, uint16_t type, uint32_t length) { +static void *preparePingExt(clusterMsgPingExt *ext, uint16_t type, uint32_t length) { ext->type = htons(type); ext->length = htonl(length); return &ext->ext[0]; } +/* If value is nonempty and cursor_ptr points to a non-NULL cursor, writes a + * ping extension at the cursor, advances the cursor, increments totlen and + * returns 1. If value is nonempty and cursor_ptr points to NULL, just computes + * the size, increments totlen and returns 1. If value is empty, returns 0. */ +static uint32_t +writeSdsPingExtIfNonempty(uint32_t *totlen_ptr, clusterMsgPingExt **cursor_ptr, clusterMsgPingtypes type, sds value) { + size_t len = sdslen(value); + if (len == 0) return 0; + size_t size = getAlignedPingExtSize(len + 1); + if (*cursor_ptr != NULL) { + void *ext = preparePingExt(*cursor_ptr, type, size); + memcpy(ext, value, len); + *cursor_ptr = getNextPingExt(*cursor_ptr); + } + *totlen_ptr += size; + return 1; +} + /* 1. If a NULL hdr is provided, compute the extension size; - * 2. If a non-NULL hdr is provided, write the hostname ping - * extension at the start of the cursor. This function + * 2. If a non-NULL hdr is provided, write the ping + * extensions at the start of the cursor. This function * will update the cursor to point to the end of the * written extension and will return the amount of bytes * written. */ -uint32_t writePingExt(clusterMsg *hdr, int gossipcount) { +static uint32_t writePingExtensions(clusterMsg *hdr, int gossipcount) { uint16_t extensions = 0; uint32_t totlen = 0; clusterMsgPingExt *cursor = NULL; @@ -2603,36 +2683,14 @@ uint32_t writePingExt(clusterMsg *hdr, int gossipcount) { cursor = getInitialPingExt(hdr, gossipcount); } - /* hostname is optional */ - if (sdslen(myself->hostname) != 0) { - if (cursor != NULL) { - /* Populate hostname */ - clusterMsgPingExtHostname *ext = - preparePingExt(cursor, CLUSTERMSG_EXT_TYPE_HOSTNAME, getHostnamePingExtSize()); - memcpy(ext->hostname, myself->hostname, sdslen(myself->hostname)); - - /* Move the write cursor */ - cursor = getNextPingExt(cursor); - } - - totlen += getHostnamePingExtSize(); - extensions++; - } - - if (sdslen(myself->human_nodename) != 0) { - if (cursor != NULL) { - /* Populate human_nodename */ - clusterMsgPingExtHumanNodename *ext = - preparePingExt(cursor, CLUSTERMSG_EXT_TYPE_HUMAN_NODENAME, getHumanNodenamePingExtSize()); - memcpy(ext->human_nodename, myself->human_nodename, sdslen(myself->human_nodename)); - - /* Move the write cursor */ - cursor = getNextPingExt(cursor); - } - - totlen += getHumanNodenamePingExtSize(); - extensions++; - } + /* Write simple optional SDS ping extensions. */ + extensions += writeSdsPingExtIfNonempty(&totlen, &cursor, CLUSTERMSG_EXT_TYPE_HOSTNAME, myself->hostname); + extensions += + writeSdsPingExtIfNonempty(&totlen, &cursor, CLUSTERMSG_EXT_TYPE_HUMAN_NODENAME, myself->human_nodename); + extensions += + writeSdsPingExtIfNonempty(&totlen, &cursor, CLUSTERMSG_EXT_TYPE_CLIENT_IPV4, myself->announce_client_ipv4); + extensions += + writeSdsPingExtIfNonempty(&totlen, &cursor, CLUSTERMSG_EXT_TYPE_CLIENT_IPV6, myself->announce_client_ipv6); /* Gossip forgotten nodes */ if (dictSize(server.cluster->nodes_black_list) > 0) { @@ -2682,6 +2740,8 @@ void clusterProcessPingExtensions(clusterMsg *hdr, clusterLink *link) { clusterNode *sender = link->node ? link->node : clusterLookupNode(hdr->sender, CLUSTER_NAMELEN); char *ext_hostname = NULL; char *ext_humannodename = NULL; + char *ext_clientipv4 = NULL; + char *ext_clientipv6 = NULL; char *ext_shardid = NULL; uint16_t extensions = ntohs(hdr->extensions); /* Loop through all the extensions and process them */ @@ -2695,6 +2755,14 @@ void clusterProcessPingExtensions(clusterMsg *hdr, clusterLink *link) { clusterMsgPingExtHumanNodename *humannodename_ext = (clusterMsgPingExtHumanNodename *)&(ext->ext[0].human_nodename); ext_humannodename = humannodename_ext->human_nodename; + } else if (type == CLUSTERMSG_EXT_TYPE_CLIENT_IPV4) { + clusterMsgPingExtClientIpV4 *clientipv4_ext = + (clusterMsgPingExtClientIpV4 *)&(ext->ext[0].announce_client_ipv4); + ext_clientipv4 = clientipv4_ext->announce_client_ipv4; + } else if (type == CLUSTERMSG_EXT_TYPE_CLIENT_IPV6) { + clusterMsgPingExtClientIpV6 *clientipv6_ext = + (clusterMsgPingExtClientIpV6 *)&(ext->ext[0].announce_client_ipv6); + ext_clientipv6 = clientipv6_ext->announce_client_ipv6; } else if (type == CLUSTERMSG_EXT_TYPE_FORGOTTEN_NODE) { clusterMsgPingExtForgottenNode *forgotten_node_ext = &(ext->ext[0].forgotten_node); clusterNode *n = clusterLookupNode(forgotten_node_ext->name, CLUSTER_NAMELEN); @@ -2723,6 +2791,8 @@ void clusterProcessPingExtensions(clusterMsg *hdr, clusterLink *link) { * set it now. */ updateAnnouncedHostname(sender, ext_hostname); updateAnnouncedHumanNodename(sender, ext_humannodename); + updateAnnouncedClientIpV4(sender, ext_clientipv4); + updateAnnouncedClientIpV6(sender, ext_clientipv6); /* If the node did not send us a shard-id extension, it means the sender * does not support it (old version), node->shard_id is randomly generated. * A cluster-wide consensus for the node's shard_id is not necessary. @@ -2919,7 +2989,16 @@ int clusterIsValidPacket(clusterLink *link) { * received from the wrong sender ID). */ int clusterProcessPacket(clusterLink *link) { /* Validate that the packet is well-formed */ - if (!clusterIsValidPacket(link)) return 1; + if (!clusterIsValidPacket(link)) { + clusterMsg *hdr = (clusterMsg *)link->rcvbuf; + uint16_t type = ntohs(hdr->type); + if (server.debug_cluster_close_link_on_packet_drop && type == server.cluster_drop_packet_filter) { + freeClusterLink(link); + serverLog(LL_WARNING, "Closing link for matching packet type %hu", type); + return 0; + } + return 1; + } clusterMsg *hdr = (clusterMsg *)link->rcvbuf; uint16_t type = ntohs(hdr->type); @@ -3025,6 +3104,13 @@ int clusterProcessPacket(clusterLink *link) { if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_PONG || type == CLUSTERMSG_TYPE_MEET) { serverLog(LL_DEBUG, "%s packet received: %.40s", clusterGetMessageTypeString(type), link->node ? link->node->name : "NULL"); + + if (sender && (sender->flags & CLUSTER_NODE_MEET)) { + /* Once we get a response for MEET from the sender, we can stop sending more MEET. */ + sender->flags &= ~CLUSTER_NODE_MEET; + serverLog(LL_NOTICE, "Successfully completed handshake with %.40s (%s)", sender->name, + sender->human_nodename); + } if (!link->inbound) { if (nodeInHandshake(link->node)) { /* If we already have this node, try to change the @@ -3054,7 +3140,7 @@ int clusterProcessPacket(clusterLink *link) { /* If the reply has a non matching node ID we * disconnect this node and set it as not having an associated * address. */ - serverLog(LL_DEBUG, + serverLog(LL_NOTICE, "PONG contains mismatching sender ID. About node %.40s (%s) in shard %.40s added %d ms ago, " "having flags %d", link->node->name, link->node->human_nodename, link->node->shard_id, @@ -3317,8 +3403,7 @@ int clusterProcessPacket(clusterLink *link) { /* We consider this vote only if the sender is a primary serving * a non zero number of slots, and its currentEpoch is greater or * equal to epoch where this node started the election. */ - if (clusterNodeIsPrimary(sender) && sender->numslots > 0 && - senderCurrentEpoch >= server.cluster->failover_auth_epoch) { + if (clusterNodeIsVotingPrimary(sender) && senderCurrentEpoch >= server.cluster->failover_auth_epoch) { server.cluster->failover_auth_count++; /* Maybe we reached a quorum here, set a flag to make sure * we check ASAP. */ @@ -3459,12 +3544,17 @@ void clusterLinkConnectHandler(connection *conn) { * replaced by the clusterSendPing() call. */ node->ping_sent = old_ping_sent; } - /* We can clear the flag after the first packet is sent. - * If we'll never receive a PONG, we'll never send new packets - * to this node. Instead after the PONG is received and we - * are no longer in meet/handshake status, we want to send - * normal PING packets. */ - node->flags &= ~CLUSTER_NODE_MEET; + /* NOTE: Assume the current node is A and is asked to MEET another node B. + * Once A sends MEET to B, it cannot clear the MEET flag for B until it + * gets a response from B. If the MEET packet is not accepted by B due to + * link failure, A must continue sending MEET. If A doesn't continue sending + * MEET, A will know about B, but B will never add A. Every node always + * responds to PINGs from unknown nodes with a PONG, so A will know about B + * and continue sending PINGs. But B won't add A until it sees a MEET (or it + * gets to know about A from a trusted third node C). In this case, clearing + * the MEET flag here leads to asymmetry in the cluster membership. So, we + * clear the MEET flag in clusterProcessPacket. + */ serverLog(LL_DEBUG, "Connecting with Node %.40s at %s:%d", node->name, node->ip, node->cport); } @@ -3774,7 +3864,7 @@ void clusterSendPing(clusterLink *link, int type) { estlen = sizeof(clusterMsg) - sizeof(union clusterMsgData); estlen += (sizeof(clusterMsgDataGossip) * (wanted + pfail_wanted)); if (link->node && nodeSupportsExtensions(link->node)) { - estlen += writePingExt(NULL, 0); + estlen += writePingExtensions(NULL, 0); } /* Note: clusterBuildMessageHdr() expects the buffer to be always at least * sizeof(clusterMsg) or more. */ @@ -3845,7 +3935,7 @@ void clusterSendPing(clusterLink *link, int type) { uint32_t totlen = 0; if (link->node && nodeSupportsExtensions(link->node)) { - totlen += writePingExt(hdr, gossipcount); + totlen += writePingExtensions(hdr, gossipcount); } else { serverLog(LL_DEBUG, "Unable to send extensions data, however setting ext data flag to true"); hdr->mflags[0] |= CLUSTERMSG_FLAG0_EXT_DATA; @@ -4172,9 +4262,9 @@ void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) { /* IF we are not a primary serving at least 1 slot, we don't have the * right to vote, as the cluster size is the number - * of primariies serving at least one slot, and quorum is the cluster + * of primaries serving at least one slot, and quorum is the cluster * size + 1 */ - if (nodeIsReplica(myself) || myself->numslots == 0) return; + if (!clusterNodeIsVotingPrimary(myself)) return; /* Request epoch must be >= our currentEpoch. * Note that it is impossible for it to actually be greater since @@ -4252,7 +4342,7 @@ void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) { } /* This function returns the "rank" of this instance, a replica, in the context - * of its primar-replicas ring. The rank of the replica is given by the number of + * of its primary-replicas ring. The rank of the replica is given by the number of * other replicas for the same primary that have a better replication offset * compared to the local one (better means, greater, so they claim more data). * @@ -4323,7 +4413,7 @@ void clusterLogCantFailover(int reason) { switch (reason) { case CLUSTER_CANT_FAILOVER_DATA_AGE: - msg = "Disconnected from master for longer than allowed. " + msg = "Disconnected from primary for longer than allowed. " "Please check the 'cluster-replica-validity-factor' configuration " "option."; break; @@ -4940,10 +5030,10 @@ void clusterCron(void) { if (!(node->flags & (CLUSTER_NODE_PFAIL | CLUSTER_NODE_FAIL))) { node->flags |= CLUSTER_NODE_PFAIL; update_state = 1; - if (clusterNodeIsPrimary(myself) && server.cluster->size == 1) { + if (server.cluster->size == 1 && clusterNodeIsVotingPrimary(myself)) { markNodeAsFailingIfNeeded(node); } else { - serverLog(LL_DEBUG, "*** NODE %.40s possibly failing", node->name); + serverLog(LL_NOTICE, "NODE %.40s (%s) possibly failing.", node->name, node->human_nodename); } } } @@ -5210,7 +5300,7 @@ void clusterUpdateState(void) { while ((de = dictNext(di)) != NULL) { clusterNode *node = dictGetVal(de); - if (clusterNodeIsPrimary(node) && node->numslots) { + if (clusterNodeIsVotingPrimary(node)) { server.cluster->size++; if ((node->flags & (CLUSTER_NODE_FAIL | CLUSTER_NODE_PFAIL)) == 0) reachable_primaries++; } @@ -5413,15 +5503,19 @@ sds representSlotInfo(sds ci, uint16_t *slot_info_pairs, int slot_info_pairs_cou /* Generate a csv-alike representation of the specified cluster node. * See clusterGenNodesDescription() top comment for more information. * + * If a client is provided, we're creating a reply to the CLUSTER NODES command. + * If client is NULL, we are creating the content of nodes.conf. + * * The function returns the string representation as an SDS string. */ sds clusterGenNodeDescription(client *c, clusterNode *node, int tls_primary) { int j, start; sds ci; int port = clusterNodeClientPort(node, tls_primary); + char *ip = clusterNodeIp(node, c); /* Node coordinates */ ci = sdscatlen(sdsempty(), node->name, CLUSTER_NAMELEN); - ci = sdscatfmt(ci, " %s:%i@%i", node->ip, port, node->cport); + ci = sdscatfmt(ci, " %s:%i@%i", ip, port, node->cport); if (sdslen(node->hostname) != 0) { ci = sdscatfmt(ci, ",%s", node->hostname); } @@ -5738,11 +5832,11 @@ void addNodeDetailsToShardReply(client *c, clusterNode *node) { } addReplyBulkCString(c, "ip"); - addReplyBulkCString(c, node->ip); + addReplyBulkCString(c, clusterNodeIp(node, c)); reply_count++; addReplyBulkCString(c, "endpoint"); - addReplyBulkCString(c, clusterNodePreferredEndpoint(node)); + addReplyBulkCString(c, clusterNodePreferredEndpoint(node, c)); reply_count++; if (sdslen(node->hostname) != 0) { @@ -5986,6 +6080,10 @@ int handleDebugClusterCommand(client *c) { addReplyErrorFormat(c, "Unknown node %s", (char *)c->argv[4]->ptr); return 1; } + if (n == server.cluster->myself) { + addReplyErrorFormat(c, "Cannot free cluster link(s) to myself"); + return 1; + } /* Terminate the link based on the direction or all. */ if (!strcasecmp(c->argv[3]->ptr, "from")) { @@ -6007,7 +6105,16 @@ int clusterNodePending(clusterNode *node) { return node->flags & (CLUSTER_NODE_NOADDR | CLUSTER_NODE_HANDSHAKE); } -char *clusterNodeIp(clusterNode *node) { +/* Returns the IP of the node as seen by the given client, or by the cluster node if c is NULL. */ +char *clusterNodeIp(clusterNode *node, client *c) { + if (c == NULL) { + return node->ip; + } + if (isClientConnIpV6(c)) { + if (sdslen(node->announce_client_ipv6) != 0) return node->announce_client_ipv6; + } else { + if (sdslen(node->announce_client_ipv4) != 0) return node->announce_client_ipv4; + } return node->ip; } @@ -6068,7 +6175,7 @@ int clusterParseSetSlotCommand(client *c, int *slot_out, clusterNode **node_out, int optarg_pos = 0; /* Allow primaries to replicate "CLUSTER SETSLOT" */ - if (!(c->flags & CLIENT_PRIMARY) && nodeIsReplica(myself)) { + if (!c->flag.primary && nodeIsReplica(myself)) { addReplyError(c, "Please use SETSLOT only with masters."); return 0; } @@ -6186,30 +6293,49 @@ void clusterCommandSetSlot(client *c) { * 3. Upon replication completion, primary B executes `SETSLOT n NODE B` and * returns success to client C. * 4. The following steps can happen in parallel: - * a. Client C issues `SETSLOT n NODE B` against parimary A. + * a. Client C issues `SETSLOT n NODE B` against primary A. * b. Primary B gossips its new slot ownership to the cluster (including A, A', etc.). * * This ensures that all replicas have the latest topology information, enabling * a reliable slot ownership transfer even if the primary node went down during * the process. */ - if (nodeIsPrimary(myself) && myself->num_replicas != 0 && (c->flags & CLIENT_REPLICATION_DONE) == 0) { - forceCommandPropagation(c, PROPAGATE_REPL); - /* We are a primary and this is the first time we see this `SETSLOT` - * command. Force-replicate the command to all of our replicas - * first and only on success will we handle the command. - * Note that - * 1. All replicas are expected to ack the replication within the given timeout - * 2. The repl offset target is set to the primary's current repl offset + 1. - * There is no concern of partial replication because replicas always - * ack the repl offset at the command boundary. */ - blockClientForReplicaAck(c, timeout_ms, server.primary_repl_offset + 1, myself->num_replicas, 0); - /* Mark client as pending command for execution after replication to replicas. */ - c->flags |= CLIENT_PENDING_COMMAND; - replicationRequestAckFromReplicas(); - return; + if (nodeIsPrimary(myself) && myself->num_replicas != 0 && !c->flag.replication_done) { + /* Iterate through the list of replicas to check if there are any running + * a version older than 8.0.0. Replicas with versions older than 8.0.0 do + * not support the CLUSTER SETSLOT command on replicas. If such a replica + * is found, we should skip the replication and fall back to the old + * non-replicated behavior.*/ + listIter li; + listNode *ln; + int legacy_replica_found = 0; + listRewind(server.replicas, &li); + while ((ln = listNext(&li))) { + client *r = ln->value; + if (r->replica_version < 0x80000 /* 8.0.0 */) { + legacy_replica_found++; + break; + } + } + + if (!legacy_replica_found) { + forceCommandPropagation(c, PROPAGATE_REPL); + /* We are a primary and this is the first time we see this `SETSLOT` + * command. Force-replicate the command to all of our replicas + * first and only on success will we handle the command. + * Note that + * 1. All replicas are expected to ack the replication within the given timeout + * 2. The repl offset target is set to the primary's current repl offset + 1. + * There is no concern of partial replication because replicas always + * ack the repl offset at the command boundary. */ + blockClientForReplicaAck(c, timeout_ms, server.primary_repl_offset + 1, myself->num_replicas, 0); + /* Mark client as pending command for execution after replication to replicas. */ + c->flag.pending_command = 1; + replicationRequestAckFromReplicas(); + return; + } } - /* Slot states have been updated on the replicas (if any). + /* Slot states have been updated on the compatible replicas (if any). * Now exuecte the command on the primary. */ if (!strcasecmp(c->argv[3]->ptr, "migrating")) { serverLog(LL_NOTICE, "Migrating slot %d to node %.40s (%s)", slot, n->name, n->human_nodename); @@ -6653,10 +6779,10 @@ long long clusterNodeReplOffset(clusterNode *node) { return node->repl_offset; } -const char *clusterNodePreferredEndpoint(clusterNode *n) { +const char *clusterNodePreferredEndpoint(clusterNode *n, client *c) { char *hostname = clusterNodeHostname(n); switch (server.cluster_preferred_endpoint_type) { - case CLUSTER_ENDPOINT_TYPE_IP: return clusterNodeIp(n); + case CLUSTER_ENDPOINT_TYPE_IP: return clusterNodeIp(n, c); case CLUSTER_ENDPOINT_TYPE_HOSTNAME: return (hostname != NULL && hostname[0] != '\0') ? hostname : "?"; case CLUSTER_ENDPOINT_TYPE_UNKNOWN_ENDPOINT: return ""; } diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h index e962cc94f2..eb1c139a30 100644 --- a/src/cluster_legacy.h +++ b/src/cluster_legacy.h @@ -152,6 +152,8 @@ typedef enum { CLUSTERMSG_EXT_TYPE_HUMAN_NODENAME, CLUSTERMSG_EXT_TYPE_FORGOTTEN_NODE, CLUSTERMSG_EXT_TYPE_SHARDID, + CLUSTERMSG_EXT_TYPE_CLIENT_IPV4, + CLUSTERMSG_EXT_TYPE_CLIENT_IPV6, } clusterMsgPingtypes; /* Helper function for making sure extensions are eight byte aligned. */ @@ -176,6 +178,14 @@ typedef struct { char shard_id[CLUSTER_NAMELEN]; /* The shard_id, 40 bytes fixed. */ } clusterMsgPingExtShardId; +typedef struct { + char announce_client_ipv4[1]; /* Announced client IPv4, ends with \0. */ +} clusterMsgPingExtClientIpV4; + +typedef struct { + char announce_client_ipv6[1]; /* Announced client IPv6, ends with \0. */ +} clusterMsgPingExtClientIpV6; + typedef struct { uint32_t length; /* Total length of this extension message (including this header) */ uint16_t type; /* Type of this extension message (see clusterMsgPingtypes) */ @@ -185,6 +195,8 @@ typedef struct { clusterMsgPingExtHumanNodename human_nodename; clusterMsgPingExtForgottenNode forgotten_node; clusterMsgPingExtShardId shard_id; + clusterMsgPingExtClientIpV4 announce_client_ipv4; + clusterMsgPingExtClientIpV6 announce_client_ipv6; } ext[]; /* Actual extension information, formatted so that the data is 8 * byte aligned, regardless of its content. */ } clusterMsgPingExt; @@ -326,22 +338,24 @@ struct _clusterNode { uint16_t *slot_info_pairs; /* Slots info represented as (start/end) pair (consecutive index). */ int slot_info_pairs_count; /* Used number of slots in slot_info_pairs */ int numslots; /* Number of slots handled by this node */ - int num_replicas; /* Number of replica nodes, if this is a primar */ + int num_replicas; /* Number of replica nodes, if this is a primary */ clusterNode **replicas; /* pointers to replica nodes */ clusterNode *replicaof; /* pointer to the primary node. Note that it may be NULL even if the node is a replica - if we don't have the parimary node in our + if we don't have the primary node in our tables. */ unsigned long long last_in_ping_gossip; /* The number of the last carried in the ping gossip section */ mstime_t ping_sent; /* Unix time we sent latest ping */ mstime_t pong_received; /* Unix time we received the pong */ mstime_t data_received; /* Unix time we received any data */ mstime_t fail_time; /* Unix time when FAIL flag was set */ - mstime_t voted_time; /* Last time we voted for a replica of this parimary */ + mstime_t voted_time; /* Last time we voted for a replica of this primary */ mstime_t repl_offset_time; /* Unix time we received offset for this node */ mstime_t orphaned_time; /* Starting time of orphaned primary condition */ long long repl_offset; /* Last known repl offset for this node. */ char ip[NET_IP_STR_LEN]; /* Latest known IP address of this node */ + sds announce_client_ipv4; /* IPv4 for clients only. */ + sds announce_client_ipv6; /* IPv6 for clients only. */ sds hostname; /* The known hostname for this node */ sds human_nodename; /* The known human readable nodename for this node */ int tcp_port; /* Latest known clients TCP port. */ diff --git a/src/cluster_slot_stats.c b/src/cluster_slot_stats.c new file mode 100644 index 0000000000..a2a6bfdd01 --- /dev/null +++ b/src/cluster_slot_stats.c @@ -0,0 +1,190 @@ +/* + * Copyright Valkey Contributors. + * All rights reserved. + * SPDX-License-Identifier: BSD 3-Clause + */ + +#include "server.h" +#include "cluster.h" + +#define UNASSIGNED_SLOT 0 + +typedef enum { + KEY_COUNT, + INVALID, +} slotStatTypes; + +/* ----------------------------------------------------------------------------- + * CLUSTER SLOT-STATS command + * -------------------------------------------------------------------------- */ + +/* Struct used to temporarily hold slot statistics for sorting. */ +typedef struct { + int slot; + uint64_t stat; +} slotStatForSort; + +static int doesSlotBelongToMyShard(int slot) { + clusterNode *myself = getMyClusterNode(); + clusterNode *primary = clusterNodeGetPrimary(myself); + + return clusterNodeCoversSlot(primary, slot); +} + +static int markSlotsAssignedToMyShard(unsigned char *assigned_slots, int start_slot, int end_slot) { + int assigned_slots_count = 0; + for (int slot = start_slot; slot <= end_slot; slot++) { + if (doesSlotBelongToMyShard(slot)) { + assigned_slots[slot]++; + assigned_slots_count++; + } + } + return assigned_slots_count; +} + +static uint64_t getSlotStat(int slot, int stat_type) { + serverAssert(stat_type != INVALID); + uint64_t slot_stat = 0; + if (stat_type == KEY_COUNT) { + slot_stat = countKeysInSlot(slot); + } + return slot_stat; +} + +/* Compare by stat in ascending order. If stat is the same, compare by slot in ascending order. */ +static int slotStatForSortAscCmp(const void *a, const void *b) { + slotStatForSort entry_a = *((slotStatForSort *)a); + slotStatForSort entry_b = *((slotStatForSort *)b); + if (entry_a.stat == entry_b.stat) { + return entry_a.slot - entry_b.slot; + } + return entry_a.stat - entry_b.stat; +} + +/* Compare by stat in descending order. If stat is the same, compare by slot in ascending order. */ +static int slotStatForSortDescCmp(const void *a, const void *b) { + slotStatForSort entry_a = *((slotStatForSort *)a); + slotStatForSort entry_b = *((slotStatForSort *)b); + if (entry_b.stat == entry_a.stat) { + return entry_a.slot - entry_b.slot; + } + return entry_b.stat - entry_a.stat; +} + +static void collectAndSortSlotStats(slotStatForSort slot_stats[], int order_by, int desc) { + int i = 0; + + for (int slot = 0; slot < CLUSTER_SLOTS; slot++) { + if (doesSlotBelongToMyShard(slot)) { + slot_stats[i].slot = slot; + slot_stats[i].stat = getSlotStat(slot, order_by); + i++; + } + } + qsort(slot_stats, i, sizeof(slotStatForSort), (desc) ? slotStatForSortDescCmp : slotStatForSortAscCmp); +} + +static void addReplySlotStat(client *c, int slot) { + addReplyArrayLen(c, 2); /* Array of size 2, where 0th index represents (int) slot, + * and 1st index represents (map) usage statistics. */ + addReplyLongLong(c, slot); + addReplyMapLen(c, 1); /* Nested map representing slot usage statistics. */ + addReplyBulkCString(c, "key-count"); + addReplyLongLong(c, countKeysInSlot(slot)); +} + +/* Adds reply for the SLOTSRANGE variant. + * Response is ordered in ascending slot number. */ +static void addReplySlotsRange(client *c, unsigned char *assigned_slots, int startslot, int endslot, int len) { + addReplyArrayLen(c, len); /* Top level RESP reply format is defined as an array, due to ordering invariance. */ + + for (int slot = startslot; slot <= endslot; slot++) { + if (assigned_slots[slot]) addReplySlotStat(c, slot); + } +} + +static void addReplySortedSlotStats(client *c, slotStatForSort slot_stats[], long limit) { + int num_slots_assigned = getMyShardSlotCount(); + int len = min(limit, num_slots_assigned); + addReplyArrayLen(c, len); /* Top level RESP reply format is defined as an array, due to ordering invariance. */ + + for (int i = 0; i < len; i++) { + addReplySlotStat(c, slot_stats[i].slot); + } +} + +/* Adds reply for the ORDERBY variant. + * Response is ordered based on the sort result. */ +static void addReplyOrderBy(client *c, int order_by, long limit, int desc) { + slotStatForSort slot_stats[CLUSTER_SLOTS]; + collectAndSortSlotStats(slot_stats, order_by, desc); + addReplySortedSlotStats(c, slot_stats, limit); +} + +void clusterSlotStatsCommand(client *c) { + if (server.cluster_enabled == 0) { + addReplyError(c, "This instance has cluster support disabled"); + return; + } + + /* Parse additional arguments. */ + if (c->argc == 5 && !strcasecmp(c->argv[2]->ptr, "slotsrange")) { + /* CLUSTER SLOT-STATS SLOTSRANGE start-slot end-slot */ + int startslot, endslot; + if ((startslot = getSlotOrReply(c, c->argv[3])) == C_ERR || + (endslot = getSlotOrReply(c, c->argv[4])) == C_ERR) { + return; + } + if (startslot > endslot) { + addReplyErrorFormat(c, "Start slot number %d is greater than end slot number %d", startslot, endslot); + return; + } + /* Initialize slot assignment array. */ + unsigned char assigned_slots[CLUSTER_SLOTS] = {UNASSIGNED_SLOT}; + int assigned_slots_count = markSlotsAssignedToMyShard(assigned_slots, startslot, endslot); + addReplySlotsRange(c, assigned_slots, startslot, endslot, assigned_slots_count); + + } else if (c->argc >= 4 && !strcasecmp(c->argv[2]->ptr, "orderby")) { + /* CLUSTER SLOT-STATS ORDERBY metric [LIMIT limit] [ASC | DESC] */ + int desc = 1, order_by = INVALID; + if (!strcasecmp(c->argv[3]->ptr, "key-count")) { + order_by = KEY_COUNT; + } else { + addReplyError(c, "Unrecognized sort metric for ORDER BY. The supported metrics are: key-count."); + return; + } + int i = 4; /* Next argument index, following ORDERBY */ + int limit_counter = 0, asc_desc_counter = 0; + long limit = CLUSTER_SLOTS; + while (i < c->argc) { + int moreargs = c->argc > i + 1; + if (!strcasecmp(c->argv[i]->ptr, "limit") && moreargs) { + if (getRangeLongFromObjectOrReply( + c, c->argv[i + 1], 1, CLUSTER_SLOTS, &limit, + "Limit has to lie in between 1 and 16384 (maximum number of slots).") != C_OK) { + return; + } + i++; + limit_counter++; + } else if (!strcasecmp(c->argv[i]->ptr, "asc")) { + desc = 0; + asc_desc_counter++; + } else if (!strcasecmp(c->argv[i]->ptr, "desc")) { + desc = 1; + asc_desc_counter++; + } else { + addReplyErrorObject(c, shared.syntaxerr); + return; + } + if (limit_counter > 1 || asc_desc_counter > 1) { + addReplyError(c, "Multiple filters of the same type are disallowed."); + return; + } + i++; + } + addReplyOrderBy(c, order_by, limit, desc); + + } else { + addReplySubcommandSyntaxError(c); + } +} diff --git a/src/commands.def b/src/commands.def index 06cdb4b87e..4559c0aefe 100644 --- a/src/commands.def +++ b/src/commands.def @@ -930,6 +930,56 @@ struct COMMAND_ARG CLUSTER_SLAVES_Args[] = { {MAKE_ARG("node-id",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, }; +/********** CLUSTER SLOT_STATS ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* CLUSTER SLOT_STATS history */ +#define CLUSTER_SLOT_STATS_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* CLUSTER SLOT_STATS tips */ +const char *CLUSTER_SLOT_STATS_Tips[] = { +"nondeterministic_output", +"request_policy:all_shards", +}; +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* CLUSTER SLOT_STATS key specs */ +#define CLUSTER_SLOT_STATS_Keyspecs NULL +#endif + +/* CLUSTER SLOT_STATS filter slotsrange argument table */ +struct COMMAND_ARG CLUSTER_SLOT_STATS_filter_slotsrange_Subargs[] = { +{MAKE_ARG("start-slot",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("end-slot",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* CLUSTER SLOT_STATS filter orderby order argument table */ +struct COMMAND_ARG CLUSTER_SLOT_STATS_filter_orderby_order_Subargs[] = { +{MAKE_ARG("asc",ARG_TYPE_PURE_TOKEN,-1,"ASC",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("desc",ARG_TYPE_PURE_TOKEN,-1,"DESC",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* CLUSTER SLOT_STATS filter orderby argument table */ +struct COMMAND_ARG CLUSTER_SLOT_STATS_filter_orderby_Subargs[] = { +{MAKE_ARG("metric",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("limit",ARG_TYPE_INTEGER,-1,"LIMIT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)}, +{MAKE_ARG("order",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=CLUSTER_SLOT_STATS_filter_orderby_order_Subargs}, +}; + +/* CLUSTER SLOT_STATS filter argument table */ +struct COMMAND_ARG CLUSTER_SLOT_STATS_filter_Subargs[] = { +{MAKE_ARG("slotsrange",ARG_TYPE_BLOCK,-1,"SLOTSRANGE",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=CLUSTER_SLOT_STATS_filter_slotsrange_Subargs}, +{MAKE_ARG("orderby",ARG_TYPE_BLOCK,-1,"ORDERBY",NULL,NULL,CMD_ARG_NONE,3,NULL),.subargs=CLUSTER_SLOT_STATS_filter_orderby_Subargs}, +}; + +/* CLUSTER SLOT_STATS argument table */ +struct COMMAND_ARG CLUSTER_SLOT_STATS_Args[] = { +{MAKE_ARG("filter",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=CLUSTER_SLOT_STATS_filter_Subargs}, +}; + /********** CLUSTER SLOTS ********************/ #ifndef SKIP_CMD_HISTORY_TABLE @@ -961,7 +1011,7 @@ struct COMMAND_STRUCT CLUSTER_Subcommands[] = { {MAKE_CMD("countkeysinslot","Returns the number of keys in a hash slot.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_COUNTKEYSINSLOT_History,0,CLUSTER_COUNTKEYSINSLOT_Tips,0,clusterCommand,3,CMD_STALE,0,CLUSTER_COUNTKEYSINSLOT_Keyspecs,0,NULL,1),.args=CLUSTER_COUNTKEYSINSLOT_Args}, {MAKE_CMD("delslots","Sets hash slots as unbound for a node.","O(N) where N is the total number of hash slot arguments","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_DELSLOTS_History,0,CLUSTER_DELSLOTS_Tips,0,clusterCommand,-3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_DELSLOTS_Keyspecs,0,NULL,1),.args=CLUSTER_DELSLOTS_Args}, {MAKE_CMD("delslotsrange","Sets hash slot ranges as unbound for a node.","O(N) where N is the total number of the slots between the start slot and end slot arguments.","7.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_DELSLOTSRANGE_History,0,CLUSTER_DELSLOTSRANGE_Tips,0,clusterCommand,-4,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_DELSLOTSRANGE_Keyspecs,0,NULL,1),.args=CLUSTER_DELSLOTSRANGE_Args}, -{MAKE_CMD("failover","Forces a replica to perform a manual failover of its master.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_FAILOVER_History,0,CLUSTER_FAILOVER_Tips,0,clusterCommand,-2,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_FAILOVER_Keyspecs,0,NULL,1),.args=CLUSTER_FAILOVER_Args}, +{MAKE_CMD("failover","Forces a replica to perform a manual failover of its primary.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_FAILOVER_History,0,CLUSTER_FAILOVER_Tips,0,clusterCommand,-2,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_FAILOVER_Keyspecs,0,NULL,1),.args=CLUSTER_FAILOVER_Args}, {MAKE_CMD("flushslots","Deletes all slots information from a node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_FLUSHSLOTS_History,0,CLUSTER_FLUSHSLOTS_Tips,0,clusterCommand,2,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_FLUSHSLOTS_Keyspecs,0,NULL,0)}, {MAKE_CMD("forget","Removes a node from the nodes table.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_FORGET_History,0,CLUSTER_FORGET_Tips,0,clusterCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_FORGET_Keyspecs,0,NULL,1),.args=CLUSTER_FORGET_Args}, {MAKE_CMD("getkeysinslot","Returns the key names in a hash slot.","O(N) where N is the number of requested keys","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_GETKEYSINSLOT_History,0,CLUSTER_GETKEYSINSLOT_Tips,1,clusterCommand,4,CMD_STALE,0,CLUSTER_GETKEYSINSLOT_Keyspecs,0,NULL,2),.args=CLUSTER_GETKEYSINSLOT_Args}, @@ -973,14 +1023,15 @@ struct COMMAND_STRUCT CLUSTER_Subcommands[] = { {MAKE_CMD("myid","Returns the ID of a node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_MYID_History,0,CLUSTER_MYID_Tips,0,clusterCommand,2,CMD_LOADING|CMD_STALE,0,CLUSTER_MYID_Keyspecs,0,NULL,0)}, {MAKE_CMD("myshardid","Returns the shard ID of a node.","O(1)","7.2.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_MYSHARDID_History,0,CLUSTER_MYSHARDID_Tips,1,clusterCommand,2,CMD_LOADING|CMD_STALE,0,CLUSTER_MYSHARDID_Keyspecs,0,NULL,0)}, {MAKE_CMD("nodes","Returns the cluster configuration for a node.","O(N) where N is the total number of Cluster nodes","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_NODES_History,0,CLUSTER_NODES_Tips,1,clusterCommand,2,CMD_LOADING|CMD_STALE,0,CLUSTER_NODES_Keyspecs,0,NULL,0)}, -{MAKE_CMD("replicas","Lists the replica nodes of a master node.","O(N) where N is the number of replicas.","5.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_REPLICAS_History,0,CLUSTER_REPLICAS_Tips,1,clusterCommand,3,CMD_ADMIN|CMD_STALE,0,CLUSTER_REPLICAS_Keyspecs,0,NULL,1),.args=CLUSTER_REPLICAS_Args}, -{MAKE_CMD("replicate","Configure a node as replica of a master node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_REPLICATE_History,0,CLUSTER_REPLICATE_Tips,0,clusterCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_REPLICATE_Keyspecs,0,NULL,1),.args=CLUSTER_REPLICATE_Args}, +{MAKE_CMD("replicas","Lists the replica nodes of a primary node.","O(N) where N is the number of replicas.","5.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_REPLICAS_History,0,CLUSTER_REPLICAS_Tips,1,clusterCommand,3,CMD_ADMIN|CMD_STALE,0,CLUSTER_REPLICAS_Keyspecs,0,NULL,1),.args=CLUSTER_REPLICAS_Args}, +{MAKE_CMD("replicate","Configure a node as replica of a primary node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_REPLICATE_History,0,CLUSTER_REPLICATE_Tips,0,clusterCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_REPLICATE_Keyspecs,0,NULL,1),.args=CLUSTER_REPLICATE_Args}, {MAKE_CMD("reset","Resets a node.","O(N) where N is the number of known nodes. The command may execute a FLUSHALL as a side effect.","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_RESET_History,0,CLUSTER_RESET_Tips,0,clusterCommand,-2,CMD_ADMIN|CMD_STALE|CMD_NOSCRIPT,0,CLUSTER_RESET_Keyspecs,0,NULL,1),.args=CLUSTER_RESET_Args}, {MAKE_CMD("saveconfig","Forces a node to save the cluster configuration to disk.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SAVECONFIG_History,0,CLUSTER_SAVECONFIG_Tips,0,clusterCommand,2,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_SAVECONFIG_Keyspecs,0,NULL,0)}, {MAKE_CMD("set-config-epoch","Sets the configuration epoch for a new node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SET_CONFIG_EPOCH_History,0,CLUSTER_SET_CONFIG_EPOCH_Tips,0,clusterCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_SET_CONFIG_EPOCH_Keyspecs,0,NULL,1),.args=CLUSTER_SET_CONFIG_EPOCH_Args}, {MAKE_CMD("setslot","Binds a hash slot to a node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SETSLOT_History,1,CLUSTER_SETSLOT_Tips,0,clusterCommand,-4,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE|CMD_MAY_REPLICATE,0,CLUSTER_SETSLOT_Keyspecs,0,NULL,3),.args=CLUSTER_SETSLOT_Args}, {MAKE_CMD("shards","Returns the mapping of cluster slots to shards.","O(N) where N is the total number of cluster nodes","7.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SHARDS_History,0,CLUSTER_SHARDS_Tips,1,clusterCommand,2,CMD_LOADING|CMD_STALE,0,CLUSTER_SHARDS_Keyspecs,0,NULL,0)}, -{MAKE_CMD("slaves","Lists the replica nodes of a master node.","O(N) where N is the number of replicas.","3.0.0",CMD_DOC_DEPRECATED,"`CLUSTER REPLICAS`","5.0.0","cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SLAVES_History,0,CLUSTER_SLAVES_Tips,1,clusterCommand,3,CMD_ADMIN|CMD_STALE,0,CLUSTER_SLAVES_Keyspecs,0,NULL,1),.args=CLUSTER_SLAVES_Args}, +{MAKE_CMD("slaves","Lists the replica nodes of a primary node.","O(N) where N is the number of replicas.","3.0.0",CMD_DOC_DEPRECATED,"`CLUSTER REPLICAS`","5.0.0","cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SLAVES_History,0,CLUSTER_SLAVES_Tips,1,clusterCommand,3,CMD_ADMIN|CMD_STALE,0,CLUSTER_SLAVES_Keyspecs,0,NULL,1),.args=CLUSTER_SLAVES_Args}, +{MAKE_CMD("slot-stats","Return an array of slot usage statistics for slots assigned to the current node.","O(N) where N is the total number of slots based on arguments. O(N*log(N)) with ORDERBY subcommand.","8.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SLOT_STATS_History,0,CLUSTER_SLOT_STATS_Tips,2,clusterSlotStatsCommand,-4,CMD_STALE|CMD_LOADING,0,CLUSTER_SLOT_STATS_Keyspecs,0,NULL,1),.args=CLUSTER_SLOT_STATS_Args}, {MAKE_CMD("slots","Returns the mapping of cluster slots to nodes.","O(N) where N is the total number of Cluster nodes","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SLOTS_History,2,CLUSTER_SLOTS_Tips,1,clusterCommand,2,CMD_LOADING|CMD_STALE,0,CLUSTER_SLOTS_Keyspecs,0,NULL,0)}, {0} }; @@ -1089,6 +1140,28 @@ struct COMMAND_ARG CLIENT_CACHING_Args[] = { {MAKE_ARG("mode",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=CLIENT_CACHING_mode_Subargs}, }; +/********** CLIENT CAPA ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* CLIENT CAPA history */ +#define CLIENT_CAPA_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* CLIENT CAPA tips */ +#define CLIENT_CAPA_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* CLIENT CAPA key specs */ +#define CLIENT_CAPA_Keyspecs NULL +#endif + +/* CLIENT CAPA argument table */ +struct COMMAND_ARG CLIENT_CAPA_Args[] = { +{MAKE_ARG("capability",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + /********** CLIENT GETNAME ********************/ #ifndef SKIP_CMD_HISTORY_TABLE @@ -1187,6 +1260,7 @@ commandHistory CLIENT_KILL_History[] = { {"5.0.0","Replaced `slave` `TYPE` with `replica`. `slave` still supported for backward compatibility."}, {"6.2.0","`LADDR` option."}, {"8.0.0","`MAXAGE` option."}, +{"8.0.0","Replaced `master` `TYPE` with `primary`. `master` still supported for backward compatibility."}, }; #endif @@ -1204,6 +1278,7 @@ commandHistory CLIENT_KILL_History[] = { struct COMMAND_ARG CLIENT_KILL_filter_new_format_client_type_Subargs[] = { {MAKE_ARG("normal",ARG_TYPE_PURE_TOKEN,-1,"NORMAL",NULL,NULL,CMD_ARG_NONE,0,NULL)}, {MAKE_ARG("master",ARG_TYPE_PURE_TOKEN,-1,"MASTER",NULL,"3.2.0",CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("primary",ARG_TYPE_PURE_TOKEN,-1,"PRIMARY",NULL,"8.0.0",CMD_ARG_NONE,0,NULL)}, {MAKE_ARG("slave",ARG_TYPE_PURE_TOKEN,-1,"SLAVE",NULL,NULL,CMD_ARG_NONE,0,NULL)}, {MAKE_ARG("replica",ARG_TYPE_PURE_TOKEN,-1,"REPLICA",NULL,"5.0.0",CMD_ARG_NONE,0,NULL)}, {MAKE_ARG("pubsub",ARG_TYPE_PURE_TOKEN,-1,"PUBSUB",NULL,NULL,CMD_ARG_NONE,0,NULL)}, @@ -1218,7 +1293,7 @@ struct COMMAND_ARG CLIENT_KILL_filter_new_format_skipme_Subargs[] = { /* CLIENT KILL filter new_format argument table */ struct COMMAND_ARG CLIENT_KILL_filter_new_format_Subargs[] = { {MAKE_ARG("client-id",ARG_TYPE_INTEGER,-1,"ID",NULL,"2.8.12",CMD_ARG_OPTIONAL,0,NULL)}, -{MAKE_ARG("client-type",ARG_TYPE_ONEOF,-1,"TYPE",NULL,"2.8.12",CMD_ARG_OPTIONAL,5,NULL),.subargs=CLIENT_KILL_filter_new_format_client_type_Subargs}, +{MAKE_ARG("client-type",ARG_TYPE_ONEOF,-1,"TYPE",NULL,"2.8.12",CMD_ARG_OPTIONAL,6,NULL),.subargs=CLIENT_KILL_filter_new_format_client_type_Subargs}, {MAKE_ARG("username",ARG_TYPE_STRING,-1,"USER",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)}, {MAKE_ARG("addr",ARG_TYPE_STRING,-1,"ADDR",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL),.display_text="ip:port"}, {MAKE_ARG("laddr",ARG_TYPE_STRING,-1,"LADDR",NULL,"6.2.0",CMD_ARG_OPTIONAL,0,NULL),.display_text="ip:port"}, @@ -1248,6 +1323,7 @@ commandHistory CLIENT_LIST_History[] = { {"6.2.0","Added `argv-mem`, `tot-mem`, `laddr` and `redir` fields and the optional `ID` filter."}, {"7.0.0","Added `resp`, `multi-mem`, `rbs` and `rbp` fields."}, {"7.0.3","Added `ssub` field."}, +{"8.0.0","Replaced `master` `TYPE` with `primary`. `master` still supported for backward compatibility."}, }; #endif @@ -1549,13 +1625,14 @@ struct COMMAND_ARG CLIENT_UNBLOCK_Args[] = { /* CLIENT command table */ struct COMMAND_STRUCT CLIENT_Subcommands[] = { {MAKE_CMD("caching","Instructs the server whether to track the keys in the next request.","O(1)","6.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_CACHING_History,0,CLIENT_CACHING_Tips,0,clientCommand,3,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_CACHING_Keyspecs,0,NULL,1),.args=CLIENT_CACHING_Args}, +{MAKE_CMD("capa","A client claims its capability.","O(1)","8.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_CAPA_History,0,CLIENT_CAPA_Tips,0,clientCommand,-3,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,ACL_CATEGORY_CONNECTION,CLIENT_CAPA_Keyspecs,0,NULL,1),.args=CLIENT_CAPA_Args}, {MAKE_CMD("getname","Returns the name of the connection.","O(1)","2.6.9",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_GETNAME_History,0,CLIENT_GETNAME_Tips,0,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_GETNAME_Keyspecs,0,NULL,0)}, {MAKE_CMD("getredir","Returns the client ID to which the connection's tracking notifications are redirected.","O(1)","6.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_GETREDIR_History,0,CLIENT_GETREDIR_Tips,0,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_GETREDIR_Keyspecs,0,NULL,0)}, {MAKE_CMD("help","Returns helpful text about the different subcommands.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_HELP_History,0,CLIENT_HELP_Tips,0,clientCommand,2,CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_HELP_Keyspecs,0,NULL,0)}, {MAKE_CMD("id","Returns the unique client ID of the connection.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_ID_History,0,CLIENT_ID_Tips,0,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_ID_Keyspecs,0,NULL,0)}, {MAKE_CMD("info","Returns information about the connection.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_INFO_History,0,CLIENT_INFO_Tips,1,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_INFO_Keyspecs,0,NULL,0)}, -{MAKE_CMD("kill","Terminates open connections.","O(N) where N is the number of client connections","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_KILL_History,6,CLIENT_KILL_Tips,0,clientCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_KILL_Keyspecs,0,NULL,1),.args=CLIENT_KILL_Args}, -{MAKE_CMD("list","Lists open connections.","O(N) where N is the number of client connections","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_LIST_History,6,CLIENT_LIST_Tips,1,clientCommand,-2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_LIST_Keyspecs,0,NULL,2),.args=CLIENT_LIST_Args}, +{MAKE_CMD("kill","Terminates open connections.","O(N) where N is the number of client connections","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_KILL_History,7,CLIENT_KILL_Tips,0,clientCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_KILL_Keyspecs,0,NULL,1),.args=CLIENT_KILL_Args}, +{MAKE_CMD("list","Lists open connections.","O(N) where N is the number of client connections","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_LIST_History,7,CLIENT_LIST_Tips,1,clientCommand,-2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_LIST_Keyspecs,0,NULL,2),.args=CLIENT_LIST_Args}, {MAKE_CMD("no-evict","Sets the client eviction mode of the connection.","O(1)","7.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_NO_EVICT_History,0,CLIENT_NO_EVICT_Tips,0,clientCommand,3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_NO_EVICT_Keyspecs,0,NULL,1),.args=CLIENT_NO_EVICT_Args}, {MAKE_CMD("no-touch","Controls whether commands sent by the client affect the LRU/LFU of accessed keys.","O(1)","7.2.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_NO_TOUCH_History,0,CLIENT_NO_TOUCH_Tips,0,clientCommand,3,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,ACL_CATEGORY_CONNECTION,CLIENT_NO_TOUCH_Keyspecs,0,NULL,1),.args=CLIENT_NO_TOUCH_Args}, {MAKE_CMD("pause","Suspends commands processing.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_PAUSE_History,1,CLIENT_PAUSE_Tips,0,clientCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_PAUSE_Keyspecs,0,NULL,2),.args=CLIENT_PAUSE_Args}, @@ -5330,6 +5407,28 @@ struct COMMAND_ARG SCRIPT_LOAD_Args[] = { {MAKE_ARG("script",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, }; +/********** SCRIPT SHOW ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* SCRIPT SHOW history */ +#define SCRIPT_SHOW_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* SCRIPT SHOW tips */ +#define SCRIPT_SHOW_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* SCRIPT SHOW key specs */ +#define SCRIPT_SHOW_Keyspecs NULL +#endif + +/* SCRIPT SHOW argument table */ +struct COMMAND_ARG SCRIPT_SHOW_Args[] = { +{MAKE_ARG("sha1",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + /* SCRIPT command table */ struct COMMAND_STRUCT SCRIPT_Subcommands[] = { {MAKE_CMD("debug","Sets the debug mode of server-side Lua scripts.","O(1)","3.2.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,SCRIPT_DEBUG_History,0,SCRIPT_DEBUG_Tips,0,scriptCommand,3,CMD_NOSCRIPT,ACL_CATEGORY_SCRIPTING,SCRIPT_DEBUG_Keyspecs,0,NULL,1),.args=SCRIPT_DEBUG_Args}, @@ -5338,6 +5437,7 @@ struct COMMAND_STRUCT SCRIPT_Subcommands[] = { {MAKE_CMD("help","Returns helpful text about the different subcommands.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,SCRIPT_HELP_History,0,SCRIPT_HELP_Tips,0,scriptCommand,2,CMD_LOADING|CMD_STALE,ACL_CATEGORY_SCRIPTING,SCRIPT_HELP_Keyspecs,0,NULL,0)}, {MAKE_CMD("kill","Terminates a server-side Lua script during execution.","O(1)","2.6.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,SCRIPT_KILL_History,0,SCRIPT_KILL_Tips,2,scriptCommand,2,CMD_NOSCRIPT|CMD_ALLOW_BUSY,ACL_CATEGORY_SCRIPTING,SCRIPT_KILL_Keyspecs,0,NULL,0)}, {MAKE_CMD("load","Loads a server-side Lua script to the script cache.","O(N) with N being the length in bytes of the script body.","2.6.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,SCRIPT_LOAD_History,0,SCRIPT_LOAD_Tips,2,scriptCommand,3,CMD_NOSCRIPT|CMD_STALE,ACL_CATEGORY_SCRIPTING,SCRIPT_LOAD_Keyspecs,0,NULL,1),.args=SCRIPT_LOAD_Args}, +{MAKE_CMD("show","Show server-side Lua script in the script cache.","O(1).","8.0.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,SCRIPT_SHOW_History,0,SCRIPT_SHOW_Tips,0,scriptCommand,3,CMD_NOSCRIPT,ACL_CATEGORY_SCRIPTING,SCRIPT_SHOW_Keyspecs,0,NULL,1),.args=SCRIPT_SHOW_Args}, {0} }; @@ -5377,7 +5477,7 @@ struct COMMAND_STRUCT SCRIPT_Subcommands[] = { /* SENTINEL CKQUORUM argument table */ struct COMMAND_ARG SENTINEL_CKQUORUM_Args[] = { -{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("primary-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, }; /********** SENTINEL CONFIG ********************/ @@ -5463,7 +5563,7 @@ struct COMMAND_ARG SENTINEL_DEBUG_Args[] = { /* SENTINEL FAILOVER argument table */ struct COMMAND_ARG SENTINEL_FAILOVER_Args[] = { -{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("primary-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, }; /********** SENTINEL FLUSHCONFIG ********************/ @@ -5502,7 +5602,7 @@ struct COMMAND_ARG SENTINEL_FAILOVER_Args[] = { /* SENTINEL GET_MASTER_ADDR_BY_NAME argument table */ struct COMMAND_ARG SENTINEL_GET_MASTER_ADDR_BY_NAME_Args[] = { -{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("primary-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, }; /********** SENTINEL HELP ********************/ @@ -5588,7 +5688,7 @@ struct COMMAND_ARG SENTINEL_IS_MASTER_DOWN_BY_ADDR_Args[] = { /* SENTINEL MASTER argument table */ struct COMMAND_ARG SENTINEL_MASTER_Args[] = { -{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("primary-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, }; /********** SENTINEL MASTERS ********************/ @@ -5686,7 +5786,7 @@ struct COMMAND_ARG SENTINEL_MONITOR_Args[] = { /* SENTINEL REMOVE argument table */ struct COMMAND_ARG SENTINEL_REMOVE_Args[] = { -{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("primary-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, }; /********** SENTINEL REPLICAS ********************/ @@ -5708,7 +5808,7 @@ struct COMMAND_ARG SENTINEL_REMOVE_Args[] = { /* SENTINEL REPLICAS argument table */ struct COMMAND_ARG SENTINEL_REPLICAS_Args[] = { -{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("primary-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, }; /********** SENTINEL RESET ********************/ @@ -5752,7 +5852,7 @@ struct COMMAND_ARG SENTINEL_RESET_Args[] = { /* SENTINEL SENTINELS argument table */ struct COMMAND_ARG SENTINEL_SENTINELS_Args[] = { -{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("primary-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, }; /********** SENTINEL SET ********************/ @@ -5780,7 +5880,7 @@ struct COMMAND_ARG SENTINEL_SET_data_Subargs[] = { /* SENTINEL SET argument table */ struct COMMAND_ARG SENTINEL_SET_Args[] = { -{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("primary-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, {MAKE_ARG("data",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,2,NULL),.subargs=SENTINEL_SET_data_Subargs}, }; @@ -5832,7 +5932,7 @@ struct COMMAND_ARG SENTINEL_SIMULATE_FAILURE_Args[] = { /* SENTINEL SLAVES argument table */ struct COMMAND_ARG SENTINEL_SLAVES_Args[] = { -{MAKE_ARG("master-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("primary-name",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, }; /* SENTINEL command table */ @@ -5842,20 +5942,20 @@ struct COMMAND_STRUCT SENTINEL_Subcommands[] = { {MAKE_CMD("debug","Lists or updates the current configurable parameters of Sentinel.","O(N) where N is the number of configurable parameters","7.0.0",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_DEBUG_History,0,SENTINEL_DEBUG_Tips,0,sentinelCommand,-2,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_DEBUG_Keyspecs,0,NULL,1),.args=SENTINEL_DEBUG_Args}, {MAKE_CMD("failover","Forces a Sentinel failover.",NULL,"2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_FAILOVER_History,0,SENTINEL_FAILOVER_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_FAILOVER_Keyspecs,0,NULL,1),.args=SENTINEL_FAILOVER_Args}, {MAKE_CMD("flushconfig","Rewrites the Sentinel configuration file.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_FLUSHCONFIG_History,0,SENTINEL_FLUSHCONFIG_Tips,0,sentinelCommand,2,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_FLUSHCONFIG_Keyspecs,0,NULL,0)}, -{MAKE_CMD("get-master-addr-by-name","Returns the port and address of a master instance.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_GET_MASTER_ADDR_BY_NAME_History,0,SENTINEL_GET_MASTER_ADDR_BY_NAME_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_GET_MASTER_ADDR_BY_NAME_Keyspecs,0,NULL,1),.args=SENTINEL_GET_MASTER_ADDR_BY_NAME_Args}, +{MAKE_CMD("get-master-addr-by-name","Returns the port and address of a primary instance.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_GET_MASTER_ADDR_BY_NAME_History,0,SENTINEL_GET_MASTER_ADDR_BY_NAME_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_GET_MASTER_ADDR_BY_NAME_Keyspecs,0,NULL,1),.args=SENTINEL_GET_MASTER_ADDR_BY_NAME_Args}, {MAKE_CMD("help","Returns helpful text about the different subcommands.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_HELP_History,0,SENTINEL_HELP_Tips,0,sentinelCommand,2,CMD_LOADING|CMD_STALE|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_HELP_Keyspecs,0,NULL,0)}, {MAKE_CMD("info-cache","Returns the cached `INFO` replies from the deployment's instances.","O(N) where N is the number of instances","3.2.0",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_INFO_CACHE_History,0,SENTINEL_INFO_CACHE_Tips,0,sentinelCommand,-3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_INFO_CACHE_Keyspecs,0,NULL,1),.args=SENTINEL_INFO_CACHE_Args}, -{MAKE_CMD("is-master-down-by-addr","Determines whether a master instance is down.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_IS_MASTER_DOWN_BY_ADDR_History,0,SENTINEL_IS_MASTER_DOWN_BY_ADDR_Tips,0,sentinelCommand,6,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_IS_MASTER_DOWN_BY_ADDR_Keyspecs,0,NULL,4),.args=SENTINEL_IS_MASTER_DOWN_BY_ADDR_Args}, -{MAKE_CMD("master","Returns the state of a master instance.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_MASTER_History,0,SENTINEL_MASTER_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_MASTER_Keyspecs,0,NULL,1),.args=SENTINEL_MASTER_Args}, -{MAKE_CMD("masters","Returns a list of monitored masters.","O(N) where N is the number of masters","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_MASTERS_History,0,SENTINEL_MASTERS_Tips,0,sentinelCommand,2,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_MASTERS_Keyspecs,0,NULL,0)}, +{MAKE_CMD("is-master-down-by-addr","Determines whether a primary instance is down.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_IS_MASTER_DOWN_BY_ADDR_History,0,SENTINEL_IS_MASTER_DOWN_BY_ADDR_Tips,0,sentinelCommand,6,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_IS_MASTER_DOWN_BY_ADDR_Keyspecs,0,NULL,4),.args=SENTINEL_IS_MASTER_DOWN_BY_ADDR_Args}, +{MAKE_CMD("master","Returns the state of a primary instance.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_MASTER_History,0,SENTINEL_MASTER_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_MASTER_Keyspecs,0,NULL,1),.args=SENTINEL_MASTER_Args}, +{MAKE_CMD("masters","Returns a list of monitored primaries.","O(N) where N is the number of primaries","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_MASTERS_History,0,SENTINEL_MASTERS_Tips,0,sentinelCommand,2,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_MASTERS_Keyspecs,0,NULL,0)}, {MAKE_CMD("monitor","Starts monitoring.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_MONITOR_History,0,SENTINEL_MONITOR_Tips,0,sentinelCommand,6,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_MONITOR_Keyspecs,0,NULL,4),.args=SENTINEL_MONITOR_Args}, {MAKE_CMD("myid","Returns the Sentinel instance ID.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_MYID_History,0,SENTINEL_MYID_Tips,0,sentinelCommand,2,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_MYID_Keyspecs,0,NULL,0)}, {MAKE_CMD("pending-scripts","Returns information about pending scripts for Sentinel.",NULL,"2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_PENDING_SCRIPTS_History,0,SENTINEL_PENDING_SCRIPTS_Tips,0,sentinelCommand,2,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_PENDING_SCRIPTS_Keyspecs,0,NULL,0)}, {MAKE_CMD("remove","Stops monitoring.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_REMOVE_History,0,SENTINEL_REMOVE_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_REMOVE_Keyspecs,0,NULL,1),.args=SENTINEL_REMOVE_Args}, {MAKE_CMD("replicas","Returns a list of the monitored replicas.","O(N) where N is the number of replicas","5.0.0",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_REPLICAS_History,0,SENTINEL_REPLICAS_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_REPLICAS_Keyspecs,0,NULL,1),.args=SENTINEL_REPLICAS_Args}, -{MAKE_CMD("reset","Resets masters by name matching a pattern.","O(N) where N is the number of monitored masters","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_RESET_History,0,SENTINEL_RESET_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_RESET_Keyspecs,0,NULL,1),.args=SENTINEL_RESET_Args}, +{MAKE_CMD("reset","Resets primaries by name matching a pattern.","O(N) where N is the number of monitored primaries","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_RESET_History,0,SENTINEL_RESET_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_RESET_Keyspecs,0,NULL,1),.args=SENTINEL_RESET_Args}, {MAKE_CMD("sentinels","Returns a list of Sentinel instances.","O(N) where N is the number of Sentinels","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_SENTINELS_History,0,SENTINEL_SENTINELS_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_SENTINELS_Keyspecs,0,NULL,1),.args=SENTINEL_SENTINELS_Args}, -{MAKE_CMD("set","Changes the configuration of a monitored master.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_SET_History,0,SENTINEL_SET_Tips,0,sentinelCommand,-5,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_SET_Keyspecs,0,NULL,2),.args=SENTINEL_SET_Args}, +{MAKE_CMD("set","Changes the configuration of a monitored primary.","O(1)","2.8.4",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_SET_History,0,SENTINEL_SET_Tips,0,sentinelCommand,-5,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_SET_Keyspecs,0,NULL,2),.args=SENTINEL_SET_Args}, {MAKE_CMD("simulate-failure","Simulates failover scenarios.",NULL,"3.2.0",CMD_DOC_NONE,NULL,NULL,"sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_SIMULATE_FAILURE_History,0,SENTINEL_SIMULATE_FAILURE_Tips,0,sentinelCommand,-3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_SIMULATE_FAILURE_Keyspecs,0,NULL,1),.args=SENTINEL_SIMULATE_FAILURE_Args}, {MAKE_CMD("slaves","Returns a list of the monitored replicas.","O(N) where N is the number of replicas.","2.8.0",CMD_DOC_DEPRECATED,"`SENTINEL REPLICAS`","5.0.0","sentinel",COMMAND_GROUP_SENTINEL,SENTINEL_SLAVES_History,0,SENTINEL_SLAVES_Tips,0,sentinelCommand,3,CMD_ADMIN|CMD_SENTINEL|CMD_ONLY_SENTINEL,0,SENTINEL_SLAVES_Keyspecs,0,NULL,1),.args=SENTINEL_SLAVES_Args}, {0} @@ -10677,8 +10777,8 @@ struct COMMAND_STRUCT serverCommandTable[] = { /* cluster */ {MAKE_CMD("asking","Signals that a cluster client is following an -ASK redirect.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,ASKING_History,0,ASKING_Tips,0,askingCommand,1,CMD_FAST,ACL_CATEGORY_CONNECTION,ASKING_Keyspecs,0,NULL,0)}, {MAKE_CMD("cluster","A container for Cluster commands.","Depends on subcommand.","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_History,0,CLUSTER_Tips,0,NULL,-2,0,0,CLUSTER_Keyspecs,0,NULL,0),.subcommands=CLUSTER_Subcommands}, -{MAKE_CMD("readonly","Enables read-only queries for a connection to a Cluster replica node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,READONLY_History,0,READONLY_Tips,0,readonlyCommand,1,CMD_FAST|CMD_LOADING|CMD_STALE,ACL_CATEGORY_CONNECTION,READONLY_Keyspecs,0,NULL,0)}, -{MAKE_CMD("readwrite","Enables read-write queries for a connection to a Reids Cluster replica node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,READWRITE_History,0,READWRITE_Tips,0,readwriteCommand,1,CMD_FAST|CMD_LOADING|CMD_STALE,ACL_CATEGORY_CONNECTION,READWRITE_Keyspecs,0,NULL,0)}, +{MAKE_CMD("readonly","Enables read-only queries for a connection to a Valkey replica node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,READONLY_History,0,READONLY_Tips,0,readonlyCommand,1,CMD_FAST|CMD_LOADING|CMD_STALE,ACL_CATEGORY_CONNECTION,READONLY_Keyspecs,0,NULL,0)}, +{MAKE_CMD("readwrite","Enables read-write queries for a connection to a Valkey replica node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,READWRITE_History,0,READWRITE_Tips,0,readwriteCommand,1,CMD_FAST|CMD_LOADING|CMD_STALE,ACL_CATEGORY_CONNECTION,READWRITE_Keyspecs,0,NULL,0)}, /* connection */ {MAKE_CMD("auth","Authenticates the connection.","O(N) where N is the number of passwords defined for the user","1.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,AUTH_History,1,AUTH_Tips,0,authCommand,-2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_NO_AUTH|CMD_SENTINEL|CMD_ALLOW_BUSY,ACL_CATEGORY_CONNECTION,AUTH_Keyspecs,0,NULL,2),.args=AUTH_Args}, {MAKE_CMD("client","A container for client connection commands.","Depends on subcommand.","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_History,0,CLIENT_Tips,0,NULL,-2,CMD_SENTINEL,0,CLIENT_Keyspecs,0,NULL,0),.subcommands=CLIENT_Subcommands}, @@ -10717,7 +10817,7 @@ struct COMMAND_STRUCT serverCommandTable[] = { {MAKE_CMD("type","Determines the type of value stored at a key.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,TYPE_History,0,TYPE_Tips,0,typeCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_KEYSPACE,TYPE_Keyspecs,1,NULL,1),.args=TYPE_Args}, {MAKE_CMD("unlink","Asynchronously deletes one or more keys.","O(1) for each key removed regardless of its size. Then the command does O(N) work in a different thread in order to reclaim memory, where N is the number of allocations the deleted objects where composed of.","4.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,UNLINK_History,0,UNLINK_Tips,2,unlinkCommand,-2,CMD_WRITE|CMD_FAST,ACL_CATEGORY_KEYSPACE,UNLINK_Keyspecs,1,NULL,1),.args=UNLINK_Args}, {MAKE_CMD("wait","Blocks until the asynchronous replication of all preceding write commands sent by the connection is completed.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,WAIT_History,0,WAIT_Tips,2,waitCommand,3,CMD_BLOCKING,ACL_CATEGORY_CONNECTION,WAIT_Keyspecs,0,NULL,2),.args=WAIT_Args}, -{MAKE_CMD("waitaof","Blocks until all of the preceding write commands sent by the connection are written to the append-only file of the master and/or replicas.","O(1)","7.2.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,WAITAOF_History,0,WAITAOF_Tips,2,waitaofCommand,4,CMD_BLOCKING,ACL_CATEGORY_CONNECTION,WAITAOF_Keyspecs,0,NULL,3),.args=WAITAOF_Args}, +{MAKE_CMD("waitaof","Blocks until all of the preceding write commands sent by the connection are written to the append-only file of the primary and/or replicas.","O(1)","7.2.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,WAITAOF_History,0,WAITAOF_Tips,2,waitaofCommand,4,CMD_BLOCKING,ACL_CATEGORY_CONNECTION,WAITAOF_Keyspecs,0,NULL,3),.args=WAITAOF_Args}, /* geo */ {MAKE_CMD("geoadd","Adds one or more members to a geospatial index. The key is created if it doesn't exist.","O(log(N)) for each item added, where N is the number of elements in the sorted set.","3.2.0",CMD_DOC_NONE,NULL,NULL,"geo",COMMAND_GROUP_GEO,GEOADD_History,1,GEOADD_Tips,0,geoaddCommand,-5,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_GEO,GEOADD_Keyspecs,1,NULL,4),.args=GEOADD_Args}, {MAKE_CMD("geodist","Returns the distance between two members of a geospatial index.","O(1)","3.2.0",CMD_DOC_NONE,NULL,NULL,"geo",COMMAND_GROUP_GEO,GEODIST_History,0,GEODIST_Tips,0,geodistCommand,-4,CMD_READONLY,ACL_CATEGORY_GEO,GEODIST_Keyspecs,1,NULL,4),.args=GEODIST_Args}, @@ -10816,12 +10916,12 @@ struct COMMAND_STRUCT serverCommandTable[] = { {MAKE_CMD("monitor","Listens for all requests received by the server in real-time.",NULL,"1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,MONITOR_History,0,MONITOR_Tips,0,monitorCommand,1,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,MONITOR_Keyspecs,0,NULL,0)}, {MAKE_CMD("psync","An internal command used in replication.",NULL,"2.8.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,PSYNC_History,0,PSYNC_Tips,0,syncCommand,-3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NO_MULTI|CMD_NOSCRIPT,0,PSYNC_Keyspecs,0,NULL,2),.args=PSYNC_Args}, {MAKE_CMD("replconf","An internal command for configuring the replication stream.","O(1)","3.0.0",CMD_DOC_SYSCMD,NULL,NULL,"server",COMMAND_GROUP_SERVER,REPLCONF_History,0,REPLCONF_Tips,0,replconfCommand,-1,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_ALLOW_BUSY,0,REPLCONF_Keyspecs,0,NULL,0)}, -{MAKE_CMD("replicaof","Configures a server as replica of another, or promotes it to a master.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,REPLICAOF_History,0,REPLICAOF_Tips,0,replicaofCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT|CMD_STALE,0,REPLICAOF_Keyspecs,0,NULL,1),.args=REPLICAOF_Args}, +{MAKE_CMD("replicaof","Configures a server as replica of another, or promotes it to a primary.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,REPLICAOF_History,0,REPLICAOF_Tips,0,replicaofCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT|CMD_STALE,0,REPLICAOF_Keyspecs,0,NULL,1),.args=REPLICAOF_Args}, {MAKE_CMD("restore-asking","An internal command for migrating keys in a cluster.","O(1) to create the new key and additional O(N*M) to reconstruct the serialized value, where N is the number of objects composing the value and M their average size. For small string values the time complexity is thus O(1)+O(1*M) where M is small, so simply O(1). However for sorted set values the complexity is O(N*M*log(N)) because inserting values into sorted sets is O(log(N)).","3.0.0",CMD_DOC_SYSCMD,NULL,NULL,"server",COMMAND_GROUP_SERVER,RESTORE_ASKING_History,3,RESTORE_ASKING_Tips,0,restoreCommand,-4,CMD_WRITE|CMD_DENYOOM|CMD_ASKING,ACL_CATEGORY_KEYSPACE|ACL_CATEGORY_DANGEROUS,RESTORE_ASKING_Keyspecs,1,NULL,7),.args=RESTORE_ASKING_Args}, {MAKE_CMD("role","Returns the replication role.","O(1)","2.8.12",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ROLE_History,0,ROLE_Tips,0,roleCommand,1,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_SENTINEL,ACL_CATEGORY_ADMIN|ACL_CATEGORY_DANGEROUS,ROLE_Keyspecs,0,NULL,0)}, {MAKE_CMD("save","Synchronously saves the database(s) to disk.","O(N) where N is the total number of keys in all databases","1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,SAVE_History,0,SAVE_Tips,0,saveCommand,1,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT|CMD_NO_MULTI,0,SAVE_Keyspecs,0,NULL,0)}, {MAKE_CMD("shutdown","Synchronously saves the database(s) to disk and shuts down the server.","O(N) when saving, where N is the total number of keys in all databases when saving data, otherwise O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,SHUTDOWN_History,1,SHUTDOWN_Tips,0,shutdownCommand,-1,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_NO_MULTI|CMD_SENTINEL|CMD_ALLOW_BUSY,0,SHUTDOWN_Keyspecs,0,NULL,1),.args=SHUTDOWN_Args}, -{MAKE_CMD("slaveof","Sets a server as a replica of another, or promotes it to being a master.","O(1)","1.0.0",CMD_DOC_DEPRECATED,"`REPLICAOF`","5.0.0","server",COMMAND_GROUP_SERVER,SLAVEOF_History,0,SLAVEOF_Tips,0,replicaofCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT|CMD_STALE,0,SLAVEOF_Keyspecs,0,NULL,1),.args=SLAVEOF_Args}, +{MAKE_CMD("slaveof","Sets a server as a replica of another, or promotes it to being a primary.","O(1)","1.0.0",CMD_DOC_DEPRECATED,"`REPLICAOF`","5.0.0","server",COMMAND_GROUP_SERVER,SLAVEOF_History,0,SLAVEOF_Tips,0,replicaofCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT|CMD_STALE,0,SLAVEOF_Keyspecs,0,NULL,1),.args=SLAVEOF_Args}, {MAKE_CMD("slowlog","A container for slow log commands.","Depends on subcommand.","2.2.12",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,SLOWLOG_History,0,SLOWLOG_Tips,0,NULL,-2,0,0,SLOWLOG_Keyspecs,0,NULL,0),.subcommands=SLOWLOG_Subcommands}, {MAKE_CMD("swapdb","Swaps two databases.","O(N) where N is the count of clients watching or blocking on keys from both databases.","4.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,SWAPDB_History,0,SWAPDB_Tips,0,swapdbCommand,3,CMD_WRITE|CMD_FAST,ACL_CATEGORY_KEYSPACE|ACL_CATEGORY_DANGEROUS,SWAPDB_Keyspecs,0,NULL,2),.args=SWAPDB_Args}, {MAKE_CMD("sync","An internal command used in replication.",NULL,"1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,SYNC_History,0,SYNC_Tips,0,syncCommand,1,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NO_MULTI|CMD_NOSCRIPT,0,SYNC_Keyspecs,0,NULL,0)}, @@ -10922,8 +11022,8 @@ struct COMMAND_STRUCT serverCommandTable[] = { /* transactions */ {MAKE_CMD("discard","Discards a transaction.","O(N), when N is the number of queued commands","2.0.0",CMD_DOC_NONE,NULL,NULL,"transactions",COMMAND_GROUP_TRANSACTIONS,DISCARD_History,0,DISCARD_Tips,0,discardCommand,1,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_ALLOW_BUSY,ACL_CATEGORY_TRANSACTION,DISCARD_Keyspecs,0,NULL,0)}, {MAKE_CMD("exec","Executes all commands in a transaction.","Depends on commands in the transaction","1.2.0",CMD_DOC_NONE,NULL,NULL,"transactions",COMMAND_GROUP_TRANSACTIONS,EXEC_History,0,EXEC_Tips,0,execCommand,1,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SKIP_SLOWLOG,ACL_CATEGORY_TRANSACTION,EXEC_Keyspecs,0,NULL,0)}, -{MAKE_CMD("multi","Starts a transaction.","O(1)","1.2.0",CMD_DOC_NONE,NULL,NULL,"transactions",COMMAND_GROUP_TRANSACTIONS,MULTI_History,0,MULTI_Tips,0,multiCommand,1,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_ALLOW_BUSY,ACL_CATEGORY_TRANSACTION,MULTI_Keyspecs,0,NULL,0)}, +{MAKE_CMD("multi","Starts a transaction.","O(1)","1.2.0",CMD_DOC_NONE,NULL,NULL,"transactions",COMMAND_GROUP_TRANSACTIONS,MULTI_History,0,MULTI_Tips,0,multiCommand,1,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_NO_MULTI|CMD_ALLOW_BUSY,ACL_CATEGORY_TRANSACTION,MULTI_Keyspecs,0,NULL,0)}, {MAKE_CMD("unwatch","Forgets about watched keys of a transaction.","O(1)","2.2.0",CMD_DOC_NONE,NULL,NULL,"transactions",COMMAND_GROUP_TRANSACTIONS,UNWATCH_History,0,UNWATCH_Tips,0,unwatchCommand,1,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_ALLOW_BUSY,ACL_CATEGORY_TRANSACTION,UNWATCH_Keyspecs,0,NULL,0)}, -{MAKE_CMD("watch","Monitors changes to keys to determine the execution of a transaction.","O(1) for every key.","2.2.0",CMD_DOC_NONE,NULL,NULL,"transactions",COMMAND_GROUP_TRANSACTIONS,WATCH_History,0,WATCH_Tips,0,watchCommand,-2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_ALLOW_BUSY,ACL_CATEGORY_TRANSACTION,WATCH_Keyspecs,1,NULL,1),.args=WATCH_Args}, +{MAKE_CMD("watch","Monitors changes to keys to determine the execution of a transaction.","O(1) for every key.","2.2.0",CMD_DOC_NONE,NULL,NULL,"transactions",COMMAND_GROUP_TRANSACTIONS,WATCH_History,0,WATCH_Tips,0,watchCommand,-2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_NO_MULTI|CMD_ALLOW_BUSY,ACL_CATEGORY_TRANSACTION,WATCH_Keyspecs,1,NULL,1),.args=WATCH_Args}, {0} }; diff --git a/src/commands/client-capa.json b/src/commands/client-capa.json new file mode 100644 index 0000000000..3c16cd44f9 --- /dev/null +++ b/src/commands/client-capa.json @@ -0,0 +1,29 @@ +{ + "CAPA": { + "summary": "A client claims its capability.", + "complexity": "O(1)", + "group": "connection", + "since": "8.0.0", + "arity": -3, + "container": "CLIENT", + "function": "clientCommand", + "command_flags": [ + "NOSCRIPT", + "LOADING", + "STALE" + ], + "acl_categories": [ + "CONNECTION" + ], + "reply_schema": { + "const": "OK" + }, + "arguments": [ + { + "multiple": "true", + "name": "capability", + "type": "string" + } + ] + } +} diff --git a/src/commands/client-kill.json b/src/commands/client-kill.json index 01079ad993..97fa932cd8 100644 --- a/src/commands/client-kill.json +++ b/src/commands/client-kill.json @@ -31,6 +31,10 @@ [ "8.0.0", "`MAXAGE` option." + ], + [ + "8.0.0", + "Replaced `master` `TYPE` with `primary`. `master` still supported for backward compatibility." ] ], "command_flags": [ @@ -84,6 +88,12 @@ "token": "master", "since": "3.2.0" }, + { + "name": "primary", + "type": "pure-token", + "token": "primary", + "since": "8.0.0" + }, { "name": "slave", "type": "pure-token", diff --git a/src/commands/client-list.json b/src/commands/client-list.json index f72ffaf40a..d9c0054e60 100644 --- a/src/commands/client-list.json +++ b/src/commands/client-list.json @@ -31,6 +31,10 @@ [ "7.0.3", "Added `ssub` field." + ], + [ + "8.0.0", + "Replaced `master` `TYPE` with `primary`. `master` still supported for backward compatibility." ] ], "command_flags": [ diff --git a/src/commands/cluster-failover.json b/src/commands/cluster-failover.json index f58fd562a7..9b31e310eb 100644 --- a/src/commands/cluster-failover.json +++ b/src/commands/cluster-failover.json @@ -1,6 +1,6 @@ { "FAILOVER": { - "summary": "Forces a replica to perform a manual failover of its master.", + "summary": "Forces a replica to perform a manual failover of its primary.", "complexity": "O(1)", "group": "cluster", "since": "3.0.0", diff --git a/src/commands/cluster-replicas.json b/src/commands/cluster-replicas.json index 4e8bd4204c..2fb47afea4 100644 --- a/src/commands/cluster-replicas.json +++ b/src/commands/cluster-replicas.json @@ -1,6 +1,6 @@ { "REPLICAS": { - "summary": "Lists the replica nodes of a master node.", + "summary": "Lists the replica nodes of a primary node.", "complexity": "O(N) where N is the number of replicas.", "group": "cluster", "since": "5.0.0", @@ -21,7 +21,7 @@ } ], "reply_schema": { - "description": "A list of replica nodes replicating from the specified master node provided in the same format used by CLUSTER NODES.", + "description": "A list of replica nodes replicating from the specified primary node provided in the same format used by CLUSTER NODES.", "type": "array", "items": { "type": "string", diff --git a/src/commands/cluster-replicate.json b/src/commands/cluster-replicate.json index 060d4af190..857a8022b8 100644 --- a/src/commands/cluster-replicate.json +++ b/src/commands/cluster-replicate.json @@ -1,6 +1,6 @@ { "REPLICATE": { - "summary": "Configure a node as replica of a master node.", + "summary": "Configure a node as replica of a primary node.", "complexity": "O(1)", "group": "cluster", "since": "3.0.0", diff --git a/src/commands/cluster-slaves.json b/src/commands/cluster-slaves.json index db66a1c1db..7059e544bb 100644 --- a/src/commands/cluster-slaves.json +++ b/src/commands/cluster-slaves.json @@ -1,6 +1,6 @@ { "SLAVES": { - "summary": "Lists the replica nodes of a master node.", + "summary": "Lists the replica nodes of a primary node.", "complexity": "O(N) where N is the number of replicas.", "group": "cluster", "since": "3.0.0", @@ -26,7 +26,7 @@ } ], "reply_schema": { - "description": "A list of replica nodes replicating from the specified master node provided in the same format used by CLUSTER NODES.", + "description": "A list of replica nodes replicating from the specified primary node provided in the same format used by CLUSTER NODES.", "type": "array", "items": { "type": "string", diff --git a/src/commands/cluster-slot-stats.json b/src/commands/cluster-slot-stats.json new file mode 100644 index 0000000000..7dfcd415ec --- /dev/null +++ b/src/commands/cluster-slot-stats.json @@ -0,0 +1,102 @@ +{ + "SLOT-STATS": { + "summary": "Return an array of slot usage statistics for slots assigned to the current node.", + "complexity": "O(N) where N is the total number of slots based on arguments. O(N*log(N)) with ORDERBY subcommand.", + "group": "cluster", + "since": "8.0.0", + "arity": -4, + "container": "CLUSTER", + "function": "clusterSlotStatsCommand", + "command_flags": [ + "STALE", + "LOADING" + ], + "command_tips": [ + "NONDETERMINISTIC_OUTPUT", + "REQUEST_POLICY:ALL_SHARDS" + ], + "reply_schema": { + "type": "array", + "description": "Array of nested arrays, where the inner array element represents a slot and its respective usage statistics.", + "items": { + "type": "array", + "description": "Array of size 2, where 0th index represents (int) slot and 1st index represents (map) usage statistics.", + "minItems": 2, + "maxItems": 2, + "items": [ + { + "description": "Slot Number.", + "type": "integer" + }, + { + "type": "object", + "description": "Map of slot usage statistics.", + "additionalProperties": false, + "properties": { + "key-count": { + "type": "integer" + } + } + } + ] + } + }, + "arguments": [ + { + "name": "filter", + "type": "oneof", + "arguments": [ + { + "token": "SLOTSRANGE", + "name": "slotsrange", + "type": "block", + "arguments": [ + { + "name": "start-slot", + "type": "integer" + }, + { + "name": "end-slot", + "type": "integer" + } + ] + }, + { + "token": "ORDERBY", + "name": "orderby", + "type": "block", + "arguments": [ + { + "name": "metric", + "type": "string" + }, + { + "token": "LIMIT", + "name": "limit", + "type": "integer", + "optional": true + }, + { + "name": "order", + "type": "oneof", + "optional": true, + "arguments": [ + { + "name": "asc", + "type": "pure-token", + "token": "ASC" + }, + { + "name": "desc", + "type": "pure-token", + "token": "DESC" + } + ] + } + ] + } + ] + } + ] + } +} \ No newline at end of file diff --git a/src/commands/cluster-slots.json b/src/commands/cluster-slots.json index ca48f371ea..5d00280f15 100644 --- a/src/commands/cluster-slots.json +++ b/src/commands/cluster-slots.json @@ -42,7 +42,7 @@ }, { "type": "array", - "description": "Master node for the slot range.", + "description": "Primary node for the slot range.", "minItems": 4, "maxItems": 4, "items": [ diff --git a/src/commands/multi.json b/src/commands/multi.json index 5f17a1da29..e66fff0174 100644 --- a/src/commands/multi.json +++ b/src/commands/multi.json @@ -11,6 +11,7 @@ "LOADING", "STALE", "FAST", + "NO_MULTI", "ALLOW_BUSY" ], "acl_categories": [ diff --git a/src/commands/readonly.json b/src/commands/readonly.json index 4478cfb797..8fe27c6d99 100644 --- a/src/commands/readonly.json +++ b/src/commands/readonly.json @@ -1,6 +1,6 @@ { "READONLY": { - "summary": "Enables read-only queries for a connection to a Cluster replica node.", + "summary": "Enables read-only queries for a connection to a Valkey replica node.", "complexity": "O(1)", "group": "cluster", "since": "3.0.0", diff --git a/src/commands/readwrite.json b/src/commands/readwrite.json index 440dd596b9..dd3762ff8c 100644 --- a/src/commands/readwrite.json +++ b/src/commands/readwrite.json @@ -1,6 +1,6 @@ { "READWRITE": { - "summary": "Enables read-write queries for a connection to a Reids Cluster replica node.", + "summary": "Enables read-write queries for a connection to a Valkey replica node.", "complexity": "O(1)", "group": "cluster", "since": "3.0.0", diff --git a/src/commands/replicaof.json b/src/commands/replicaof.json index 6ddedf2d68..cd5102171c 100644 --- a/src/commands/replicaof.json +++ b/src/commands/replicaof.json @@ -1,6 +1,6 @@ { "REPLICAOF": { - "summary": "Configures a server as replica of another, or promotes it to a master.", + "summary": "Configures a server as replica of another, or promotes it to a primary.", "complexity": "O(1)", "group": "server", "since": "5.0.0", diff --git a/src/commands/role.json b/src/commands/role.json index 1c3a4490ca..d31396faf6 100644 --- a/src/commands/role.json +++ b/src/commands/role.json @@ -28,7 +28,7 @@ "const": "master" }, { - "description": "Current replication master offset.", + "description": "Current replication primary offset.", "type": "integer" }, { @@ -65,18 +65,18 @@ "const": "slave" }, { - "description": "IP of master.", + "description": "IP of primary.", "type": "string" }, { - "description": "Port number of master.", + "description": "Port number of primary.", "type": "integer" }, { - "description": "State of the replication from the point of view of the master.", + "description": "State of the replication from the point of view of the primary.", "oneOf": [ { - "description": "The instance is in handshake with its master.", + "description": "The instance is in handshake with its primary.", "const": "handshake" }, { @@ -84,15 +84,15 @@ "const": "none" }, { - "description": "The instance needs to connect to its master.", + "description": "The instance needs to connect to its primary.", "const": "connect" }, { - "description": "The master-replica connection is in progress.", + "description": "The primary-replica connection is in progress.", "const": "connecting" }, { - "description": "The master and replica are trying to perform the synchronization.", + "description": "The primary and replica are trying to perform the synchronization.", "const": "sync" }, { @@ -106,7 +106,7 @@ ] }, { - "description": "The amount of data received from the replica so far in terms of master replication offset.", + "description": "The amount of data received from the replica so far in terms of primary replication offset.", "type": "integer" } ] @@ -120,7 +120,7 @@ "const": "sentinel" }, { - "description": "List of master names monitored by this sentinel instance.", + "description": "List of primary names monitored by this sentinel instance.", "type": "array", "items": { "type": "string" diff --git a/src/commands/script-show.json b/src/commands/script-show.json new file mode 100644 index 0000000000..f22fa29675 --- /dev/null +++ b/src/commands/script-show.json @@ -0,0 +1,27 @@ +{ + "SHOW": { + "summary": "Show server-side Lua script in the script cache.", + "complexity": "O(1).", + "group": "scripting", + "since": "8.0.0", + "arity": 3, + "container": "SCRIPT", + "function": "scriptCommand", + "command_flags": [ + "NOSCRIPT" + ], + "acl_categories": [ + "SCRIPTING" + ], + "arguments": [ + { + "name": "sha1", + "type": "string" + } + ], + "reply_schema": { + "description": "Lua script if sha1 hash exists in script cache.", + "type": "string" + } + } +} diff --git a/src/commands/sentinel-ckquorum.json b/src/commands/sentinel-ckquorum.json index e79132303f..7f4428c3fe 100644 --- a/src/commands/sentinel-ckquorum.json +++ b/src/commands/sentinel-ckquorum.json @@ -13,12 +13,12 @@ ], "reply_schema": { "type": "string", - "description": "Returns OK if the current Sentinel configuration is able to reach the quorum needed to failover a master, and the majority needed to authorize the failover.", + "description": "Returns OK if the current Sentinel configuration is able to reach the quorum needed to failover a primary, and the majority needed to authorize the failover.", "pattern": "OK" }, "arguments": [ { - "name": "master-name", + "name": "primary-name", "type": "string" } ] diff --git a/src/commands/sentinel-failover.json b/src/commands/sentinel-failover.json index 8a211990f2..8e7c3ea3e7 100644 --- a/src/commands/sentinel-failover.json +++ b/src/commands/sentinel-failover.json @@ -13,11 +13,11 @@ ], "reply_schema": { "const": "OK", - "description": "Force a fail over as if the master was not reachable, and without asking for agreement to other Sentinels." + "description": "Force a fail over as if the primary was not reachable, and without asking for agreement to other Sentinels." }, "arguments": [ { - "name": "master-name", + "name": "primary-name", "type": "string" } ] diff --git a/src/commands/sentinel-get-master-addr-by-name.json b/src/commands/sentinel-get-master-addr-by-name.json index 1bcbec5341..2d7fc50eda 100644 --- a/src/commands/sentinel-get-master-addr-by-name.json +++ b/src/commands/sentinel-get-master-addr-by-name.json @@ -1,6 +1,6 @@ { "GET-MASTER-ADDR-BY-NAME": { - "summary": "Returns the port and address of a master instance.", + "summary": "Returns the port and address of a primary instance.", "complexity": "O(1)", "group": "sentinel", "since": "2.8.4", @@ -30,7 +30,7 @@ }, "arguments": [ { - "name": "master-name", + "name": "primary-name", "type": "string" } ] diff --git a/src/commands/sentinel-info-cache.json b/src/commands/sentinel-info-cache.json index af89f182ea..44edcf35e3 100644 --- a/src/commands/sentinel-info-cache.json +++ b/src/commands/sentinel-info-cache.json @@ -14,7 +14,7 @@ ], "reply_schema": { "type": "array", - "description": "This is actually a map, the odd entries are a master name, and the even entries are the last cached INFO output from that master and all its replicas.", + "description": "This is actually a map, the odd entries are a primary name, and the even entries are the last cached INFO output from that primary and all its replicas.", "minItems": 0, "maxItems": 4294967295, "items": [ @@ -22,11 +22,11 @@ "oneOf": [ { "type": "string", - "description": "The master name." + "description": "The primary name." }, { "type": "array", - "description": "This is an array of pairs, the odd entries are the INFO age, and the even entries are the cached INFO string. The first pair belong to the master and the rest are its replicas.", + "description": "This is an array of pairs, the odd entries are the INFO age, and the even entries are the cached INFO string. The first pair belong to the primary and the rest are its replicas.", "minItems": 2, "maxItems": 2, "items": [ diff --git a/src/commands/sentinel-is-master-down-by-addr.json b/src/commands/sentinel-is-master-down-by-addr.json index fd7698014c..3ecf8723fb 100644 --- a/src/commands/sentinel-is-master-down-by-addr.json +++ b/src/commands/sentinel-is-master-down-by-addr.json @@ -1,6 +1,6 @@ { "IS-MASTER-DOWN-BY-ADDR": { - "summary": "Determines whether a master instance is down.", + "summary": "Determines whether a primary instance is down.", "complexity": "O(1)", "group": "sentinel", "since": "2.8.4", @@ -21,11 +21,11 @@ "oneOf": [ { "const": 0, - "description": "Master is up." + "description": "Primary is up." }, { "const": 1, - "description": "Master is down." + "description": "Primary is down." } ] }, diff --git a/src/commands/sentinel-master.json b/src/commands/sentinel-master.json index ff94617aeb..3af3227394 100644 --- a/src/commands/sentinel-master.json +++ b/src/commands/sentinel-master.json @@ -1,6 +1,6 @@ { "MASTER": { - "summary": "Returns the state of a master instance.", + "summary": "Returns the state of a primary instance.", "complexity": "O(1)", "group": "sentinel", "since": "2.8.4", @@ -14,14 +14,14 @@ ], "reply_schema": { "type": "object", - "description": "The state and info of the specified master.", + "description": "The state and info of the specified primary.", "additionalProperties": { "type": "string" } }, "arguments": [ { - "name": "master-name", + "name": "primary-name", "type": "string" } ] diff --git a/src/commands/sentinel-masters.json b/src/commands/sentinel-masters.json index 26992585a1..b6aa86d02a 100644 --- a/src/commands/sentinel-masters.json +++ b/src/commands/sentinel-masters.json @@ -1,7 +1,7 @@ { "MASTERS": { - "summary": "Returns a list of monitored masters.", - "complexity": "O(N) where N is the number of masters", + "summary": "Returns a list of monitored primaries.", + "complexity": "O(N) where N is the number of primaries", "group": "sentinel", "since": "2.8.4", "arity": 2, @@ -14,7 +14,7 @@ ], "reply_schema": { "type": "array", - "description": "List of monitored masters, and their state.", + "description": "List of monitored primaries, and their state.", "items": { "type": "object", "additionalProperties": { diff --git a/src/commands/sentinel-remove.json b/src/commands/sentinel-remove.json index 1fe084f42c..7d545c3715 100644 --- a/src/commands/sentinel-remove.json +++ b/src/commands/sentinel-remove.json @@ -17,7 +17,7 @@ }, "arguments": [ { - "name": "master-name", + "name": "primary-name", "type": "string" } ] diff --git a/src/commands/sentinel-replicas.json b/src/commands/sentinel-replicas.json index 32b04e994a..a81ed0ef00 100644 --- a/src/commands/sentinel-replicas.json +++ b/src/commands/sentinel-replicas.json @@ -14,7 +14,7 @@ ], "reply_schema": { "type": "array", - "description": "List of replicas for this master, and their state.", + "description": "List of replicas for this primary, and their state.", "items": { "type": "object", "additionalProperties": { @@ -24,7 +24,7 @@ }, "arguments": [ { - "name": "master-name", + "name": "primary-name", "type": "string" } ] diff --git a/src/commands/sentinel-reset.json b/src/commands/sentinel-reset.json index 5d2a63f3d5..35153609cb 100644 --- a/src/commands/sentinel-reset.json +++ b/src/commands/sentinel-reset.json @@ -1,7 +1,7 @@ { "RESET": { - "summary": "Resets masters by name matching a pattern.", - "complexity": "O(N) where N is the number of monitored masters", + "summary": "Resets primaries by name matching a pattern.", + "complexity": "O(N) where N is the number of monitored primaries", "group": "sentinel", "since": "2.8.4", "arity": 3, @@ -14,7 +14,7 @@ ], "reply_schema": { "type": "integer", - "description": "The number of masters that were reset." + "description": "The number of primaries that were reset." }, "arguments": [ { diff --git a/src/commands/sentinel-sentinels.json b/src/commands/sentinel-sentinels.json index fdaa5cb992..dae12c5a9b 100644 --- a/src/commands/sentinel-sentinels.json +++ b/src/commands/sentinel-sentinels.json @@ -24,7 +24,7 @@ }, "arguments": [ { - "name": "master-name", + "name": "primary-name", "type": "string" } ] diff --git a/src/commands/sentinel-set.json b/src/commands/sentinel-set.json index abca33b89a..43523e6d6b 100644 --- a/src/commands/sentinel-set.json +++ b/src/commands/sentinel-set.json @@ -1,6 +1,6 @@ { "SET": { - "summary": "Changes the configuration of a monitored master.", + "summary": "Changes the configuration of a monitored primary.", "complexity": "O(1)", "group": "sentinel", "since": "2.8.4", @@ -17,7 +17,7 @@ }, "arguments": [ { - "name": "master-name", + "name": "primary-name", "type": "string" }, { diff --git a/src/commands/sentinel-slaves.json b/src/commands/sentinel-slaves.json index c1fec41bb2..9792270982 100644 --- a/src/commands/sentinel-slaves.json +++ b/src/commands/sentinel-slaves.json @@ -29,7 +29,7 @@ }, "arguments": [ { - "name": "master-name", + "name": "primary-name", "type": "string" } ] diff --git a/src/commands/slaveof.json b/src/commands/slaveof.json index ca30982887..509bdfbee3 100644 --- a/src/commands/slaveof.json +++ b/src/commands/slaveof.json @@ -1,6 +1,6 @@ { "SLAVEOF": { - "summary": "Sets a server as a replica of another, or promotes it to being a master.", + "summary": "Sets a server as a replica of another, or promotes it to being a primary.", "complexity": "O(1)", "group": "server", "since": "1.0.0", diff --git a/src/commands/waitaof.json b/src/commands/waitaof.json index 19b514c274..d664000b5f 100644 --- a/src/commands/waitaof.json +++ b/src/commands/waitaof.json @@ -1,6 +1,6 @@ { "WAITAOF": { - "summary": "Blocks until all of the preceding write commands sent by the connection are written to the append-only file of the master and/or replicas.", + "summary": "Blocks until all of the preceding write commands sent by the connection are written to the append-only file of the primary and/or replicas.", "complexity": "O(1)", "group": "generic", "since": "7.2.0", diff --git a/src/commands/watch.json b/src/commands/watch.json index 9faab2b917..588cea72b4 100644 --- a/src/commands/watch.json +++ b/src/commands/watch.json @@ -11,6 +11,7 @@ "LOADING", "STALE", "FAST", + "NO_MULTI", "ALLOW_BUSY" ], "acl_categories": [ diff --git a/src/config.c b/src/config.c index 83e2a51db1..3c82f9ee7a 100644 --- a/src/config.c +++ b/src/config.c @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -590,6 +591,9 @@ void loadServerConfigFromString(char *config) { if (server.config_hz < CONFIG_MIN_HZ) server.config_hz = CONFIG_MIN_HZ; if (server.config_hz > CONFIG_MAX_HZ) server.config_hz = CONFIG_MAX_HZ; + /* To ensure backward compatibility when io_threads_num is according to the previous maximum of 128. */ + if (server.io_threads_num > IO_THREADS_MAX_NUM) server.io_threads_num = IO_THREADS_MAX_NUM; + sdsfreesplitres(lines, totlines); reading_config_file = 0; return; @@ -2051,6 +2055,7 @@ static void numericConfigInit(standardConfig *config) { static int numericBoundaryCheck(standardConfig *config, long long ll, const char **err) { if (config->data.numeric.numeric_type == NUMERIC_TYPE_ULONG_LONG || + config->data.numeric.numeric_type == NUMERIC_TYPE_ULONG || config->data.numeric.numeric_type == NUMERIC_TYPE_UINT || config->data.numeric.numeric_type == NUMERIC_TYPE_SIZE_T) { /* Boundary check for unsigned types */ @@ -2378,6 +2383,24 @@ static int isValidAnnouncedHostname(char *val, const char **err) { return 1; } +static int isValidIpV4(char *val, const char **err) { + struct sockaddr_in sa; + if (val[0] != '\0' && inet_pton(AF_INET, val, &(sa.sin_addr)) == 0) { + *err = "Invalid IPv4 address"; + return 0; + } + return 1; +} + +static int isValidIpV6(char *val, const char **err) { + struct sockaddr_in6 sa; + if (val[0] != '\0' && inet_pton(AF_INET6, val, &(sa.sin6_addr)) == 0) { + *err = "Invalid IPv6 address"; + return 0; + } + return 1; +} + /* Validate specified string is a valid proc-title-template */ static int isValidProcTitleTemplate(char *val, const char **err) { if (!validateProcTitleTemplate(val)) { @@ -2619,6 +2642,18 @@ static int updateClusterIp(const char **err) { return 1; } +int updateClusterClientIpV4(const char **err) { + UNUSED(err); + clusterUpdateMyselfClientIpV4(); + return 1; +} + +int updateClusterClientIpV6(const char **err) { + UNUSED(err); + clusterUpdateMyselfClientIpV6(); + return 1; +} + int updateClusterHostname(const char **err) { UNUSED(err); clusterUpdateMyselfHostname(); @@ -2891,7 +2926,7 @@ static int setConfigReplicaOfOption(standardConfig *config, sds *argv, int argc, char *ptr; server.primary_port = strtol(argv[1], &ptr, 10); if (server.primary_port < 0 || server.primary_port > 65535 || *ptr != '\0') { - *err = "Invalid master port"; + *err = "Invalid primary port"; return 0; } server.primary_host = sdsnew(argv[0]); @@ -3023,7 +3058,7 @@ standardConfig static_configs[] = { /* Bool configs */ createBoolConfig("rdbchecksum", NULL, IMMUTABLE_CONFIG, server.rdb_checksum, 1, NULL, NULL), createBoolConfig("daemonize", NULL, IMMUTABLE_CONFIG, server.daemonize, 0, NULL, NULL), - createBoolConfig("io-threads-do-reads", NULL, DEBUG_CONFIG | IMMUTABLE_CONFIG, server.io_threads_do_reads, 0, NULL, NULL), /* Read + parse from threads? */ + createBoolConfig("io-threads-do-reads", NULL, DEBUG_CONFIG | IMMUTABLE_CONFIG, server.io_threads_do_reads, 1, NULL, NULL), /* Read + parse from threads */ createBoolConfig("always-show-logo", NULL, IMMUTABLE_CONFIG, server.always_show_logo, 0, NULL, NULL), createBoolConfig("protected-mode", NULL, MODIFIABLE_CONFIG, server.protected_mode, 1, NULL, NULL), createBoolConfig("rdbcompression", NULL, MODIFIABLE_CONFIG, server.rdb_compression, 1, NULL, NULL), @@ -3077,6 +3112,8 @@ standardConfig static_configs[] = { createStringConfig("replica-announce-ip", "slave-announce-ip", MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.replica_announce_ip, NULL, NULL, NULL), createStringConfig("primaryuser", "masteruser", MODIFIABLE_CONFIG | SENSITIVE_CONFIG, EMPTY_STRING_IS_NULL, server.primary_user, NULL, NULL, NULL), createStringConfig("cluster-announce-ip", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_ip, NULL, NULL, updateClusterIp), + createStringConfig("cluster-announce-client-ipv4", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_client_ipv4, NULL, isValidIpV4, updateClusterClientIpV4), + createStringConfig("cluster-announce-client-ipv6", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_client_ipv6, NULL, isValidIpV6, updateClusterClientIpV6), createStringConfig("cluster-config-file", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.cluster_configfile, "nodes.conf", isValidClusterConfigFile, NULL), createStringConfig("cluster-announce-hostname", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_hostname, NULL, isValidAnnouncedHostname, updateClusterHostname), createStringConfig("cluster-announce-human-nodename", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_human_nodename, NULL, isValidAnnouncedNodename, updateClusterHumanNodename), @@ -3100,6 +3137,7 @@ standardConfig static_configs[] = { /* SDS Configs */ createSDSConfig("primaryauth", "masterauth", MODIFIABLE_CONFIG | SENSITIVE_CONFIG, EMPTY_STRING_IS_NULL, server.primary_auth, NULL, NULL, NULL), createSDSConfig("requirepass", NULL, MODIFIABLE_CONFIG | SENSITIVE_CONFIG, EMPTY_STRING_IS_NULL, server.requirepass, NULL, NULL, updateRequirePass), + createSDSConfig("availability-zone", NULL, MODIFIABLE_CONFIG, ALLOW_EMPTY_STRING, server.availability_zone, "", NULL, NULL), /* Enum Configs */ createEnumConfig("supervised", NULL, IMMUTABLE_CONFIG, supervised_mode_enum, server.supervised_mode, SUPERVISED_NONE, NULL, NULL), @@ -3123,6 +3161,7 @@ standardConfig static_configs[] = { createIntConfig("databases", NULL, IMMUTABLE_CONFIG, 1, INT_MAX, server.dbnum, 16, INTEGER_CONFIG, NULL, NULL), createIntConfig("port", NULL, MODIFIABLE_CONFIG, 0, 65535, server.port, 6379, INTEGER_CONFIG, NULL, updatePort), /* TCP port. */ createIntConfig("io-threads", NULL, DEBUG_CONFIG | IMMUTABLE_CONFIG, 1, 128, server.io_threads_num, 1, INTEGER_CONFIG, NULL, NULL), /* Single threaded by default */ + createIntConfig("events-per-io-thread", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.events_per_io_thread, 2, INTEGER_CONFIG, NULL, NULL), createIntConfig("auto-aof-rewrite-percentage", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.aof_rewrite_perc, 100, INTEGER_CONFIG, NULL, NULL), createIntConfig("cluster-replica-validity-factor", "cluster-slave-validity-factor", MODIFIABLE_CONFIG, 0, INT_MAX, server.cluster_replica_validity_factor, 10, INTEGER_CONFIG, NULL, NULL), /* replica max data age factor. */ createIntConfig("list-max-listpack-size", "list-max-ziplist-size", MODIFIABLE_CONFIG, INT_MIN, INT_MAX, server.list_max_listpack_size, -2, INTEGER_CONFIG, NULL, NULL), diff --git a/src/config.h b/src/config.h index e5adb785aa..201e421976 100644 --- a/src/config.h +++ b/src/config.h @@ -264,6 +264,15 @@ void setproctitle(const char *fmt, ...); #error "Undefined or invalid BYTE_ORDER" #endif +/* Cache line alignment */ +#ifndef CACHE_LINE_SIZE +#if defined(__aarch64__) && defined(__APPLE__) +#define CACHE_LINE_SIZE 128 +#else +#define CACHE_LINE_SIZE 64 +#endif /* __aarch64__ && __APPLE__ */ +#endif /* CACHE_LINE_SIZE */ + #if (__i386 || __amd64 || __powerpc__) && __GNUC__ #define GNUC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) #if defined(__clang__) @@ -329,4 +338,14 @@ void setcpuaffinity(const char *cpulist); #define HAVE_FADVISE #endif +#define IO_THREADS_MAX_NUM 16 + +#ifndef CACHE_LINE_SIZE +#if defined(__aarch64__) && defined(__APPLE__) +#define CACHE_LINE_SIZE 128 +#else +#define CACHE_LINE_SIZE 64 +#endif +#endif + #endif diff --git a/src/connection.h b/src/connection.h index 3de581b417..d59f7bc7fc 100644 --- a/src/connection.h +++ b/src/connection.h @@ -62,8 +62,6 @@ typedef enum { #define CONN_TYPE_TLS "tls" #define CONN_TYPE_MAX 8 /* 8 is enough to be extendable */ -typedef enum connTypeForCaching { CACHE_CONN_TCP, CACHE_CONN_TLS, CACHE_CONN_TYPE_MAX } connTypeForCaching; - typedef void (*ConnectionCallbackFunc)(struct connection *conn); typedef struct ConnectionType { @@ -112,6 +110,12 @@ typedef struct ConnectionType { int (*has_pending_data)(void); int (*process_pending_data)(void); + /* Postpone update state - with IO threads & TLS we don't want the IO threads to update the event loop events - let + * the main-thread do it */ + void (*postpone_update_state)(struct connection *conn, int); + /* Called by the main-thread */ + void (*update_state)(struct connection *conn); + /* TLS specified methods */ sds (*get_peer_cert)(struct connection *conn); } ConnectionType; @@ -456,4 +460,16 @@ static inline int connIsTLS(connection *conn) { return conn && conn->type == connectionTypeTls(); } +static inline void connUpdateState(connection *conn) { + if (conn->type->update_state) { + conn->type->update_state(conn); + } +} + +static inline void connSetPostponeUpdateState(connection *conn, int on) { + if (conn->type->postpone_update_state) { + conn->type->postpone_update_state(conn, on); + } +} + #endif /* __REDIS_CONNECTION_H */ diff --git a/src/db.c b/src/db.c index c879b2ffb5..5a6562a1e2 100644 --- a/src/db.c +++ b/src/db.c @@ -118,7 +118,7 @@ robj *lookupKey(serverDb *db, robj *key, int flags) { /* Update the access time for the ageing algorithm. * Don't do it if we have a saving child, as this will trigger * a copy on write madness. */ - if (server.current_client && server.current_client->flags & CLIENT_NO_TOUCH && + if (server.current_client && server.current_client->flag.no_touch && server.current_client->cmd->proc != touchCommand) flags |= LOOKUP_NOTOUCH; if (!hasActiveChildProcess() && !(flags & LOOKUP_NOTOUCH)) { @@ -190,7 +190,11 @@ robj *lookupKeyWriteOrReply(client *c, robj *key, robj *reply) { return o; } -/* Add the key to the DB. It's up to the caller to increment the reference +/* Add the key to the DB. + * + * In this case a copy of `key` is copied in kvstore, the caller must ensure the `key` is properly freed. + * + * It's up to the caller to increment the reference * counter of the value if needed. * * If the update_if_existing argument is false, the program is aborted @@ -204,7 +208,6 @@ static void dbAddInternal(serverDb *db, robj *key, robj *val, int update_if_exis return; } serverAssertWithInfo(NULL, key, de != NULL); - kvstoreDictSetKey(db->keys, slot, de, sdsdup(key->ptr)); initObjectLRUOrLFU(val); kvstoreDictSetVal(db->keys, slot, de, val); signalKeyAsReady(db, key, val->type); @@ -231,8 +234,7 @@ int getKeySlot(sds key) { * It only gets set during the execution of command under `call` method. Other flows requesting * the key slot would fallback to calculateKeySlot. */ - if (server.current_client && server.current_client->slot >= 0 && - server.current_client->flags & CLIENT_EXECUTING_COMMAND) { + if (server.current_client && server.current_client->slot >= 0 && server.current_client->flag.executing_command) { debugServerAssertWithInfo(server.current_client, NULL, calculateKeySlot(key) == server.current_client->slot); return server.current_client->slot; } @@ -241,15 +243,16 @@ int getKeySlot(sds key) { /* This is a special version of dbAdd() that is used only when loading * keys from the RDB file: the key is passed as an SDS string that is - * retained by the function (and not freed by the caller). + * copied by the function and freed by the caller. * * Moreover this function will not abort if the key is already busy, to * give more control to the caller, nor will signal the key as ready * since it is not useful in this context. * - * The function returns 1 if the key was added to the database, taking - * ownership of the SDS string, otherwise 0 is returned, and is up to the - * caller to free the SDS string. */ + * The function returns 1 if the key was added to the database, otherwise 0 is returned. + * + * In this case a copy of `key` is copied in kvstore, the caller must ensure the `key` is properly freed. + */ int dbAddRDBLoad(serverDb *db, sds key, robj *val) { int slot = getKeySlot(key); dictEntry *de = kvstoreDictAddRaw(db->keys, slot, key, NULL); @@ -822,7 +825,7 @@ void keysCommand(client *c) { numkeys++; } } - if (c->flags & CLIENT_CLOSE_ASAP) break; + if (c->flag.close_asap) break; } if (kvs_di) kvstoreReleaseDictIterator(kvs_di); if (kvs_it) kvstoreIteratorRelease(kvs_it); @@ -1238,7 +1241,7 @@ void shutdownCommand(client *c) { return; } - if (!(flags & SHUTDOWN_NOW) && c->flags & CLIENT_DENY_BLOCKING) { + if (!(flags & SHUTDOWN_NOW) && c->flag.deny_blocking) { addReplyError(c, "SHUTDOWN without NOW or ABORT isn't allowed for DENY BLOCKING client"); return; } @@ -1667,7 +1670,7 @@ void setExpire(client *c, serverDb *db, robj *key, long long when) { } int writable_replica = server.primary_host && server.repl_replica_ro == 0; - if (c && writable_replica && !(c->flags & CLIENT_PRIMARY)) rememberReplicaKeyWithExpire(db, key); + if (c && writable_replica && !c->flag.primary) rememberReplicaKeyWithExpire(db, key); } /* Return the expire time of the specified key, or -1 if no expire @@ -1796,7 +1799,7 @@ keyStatus expireIfNeeded(serverDb *db, robj *key, int flags) { * When replicating commands from the primary, keys are never considered * expired. */ if (server.primary_host != NULL) { - if (server.current_client && (server.current_client->flags & CLIENT_PRIMARY)) return KEY_VALID; + if (server.current_client && (server.current_client->flag.primary)) return KEY_VALID; if (!(flags & EXPIRE_FORCE_DELETE_EXPIRED)) return KEY_EXPIRED; } diff --git a/src/debug.c b/src/debug.c index 6394e3f0f4..9501b8a658 100644 --- a/src/debug.c +++ b/src/debug.c @@ -37,6 +37,7 @@ #include "fpconv_dtoa.h" #include "cluster.h" #include "threads_mngr.h" +#include "io_threads.h" #include #include @@ -429,6 +430,9 @@ void debugCommand(client *c) { " Show low level info about `key` and associated value.", "DROP-CLUSTER-PACKET-FILTER ", " Drop all packets that match the filtered type. Set to -1 allow all packets.", + "CLOSE-CLUSTER-LINK-ON-PACKET-DROP <0|1>", + " This is valid only when DROP-CLUSTER-PACKET-FILTER is set to a valid packet type.", + " When set to 1, the cluster link is closed after dropping a packet based on the filter.", "OOM", " Crash the server simulating an out-of-memory error.", "PANIC", @@ -593,6 +597,9 @@ void debugCommand(client *c) { if (getLongFromObjectOrReply(c, c->argv[2], &packet_type, NULL) != C_OK) return; server.cluster_drop_packet_filter = packet_type; addReply(c, shared.ok); + } else if (!strcasecmp(c->argv[1]->ptr, "close-cluster-link-on-packet-drop") && c->argc == 3) { + server.debug_cluster_close_link_on_packet_drop = atoi(c->argv[2]->ptr); + addReply(c, shared.ok); } else if (!strcasecmp(c->argv[1]->ptr, "object") && c->argc == 3) { dictEntry *de; robj *val; @@ -798,12 +805,12 @@ void debugCommand(client *c) { addReplyError(c, "RESP2 is not supported by this command"); return; } - uint64_t old_flags = c->flags; - c->flags |= CLIENT_PUSHING; + struct ClientFlags old_flags = c->flag; + c->flag.pushing = 1; addReplyPushLen(c, 2); addReplyBulkCString(c, "server-cpu-usage"); addReplyLongLong(c, 42); - if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING; + if (!old_flags.pushing) c->flag.pushing = 0; /* Push replies are not synchronous replies, so we emit also a * normal reply in order for blocking clients just discarding the * push reply, to actually consume the reply and continue. */ @@ -858,7 +865,7 @@ void debugCommand(client *c) { sds sizes = sdsempty(); sizes = sdscatprintf(sizes, "bits:%d ", (sizeof(void *) == 8) ? 64 : 32); sizes = sdscatprintf(sizes, "robj:%d ", (int)sizeof(robj)); - sizes = sdscatprintf(sizes, "dictentry:%d ", (int)dictEntryMemUsage()); + sizes = sdscatprintf(sizes, "dictentry:%d ", (int)dictEntryMemUsage(NULL)); sizes = sdscatprintf(sizes, "sdshdr5:%d ", (int)sizeof(struct sdshdr5)); sizes = sdscatprintf(sizes, "sdshdr8:%d ", (int)sizeof(struct sdshdr8)); sizes = sdscatprintf(sizes, "sdshdr16:%d ", (int)sizeof(struct sdshdr16)); @@ -1020,7 +1027,7 @@ void _serverAssertPrintClientInfo(const client *c) { bugReportStart(); serverLog(LL_WARNING, "=== ASSERTION FAILED CLIENT CONTEXT ==="); - serverLog(LL_WARNING, "client->flags = %llu", (unsigned long long)c->flags); + serverLog(LL_WARNING, "client->flags = %llu", (unsigned long long)c->raw_flag); serverLog(LL_WARNING, "client->conn = %s", connGetInfo(c->conn, conninfo, sizeof(conninfo))); serverLog(LL_WARNING, "client->argc = %d", c->argc); for (j = 0; j < c->argc; j++) { @@ -2153,6 +2160,7 @@ void removeSigSegvHandlers(void) { } void printCrashReport(void) { + server.crashed = 1; /* Log INFO and CLIENT LIST */ logServerInfo(); diff --git a/src/defrag.c b/src/defrag.c index 2de1c061e8..5a54875864 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -41,6 +41,7 @@ typedef struct defragCtx { void *privdata; int slot; + void *aux; } defragCtx; typedef struct defragPubSubCtx { @@ -75,6 +76,36 @@ void *activeDefragAlloc(void *ptr) { return newptr; } +/* This method captures the expiry db dict entry which refers to data stored in keys db dict entry. */ +void defragEntryStartCbForKeys(void *ctx, void *oldptr) { + defragCtx *defragctx = (defragCtx *)ctx; + serverDb *db = defragctx->privdata; + sds oldsds = (sds)dictGetKey((dictEntry *)oldptr); + int slot = defragctx->slot; + if (kvstoreDictSize(db->expires, slot)) { + dictEntry *expire_de = kvstoreDictFind(db->expires, slot, oldsds); + defragctx->aux = expire_de; + } +} + +/* This method updates the key of expiry db dict entry. The key might be no longer valid + * as it could have been cleaned up during the defrag-realloc of the main dictionary. */ +void defragEntryFinishCbForKeys(void *ctx, void *newptr) { + defragCtx *defragctx = (defragCtx *)ctx; + dictEntry *expire_de = (dictEntry *)defragctx->aux; + /* Item doesn't have TTL associated to it. */ + if (!expire_de) return; + /* No reallocation happened. */ + if (!newptr) { + expire_de = NULL; + return; + } + serverDb *db = defragctx->privdata; + sds newsds = (sds)dictGetKey((dictEntry *)newptr); + int slot = defragctx->slot; + kvstoreDictSetKey(db->expires, slot, expire_de, newsds); +} + /*Defrag helper for sds strings * * returns NULL in case the allocation wasn't moved. @@ -650,25 +681,10 @@ void defragModule(serverDb *db, dictEntry *kde) { /* for each key we scan in the main dict, this function will attempt to defrag * all the various pointers it has. */ void defragKey(defragCtx *ctx, dictEntry *de) { - sds keysds = dictGetKey(de); - robj *newob, *ob; - unsigned char *newzl; - sds newsds; serverDb *db = ctx->privdata; int slot = ctx->slot; - /* Try to defrag the key name. */ - newsds = activeDefragSds(keysds); - if (newsds) { - kvstoreDictSetKey(db->keys, slot, de, newsds); - if (kvstoreDictSize(db->expires, slot)) { - /* We can't search in db->expires for that key after we've released - * the pointer it holds, since it won't be able to do the string - * compare, but we can find the entry using key hash and pointer. */ - uint64_t hash = kvstoreGetHash(db->expires, newsds); - dictEntry *expire_de = kvstoreDictFindEntryByPtrAndHash(db->expires, slot, keysds, hash); - if (expire_de) kvstoreDictSetKey(db->expires, slot, expire_de, newsds); - } - } + robj *newob, *ob; + unsigned char *newzl; /* Try to defrag robj and / or string value. */ ob = dictGetVal(de); @@ -984,7 +1000,9 @@ void activeDefragCycle(void) { endtime = start + timelimit; latencyStartMonitor(latency); - dictDefragFunctions defragfns = {.defragAlloc = activeDefragAlloc}; + dictDefragFunctions defragfns = {.defragAlloc = activeDefragAlloc, + .defragEntryStartCb = defragEntryStartCbForKeys, + .defragEntryFinishCb = defragEntryFinishCbForKeys}; do { /* if we're not continuing a scan from the last call or loop, start a new one */ if (!defrag_stage && !defrag_cursor && (slot < 0)) { diff --git a/src/dict.c b/src/dict.c index bc92d49564..280f0b6abc 100644 --- a/src/dict.c +++ b/src/dict.c @@ -35,6 +35,7 @@ #include "fmacros.h" +#include #include #include #include @@ -48,6 +49,10 @@ #include "serverassert.h" #include "monotonic.h" +#ifndef static_assert +#define static_assert(expr, lit) _Static_assert(expr, lit) +#endif + #define UNUSED(V) ((void)V) /* Using dictSetResizeEnabled() we make possible to disable @@ -76,6 +81,33 @@ struct dictEntry { struct dictEntry *next; /* Next entry in the same hash bucket. */ }; +typedef struct { + union { + void *val; + uint64_t u64; + int64_t s64; + double d; + } v; + struct dictEntry *next; /* Next entry in the same hash bucket. */ + uint8_t key_header_size; /* offset into key_buf where the key is located at. */ + unsigned char key_buf[]; /* buffer with embedded key. */ +} embeddedDictEntry; + +/* Validation and helper for `embeddedDictEntry` */ + +static_assert(offsetof(embeddedDictEntry, v) == 0, "unexpected field offset"); +static_assert(offsetof(embeddedDictEntry, next) == sizeof(double), "unexpected field offset"); +static_assert(offsetof(embeddedDictEntry, key_header_size) == sizeof(double) + sizeof(void *), + "unexpected field offset"); +/* key_buf is located after a union with a double value `v.d`, a pointer `next` and uint8_t field `key_header_size` */ +static_assert(offsetof(embeddedDictEntry, key_buf) == sizeof(double) + sizeof(void *) + sizeof(uint8_t), + "unexpected field offset"); + +/* The minimum amount of bytes required for embedded dict entry. */ +static inline size_t compactSizeEmbeddedDictEntry(void) { + return offsetof(embeddedDictEntry, key_buf); +} + typedef struct { void *key; dictEntry *next; @@ -91,6 +123,19 @@ static dictEntry *dictGetNext(const dictEntry *de); static dictEntry **dictGetNextRef(dictEntry *de); static void dictSetNext(dictEntry *de, dictEntry *next); +/* -------------------------- Utility functions -------------------------------- */ + +/* Validates dict type members dependencies. */ +static inline void validateDictType(dictType *type) { + if (type->embedded_entry) { + assert(type->embedKey); + assert(!type->keyDup); + assert(!type->keyDestructor); + } else { + assert(!type->embedKey); + } +} + /* -------------------------- hash functions -------------------------------- */ static uint8_t dict_hash_function_seed[16]; @@ -126,6 +171,8 @@ uint64_t dictGenCaseHashFunction(const unsigned char *buf, size_t len) { #define ENTRY_PTR_MASK 7 /* 111 */ #define ENTRY_PTR_NORMAL 0 /* 000 */ #define ENTRY_PTR_NO_VALUE 2 /* 010 */ +#define ENTRY_PTR_EMBEDDED 4 /* 100 */ +/* ENTRY_PTR_IS_KEY xx1 */ /* Returns 1 if the entry pointer is a pointer to a key, rather than to an * allocated entry. Returns 0 otherwise. */ @@ -145,12 +192,9 @@ static inline int entryIsNoValue(const dictEntry *de) { return ((uintptr_t)(void *)de & ENTRY_PTR_MASK) == ENTRY_PTR_NO_VALUE; } -/* Creates an entry without a value field. */ -static inline dictEntry *createEntryNoValue(void *key, dictEntry *next) { - dictEntryNoValue *entry = zmalloc(sizeof(*entry)); - entry->key = key; - entry->next = next; - return (dictEntry *)(void *)((uintptr_t)(void *)entry | ENTRY_PTR_NO_VALUE); + +static inline int entryIsEmbedded(const dictEntry *de) { + return ((uintptr_t)(void *)de & ENTRY_PTR_MASK) == ENTRY_PTR_EMBEDDED; } static inline dictEntry *encodeMaskedPtr(const void *ptr, unsigned int bits) { @@ -163,15 +207,40 @@ static inline void *decodeMaskedPtr(const dictEntry *de) { return (void *)((uintptr_t)(void *)de & ~ENTRY_PTR_MASK); } +/* Creates an entry without a value field. */ +static inline dictEntry *createEntryNoValue(void *key, dictEntry *next) { + dictEntryNoValue *entry = zmalloc(sizeof(*entry)); + entry->key = key; + entry->next = next; + return encodeMaskedPtr(entry, ENTRY_PTR_NO_VALUE); +} + +static inline dictEntry *createEmbeddedEntry(void *key, dictEntry *next, dictType *dt) { + size_t key_len = dt->embedKey(NULL, 0, key, NULL); + embeddedDictEntry *entry = zmalloc(compactSizeEmbeddedDictEntry() + key_len); + dt->embedKey(entry->key_buf, key_len, key, &entry->key_header_size); + entry->next = next; + return encodeMaskedPtr(entry, ENTRY_PTR_EMBEDDED); +} + +static inline void *getEmbeddedKey(const dictEntry *de) { + embeddedDictEntry *entry = (embeddedDictEntry *)decodeMaskedPtr(de); + return &entry->key_buf[entry->key_header_size]; +} + /* Decodes the pointer to an entry without value, when you know it is an entry * without value. Hint: Use entryIsNoValue to check. */ static inline dictEntryNoValue *decodeEntryNoValue(const dictEntry *de) { return decodeMaskedPtr(de); } +static inline embeddedDictEntry *decodeEmbeddedEntry(const dictEntry *de) { + return decodeMaskedPtr(de); +} + /* Returns 1 if the entry has a value field and 0 otherwise. */ static inline int entryHasValue(const dictEntry *de) { - return entryIsNormal(de); + return entryIsNormal(de) || entryIsEmbedded(de); } /* ----------------------------- API implementation ------------------------- */ @@ -185,6 +254,7 @@ static void _dictReset(dict *d, int htidx) { /* Create a new hash table */ dict *dictCreate(dictType *type) { + validateDictType(type); size_t metasize = type->dictMetadataBytes ? type->dictMetadataBytes(NULL) : 0; dict *d = zmalloc(sizeof(*d) + metasize); if (metasize > 0) { @@ -473,6 +543,10 @@ int dictAdd(dict *d, void *key, void *val) { * with the existing entry if existing is not NULL. * * If key was added, the hash entry is returned to be manipulated by the caller. + * + * The dict handles `key` based on `dictType` during initialization: + * - If `dictType.embedded-entry` is 1, it clones the `key`. + * - Otherwise, it assumes ownership of the `key`. */ dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing) { /* Get the position for the new key or NULL if the key already exists. */ @@ -511,6 +585,8 @@ dictEntry *dictInsertAtPosition(dict *d, void *key, void *position) { /* Allocate an entry without value. */ entry = createEntryNoValue(key, *bucket); } + } else if (d->type->embedded_entry) { + entry = createEmbeddedEntry(key, *bucket, d->type); } else { /* Allocate the memory and store the new entry. * Insert the element in top, with the assumption that in a database @@ -658,6 +734,7 @@ void dictFreeUnlinkedEntry(dict *d, dictEntry *he) { if (he == NULL) return; dictFreeKey(d, he); dictFreeVal(d, he); + /* Clear the dictEntry */ if (!entryIsKey(he)) zfree(decodeMaskedPtr(he)); } @@ -804,7 +881,11 @@ void dictSetKey(dict *d, dictEntry *de, void *key) { void dictSetVal(dict *d, dictEntry *de, void *val) { UNUSED(d); assert(entryHasValue(de)); - de->v.val = val; + if (entryIsEmbedded(de)) { + decodeEmbeddedEntry(de)->v.val = val; + } else { + de->v.val = val; + } } void dictSetSignedIntegerVal(dictEntry *de, int64_t val) { @@ -840,11 +921,15 @@ double dictIncrDoubleVal(dictEntry *de, double val) { void *dictGetKey(const dictEntry *de) { if (entryIsKey(de)) return (void *)de; if (entryIsNoValue(de)) return decodeEntryNoValue(de)->key; + if (entryIsEmbedded(de)) return getEmbeddedKey(de); return de->key; } void *dictGetVal(const dictEntry *de) { assert(entryHasValue(de)); + if (entryIsEmbedded(de)) { + return decodeEmbeddedEntry(de)->v.val; + } return de->v.val; } @@ -874,6 +959,7 @@ double *dictGetDoubleValPtr(dictEntry *de) { static dictEntry *dictGetNext(const dictEntry *de) { if (entryIsKey(de)) return NULL; /* there's no next */ if (entryIsNoValue(de)) return decodeEntryNoValue(de)->next; + if (entryIsEmbedded(de)) return decodeEmbeddedEntry(de)->next; return de->next; } @@ -882,14 +968,16 @@ static dictEntry *dictGetNext(const dictEntry *de) { static dictEntry **dictGetNextRef(dictEntry *de) { if (entryIsKey(de)) return NULL; if (entryIsNoValue(de)) return &decodeEntryNoValue(de)->next; + if (entryIsEmbedded(de)) return &decodeEmbeddedEntry(de)->next; return &de->next; } static void dictSetNext(dictEntry *de, dictEntry *next) { assert(!entryIsKey(de)); if (entryIsNoValue(de)) { - dictEntryNoValue *entry = decodeEntryNoValue(de); - entry->next = next; + decodeEntryNoValue(de)->next = next; + } else if (entryIsEmbedded(de)) { + decodeEmbeddedEntry(de)->next = next; } else { de->next = next; } @@ -901,8 +989,20 @@ size_t dictMemUsage(const dict *d) { return dictSize(d) * sizeof(dictEntry) + dictBuckets(d) * sizeof(dictEntry *); } -size_t dictEntryMemUsage(void) { - return sizeof(dictEntry); +/* Returns the memory usage in bytes of dictEntry based on the type. if `de` is NULL, return the size of + * regular dict entry else return based on the type. */ +size_t dictEntryMemUsage(dictEntry *de) { + if (de == NULL || entryIsNormal(de)) + return sizeof(dictEntry); + else if (entryIsKey(de)) + return 0; + else if (entryIsNoValue(de)) + return sizeof(dictEntryNoValue); + else if (entryIsEmbedded(de)) + return zmalloc_size(decodeEmbeddedEntry(de)); + else + assert("Entry type not supported"); + return 0; } /* A fingerprint is a 64 bit number that represents the state of the dictionary @@ -1172,7 +1272,7 @@ unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) { /* Reallocate the dictEntry, key and value allocations in a bucket using the * provided allocation functions in order to defrag them. */ -static void dictDefragBucket(dictEntry **bucketref, dictDefragFunctions *defragfns) { +static void dictDefragBucket(dictEntry **bucketref, dictDefragFunctions *defragfns, void *privdata) { dictDefragAllocFunction *defragalloc = defragfns->defragAlloc; dictDefragAllocFunction *defragkey = defragfns->defragKey; dictDefragAllocFunction *defragval = defragfns->defragVal; @@ -1190,6 +1290,17 @@ static void dictDefragBucket(dictEntry **bucketref, dictDefragFunctions *defragf entry = newentry; } if (newkey) entry->key = newkey; + } else if (entryIsEmbedded(de)) { + defragfns->defragEntryStartCb(privdata, de); + embeddedDictEntry *entry = decodeEmbeddedEntry(de), *newentry; + if ((newentry = defragalloc(entry))) { + newde = encodeMaskedPtr(newentry, ENTRY_PTR_EMBEDDED); + entry = newentry; + defragfns->defragEntryFinishCb(privdata, newde); + } else { + defragfns->defragEntryFinishCb(privdata, NULL); + } + if (newval) entry->v.val = newval; } else { assert(entryIsNormal(de)); newde = defragalloc(de); @@ -1353,7 +1464,7 @@ dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctio /* Emit entries at cursor */ if (defragfns) { - dictDefragBucket(&d->ht_table[htidx0][v & m0], defragfns); + dictDefragBucket(&d->ht_table[htidx0][v & m0], defragfns, privdata); } de = d->ht_table[htidx0][v & m0]; while (de) { @@ -1386,7 +1497,7 @@ dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctio /* Emit entries at cursor */ if (defragfns) { - dictDefragBucket(&d->ht_table[htidx0][v & m0], defragfns); + dictDefragBucket(&d->ht_table[htidx0][v & m0], defragfns, privdata); } de = d->ht_table[htidx0][v & m0]; while (de) { @@ -1400,7 +1511,7 @@ dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctio do { /* Emit entries at cursor */ if (defragfns) { - dictDefragBucket(&d->ht_table[htidx1][v & m1], defragfns); + dictDefragBucket(&d->ht_table[htidx1][v & m1], defragfns, privdata); } de = d->ht_table[htidx1][v & m1]; while (de) { @@ -1573,29 +1684,6 @@ uint64_t dictGetHash(dict *d, const void *key) { return dictHashKey(d, key); } -/* Finds the dictEntry using pointer and pre-calculated hash. - * oldkey is a dead pointer and should not be accessed. - * the hash value should be provided using dictGetHash. - * no string / key comparison is performed. - * return value is a pointer to the dictEntry if found, or NULL if not found. */ -dictEntry *dictFindEntryByPtrAndHash(dict *d, const void *oldptr, uint64_t hash) { - dictEntry *he; - unsigned long idx, table; - - if (dictSize(d) == 0) return NULL; /* dict is empty */ - for (table = 0; table <= 1; table++) { - idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]); - if (table == 0 && (long)idx < d->rehashidx) continue; - he = d->ht_table[table][idx]; - while (he) { - if (oldptr == dictGetKey(he)) return he; - he = dictGetNext(he); - } - if (!dictIsRehashing(d)) return NULL; - } - return NULL; -} - /* Provides the old and new ht size for a given dictionary during rehashing. This method * should only be invoked during initialization/rehashing. */ void dictRehashingInfo(dict *d, unsigned long long *from_size, unsigned long long *to_size) { @@ -1718,7 +1806,6 @@ void dictGetStats(char *buf, size_t bufsize, dict *d, int full) { #ifdef SERVER_TEST #include "testhelp.h" -#define UNUSED(V) ((void)V) #define TEST(name) printf("test — %s\n", name); uint64_t hashCallback(const void *key) { diff --git a/src/dict.h b/src/dict.h index 723e5a54c2..a7c5c71826 100644 --- a/src/dict.h +++ b/src/dict.h @@ -66,6 +66,10 @@ typedef struct dictType { /* Allow a dict to carry extra caller-defined metadata. The * extra memory is initialized to 0 when a dict is allocated. */ size_t (*dictMetadataBytes)(dict *d); + /* Method for copying a given key into a buffer of buf_len. Also used for + * computing the length of the key + header when buf is NULL. */ + size_t (*embedKey)(unsigned char *buf, size_t buf_len, const void *key, unsigned char *header_size); + /* Data */ void *userdata; @@ -80,8 +84,9 @@ typedef struct dictType { * enables one more optimization: to store a key without an allocated * dictEntry. */ unsigned int keys_are_odd : 1; - /* TODO: Add a 'keys_are_even' flag and use a similar optimization if that - * flag is set. */ + /* If embedded_entry flag is set, it indicates that a copy of the key is created and the key is embedded + * as part of the dict entry. */ + unsigned int embedded_entry : 1; } dictType; #define DICTHT_SIZE(exp) ((exp) == -1 ? 0 : (unsigned long)1 << (exp)) @@ -127,10 +132,13 @@ typedef struct dictStats { typedef void(dictScanFunction)(void *privdata, const dictEntry *de); typedef void *(dictDefragAllocFunction)(void *ptr); +typedef void(dictDefragEntryCb)(void *privdata, void *ptr); typedef struct { - dictDefragAllocFunction *defragAlloc; /* Used for entries etc. */ - dictDefragAllocFunction *defragKey; /* Defrag-realloc keys (optional) */ - dictDefragAllocFunction *defragVal; /* Defrag-realloc values (optional) */ + dictDefragAllocFunction *defragAlloc; /* Used for entries etc. */ + dictDefragAllocFunction *defragKey; /* Defrag-realloc keys (optional) */ + dictDefragAllocFunction *defragVal; /* Defrag-realloc values (optional) */ + dictDefragEntryCb *defragEntryStartCb; /* Callback invoked prior to the start of defrag of dictEntry. */ + dictDefragEntryCb *defragEntryFinishCb; /* Callback invoked after the defrag of dictEntry is tried. */ } dictDefragFunctions; /* This is the initial size of every hash table */ @@ -212,7 +220,7 @@ uint64_t dictGetUnsignedIntegerVal(const dictEntry *de); double dictGetDoubleVal(const dictEntry *de); double *dictGetDoubleValPtr(dictEntry *de); size_t dictMemUsage(const dict *d); -size_t dictEntryMemUsage(void); +size_t dictEntryMemUsage(dictEntry *de); dictIterator *dictGetIterator(dict *d); dictIterator *dictGetSafeIterator(dict *d); void dictInitIterator(dictIterator *iter, dict *d); @@ -236,7 +244,6 @@ unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, void *pri unsigned long dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctions *defragfns, void *privdata); uint64_t dictGetHash(dict *d, const void *key); -dictEntry *dictFindEntryByPtrAndHash(dict *d, const void *oldptr, uint64_t hash); void dictRehashingInfo(dict *d, unsigned long long *from_size, unsigned long long *to_size); size_t dictGetStatsMsg(char *buf, size_t bufsize, dictStats *stats, int full); diff --git a/src/eval.c b/src/eval.c index e747c233e8..2afbf445f5 100644 --- a/src/eval.c +++ b/src/eval.c @@ -258,10 +258,10 @@ void scriptingInit(int setup) { * by scriptingReset(). */ if (lctx.lua_client == NULL) { lctx.lua_client = createClient(NULL); - lctx.lua_client->flags |= CLIENT_SCRIPT; + lctx.lua_client->flag.script = 1; /* We do not want to allow blocking commands inside Lua */ - lctx.lua_client->flags |= CLIENT_DENY_BLOCKING; + lctx.lua_client->flag.deny_blocking = 1; } /* Lock the global table from any changes */ @@ -630,7 +630,7 @@ void evalCommand(client *c) { /* Explicitly feed monitor here so that lua commands appear after their * script command. */ replicationFeedMonitors(c, server.monitors, c->db->id, c->argv, c->argc); - if (!(c->flags & CLIENT_LUA_DEBUG)) + if (!c->flag.lua_debug) evalGenericCommand(c, 0); else evalGenericCommandWithDebugging(c, 0); @@ -652,7 +652,7 @@ void evalShaCommand(client *c) { addReplyErrorObject(c, shared.noscripterr); return; } - if (!(c->flags & CLIENT_LUA_DEBUG)) + if (!c->flag.lua_debug) evalGenericCommand(c, 1); else { addReplyError(c, "Please use EVAL instead of EVALSHA for debugging"); @@ -682,6 +682,8 @@ void scriptCommand(client *c) { " Kill the currently executing Lua script.", "LOAD