From b6e0f1998930a4a56175ef56f45d08095c62c14e Mon Sep 17 00:00:00 2001 From: Karthik Subbarao Date: Wed, 15 May 2024 08:43:36 +0000 Subject: [PATCH 01/15] Add support for limiting custom LUA errors when over 128 error messages while allowing non LUA errors to function as usual Signed-off-by: Karthik Subbarao --- src/networking.c | 28 +++++++++++++++++----------- src/script_lua.c | 2 +- src/server.c | 42 ------------------------------------------ src/server.h | 7 ++++++- tests/unit/info.tcl | 18 ++++++------------ 5 files changed, 30 insertions(+), 67 deletions(-) diff --git a/src/networking.c b/src/networking.c index 5aa02e8315..afb4ed9ca6 100644 --- a/src/networking.c +++ b/src/networking.c @@ -513,19 +513,25 @@ void afterErrorReply(client *c, const char *s, size_t len, int flags) { if (!(flags & ERR_REPLY_FLAG_NO_STATS_UPDATE)) { /* Increment the global error counter */ server.stat_total_error_replies++; - /* Increment the error stats - * If the string already starts with "-..." then the error prefix - * is provided by the caller ( we limit the search to 32 chars). Otherwise we use "-ERR". */ - if (s[0] != '-') { - incrementErrorCount("ERR", 3); + /* After the errors RAX reaches this limit, instead of tracking + * custom LUA errors, we track the error under `error_LUA` */ + if (flags & ERR_REPLY_FLAG_LUA && raxSize(server.errors) >= ERROR_STATS_LUA_LIMIT) { + incrementErrorCount("LUA_ERRORSTATS_DISABLED", 23); } else { - char *spaceloc = memchr(s, ' ', len < 32 ? len : 32); - if (spaceloc) { - const size_t errEndPos = (size_t)(spaceloc - s); - incrementErrorCount(s+1, errEndPos-1); - } else { - /* Fallback to ERR if we can't retrieve the error prefix */ + /* Increment the error stats + * If the string already starts with "-..." then the error prefix + * is provided by the caller ( we limit the search to 32 chars). Otherwise we use "-ERR". */ + if (s[0] != '-') { incrementErrorCount("ERR", 3); + } else { + char *spaceloc = memchr(s, ' ', len < 32 ? len : 32); + if (spaceloc) { + const size_t errEndPos = (size_t)(spaceloc - s); + incrementErrorCount(s+1, errEndPos-1); + } else { + /* Fallback to ERR if we can't retrieve the error prefix */ + incrementErrorCount("ERR", 3); + } } } } else { diff --git a/src/script_lua.c b/src/script_lua.c index 21bf6f6af9..501116ae4f 100644 --- a/src/script_lua.c +++ b/src/script_lua.c @@ -639,7 +639,7 @@ static void luaReplyToRedisReply(client *c, client* script_client, lua_State *lu errorInfo err_info = {0}; luaExtractErrorInformation(lua, &err_info); addReplyErrorFormatEx(c, - err_info.ignore_err_stats_update? ERR_REPLY_FLAG_NO_STATS_UPDATE: 0, + err_info.ignore_err_stats_update? ERR_REPLY_FLAG_NO_STATS_UPDATE | ERR_REPLY_FLAG_LUA: ERR_REPLY_FLAG_LUA, "-%s", err_info.msg); luaErrorInformationDiscard(&err_info); diff --git a/src/server.c b/src/server.c index fe54d9eb82..284e675e4a 100644 --- a/src/server.c +++ b/src/server.c @@ -2629,7 +2629,6 @@ void initServer(void) { server.main_thread_id = pthread_self(); server.current_client = NULL; server.errors = raxNew(); - server.errors_enabled = 1; server.execution_nesting = 0; server.clients = listCreate(); server.clients_index = raxNew(); @@ -3136,7 +3135,6 @@ void resetCommandTableStats(dict* commands) { void resetErrorTableStats(void) { freeErrorsRadixTreeAsync(server.errors); server.errors = raxNew(); - server.errors_enabled = 1; } /* ========================== OP Array API ============================ */ @@ -4233,49 +4231,9 @@ int processCommand(client *c) { /* ====================== Error lookup and execution ===================== */ -/* Users who abuse lua error_reply will generate a new error object on each - * error call, which can make server.errors get bigger and bigger. This will - * cause the server to block when calling INFO (we also return errorstats by - * default). To prevent the damage it can cause, when a misuse is detected, - * we will print the warning log and disable the errorstats to avoid adding - * more new errors. It can be re-enabled via CONFIG RESETSTAT. */ -#define ERROR_STATS_NUMBER 128 void incrementErrorCount(const char *fullerr, size_t namelen) { - /* errorstats is disabled, return ASAP. */ - if (!server.errors_enabled) return; - void *result; if (!raxFind(server.errors,(unsigned char*)fullerr,namelen,&result)) { - if (server.errors->numele >= ERROR_STATS_NUMBER) { - sds errors = sdsempty(); - raxIterator ri; - raxStart(&ri, server.errors); - raxSeek(&ri, "^", NULL, 0); - while (raxNext(&ri)) { - char *tmpsafe; - errors = sdscatlen(errors, getSafeInfoString((char *)ri.key, ri.key_len, &tmpsafe), ri.key_len); - errors = sdscatlen(errors, ", ", 2); - if (tmpsafe != NULL) zfree(tmpsafe); - } - sdsrange(errors, 0, -3); /* Remove final ", ". */ - raxStop(&ri); - - /* Print the warning log and the contents of server.errors to the log. */ - serverLog(LL_WARNING, - "Errorstats stopped adding new errors because the number of " - "errors reached the limit, may be misuse of lua error_reply, " - "please check INFO ERRORSTATS, this can be re-enabled via " - "CONFIG RESETSTAT."); - serverLog(LL_WARNING, "Current errors code list: %s", errors); - sdsfree(errors); - - /* Reset the errors and add a single element to indicate that it is disabled. */ - resetErrorTableStats(); - incrementErrorCount("ERRORSTATS_DISABLED", 19); - server.errors_enabled = 0; - return; - } - struct serverError *error = zmalloc(sizeof(*error)); error->count = 1; raxInsert(server.errors,(unsigned char*)fullerr,namelen,error,NULL); diff --git a/src/server.h b/src/server.h index 23bcda81d7..3222e33989 100644 --- a/src/server.h +++ b/src/server.h @@ -1568,7 +1568,6 @@ struct valkeyServer { dict *orig_commands; /* Command table before command renaming. */ aeEventLoop *el; rax *errors; /* Errors table */ - int errors_enabled; /* If true, errorstats is enabled, and we will add new errors. */ unsigned int lruclock; /* Clock for LRU eviction */ volatile sig_atomic_t shutdown_asap; /* Shutdown ordered by signal handler. */ mstime_t shutdown_mstime; /* Timestamp to limit graceful shutdown. */ @@ -2563,9 +2562,15 @@ int validateProcTitleTemplate(const char *template); int serverCommunicateSystemd(const char *sd_notify_msg); void serverSetCpuAffinity(const char *cpulist); +/* ERROR STATS constants */ +#define ERROR_STATS_LUA_LIMIT 128 /* After the errors RAX reaches this limit, instead of tracking + custom LUA errors, we track the error under `error_LUA`. */ + /* afterErrorReply flags */ #define ERR_REPLY_FLAG_NO_STATS_UPDATE (1ULL<<0) /* Indicating that we should not update error stats after sending error reply */ +#define ERR_REPLY_FLAG_LUA (1ULL<<1) /* Indicating that we are coming in from ScriptCall */ + /* networking.c -- Networking and Client related operations */ client *createClient(connection *conn); void freeClient(client *c); diff --git a/tests/unit/info.tcl b/tests/unit/info.tcl index 17dc6a1861..dbe4d3606d 100644 --- a/tests/unit/info.tcl +++ b/tests/unit/info.tcl @@ -281,18 +281,12 @@ start_server {tags {"info" "external:skip"}} { r eval {return redis.error_reply(string.format('%s my error message', ARGV[1]))} 0 $j } } - - assert_equal [count_log_message 0 "Errorstats stopped adding new errors"] 1 - assert_equal [count_log_message 0 "Current errors code list"] 1 - assert_equal "count=1" [errorstat ERRORSTATS_DISABLED] - - # Since we currently have no metrics exposed for server.errors, we use lazyfree - # to verify that we only have 128 errors. - wait_for_condition 50 100 { - [s lazyfreed_objects] eq 128 - } else { - fail "errorstats resetstat lazyfree error" - } + # Validate that custom LUA errors are tracked in `LUA_ERRORSTATS_DISABLED` when errors + # has 128 entries. + assert_equal "count=972" [errorstat LUA_ERRORSTATS_DISABLED] + # Validate that non LUA errors continue to be tracked even when we have >=128 entries. + assert_error {ERR syntax error} {r set a b c d e f g} + assert_equal "count=1" [errorstat ERR] } test {stats: eventloop metrics} { From c86cb624202009d4a9f4a6eaeb407c6fd355f72d Mon Sep 17 00:00:00 2001 From: Karthik Subbarao Date: Wed, 15 May 2024 09:19:34 +0000 Subject: [PATCH 02/15] Minor clean up + update documentation Signed-off-by: Karthik Subbarao --- src/networking.c | 24 +++++++++++------------- src/server.h | 3 ++- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/networking.c b/src/networking.c index afb4ed9ca6..1c2747dc27 100644 --- a/src/networking.c +++ b/src/networking.c @@ -513,25 +513,23 @@ void afterErrorReply(client *c, const char *s, size_t len, int flags) { if (!(flags & ERR_REPLY_FLAG_NO_STATS_UPDATE)) { /* Increment the global error counter */ server.stat_total_error_replies++; + /* Increment the error stats */ /* After the errors RAX reaches this limit, instead of tracking * custom LUA errors, we track the error under `error_LUA` */ if (flags & ERR_REPLY_FLAG_LUA && raxSize(server.errors) >= ERROR_STATS_LUA_LIMIT) { incrementErrorCount("LUA_ERRORSTATS_DISABLED", 23); + /* If the string already starts with "-..." then the error prefix + * is provided by the caller ( we limit the search to 32 chars). Otherwise we use "-ERR". */ + } else if (s[0] != '-') { + incrementErrorCount("ERR", 3); } else { - /* Increment the error stats - * If the string already starts with "-..." then the error prefix - * is provided by the caller ( we limit the search to 32 chars). Otherwise we use "-ERR". */ - if (s[0] != '-') { - incrementErrorCount("ERR", 3); + char *spaceloc = memchr(s, ' ', len < 32 ? len : 32); + if (spaceloc) { + const size_t errEndPos = (size_t)(spaceloc - s); + incrementErrorCount(s+1, errEndPos-1); } else { - char *spaceloc = memchr(s, ' ', len < 32 ? len : 32); - if (spaceloc) { - const size_t errEndPos = (size_t)(spaceloc - s); - incrementErrorCount(s+1, errEndPos-1); - } else { - /* Fallback to ERR if we can't retrieve the error prefix */ - incrementErrorCount("ERR", 3); - } + /* Fallback to ERR if we can't retrieve the error prefix */ + incrementErrorCount("ERR", 3); } } } else { diff --git a/src/server.h b/src/server.h index 3222e33989..4ff209c0e9 100644 --- a/src/server.h +++ b/src/server.h @@ -2569,7 +2569,8 @@ void serverSetCpuAffinity(const char *cpulist); /* afterErrorReply flags */ #define ERR_REPLY_FLAG_NO_STATS_UPDATE (1ULL<<0) /* Indicating that we should not update error stats after sending error reply */ -#define ERR_REPLY_FLAG_LUA (1ULL<<1) /* Indicating that we are coming in from ScriptCall */ +#define ERR_REPLY_FLAG_LUA (1ULL<<1) /* Indicating that the error message is from LUA replying + to a client */ /* networking.c -- Networking and Client related operations */ client *createClient(connection *conn); From bc0eaef1522765d9117d27e4c60b1a8bbc4dd9b9 Mon Sep 17 00:00:00 2001 From: Karthik Subbarao Date: Thu, 16 May 2024 16:58:13 +0000 Subject: [PATCH 03/15] Rename LUA Error Stat overflow error message Signed-off-by: Karthik Subbarao --- src/networking.c | 2 +- src/server.h | 3 ++- tests/unit/info.tcl | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/networking.c b/src/networking.c index 1c2747dc27..784083a4f9 100644 --- a/src/networking.c +++ b/src/networking.c @@ -517,7 +517,7 @@ void afterErrorReply(client *c, const char *s, size_t len, int flags) { /* After the errors RAX reaches this limit, instead of tracking * custom LUA errors, we track the error under `error_LUA` */ if (flags & ERR_REPLY_FLAG_LUA && raxSize(server.errors) >= ERROR_STATS_LUA_LIMIT) { - incrementErrorCount("LUA_ERRORSTATS_DISABLED", 23); + incrementErrorCount(LUA_ERRORSTATS_OVERFLOW_ERR, strlen(LUA_ERRORSTATS_OVERFLOW_ERR)); /* If the string already starts with "-..." then the error prefix * is provided by the caller ( we limit the search to 32 chars). Otherwise we use "-ERR". */ } else if (s[0] != '-') { diff --git a/src/server.h b/src/server.h index 4ff209c0e9..b77ef69dfd 100644 --- a/src/server.h +++ b/src/server.h @@ -2564,7 +2564,8 @@ void serverSetCpuAffinity(const char *cpulist); /* ERROR STATS constants */ #define ERROR_STATS_LUA_LIMIT 128 /* After the errors RAX reaches this limit, instead of tracking - custom LUA errors, we track the error under `error_LUA`. */ + custom LUA errors, we track the error under `error_LUA_ERRORSTATS_OVERFLOW`. */ +#define LUA_ERRORSTATS_OVERFLOW_ERR "LUA_ERRORSTATS_OVERFLOW" /* afterErrorReply flags */ #define ERR_REPLY_FLAG_NO_STATS_UPDATE (1ULL<<0) /* Indicating that we should not update diff --git a/tests/unit/info.tcl b/tests/unit/info.tcl index dbe4d3606d..6a7fbe9438 100644 --- a/tests/unit/info.tcl +++ b/tests/unit/info.tcl @@ -281,9 +281,9 @@ start_server {tags {"info" "external:skip"}} { r eval {return redis.error_reply(string.format('%s my error message', ARGV[1]))} 0 $j } } - # Validate that custom LUA errors are tracked in `LUA_ERRORSTATS_DISABLED` when errors + # Validate that custom LUA errors are tracked in `LUA_ERRORSTATS_OVERFLOW` when errors # has 128 entries. - assert_equal "count=972" [errorstat LUA_ERRORSTATS_DISABLED] + assert_equal "count=972" [errorstat LUA_ERRORSTATS_OVERFLOW] # Validate that non LUA errors continue to be tracked even when we have >=128 entries. assert_error {ERR syntax error} {r set a b c d e f g} assert_equal "count=1" [errorstat ERR] From 1c77f69b16f675cf15a5ce5d038d07a831862dfb Mon Sep 17 00:00:00 2001 From: Karthik Subbarao Date: Mon, 20 May 2024 15:36:57 +0000 Subject: [PATCH 04/15] Update documentation, minor refactor, additional test case Signed-off-by: Karthik Subbarao --- src/script_lua.c | 2 +- src/server.h | 2 +- tests/unit/info.tcl | 5 ++++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/script_lua.c b/src/script_lua.c index 501116ae4f..eff914779d 100644 --- a/src/script_lua.c +++ b/src/script_lua.c @@ -639,7 +639,7 @@ static void luaReplyToRedisReply(client *c, client* script_client, lua_State *lu errorInfo err_info = {0}; luaExtractErrorInformation(lua, &err_info); addReplyErrorFormatEx(c, - err_info.ignore_err_stats_update? ERR_REPLY_FLAG_NO_STATS_UPDATE | ERR_REPLY_FLAG_LUA: ERR_REPLY_FLAG_LUA, + ERR_REPLY_FLAG_LUA | (err_info.ignore_err_stats_update? ERR_REPLY_FLAG_NO_STATS_UPDATE: 0), "-%s", err_info.msg); luaErrorInformationDiscard(&err_info); diff --git a/src/server.h b/src/server.h index b77ef69dfd..b9c9a00b4d 100644 --- a/src/server.h +++ b/src/server.h @@ -2564,7 +2564,7 @@ void serverSetCpuAffinity(const char *cpulist); /* ERROR STATS constants */ #define ERROR_STATS_LUA_LIMIT 128 /* After the errors RAX reaches this limit, instead of tracking - custom LUA errors, we track the error under `error_LUA_ERRORSTATS_OVERFLOW`. */ + custom LUA errors, we track the error under the symbol below. */ #define LUA_ERRORSTATS_OVERFLOW_ERR "LUA_ERRORSTATS_OVERFLOW" /* afterErrorReply flags */ diff --git a/tests/unit/info.tcl b/tests/unit/info.tcl index 6a7fbe9438..f72166ce32 100644 --- a/tests/unit/info.tcl +++ b/tests/unit/info.tcl @@ -278,7 +278,7 @@ start_server {tags {"info" "external:skip"}} { r config resetstat for {set j 1} {$j <= 1100} {incr j} { assert_error "$j my error message" { - r eval {return redis.error_reply(string.format('%s my error message', ARGV[1]))} 0 $j + r eval {return server.error_reply(string.format('%s my error message', ARGV[1]))} 0 $j } } # Validate that custom LUA errors are tracked in `LUA_ERRORSTATS_OVERFLOW` when errors @@ -287,6 +287,9 @@ start_server {tags {"info" "external:skip"}} { # Validate that non LUA errors continue to be tracked even when we have >=128 entries. assert_error {ERR syntax error} {r set a b c d e f g} assert_equal "count=1" [errorstat ERR] + r eval {return server.error_reply(string.format('My error message'))} 0 + r eval {return {err = 'My error message'}} 0 + assert_equal "count=974" [errorstat LUA_ERRORSTATS_OVERFLOW] } test {stats: eventloop metrics} { From 8206c4ae59d8763f4ed0af45cf7dc0dafc0857ec Mon Sep 17 00:00:00 2001 From: KarthikSubbarao Date: Tue, 21 May 2024 04:06:37 +0000 Subject: [PATCH 05/15] update tests Signed-off-by: KarthikSubbarao --- tests/unit/info.tcl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/info.tcl b/tests/unit/info.tcl index f72166ce32..ab5dc06df3 100644 --- a/tests/unit/info.tcl +++ b/tests/unit/info.tcl @@ -287,8 +287,8 @@ start_server {tags {"info" "external:skip"}} { # Validate that non LUA errors continue to be tracked even when we have >=128 entries. assert_error {ERR syntax error} {r set a b c d e f g} assert_equal "count=1" [errorstat ERR] - r eval {return server.error_reply(string.format('My error message'))} 0 - r eval {return {err = 'My error message'}} 0 + assert_error "My error message" {r eval {return server.error_reply(string.format('My error message'))} 0} + assert_error "My error message" {r eval {return {err = 'My error message'}} 0} assert_equal "count=974" [errorstat LUA_ERRORSTATS_OVERFLOW] } From a03333b61351696a58b77671174fb6194e21a738 Mon Sep 17 00:00:00 2001 From: KarthikSubbarao Date: Tue, 21 May 2024 04:41:04 +0000 Subject: [PATCH 06/15] Add tests for Valkey Functions Signed-off-by: KarthikSubbarao --- tests/unit/info.tcl | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/unit/info.tcl b/tests/unit/info.tcl index ab5dc06df3..181eb1412f 100644 --- a/tests/unit/info.tcl +++ b/tests/unit/info.tcl @@ -287,9 +287,19 @@ start_server {tags {"info" "external:skip"}} { # Validate that non LUA errors continue to be tracked even when we have >=128 entries. assert_error {ERR syntax error} {r set a b c d e f g} assert_equal "count=1" [errorstat ERR] - assert_error "My error message" {r eval {return server.error_reply(string.format('My error message'))} 0} + # Test LUA error variants. + assert_error "My error message" {r eval {return server.error_reply('My error message')} 0} assert_error "My error message" {r eval {return {err = 'My error message'}} 0} assert_equal "count=974" [errorstat LUA_ERRORSTATS_OVERFLOW] + # Function calls that contain custom error messages should call be included in overflow counter + r FUNCTION LOAD replace [format "#!lua name=mylib\nserver.register_function('customerrorfn', function() return server.error_reply('My error message') end)"] + assert_error "My error message" {r fcall customerrorfn 0} + assert_equal "count=975" [errorstat LUA_ERRORSTATS_OVERFLOW] + # Function calls that contain non lua errors should continue to be tracked normally (in a separate counter). + r FUNCTION LOAD replace [format "#!lua name=mylib\nserver.register_function('invalidgetcmd', function() return server.call('get', 'x', 'x', 'x') end)"] + assert_error "ERR Wrong number of args*" {r fcall invalidgetcmd 0} + assert_equal "count=975" [errorstat LUA_ERRORSTATS_OVERFLOW] + assert_equal "count=2" [errorstat ERR] } test {stats: eventloop metrics} { From 6d324c89930539e4fa746440f33bf4f4440a7fce Mon Sep 17 00:00:00 2001 From: KarthikSubbarao Date: Fri, 21 Jun 2024 23:28:47 +0000 Subject: [PATCH 07/15] Fix typo from unstable merge Signed-off-by: KarthikSubbarao --- src/server.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server.c b/src/server.c index bc82f78f5f..dacb157d3d 100644 --- a/src/server.c +++ b/src/server.c @@ -4039,7 +4039,7 @@ int processCommand(client *c) { /* ====================== Error lookup and execution ===================== */ void incrementErrorCount(const char *fullerr, size_t namelen) { - void *result + void *result; if (!raxFind(server.errors,(unsigned char*)fullerr,namelen,&result)) { struct serverError *error = zmalloc(sizeof(*error)); error->count = 1; From d054ab15fff7acaa43c00982427718dc8220cca9 Mon Sep 17 00:00:00 2001 From: KarthikSubbarao Date: Mon, 1 Jul 2024 19:16:30 +0000 Subject: [PATCH 08/15] Address some of the format suggestions Signed-off-by: KarthikSubbarao --- src/networking.c | 4 ++-- src/script_lua.c | 7 +++---- src/server.c | 2 +- src/server.h | 4 ++-- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/networking.c b/src/networking.c index abbbb1b48f..0f21c63be1 100644 --- a/src/networking.c +++ b/src/networking.c @@ -548,9 +548,9 @@ void afterErrorReply(client *c, const char *s, size_t len, int flags) { * custom LUA errors, we track the error under `error_LUA` */ if (flags & ERR_REPLY_FLAG_LUA && raxSize(server.errors) >= ERROR_STATS_LUA_LIMIT) { incrementErrorCount(LUA_ERRORSTATS_OVERFLOW_ERR, strlen(LUA_ERRORSTATS_OVERFLOW_ERR)); - /* If the string already starts with "-..." then the error prefix - * is provided by the caller ( we limit the search to 32 chars). Otherwise we use "-ERR". */ } else if (s[0] != '-') { + /* If the string already starts with "-..." then the error prefix + * is provided by the caller ( we limit the search to 32 chars). Otherwise we use "-ERR". */ incrementErrorCount("ERR", 3); } else { char *spaceloc = memchr(s, ' ', len < 32 ? len : 32); diff --git a/src/script_lua.c b/src/script_lua.c index 7851cf26e8..b7467f3772 100644 --- a/src/script_lua.c +++ b/src/script_lua.c @@ -617,10 +617,9 @@ static void luaReplyToRedisReply(client *c, client *script_client, lua_State *lu 1); /* pop the error message, we will use luaExtractErrorInformation to get error information */ errorInfo err_info = {0}; luaExtractErrorInformation(lua, &err_info); - addReplyErrorFormatEx(c, - ERR_REPLY_FLAG_LUA | (err_info.ignore_err_stats_update? ERR_REPLY_FLAG_NO_STATS_UPDATE: 0), - "-%s", - err_info.msg); + addReplyErrorFormatEx( + c, ERR_REPLY_FLAG_LUA | (err_info.ignore_err_stats_update ? ERR_REPLY_FLAG_NO_STATS_UPDATE : 0), "-%s", + err_info.msg); luaErrorInformationDiscard(&err_info); lua_pop(lua, 1); /* pop the result table */ return; diff --git a/src/server.c b/src/server.c index 18c15399b0..df16b09e39 100644 --- a/src/server.c +++ b/src/server.c @@ -4048,7 +4048,7 @@ int processCommand(client *c) { void incrementErrorCount(const char *fullerr, size_t namelen) { void *result; - if (!raxFind(server.errors,(unsigned char*)fullerr,namelen,&result)) { + if (!raxFind(server.errors, (unsigned char *)fullerr, namelen, &result)) { struct serverError *error = zmalloc(sizeof(*error)); error->count = 1; raxInsert(server.errors, (unsigned char *)fullerr, namelen, error, NULL); diff --git a/src/server.h b/src/server.h index 24656641c4..330b54cb7a 100644 --- a/src/server.h +++ b/src/server.h @@ -1579,8 +1579,8 @@ struct valkeyServer { dict *commands; /* Command table */ dict *orig_commands; /* Command table before command renaming. */ aeEventLoop *el; - rax *errors; /* Errors table */ - unsigned int lruclock; /* Clock for LRU eviction */ + rax *errors; /* Errors table */ + unsigned int lruclock; /* Clock for LRU eviction */ volatile sig_atomic_t shutdown_asap; /* Shutdown ordered by signal handler. */ mstime_t shutdown_mstime; /* Timestamp to limit graceful shutdown. */ int last_sig_received; /* Indicates the last SIGNAL received, if any (e.g., SIGINT or SIGTERM). */ From 05e946fc92dc5d034e5982bcb88f752d214e0f49 Mon Sep 17 00:00:00 2001 From: KarthikSubbarao Date: Mon, 8 Jul 2024 16:52:08 +0000 Subject: [PATCH 09/15] clang-format-check suggestions Signed-off-by: KarthikSubbarao --- src/server.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/server.h b/src/server.h index 330b54cb7a..3faa906e9e 100644 --- a/src/server.h +++ b/src/server.h @@ -2595,15 +2595,18 @@ void serverSetCpuAffinity(const char *cpulist); void dictVanillaFree(dict *d, void *val); /* ERROR STATS constants */ -#define ERROR_STATS_LUA_LIMIT 128 /* After the errors RAX reaches this limit, instead of tracking - custom LUA errors, we track the error under the symbol below. */ +#define ERROR_STATS_LUA_LIMIT \ + 128 /* After the errors RAX reaches this limit, instead of tracking \ + custom LUA errors, we track the error under the symbol below. */ #define LUA_ERRORSTATS_OVERFLOW_ERR "LUA_ERRORSTATS_OVERFLOW" /* afterErrorReply flags */ -#define ERR_REPLY_FLAG_NO_STATS_UPDATE (1ULL<<0) /* Indicating that we should not update - error stats after sending error reply */ -#define ERR_REPLY_FLAG_LUA (1ULL<<1) /* Indicating that the error message is from LUA replying - to a client */ +#define ERR_REPLY_FLAG_NO_STATS_UPDATE \ + (1ULL << 0) /* Indicating that we should not update \ + error stats after sending error reply */ +#define ERR_REPLY_FLAG_LUA \ + (1ULL << 1) /* Indicating that the error message is from LUA replying \ + to a client */ /* networking.c -- Networking and Client related operations */ client *createClient(connection *conn); From 23f598d056dc35cf67dd35494bb04d6861cb4c11 Mon Sep 17 00:00:00 2001 From: KarthikSubbarao Date: Tue, 9 Jul 2024 21:14:09 +0000 Subject: [PATCH 10/15] Revert "clang-format-check suggestions" This reverts commit 05e946fc92dc5d034e5982bcb88f752d214e0f49. Signed-off-by: KarthikSubbarao --- src/server.h | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/server.h b/src/server.h index 3faa906e9e..330b54cb7a 100644 --- a/src/server.h +++ b/src/server.h @@ -2595,18 +2595,15 @@ void serverSetCpuAffinity(const char *cpulist); void dictVanillaFree(dict *d, void *val); /* ERROR STATS constants */ -#define ERROR_STATS_LUA_LIMIT \ - 128 /* After the errors RAX reaches this limit, instead of tracking \ - custom LUA errors, we track the error under the symbol below. */ +#define ERROR_STATS_LUA_LIMIT 128 /* After the errors RAX reaches this limit, instead of tracking + custom LUA errors, we track the error under the symbol below. */ #define LUA_ERRORSTATS_OVERFLOW_ERR "LUA_ERRORSTATS_OVERFLOW" /* afterErrorReply flags */ -#define ERR_REPLY_FLAG_NO_STATS_UPDATE \ - (1ULL << 0) /* Indicating that we should not update \ - error stats after sending error reply */ -#define ERR_REPLY_FLAG_LUA \ - (1ULL << 1) /* Indicating that the error message is from LUA replying \ - to a client */ +#define ERR_REPLY_FLAG_NO_STATS_UPDATE (1ULL<<0) /* Indicating that we should not update + error stats after sending error reply */ +#define ERR_REPLY_FLAG_LUA (1ULL<<1) /* Indicating that the error message is from LUA replying + to a client */ /* networking.c -- Networking and Client related operations */ client *createClient(connection *conn); From fe6e57423dcfdee5853dfc785d25191f485e484a Mon Sep 17 00:00:00 2001 From: KarthikSubbarao Date: Tue, 9 Jul 2024 22:35:38 +0000 Subject: [PATCH 11/15] Allow custom errors already tracked to be incremented when past limit + update overflow error prefix name Signed-off-by: KarthikSubbarao --- src/networking.c | 28 +++++++++++++++------------- src/script_lua.c | 2 +- src/server.h | 11 +++++------ tests/unit/info.tcl | 16 +++++++++++----- 4 files changed, 32 insertions(+), 25 deletions(-) diff --git a/src/networking.c b/src/networking.c index 0f21c63be1..e51adc75ce 100644 --- a/src/networking.c +++ b/src/networking.c @@ -544,24 +544,26 @@ void afterErrorReply(client *c, const char *s, size_t len, int flags) { /* Increment the global error counter */ server.stat_total_error_replies++; /* Increment the error stats */ - /* After the errors RAX reaches this limit, instead of tracking - * custom LUA errors, we track the error under `error_LUA` */ - if (flags & ERR_REPLY_FLAG_LUA && raxSize(server.errors) >= ERROR_STATS_LUA_LIMIT) { - incrementErrorCount(LUA_ERRORSTATS_OVERFLOW_ERR, strlen(LUA_ERRORSTATS_OVERFLOW_ERR)); - } else if (s[0] != '-') { - /* If the string already starts with "-..." then the error prefix - * is provided by the caller ( we limit the search to 32 chars). Otherwise we use "-ERR". */ - incrementErrorCount("ERR", 3); - } else { + /* If the string already starts with "-..." then the error prefix + * is provided by the caller (we limit the search to 32 chars). Otherwise we use "-ERR". */ + char *err_prefix = "ERR"; + size_t prefix_len = 3; + if (s[0] == '-') { char *spaceloc = memchr(s, ' ', len < 32 ? len : 32); + /* If we cannot retrieve the error prefix, use the default: "ERR". */ if (spaceloc) { const size_t errEndPos = (size_t)(spaceloc - s); - incrementErrorCount(s + 1, errEndPos - 1); - } else { - /* Fallback to ERR if we can't retrieve the error prefix */ - incrementErrorCount("ERR", 3); + err_prefix = (char *)s + 1; + prefix_len = errEndPos - 1; } } + /* After the errors RAX reaches its limit, instead of tracking + * custom errors (e.g. LUA), we track the error under `errorstat_ERRORSTATS_OVERFLOW` */ + if (flags & ERR_REPLY_FLAG_CUSTOM && !raxFind(server.errors, (unsigned char *)err_prefix, prefix_len, NULL) && raxSize(server.errors) >= ERROR_STATS_LIMIT) { + err_prefix = ERRORSTATS_OVERFLOW_ERR; + prefix_len = strlen(ERRORSTATS_OVERFLOW_ERR); + } + incrementErrorCount(err_prefix, prefix_len); } else { /* stat_total_error_replies will not be updated, which means that * the cmd stats will not be updated as well, we still want this command diff --git a/src/script_lua.c b/src/script_lua.c index b7467f3772..e9480d89d0 100644 --- a/src/script_lua.c +++ b/src/script_lua.c @@ -618,7 +618,7 @@ static void luaReplyToRedisReply(client *c, client *script_client, lua_State *lu errorInfo err_info = {0}; luaExtractErrorInformation(lua, &err_info); addReplyErrorFormatEx( - c, ERR_REPLY_FLAG_LUA | (err_info.ignore_err_stats_update ? ERR_REPLY_FLAG_NO_STATS_UPDATE : 0), "-%s", + c, ERR_REPLY_FLAG_CUSTOM | (err_info.ignore_err_stats_update ? ERR_REPLY_FLAG_NO_STATS_UPDATE : 0), "-%s", err_info.msg); luaErrorInformationDiscard(&err_info); lua_pop(lua, 1); /* pop the result table */ diff --git a/src/server.h b/src/server.h index 330b54cb7a..917e1fc73c 100644 --- a/src/server.h +++ b/src/server.h @@ -2595,15 +2595,14 @@ void serverSetCpuAffinity(const char *cpulist); void dictVanillaFree(dict *d, void *val); /* ERROR STATS constants */ -#define ERROR_STATS_LUA_LIMIT 128 /* After the errors RAX reaches this limit, instead of tracking - custom LUA errors, we track the error under the symbol below. */ -#define LUA_ERRORSTATS_OVERFLOW_ERR "LUA_ERRORSTATS_OVERFLOW" +#define ERROR_STATS_LIMIT 128 /* Once the errors RAX reaches this limit, instead of tracking custom \ + errors (e.g. LUA), we track the error under the prefix below. */ +#define ERRORSTATS_OVERFLOW_ERR "ERRORSTATS_OVERFLOW" /* afterErrorReply flags */ -#define ERR_REPLY_FLAG_NO_STATS_UPDATE (1ULL<<0) /* Indicating that we should not update +#define ERR_REPLY_FLAG_NO_STATS_UPDATE (1ULL<<0) /* Indicating that we should not update \ error stats after sending error reply */ -#define ERR_REPLY_FLAG_LUA (1ULL<<1) /* Indicating that the error message is from LUA replying - to a client */ +#define ERR_REPLY_FLAG_CUSTOM (1ULL<<1) /* Indicates the error message is custom (e.g. from LUA). */ /* networking.c -- Networking and Client related operations */ client *createClient(connection *conn); diff --git a/tests/unit/info.tcl b/tests/unit/info.tcl index 181eb1412f..d2ca6edc37 100644 --- a/tests/unit/info.tcl +++ b/tests/unit/info.tcl @@ -281,24 +281,30 @@ start_server {tags {"info" "external:skip"}} { r eval {return server.error_reply(string.format('%s my error message', ARGV[1]))} 0 $j } } - # Validate that custom LUA errors are tracked in `LUA_ERRORSTATS_OVERFLOW` when errors + # Validate that custom LUA errors are tracked in `ERRORSTATS_OVERFLOW` when errors # has 128 entries. - assert_equal "count=972" [errorstat LUA_ERRORSTATS_OVERFLOW] + assert_equal "count=972" [errorstat ERRORSTATS_OVERFLOW] # Validate that non LUA errors continue to be tracked even when we have >=128 entries. assert_error {ERR syntax error} {r set a b c d e f g} assert_equal "count=1" [errorstat ERR] + # Validate that custom errors that were already tracked continue to increment when past 128 entries. + assert_equal "count=1" [errorstat 1] + assert_error "1 my error message" { + r eval {return server.error_reply(string.format('1 my error message', ARGV[1]))} 0 + } + assert_equal "count=2" [errorstat 1] # Test LUA error variants. assert_error "My error message" {r eval {return server.error_reply('My error message')} 0} assert_error "My error message" {r eval {return {err = 'My error message'}} 0} - assert_equal "count=974" [errorstat LUA_ERRORSTATS_OVERFLOW] + assert_equal "count=974" [errorstat ERRORSTATS_OVERFLOW] # Function calls that contain custom error messages should call be included in overflow counter r FUNCTION LOAD replace [format "#!lua name=mylib\nserver.register_function('customerrorfn', function() return server.error_reply('My error message') end)"] assert_error "My error message" {r fcall customerrorfn 0} - assert_equal "count=975" [errorstat LUA_ERRORSTATS_OVERFLOW] + assert_equal "count=975" [errorstat ERRORSTATS_OVERFLOW] # Function calls that contain non lua errors should continue to be tracked normally (in a separate counter). r FUNCTION LOAD replace [format "#!lua name=mylib\nserver.register_function('invalidgetcmd', function() return server.call('get', 'x', 'x', 'x') end)"] assert_error "ERR Wrong number of args*" {r fcall invalidgetcmd 0} - assert_equal "count=975" [errorstat LUA_ERRORSTATS_OVERFLOW] + assert_equal "count=975" [errorstat ERRORSTATS_OVERFLOW] assert_equal "count=2" [errorstat ERR] } From d84992871ecd5be65b103ec88e7040d01d4dadd1 Mon Sep 17 00:00:00 2001 From: KarthikSubbarao Date: Tue, 9 Jul 2024 23:17:05 +0000 Subject: [PATCH 12/15] fmt changes Signed-off-by: KarthikSubbarao --- src/networking.c | 7 ++++--- src/script_lua.c | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/networking.c b/src/networking.c index e51adc75ce..9da66d6eaf 100644 --- a/src/networking.c +++ b/src/networking.c @@ -543,8 +543,8 @@ void afterErrorReply(client *c, const char *s, size_t len, int flags) { if (!(flags & ERR_REPLY_FLAG_NO_STATS_UPDATE)) { /* Increment the global error counter */ server.stat_total_error_replies++; - /* Increment the error stats */ - /* If the string already starts with "-..." then the error prefix + /* Increment the error stats + * If the string already starts with "-..." then the error prefix * is provided by the caller (we limit the search to 32 chars). Otherwise we use "-ERR". */ char *err_prefix = "ERR"; size_t prefix_len = 3; @@ -559,7 +559,8 @@ void afterErrorReply(client *c, const char *s, size_t len, int flags) { } /* After the errors RAX reaches its limit, instead of tracking * custom errors (e.g. LUA), we track the error under `errorstat_ERRORSTATS_OVERFLOW` */ - if (flags & ERR_REPLY_FLAG_CUSTOM && !raxFind(server.errors, (unsigned char *)err_prefix, prefix_len, NULL) && raxSize(server.errors) >= ERROR_STATS_LIMIT) { + if (flags & ERR_REPLY_FLAG_CUSTOM && !raxFind(server.errors, (unsigned char *)err_prefix, prefix_len, NULL) && + raxSize(server.errors) >= ERROR_STATS_LIMIT) { err_prefix = ERRORSTATS_OVERFLOW_ERR; prefix_len = strlen(ERRORSTATS_OVERFLOW_ERR); } diff --git a/src/script_lua.c b/src/script_lua.c index e9480d89d0..19f712fa3a 100644 --- a/src/script_lua.c +++ b/src/script_lua.c @@ -618,8 +618,8 @@ static void luaReplyToRedisReply(client *c, client *script_client, lua_State *lu errorInfo err_info = {0}; luaExtractErrorInformation(lua, &err_info); addReplyErrorFormatEx( - c, ERR_REPLY_FLAG_CUSTOM | (err_info.ignore_err_stats_update ? ERR_REPLY_FLAG_NO_STATS_UPDATE : 0), "-%s", - err_info.msg); + c, ERR_REPLY_FLAG_CUSTOM | (err_info.ignore_err_stats_update ? ERR_REPLY_FLAG_NO_STATS_UPDATE : 0), + "-%s", err_info.msg); luaErrorInformationDiscard(&err_info); lua_pop(lua, 1); /* pop the result table */ return; From 54ce6db9d0299db925ee37e7c6e80ad0eba13f73 Mon Sep 17 00:00:00 2001 From: KarthikSubbarao Date: Wed, 10 Jul 2024 02:35:24 +0000 Subject: [PATCH 13/15] Reorder check Signed-off-by: KarthikSubbarao --- src/networking.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/networking.c b/src/networking.c index 9da66d6eaf..c9104eb616 100644 --- a/src/networking.c +++ b/src/networking.c @@ -559,8 +559,8 @@ void afterErrorReply(client *c, const char *s, size_t len, int flags) { } /* After the errors RAX reaches its limit, instead of tracking * custom errors (e.g. LUA), we track the error under `errorstat_ERRORSTATS_OVERFLOW` */ - if (flags & ERR_REPLY_FLAG_CUSTOM && !raxFind(server.errors, (unsigned char *)err_prefix, prefix_len, NULL) && - raxSize(server.errors) >= ERROR_STATS_LIMIT) { + if (flags & ERR_REPLY_FLAG_CUSTOM && raxSize(server.errors) >= ERROR_STATS_LIMIT && + !raxFind(server.errors, (unsigned char *)err_prefix, prefix_len, NULL)) { err_prefix = ERRORSTATS_OVERFLOW_ERR; prefix_len = strlen(ERRORSTATS_OVERFLOW_ERR); } From 85d003e15be0fdd84d1a37006d2712817b7dc5b5 Mon Sep 17 00:00:00 2001 From: KarthikSubbarao Date: Wed, 10 Jul 2024 16:08:32 +0000 Subject: [PATCH 14/15] fmt changes Signed-off-by: KarthikSubbarao --- src/networking.c | 2 +- src/server.h | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/networking.c b/src/networking.c index c9104eb616..a0d0a5f21c 100644 --- a/src/networking.c +++ b/src/networking.c @@ -559,7 +559,7 @@ void afterErrorReply(client *c, const char *s, size_t len, int flags) { } /* After the errors RAX reaches its limit, instead of tracking * custom errors (e.g. LUA), we track the error under `errorstat_ERRORSTATS_OVERFLOW` */ - if (flags & ERR_REPLY_FLAG_CUSTOM && raxSize(server.errors) >= ERROR_STATS_LIMIT && + if (flags & ERR_REPLY_FLAG_CUSTOM && raxSize(server.errors) >= ERRORSTATS_LIMIT && !raxFind(server.errors, (unsigned char *)err_prefix, prefix_len, NULL)) { err_prefix = ERRORSTATS_OVERFLOW_ERR; prefix_len = strlen(ERRORSTATS_OVERFLOW_ERR); diff --git a/src/server.h b/src/server.h index 917e1fc73c..37dcfce5e3 100644 --- a/src/server.h +++ b/src/server.h @@ -2595,14 +2595,18 @@ void serverSetCpuAffinity(const char *cpulist); void dictVanillaFree(dict *d, void *val); /* ERROR STATS constants */ -#define ERROR_STATS_LIMIT 128 /* Once the errors RAX reaches this limit, instead of tracking custom \ - errors (e.g. LUA), we track the error under the prefix below. */ + +/* Once the errors RAX reaches this limit, instead of tracking custom + * errors (e.g. LUA), we track the error under the prefix below. */ +#define ERRORSTATS_LIMIT 128 #define ERRORSTATS_OVERFLOW_ERR "ERRORSTATS_OVERFLOW" /* afterErrorReply flags */ -#define ERR_REPLY_FLAG_NO_STATS_UPDATE (1ULL<<0) /* Indicating that we should not update \ - error stats after sending error reply */ -#define ERR_REPLY_FLAG_CUSTOM (1ULL<<1) /* Indicates the error message is custom (e.g. from LUA). */ + +/* Indicating that we should not update error stats after sending error reply */ +#define ERR_REPLY_FLAG_NO_STATS_UPDATE (1ULL << 0) +/* Indicates the error message is custom (e.g. from LUA). */ +#define ERR_REPLY_FLAG_CUSTOM (1ULL << 1) /* networking.c -- Networking and Client related operations */ client *createClient(connection *conn); From 08e97baeab1057e81dbad2c87f57947c66d47467 Mon Sep 17 00:00:00 2001 From: Madelyn Olson Date: Thu, 11 Jul 2024 10:25:41 -0700 Subject: [PATCH 15/15] Update src/server.h Signed-off-by: Madelyn Olson --- src/server.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server.h b/src/server.h index 37dcfce5e3..69c69d93ac 100644 --- a/src/server.h +++ b/src/server.h @@ -2603,7 +2603,7 @@ void dictVanillaFree(dict *d, void *val); /* afterErrorReply flags */ -/* Indicating that we should not update error stats after sending error reply */ +/* Indicating that we should not update error stats after sending error reply. */ #define ERR_REPLY_FLAG_NO_STATS_UPDATE (1ULL << 0) /* Indicates the error message is custom (e.g. from LUA). */ #define ERR_REPLY_FLAG_CUSTOM (1ULL << 1)