From 1e7c7ad5d25dcbef39b933728a2201b4c8e39171 Mon Sep 17 00:00:00 2001 From: Christian Kreibich Date: Thu, 21 Nov 2024 17:23:42 -0800 Subject: [PATCH] Add util::nofile_cap_limits() to manage maximum number of file descriptors The main purpose of this function is to disarm excessive memory use on systems with very large limits for open file descriptors, where libkqueue currently wastes gigabytes of memory by allocating state for every possible file descriptor. This particularly triggers in Docker environments until libkqueue gets fixed or containerd changes its default-uncapped fd limit behavior. The function caps the maximum to a default of 1M open fds. The user can override or disable the behavior via the ZEEK_NOFILE_MAX environment variable. References: https://github.com/mheily/libkqueue/issues/153 https://github.com/moby/moby/issues/38814 --- src/util.cc | 44 ++++++++++++++++++++++++++++++++++++++++++++ src/util.h | 31 +++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/src/util.cc b/src/util.cc index edc424fb4a2..2338e1adf28 100644 --- a/src/util.cc +++ b/src/util.cc @@ -2578,6 +2578,50 @@ TEST_CASE("util approx_equal") { */ bool approx_equal(double a, double b, double tolerance) { return std::abs(a - b) < std::abs(tolerance); } +NofileUpdates nofile_cap_limits() { + struct rlimit rl; + + if ( getrlimit(RLIMIT_NOFILE, &rl) < 0 ) { + // We likely don't yet have a reporter when running this. + fprintf(stderr, "nofile_cap_limits(): getrlimit failed, %s\n", strerror(errno)); + exit(1); + } + + rlim_t orig_cur = rl.rlim_cur; + rlim_t orig_max = rl.rlim_max; + rlim_t safe_max = 1024 * 1024; + + const char* nofile_max_str = getenv("ZEEK_NOFILE_MAX"); + + if ( nofile_max_str ) { + char* end = nullptr; + unsigned long nofile_max = strtoul(nofile_max_str, &end, 10); + + if ( nofile_max_str[0] != '\0' && (end == nofile_max_str || end[0] != '\0') ) { + fprintf(stderr, "ZEEK_NOFILE_MAX must be a non-negative integer\n"); + exit(1); + } + + safe_max = nofile_max; + } + + if ( safe_max > 0 && safe_max < rl.rlim_max ) { + rl.rlim_max = safe_max; + + if ( safe_max < rl.rlim_cur ) + rl.rlim_cur = safe_max; + + if ( setrlimit(RLIMIT_NOFILE, &rl) < 0 ) { + fprintf(stderr, "nofile_cap_limits(): setrlimit to %lu/$%lu failed, %s\n", rl.rlim_cur, rl.rlim_max, + strerror(errno)); + exit(1); + } + } + + return {orig_cur, orig_max, rl.rlim_cur, rl.rlim_max, nofile_max_str != nullptr}; +} + + } // namespace zeek::util extern "C" void out_of_memory(const char* where) { diff --git a/src/util.h b/src/util.h index d1c8e5df48a..4e275ca8803 100644 --- a/src/util.h +++ b/src/util.h @@ -531,6 +531,37 @@ int memory_size_align(size_t offset, size_t size); // handed out by malloc. extern void get_memory_usage(uint64_t* total, uint64_t* malloced); +// File descriptor limits. + +struct NofileUpdates { + uint64_t orig_cur = 0; + uint64_t orig_max = 0; + uint64_t new_cur = 0; + uint64_t new_max = 0; + + // Whether the ZEEK_NOFILE_MAX env variable affected fd limit adjustment: + bool user_configured = false; + + // Predicate that indicates whether a limit adjustment occurred by default, + // without the user customizing via ZEEK_NOFILE_MAX. + bool did_default_adjustment() { return ! user_configured && (orig_cur > new_cur || orig_max > new_max); } +}; + +// Checks for an unreasonably large maximum allowable number of open file +// descriptors (as in "ulimit -n -H"), and caps that limit if excessive. The +// default "sane" limit is 1M (1024*1024) fds. If the currently effective limit +// (as in "ulimit -n") exceeds this limit, it too gets reduced. + +// You can override the limit by setting the ZEEK_NOFILE_MAX environment +// variable to the desired number. Setting it to an empty string or 0 disables +// the capping mechanism. +// +// Exits Zeek with an error if this adjustment procedure fails. +// +// Returns a NofileUpdates struct summarizing the outcome. +NofileUpdates nofile_cap_limits(); + + // Class to be used as a third argument for STL maps to be able to use // char*'s as keys. Otherwise the pointer values will be compared instead of // the actual string values.