From 543010844a65cd7e6e3f950f33c5dc2854c0988a Mon Sep 17 00:00:00 2001 From: M Kelly Date: Tue, 23 Jul 2024 12:35:54 -0400 Subject: [PATCH] HPCC-31990 Add timeout to DNS lookups for soapcalls using a threadpool 5 Signed-off-by: M Kelly --- system/jlib/jsocket.cpp | 52 +++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/system/jlib/jsocket.cpp b/system/jlib/jsocket.cpp index ffa9aeba3bf..73b286ae65b 100644 --- a/system/jlib/jsocket.cpp +++ b/system/jlib/jsocket.cpp @@ -24,6 +24,7 @@ look at loopback */ +#include #include #include #include @@ -474,7 +475,7 @@ extern jlib_decl bool queryKeepAlive(int &time, int &intvl, int &probes) return hasKeepAlive; } -static bool getAddressInfo(const char *name, unsigned *netaddr, bool okToLogErr); +static int getAddressInfo(const char *name, unsigned *netaddr, bool okToLogErr); static CriticalSection queryDNSCS; @@ -483,7 +484,7 @@ class CAddrInfoThreadArgs : public CInterface public: StringAttr name; unsigned netaddr[4] = { 0, 0, 0, 0 }; - std::atomic result { false }; + std::atomic retCode { EAI_SYSTEM }; CAddrInfoThreadArgs(const char *_name) : name(_name) { } }; @@ -502,7 +503,7 @@ class CAddrPoolThread : public CInterface, implements IPooledThread virtual void threadmain() override { - localAddrThreadInfo->result = getAddressInfo(localAddrThreadInfo->name.get(), localAddrThreadInfo->netaddr, false); + localAddrThreadInfo->retCode = getAddressInfo(localAddrThreadInfo->name.get(), localAddrThreadInfo->netaddr, false); } virtual bool stop() override @@ -3431,11 +3432,12 @@ static void RecursionSafeLogErr(int ret, int ref, const char *msg, unsigned line } } -bool getAddressInfo(const char *name, unsigned *netaddr, bool okToLogErr) +int getAddressInfo(const char *name, unsigned *netaddr, bool okToLogErr) { struct addrinfo hints; struct addrinfo *addrInfo = NULL; int retry=10; + int retCode; // each retry could take up to several seconds, depending on how DNS resolver is configured // should a few specific non-zero return codes break out early from retry loop (EAI_NONAME) ? @@ -3446,8 +3448,8 @@ bool getAddressInfo(const char *name, unsigned *netaddr, bool okToLogErr) if (IP4only || (!IP6preferred)) hints.ai_family = AF_INET; // hints.ai_flags = AI_CANONNAME | AI_ADDRCONFIG | AI_V4MAPPED - int ret = getaddrinfo(name, NULL, &hints, &addrInfo); - if (!ret) + retCode = getaddrinfo(name, NULL, &hints, &addrInfo); + if (0 == retCode) break; if (--retry > 0) Sleep((10-retry)*100); @@ -3455,10 +3457,10 @@ bool getAddressInfo(const char *name, unsigned *netaddr, bool okToLogErr) { // use gai_strerror(ret) to get meaningful error text ? if (okToLogErr) - RecursionSafeLogErr(ret, 1, "getaddrinfo failed", __LINE__, name); - return false; + RecursionSafeLogErr(retCode, 1, "getaddrinfo failed", __LINE__, name); + return retCode; } - if (ret == EAI_NONAME) + if (EAI_NONAME == retCode) { // try one more time, but why ? // MCK TODO: probably should only retry on EAI_AGAIN and possibly EAI_SYSTEM ... @@ -3512,7 +3514,10 @@ bool getAddressInfo(const char *name, unsigned *netaddr, bool okToLogErr) } } freeaddrinfo(addrInfo); - return best!=NULL; + if (best!=NULL) + return 0; + else + return EAI_NONAME; // or EAI_NODATA ? } static bool lookupHostAddress(const char *name, unsigned *netaddr, unsigned timeoutms=INFINITE) @@ -3563,7 +3568,7 @@ static bool lookupHostAddress(const char *name, unsigned *netaddr, unsigned time } #if defined(__linux__) || defined (__APPLE__) || defined(getaddrinfo) - bool ret = false; + int retCode = 0; if ( (timeoutms != INFINITE) && (useDNSTimeout()) ) // could addrInfoPool be NULL ? { // getaddrinfo_a() offers an async getaddrinfo method, but has some limitations and a possible mem leak @@ -3602,23 +3607,34 @@ static bool lookupHostAddress(const char *name, unsigned *netaddr, unsigned time dnstimeout.timedout(&remaining); if (addrInfoPool->join(thrdHandle, remaining)) { - if (addrThreadInfo->result) + if (0 == addrThreadInfo->retCode) { memcpy(netaddr, addrThreadInfo->netaddr, sizeof(addrThreadInfo->netaddr)); return true; } + else + { + // use gai_strerror(ret) to get meaningful error text ? + StringBuffer emsg; + emsg.appendf("getaddrinfo failed (thread)"); + RecursionSafeLogErr(addrThreadInfo->retCode.load(), 1, emsg.str(), __LINE__, name); + return false; + } } - // if join() returns false either call failed or thread still running, but its detached, + // if join() returns false thread still running, but its detached, // will terminate, release thread args and return to pool on its own ... StringBuffer emsg; - emsg.appendf("getaddrinfo failed or timed out (thread) (%d)", timeoutms); + emsg.appendf("getaddrinfo timed out (thread) (%d)", timeoutms); RecursionSafeLogErr(EAI_AGAIN, 1, emsg.str(), __LINE__, name); return false; } + + retCode = getAddressInfo(name, netaddr, true); + if (0 == retCode) + return true; else - ret = getAddressInfo(name, netaddr, true); - return ret; + return false; #endif } @@ -6613,8 +6629,8 @@ bool SocketEndpointArray::fromName(const char *name, unsigned defport) } #if defined(__linux__) || defined (__APPLE__) || defined(getaddrinfo) unsigned netaddr[4]; - bool ret = getAddressInfo(name, netaddr, true); - if (!ret) + int retCode = getAddressInfo(name, netaddr, true); + if (0 == retCode) { SocketEndpoint ep; ep.setNetAddress(sizeof(netaddr),netaddr);