Skip to content

Commit

Permalink
HPCC-31990 Add timeout to DNS lookups for soapcalls using a threadpool 5
Browse files Browse the repository at this point in the history
Signed-off-by: M Kelly <[email protected]>
  • Loading branch information
mckellyln committed Jul 23, 2024
1 parent 86d4a90 commit 5430108
Showing 1 changed file with 34 additions and 18 deletions.
52 changes: 34 additions & 18 deletions system/jlib/jsocket.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
look at loopback
*/

#include <netdb.h>
#include <string>
#include <unordered_set>
#include <functional>
Expand Down Expand Up @@ -474,7 +475,7 @@ extern jlib_decl bool queryKeepAlive(int &time, int &intvl, int &probes)
return hasKeepAlive;
}

static bool getAddressInfo(const char *name, unsigned *netaddr, bool okToLogErr);
static int getAddressInfo(const char *name, unsigned *netaddr, bool okToLogErr);

static CriticalSection queryDNSCS;

Expand All @@ -483,7 +484,7 @@ class CAddrInfoThreadArgs : public CInterface
public:
StringAttr name;
unsigned netaddr[4] = { 0, 0, 0, 0 };
std::atomic<bool> result { false };
std::atomic<int> retCode { EAI_SYSTEM };

CAddrInfoThreadArgs(const char *_name) : name(_name) { }
};
Expand All @@ -502,7 +503,7 @@ class CAddrPoolThread : public CInterface, implements IPooledThread

virtual void threadmain() override
{
localAddrThreadInfo->result = getAddressInfo(localAddrThreadInfo->name.get(), localAddrThreadInfo->netaddr, false);
localAddrThreadInfo->retCode = getAddressInfo(localAddrThreadInfo->name.get(), localAddrThreadInfo->netaddr, false);
}

virtual bool stop() override
Expand Down Expand Up @@ -3431,11 +3432,12 @@ static void RecursionSafeLogErr(int ret, int ref, const char *msg, unsigned line
}
}

bool getAddressInfo(const char *name, unsigned *netaddr, bool okToLogErr)
int getAddressInfo(const char *name, unsigned *netaddr, bool okToLogErr)
{
struct addrinfo hints;
struct addrinfo *addrInfo = NULL;
int retry=10;
int retCode;

// each retry could take up to several seconds, depending on how DNS resolver is configured
// should a few specific non-zero return codes break out early from retry loop (EAI_NONAME) ?
Expand All @@ -3446,19 +3448,19 @@ bool getAddressInfo(const char *name, unsigned *netaddr, bool okToLogErr)
if (IP4only || (!IP6preferred))
hints.ai_family = AF_INET;
// hints.ai_flags = AI_CANONNAME | AI_ADDRCONFIG | AI_V4MAPPED
int ret = getaddrinfo(name, NULL, &hints, &addrInfo);
if (!ret)
retCode = getaddrinfo(name, NULL, &hints, &addrInfo);
if (0 == retCode)
break;
if (--retry > 0)
Sleep((10-retry)*100);
else
{
// use gai_strerror(ret) to get meaningful error text ?
if (okToLogErr)
RecursionSafeLogErr(ret, 1, "getaddrinfo failed", __LINE__, name);
return false;
RecursionSafeLogErr(retCode, 1, "getaddrinfo failed", __LINE__, name);
return retCode;
}
if (ret == EAI_NONAME)
if (EAI_NONAME == retCode)
{
// try one more time, but why ?
// MCK TODO: probably should only retry on EAI_AGAIN and possibly EAI_SYSTEM ...
Expand Down Expand Up @@ -3512,7 +3514,10 @@ bool getAddressInfo(const char *name, unsigned *netaddr, bool okToLogErr)
}
}
freeaddrinfo(addrInfo);
return best!=NULL;
if (best!=NULL)
return 0;
else
return EAI_NONAME; // or EAI_NODATA ?
}

static bool lookupHostAddress(const char *name, unsigned *netaddr, unsigned timeoutms=INFINITE)
Expand Down Expand Up @@ -3563,7 +3568,7 @@ static bool lookupHostAddress(const char *name, unsigned *netaddr, unsigned time
}

#if defined(__linux__) || defined (__APPLE__) || defined(getaddrinfo)
bool ret = false;
int retCode = 0;
if ( (timeoutms != INFINITE) && (useDNSTimeout()) ) // could addrInfoPool be NULL ?
{
// getaddrinfo_a() offers an async getaddrinfo method, but has some limitations and a possible mem leak
Expand Down Expand Up @@ -3602,23 +3607,34 @@ static bool lookupHostAddress(const char *name, unsigned *netaddr, unsigned time
dnstimeout.timedout(&remaining);
if (addrInfoPool->join(thrdHandle, remaining))
{
if (addrThreadInfo->result)
if (0 == addrThreadInfo->retCode)
{
memcpy(netaddr, addrThreadInfo->netaddr, sizeof(addrThreadInfo->netaddr));
return true;
}
else
{
// use gai_strerror(ret) to get meaningful error text ?
StringBuffer emsg;
emsg.appendf("getaddrinfo failed (thread)");
RecursionSafeLogErr(addrThreadInfo->retCode.load(), 1, emsg.str(), __LINE__, name);
return false;
}
}
// if join() returns false either call failed or thread still running, but its detached,
// if join() returns false thread still running, but its detached,
// will terminate, release thread args and return to pool on its own ...

StringBuffer emsg;
emsg.appendf("getaddrinfo failed or timed out (thread) (%d)", timeoutms);
emsg.appendf("getaddrinfo timed out (thread) (%d)", timeoutms);
RecursionSafeLogErr(EAI_AGAIN, 1, emsg.str(), __LINE__, name);
return false;
}

retCode = getAddressInfo(name, netaddr, true);
if (0 == retCode)
return true;
else
ret = getAddressInfo(name, netaddr, true);
return ret;
return false;
#endif
}

Expand Down Expand Up @@ -6613,8 +6629,8 @@ bool SocketEndpointArray::fromName(const char *name, unsigned defport)
}
#if defined(__linux__) || defined (__APPLE__) || defined(getaddrinfo)
unsigned netaddr[4];
bool ret = getAddressInfo(name, netaddr, true);
if (!ret)
int retCode = getAddressInfo(name, netaddr, true);
if (0 == retCode)
{
SocketEndpoint ep;
ep.setNetAddress(sizeof(netaddr),netaddr);
Expand Down

0 comments on commit 5430108

Please sign in to comment.