From 5ff1fe3a70b9293ffdfd3b9900c73729ec49acfb Mon Sep 17 00:00:00 2001 From: M Kelly Date: Thu, 3 Oct 2024 14:45:28 -0400 Subject: [PATCH] HPCC-32770 Soapcall stats for DNS, connect, num failures Signed-off-by: M Kelly --- common/thorhelper/thorsoapcall.cpp | 12 ++++++++++++ roxie/ccd/ccdserver.cpp | 4 ++-- system/jlib/jstatcodes.h | 5 +++++ system/jlib/jstats.cpp | 5 +++++ thorlcr/thorutil/thormisc.cpp | 2 +- 5 files changed, 25 insertions(+), 3 deletions(-) diff --git a/common/thorhelper/thorsoapcall.cpp b/common/thorhelper/thorsoapcall.cpp index 8ed046fffdf..1b241b43ac0 100644 --- a/common/thorhelper/thorsoapcall.cpp +++ b/common/thorhelper/thorsoapcall.cpp @@ -2469,9 +2469,14 @@ class CWSCAsyncFor : implements IWSCAsyncFor, public CInterface, public CAsyncFo checkTimeLimitExceeded(&remainingMS); Url &connUrl = master->proxyUrlArray.empty() ? url : master->proxyUrlArray.item(0); + CCycleTimer dnsTimer; + // TODO: for DNS, do we use timeoutMS or remainingMS or remainingMS / maxRetries+1 or ? ep.set(connUrl.host.get(), connUrl.port, master->timeoutMS); + master->logctx.noteStatistic(StTimeSoapcallDNS, dnsTimer.elapsedNs()); + master->activitySpanScope->setSpanAttribute("SoapcallDNSTimeMs", dnsTimer.elapsedMs()); + if (ep.isNull()) throw MakeStringException(-1, "Failed to resolve host '%s'", nullText(connUrl.host.get())); @@ -2492,6 +2497,8 @@ class CWSCAsyncFor : implements IWSCAsyncFor, public CInterface, public CAsyncFo isReused = false; keepAlive = true; + CCycleTimer connTimer; + // TODO: for each connect attempt, do we use timeoutMS or remainingMS or remainingMS / maxRetries or ? socket.setown(blacklist->connect(ep, master->logctx, (unsigned)master->maxRetries, master->timeoutMS, master->roxieAbortMonitor, master->rowProvider)); @@ -2521,11 +2528,16 @@ class CWSCAsyncFor : implements IWSCAsyncFor, public CInterface, public CAsyncFo throw makeStringException(0, err.str()); #endif } + + master->logctx.noteStatistic(StTimeSoapcallConn, connTimer.elapsedNs()); + master->activitySpanScope->setSpanAttribute("SoapcallConnTimeMs", connTimer.elapsedMs()); } break; } catch (IException *e) { + master->logctx.noteStatistic(StNumSoapcallConnFailures, 1); + if (master->timeLimitExceeded) { master->activitySpanScope->recordError(SpanError("Time Limit Exceeded", e->errorCode(), true, true)); diff --git a/roxie/ccd/ccdserver.cpp b/roxie/ccd/ccdserver.cpp index b5d9ebbf133..51904f6430f 100644 --- a/roxie/ccd/ccdserver.cpp +++ b/roxie/ccd/ccdserver.cpp @@ -496,7 +496,7 @@ static const StatisticsMapping indexStatistics({StNumServerCacheHits, StNumIndex static const StatisticsMapping diskStatistics({StNumServerCacheHits, StNumDiskRowsRead, StNumDiskSeeks, StNumDiskAccepted, StNumDiskRejected, StSizeAgentReply, StTimeAgentWait, StTimeAgentQueue, StTimeAgentProcess, StTimeIBYTIDelay, StNumAckRetries, StSizeContinuationData, StNumContinuationRequests }, actStatistics); -static const StatisticsMapping soapStatistics({ StTimeSoapcall }, actStatistics); +static const StatisticsMapping soapStatistics({ StTimeSoapcall, StTimeSoapcallDNS, StTimeSoapcallConn, StNumSoapcallConnFailures }, actStatistics); static const StatisticsMapping groupStatistics({ StNumGroups, StNumGroupMax }, actStatistics); static const StatisticsMapping sortStatistics({ StTimeSortElapsed }, actStatistics); static const StatisticsMapping indexWriteStatistics({ StNumDuplicateKeys, StNumLeafCacheAdds, StNumNodeCacheAdds, StNumBlobCacheAdds }, actStatistics); @@ -518,7 +518,7 @@ extern const StatisticsMapping accumulatedStatistics({StWhenFirstRow, StTimeLoca StCycleBlobFetchCycles, StCycleLeafFetchCycles, StCycleNodeFetchCycles, StTimeBlobFetch, StTimeLeafFetch, StTimeNodeFetch, StNumNodeDiskFetches, StNumLeafDiskFetches, StNumBlobDiskFetches, StNumDiskRejected, StSizeAgentReply, StTimeAgentWait, - StTimeSoapcall, + StTimeSoapcall, StTimeSoapcallDNS, StTimeSoapcallConn, StNumSoapcallConnFailures, StNumGroups, StTimeSortElapsed, StNumDuplicateKeys, diff --git a/system/jlib/jstatcodes.h b/system/jlib/jstatcodes.h index aa133ad5ba2..ca47be0fb41 100644 --- a/system/jlib/jstatcodes.h +++ b/system/jlib/jstatcodes.h @@ -311,6 +311,11 @@ enum StatisticKind StNumMatchRightRowsMax, StNumMatchCandidates, StNumMatchCandidatesMax, + StTimeSoapcallDNS, // Time spent in DNS lookups for soapcalls + StTimeSoapcallConn, // Time spent in connect[+SSL_connect] for soapcalls + StCycleSoapcallDNSCycles, + StCycleSoapcallConnCycles, + StNumSoapcallConnFailures, StMax, //For any quantity there is potentially the following variants. diff --git a/system/jlib/jstats.cpp b/system/jlib/jstats.cpp index 6c768151019..f187f3d180f 100644 --- a/system/jlib/jstats.cpp +++ b/system/jlib/jstats.cpp @@ -983,6 +983,11 @@ static const constexpr StatisticMeta statsMetaData[StMax] = { { NUMSTAT(MatchRightRowsMax), "The largest number of right rows in a join group" }, { NUMSTAT(MatchCandidates), "The number of candidate combinations of left and right rows forming join groups" }, { NUMSTAT(MatchCandidatesMax), "The largest number of candidate combinations of left and right rows in a single group" }, + { TIMESTAT(SoapcallDNS), "The time taken for DNS lookup in SOAPCALL" }, + { TIMESTAT(SoapcallConn), "The time taken for connect[+SSL_connect] in SOAPCALL" }, + { CYCLESTAT(SoapcallDNS) }, + { CYCLESTAT(SoapcallConn) }, + { NUMSTAT(SoapcallConnFailures), "The number of SOAPCALL connect failures" }, }; static MapStringTo statisticNameMap(true); diff --git a/thorlcr/thorutil/thormisc.cpp b/thorlcr/thorutil/thormisc.cpp index da8a8b06144..226a8394a27 100644 --- a/thorlcr/thorutil/thormisc.cpp +++ b/thorlcr/thorutil/thormisc.cpp @@ -75,7 +75,7 @@ static Owned ClusterMPAllocator; // stat. mappings shared between master and slave activities const StatisticsMapping spillStatistics({StTimeSpillElapsed, StTimeSortElapsed, StNumSpills, StSizeSpillFile, StSizePeakTempDisk}); const StatisticsMapping executeStatistics({StTimeTotalExecute, StTimeLocalExecute, StTimeBlocked}); -const StatisticsMapping soapcallStatistics({StTimeSoapcall}); +const StatisticsMapping soapcallStatistics({StTimeSoapcall, StTimeSoapcallDNS, StTimeSoapcallConn, StNumSoapcallConnFailures}); const StatisticsMapping basicActivityStatistics({}, executeStatistics, spillStatistics); const StatisticsMapping groupActivityStatistics({StNumGroups, StNumGroupMax}, basicActivityStatistics); const StatisticsMapping indexReadFileStatistics({}, diskReadRemoteStatistics, jhtreeCacheStatistics);