Skip to content

Commit

Permalink
HPCC-30599 Fix file access costs for keyed join
Browse files Browse the repository at this point in the history
Signed-off-by: Shamser Ahmed <[email protected]>
  • Loading branch information
jakesmith authored and shamser committed Dec 7, 2023
1 parent c3a5532 commit 41fafa0
Show file tree
Hide file tree
Showing 11 changed files with 199 additions and 127 deletions.
4 changes: 4 additions & 0 deletions common/thorhelper/thorcommon.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -762,6 +762,10 @@ class CStatsContextLogger : public CSimpleInterfaceOf<IContextLogger>
previous.updateDelta(to, stats);
}
virtual const LogMsgJobInfo & queryJob() const override { return job; }
void reset()
{
stats.reset();
}
};

extern THORHELPER_API bool isActivitySink(ThorActivityKind kind);
Expand Down
7 changes: 5 additions & 2 deletions system/jhtree/jhtree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -573,10 +573,13 @@ class jhtree_decl CKeyLevelManager : implements IKeyManager, public CInterface
filter->describe(out);
}

virtual void mergeStats(CRuntimeStatisticCollection & stats) const
virtual void mergeStats(CRuntimeStatisticCollection & targetStats) const
{
// IO Stats comming from the keyCursor and jhtree cache stats coming from this class's stats
if (keyCursor)
keyCursor->mergeStats(stats);
keyCursor->mergeStats(targetStats); // merge IO stats
if (stats.ctx)
targetStats.merge(stats.ctx->queryStats()); // merge jhtree cache stats
}
};

Expand Down
4 changes: 2 additions & 2 deletions system/jlib/jstats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1322,8 +1322,8 @@ const StatisticsMapping noStatistics({});
const StatisticsMapping jhtreeCacheStatistics({ StNumIndexSeeks, StNumIndexScans, StNumPostFiltered, StNumIndexWildSeeks,
StNumNodeCacheAdds, StNumLeafCacheAdds, StNumBlobCacheAdds, StNumNodeCacheHits, StNumLeafCacheHits, StNumBlobCacheHits, StCycleNodeLoadCycles, StCycleLeafLoadCycles,
StCycleBlobLoadCycles, StCycleNodeReadCycles, StCycleLeafReadCycles, StCycleBlobReadCycles, StNumNodeDiskFetches, StNumLeafDiskFetches, StNumBlobDiskFetches,
StCycleNodeFetchCycles, StCycleLeafFetchCycles, StCycleBlobFetchCycles, StCycleIndexCacheBlockedCycles, StNumIndexMergeCompares, StNumIndexMerges, StNumIndexSkips, StNumIndexNullSkips,
StTimeLeafLoad, StTimeLeafRead, StTimeLeafFetch, StTimeIndexCacheBlocked, StTimeNodeFetch});
StCycleNodeFetchCycles, StCycleLeafFetchCycles, StCycleBlobFetchCycles, StCycleIndexCacheBlockedCycles, StNumIndexMergeCompares, StNumIndexMerges, StNumIndexSkips,
StNumIndexNullSkips, StTimeLeafLoad, StTimeLeafRead, StTimeLeafFetch, StTimeIndexCacheBlocked, StTimeNodeFetch, StTimeNodeLoad, StTimeNodeRead});

const StatisticsMapping allStatistics(StKindAll);
const StatisticsMapping heapStatistics({StNumAllocations, StNumAllocationScans});
Expand Down
101 changes: 47 additions & 54 deletions thorlcr/activities/indexread/thindexreadslave.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class CIndexReadSlaveBase : public CSlaveActivity
protected:
StringAttr logicalFilename;
IArrayOf<IPartDescriptor> partDescs;
bool isSuperFile = false;
IHThorIndexReadBaseArg *helper;
IHThorSourceLimitTransformExtra * limitTransformExtra;
Owned<IEngineRowAllocator> allocator;
Expand Down Expand Up @@ -78,8 +79,7 @@ class CIndexReadSlaveBase : public CSlaveActivity
Owned<IFileIO> lazyIFileIO;
mutable CriticalSection ioStatsCS;
unsigned fileTableStart = NotFound;
CStatsContextLogger contextLogger;
CStatsCtxLoggerDeltaUpdater statsUpdater;
std::vector<Owned<CStatsContextLogger>> contextLoggers;

class TransformCallback : implements IThorIndexCallback , public CSimpleInterface
{
Expand All @@ -98,7 +98,7 @@ class CIndexReadSlaveBase : public CSlaveActivity
if (!keyManager)
throw MakeActivityException(&activity, 0, "Callback attempting to read blob with no key manager - index being read remotely?");
needsBlobCleaning = true;
return (byte *) keyManager->loadBlob(id, dummy, &activity.contextLogger);
return (byte *) keyManager->loadBlob(id, dummy, nullptr);
}
void prepareManager(IKeyManager *_keyManager)
{
Expand Down Expand Up @@ -169,6 +169,7 @@ class CIndexReadSlaveBase : public CSlaveActivity
unsigned p = partNum;
while (p<partDescs.ordinality()) // will process all parts if localMerge
{
CStatsContextLogger * contextLogger = contextLoggers[p];
IPartDescriptor &part = partDescs.item(p++);
unsigned crc=0;
part.getCrc(crc);
Expand Down Expand Up @@ -296,15 +297,15 @@ class CIndexReadSlaveBase : public CSlaveActivity
part.queryOwner().getClusterLabel(0, planeName);
blockedSize = getBlockedFileIOSize(planeName);
}
lazyIFileIO.setown(queryThor().queryFileCache().lookupIFileIO(*this, logicalFilename, part, nullptr, indexReadActivityStatistics, blockedSize));
lazyIFileIO.setown(queryThor().queryFileCache().lookupIFileIO(*this, logicalFilename, part, nullptr, indexReadFileStatistics, blockedSize));

RemoteFilename rfn;
part.getFilename(0, rfn);
StringBuffer path;
rfn.getPath(path); // NB: use for tracing only, IDelayedFile uses IPartDescriptor and any copy

Owned<IKeyIndex> keyIndex = createKeyIndex(path, crc, *lazyIFileIO, (unsigned) -1, false);
Owned<IKeyManager> klManager = createLocalKeyManager(helper->queryDiskRecordSize()->queryRecordAccessor(true), keyIndex, &contextLogger, helper->hasNewSegmentMonitors(), false);
Owned<IKeyManager> klManager = createLocalKeyManager(helper->queryDiskRecordSize()->queryRecordAccessor(true), keyIndex, contextLogger, helper->hasNewSegmentMonitors(), false);
if (localMerge)
{
if (!keyIndexSet)
Expand All @@ -331,7 +332,8 @@ class CIndexReadSlaveBase : public CSlaveActivity
return createIndexLookup(keyManager);
}
}
keyMergerManager.setown(createKeyMerger(helper->queryDiskRecordSize()->queryRecordAccessor(true), keyIndexSet, seekGEOffset, &contextLogger, helper->hasNewSegmentMonitors(), false));
//Not tracking jhtree cache stats in KeyMerger at the moment. Future: something to consider
keyMergerManager.setown(createKeyMerger(helper->queryDiskRecordSize()->queryRecordAccessor(true), keyIndexSet, seekGEOffset, nullptr, helper->hasNewSegmentMonitors(), false));
const ITranslator *translator = translators.item(0);
if (translator)
keyMergerManager->setLayoutTranslator(&translator->queryTranslator());
Expand All @@ -348,40 +350,12 @@ class CIndexReadSlaveBase : public CSlaveActivity
else
return nullptr;
}
void mergeFileStats(IPartDescriptor *partDesc, IFileIO *partIO)
{
if (fileStats.size()>0)
{
ISuperFileDescriptor * superFDesc = partDesc->queryOwner().querySuperFileDescriptor();
if (superFDesc)
{
unsigned subfile, lnum;
if(superFDesc->mapSubPart(partDesc->queryPartIndex(), subfile, lnum))
mergeStats(*fileStats[fileTableStart+subfile], partIO);
}
else
mergeStats(*fileStats[fileTableStart], partIO);
}
}
void updateStats()
{
// NB: updateStats() should always be called whilst ioStatsCS is held.
if (lazyIFileIO)
{
mergeStats(inactiveStats, lazyIFileIO);
if (currentPart<partDescs.ordinality())
mergeFileStats(&partDescs.item(currentPart), lazyIFileIO);
}
}
void configureNextInput()
{
if (currentManager)
{
resetManager(currentManager);
currentManager = nullptr;

CriticalBlock b(ioStatsCS);
updateStats();
lazyIFileIO.clear();
}
IKeyManager *keyManager = nullptr;
Expand Down Expand Up @@ -433,7 +407,6 @@ class CIndexReadSlaveBase : public CSlaveActivity
if (eoi)
return nullptr;
dbgassertex(currentInput);
CStatsScopedThresholdDeltaUpdater scoped(statsUpdater);
const void *ret = nullptr;
while (true)
{
Expand Down Expand Up @@ -528,7 +501,7 @@ class CIndexReadSlaveBase : public CSlaveActivity
}
public:
CIndexReadSlaveBase(CGraphElementBase *container)
: CSlaveActivity(container, indexReadActivityStatistics), callback(*this), contextLogger(jhtreeCacheStatistics, thorJob), statsUpdater(jhtreeCacheStatistics, *this, contextLogger)
: CSlaveActivity(container, indexReadActivityStatistics), callback(*this)
{
helper = (IHThorIndexReadBaseArg *)container->queryHelper();
limitTransformExtra = nullptr;
Expand All @@ -555,7 +528,6 @@ class CIndexReadSlaveBase : public CSlaveActivity
break;
if (keyManager)
prepareManager(keyManager);
CStatsScopedThresholdDeltaUpdater scoped(statsUpdater);
if (hard) // checkCount checks hard key count only.
count += indexInput->checkCount(keyedLimit-count); // part max, is total limit [keyedLimit] minus total so far [count]
else
Expand Down Expand Up @@ -607,6 +579,7 @@ class CIndexReadSlaveBase : public CSlaveActivity
IPartDescriptor &part0 = partDescs.item(0);
IFileDescriptor &fileDesc = part0.queryOwner();
ISuperFileDescriptor *super = fileDesc.querySuperFileDescriptor();
isSuperFile = super != nullptr;

if ((0 == (helper->getFlags() & TIRusesblob)) && !localMerge)
{
Expand Down Expand Up @@ -684,7 +657,15 @@ class CIndexReadSlaveBase : public CSlaveActivity
}
}
data.read(fileTableStart);
setupSpace4FileStats(fileTableStart, reInit, super!=nullptr, super?super->querySubFiles():0, indexReadActivityStatistics);
setupSpace4FileStats(fileTableStart, reInit, isSuperFile, isSuperFile?super->querySubFiles():0, indexReadFileStatistics);
// One contextLoggers per part required:
// 1) superfile: multiple contextLoggers required to allow stats to be tracked at subfile level
// 2) non-superfile: although all stats doesn't need to be tracked at part level, having separate contextLoggers is needed to
// merge both io stats and jhtree stats. (Without multiple contextLoggers, when ForEach(keyManagers)mergeStats() is called,
// the same jhtree stats would be merged multiple times. And ForEach(keyManagers)->mergeStats needs to be called to ensure
// that io stats are merged)
for(unsigned i = 0; i < parts; ++i)
contextLoggers.push_back(new CStatsContextLogger(jhtreeCacheStatistics, thorJob));
}
}
// IThorDataLink
Expand Down Expand Up @@ -719,10 +700,35 @@ class CIndexReadSlaveBase : public CSlaveActivity
virtual void gatherActiveStats(CRuntimeStatisticCollection &activeStats) const
{
PARENT::gatherActiveStats(activeStats);
if (partDescs.ordinality())
{
CriticalBlock b(ioStatsCS);
if (lazyIFileIO)
mergeStats(activeStats, lazyIFileIO);
// reset required because within loop below, mergeStats() is used to build up stats for each file
for (auto & fileStatItem: fileStats)
fileStatItem->reset();
ISuperFileDescriptor *superFDesc = partDescs.item(0).queryOwner().querySuperFileDescriptor();
for (unsigned partNum=0; partNum<partDescs.ordinality(); partNum++)
{
if (!keyManagers.isItem(partNum))
continue;
IKeyManager & keyManager = keyManagers.item(partNum);
// If it is a superfile, track stats at subfile level fileStats[fileTableStart+subfile]
// if not, one set of stats for the whole file fileStats[fileTableStart]
unsigned subfile = 0;
if (isSuperFile) // if it's not superfile, subfile is always 0 (otherwise it is the subfile number)
{
unsigned lnum;
if(!superFDesc->mapSubPart(partDescs.item(partNum).queryPartIndex(), subfile, lnum))
continue; // should not happen
}
if (fileStats.size()>0)
keyManager.mergeStats(*fileStats[fileTableStart+subfile]);
else
keyManager.mergeStats(activeStats); // when just 1 file, merge into activeStats
}
// fileStats[] will be serialized separately so file level stats are tracked (see serializeStats() below)
// Also, merged into the activeStats for activity level stats
for (auto & fileStatItem: fileStats)
activeStats.merge(*fileStatItem);
}
activeStats.setStatistic(StNumRowsProcessed, progress);
}
Expand All @@ -733,15 +739,6 @@ class CIndexReadSlaveBase : public CSlaveActivity
for (auto &stats: fileStats)
stats->serialize(mb);
}
virtual void done() override
{
{
CriticalBlock b(ioStatsCS);
updateStats();
lazyIFileIO.clear();
}
PARENT::done();
}
};

class CIndexReadSlaveActivity : public CIndexReadSlaveBase
Expand Down Expand Up @@ -819,7 +816,6 @@ class CIndexReadSlaveActivity : public CIndexReadSlaveBase
helper->mapOutputToInput(tempBuilder, seek, numFields); // NOTE - weird interface to mapOutputToInput means that it STARTS writing at seekGEOffset...
rawSeek = (byte *)temp;
}
CStatsScopedThresholdDeltaUpdater scoped(statsUpdater);
if (!currentManager->lookupSkip(rawSeek, seekGEOffset, seekSize))
return NULL;
const byte *row = currentManager->queryKeyBuffer();
Expand Down Expand Up @@ -972,7 +968,6 @@ class CIndexReadSlaveActivity : public CIndexReadSlaveBase
// IRowStream
virtual void stop() override
{
CStatsScopedDeltaUpdater scoped(statsUpdater);
if (RCMAX != keyedLimit) // NB: will not be true if nextRow() has handled
{
keyedLimitCount = sendGetCount(keyedProcessed);
Expand Down Expand Up @@ -1142,7 +1137,6 @@ class CIndexGroupAggregateSlaveActivity : public CIndexReadSlaveBase, implements
if (keyManager)
prepareManager(keyManager);

CStatsScopedThresholdDeltaUpdater scoped(statsUpdater);
while (true)
{
const void *key = indexInput->nextKey();
Expand Down Expand Up @@ -1301,7 +1295,6 @@ class CIndexCountSlaveActivity : public CIndexReadSlaveBase
if (keyManager)
prepareManager(keyManager);

CStatsScopedThresholdDeltaUpdater scoped(statsUpdater);
while (true)
{
const void *key = indexInput->nextKey();
Expand Down
2 changes: 1 addition & 1 deletion thorlcr/activities/keyedjoin/thkeyedjoin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ class CKeyedJoinMaster : public CMasterActivity
totalIndexParts = 0;

Owned<IDistributedFile> dataFile;
Owned<IDistributedFile> indexFile = lookupReadFile(indexFileName, AccessMode::readRandom, false, false, 0 != (helper->getJoinFlags() & JFindexoptional), true, indexReadActivityStatistics, &indexFileStatsTableEntry);
Owned<IDistributedFile> indexFile = lookupReadFile(indexFileName, AccessMode::readRandom, false, false, 0 != (helper->getJoinFlags() & JFindexoptional), true, indexReadFileStatistics, &indexFileStatsTableEntry);
if (indexFile)
{
if (!isFileKey(indexFile))
Expand Down
Loading

0 comments on commit 41fafa0

Please sign in to comment.