diff --git a/dali/base/dadfs.cpp b/dali/base/dadfs.cpp index ed2a599fb8b..1225565086a 100644 --- a/dali/base/dadfs.cpp +++ b/dali/base/dadfs.cpp @@ -221,6 +221,16 @@ extern da_decl double calcFileAccessCost(IDistributedFile *f, __int64 numDiskWri return calcFileAccessCost(clusterName, numDiskWrites, numDiskReads); } +extern da_decl cost_type calcDiskWriteCost(const StringArray & clusters, stat_type numDiskWrites) +{ + if (!numDiskWrites) + return 0; + cost_type writeCost = 0; + ForEachItemIn(idx, clusters) + writeCost += money2cost_type(calcFileAccessCost(clusters.item(idx), numDiskWrites, 0)); + return writeCost; +} + RemoteFilename &constructPartFilename(IGroup *grp,unsigned partno,unsigned partmax,const char *name,const char *partmask,const char *partdir,unsigned copy,ClusterPartDiskMapSpec &mspec,RemoteFilename &rfn) { partno--; diff --git a/dali/base/dadfs.hpp b/dali/base/dadfs.hpp index 0d1daef7881..fa11aa0fee0 100644 --- a/dali/base/dadfs.hpp +++ b/dali/base/dadfs.hpp @@ -892,6 +892,7 @@ extern da_decl bool checkLogicalName(const char *lfn,IUserDescriptor *user,bool extern da_decl double calcFileAtRestCost(const char * cluster, double sizeGB, double fileAgeDays); extern da_decl double calcFileAccessCost(const char * cluster, __int64 numDiskWrites, __int64 numDiskReads); extern da_decl double calcFileAccessCost(IDistributedFile *f, __int64 numDiskWrites, __int64 numDiskReads); +extern da_decl cost_type calcDiskWriteCost(const StringArray & clusters, stat_type numDiskWrites); constexpr bool defaultPrivilegedUser = true; constexpr bool defaultNonPrivilegedUser = false; diff --git a/thorlcr/activities/fetch/thfetch.cpp b/thorlcr/activities/fetch/thfetch.cpp index 78d11706d49..0fb3852fb87 100644 --- a/thorlcr/activities/fetch/thfetch.cpp +++ b/thorlcr/activities/fetch/thfetch.cpp @@ -111,9 +111,16 @@ class CFetchActivityMaster : public CMasterActivity for (unsigned i=0; ideserialize(node, mb); } + virtual void getActivityStats(IStatisticGatherer & stats) override + { + CMasterActivity::getActivityStats(stats); + diskAccessCost = calcFileReadCostStats(false); + if (diskAccessCost) + stats.addStatistic(StCostFileAccess, diskAccessCost); + } virtual void done() override { - updateFileReadCostStats(); + diskAccessCost = calcFileReadCostStats(true); CMasterActivity::done(); } }; diff --git a/thorlcr/activities/indexread/thindexread.cpp b/thorlcr/activities/indexread/thindexread.cpp index 625ae2eb937..de52029dd56 100644 --- a/thorlcr/activities/indexread/thindexread.cpp +++ b/thorlcr/activities/indexread/thindexread.cpp @@ -298,9 +298,16 @@ class CIndexReadBase : public CMasterActivity for (unsigned i=0; ideserialize(node, mb); } + virtual void getActivityStats(IStatisticGatherer & stats) override + { + CMasterActivity::getActivityStats(stats); + diskAccessCost = calcFileReadCostStats(false); + if (diskAccessCost) + stats.addStatistic(StCostFileAccess, diskAccessCost); + } virtual void done() override { - updateFileReadCostStats(); + diskAccessCost = calcFileReadCostStats(true); CMasterActivity::done(); } }; diff --git a/thorlcr/activities/indexwrite/thindexwrite.cpp b/thorlcr/activities/indexwrite/thindexwrite.cpp index 859a987041f..1a9f5894a87 100644 --- a/thorlcr/activities/indexwrite/thindexwrite.cpp +++ b/thorlcr/activities/indexwrite/thindexwrite.cpp @@ -293,6 +293,8 @@ class IndexWriteActivityMaster : public CMasterActivity props.setPropInt64("@totalCRC", totalCRC); } props.setPropInt("@formatCrc", helper->getFormatCrc()); + props.setPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites), statsCollection.getStatisticSum(StNumDiskWrites)); + props.setPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), diskAccessCost); if (isLocal) { props.setPropBool("@local", true); @@ -310,7 +312,6 @@ class IndexWriteActivityMaster : public CMasterActivity bloom->setProp("@bloomProbability", pval.str()); } container.queryTempHandler()->registerFile(fileName, container.queryOwner().queryGraphId(), 0, false, WUFileStandard, &clusters); - updateFileWriteCostStats(*fileDesc, props, statsCollection.getStatisticSum(StNumDiskWrites)); if (!dlfn.isExternal()) queryThorFileManager().publish(container.queryJob(), fileName, *fileDesc); } @@ -418,6 +419,9 @@ class IndexWriteActivityMaster : public CMasterActivity { CMasterActivity::getActivityStats(stats); stats.addStatistic(StNumDuplicateKeys, cummulativeDuplicateKeyCount); + diskAccessCost = calcDiskWriteCost(clusters, statsCollection.getStatisticSum(StNumDiskWrites)); + if (diskAccessCost) + stats.addStatistic(StCostFileAccess, diskAccessCost); } }; diff --git a/thorlcr/activities/keyedjoin/thkeyedjoin.cpp b/thorlcr/activities/keyedjoin/thkeyedjoin.cpp index 8130a5d9d2f..59edb954946 100644 --- a/thorlcr/activities/keyedjoin/thkeyedjoin.cpp +++ b/thorlcr/activities/keyedjoin/thkeyedjoin.cpp @@ -556,9 +556,16 @@ class CKeyedJoinMaster : public CMasterActivity for (unsigned i=0; ideserialize(node, mb); } + virtual void getActivityStats(IStatisticGatherer & stats) override + { + CMasterActivity::getActivityStats(stats); + diskAccessCost = calcFileReadCostStats(false); + if (diskAccessCost) + stats.addStatistic(StCostFileAccess, diskAccessCost); + } virtual void done() override { - updateFileReadCostStats(); + diskAccessCost = calcFileReadCostStats(true); CMasterActivity::done(); } }; diff --git a/thorlcr/activities/thdiskbase.cpp b/thorlcr/activities/thdiskbase.cpp index 269d55be407..0feb326a37c 100644 --- a/thorlcr/activities/thdiskbase.cpp +++ b/thorlcr/activities/thdiskbase.cpp @@ -121,9 +121,17 @@ void CDiskReadMasterBase::serializeSlaveData(MemoryBuffer &dst, unsigned slave) CSlavePartMapping::serializeNullMap(dst); } +void CDiskReadMasterBase::getActivityStats(IStatisticGatherer & stats) +{ + CMasterActivity::getActivityStats(stats); + diskAccessCost = calcFileReadCostStats(false); + if (diskAccessCost) + stats.addStatistic(StCostFileAccess, diskAccessCost); +} + void CDiskReadMasterBase::done() { - updateFileReadCostStats(); + diskAccessCost = calcFileReadCostStats(true); CMasterActivity::done(); } @@ -284,7 +292,8 @@ void CWriteMasterBase::publish() } if (TDWrestricted & diskHelperBase->getFlags()) props.setPropBool("restricted", true ); - updateFileWriteCostStats(*fileDesc, props, statsCollection.getStatisticSum(StNumDiskWrites)); + props.setPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites), statsCollection.getStatisticSum(StNumDiskWrites)); + props.setPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), diskAccessCost); container.queryTempHandler()->registerFile(fileName, container.queryOwner().queryGraphId(), diskHelperBase->getTempUsageCount(), TDXtemporary & diskHelperBase->getFlags(), getDiskOutputKind(diskHelperBase->getFlags()), &clusters); if (!dlfn.isExternal()) { @@ -432,6 +441,14 @@ void CWriteMasterBase::done() } } +void CWriteMasterBase::getActivityStats(IStatisticGatherer & stats) +{ + CMasterActivity::getActivityStats(stats); + diskAccessCost = calcDiskWriteCost(clusters, statsCollection.getStatisticSum(StNumDiskWrites)); + if (diskAccessCost) + stats.addStatistic(StCostFileAccess, diskAccessCost); +} + void CWriteMasterBase::slaveDone(size32_t slaveIdx, MemoryBuffer &mb) { if (mb.length()) // if 0 implies aborted out from this slave. diff --git a/thorlcr/activities/thdiskbase.ipp b/thorlcr/activities/thdiskbase.ipp index d3dad2be550..2d44fd65c87 100644 --- a/thorlcr/activities/thdiskbase.ipp +++ b/thorlcr/activities/thdiskbase.ipp @@ -39,6 +39,7 @@ public: virtual void init() override; virtual void kill() override; virtual void serializeSlaveData(MemoryBuffer &dst, unsigned slave) override; + virtual void getActivityStats(IStatisticGatherer & stats) override; virtual void done() override; virtual void validateFile(IDistributedFile *file) { } virtual void deserializeStats(unsigned node, MemoryBuffer &mb) override; @@ -62,6 +63,7 @@ public: virtual void preStart(size32_t parentExtractSz, const byte *parentExtract); virtual void init(); virtual void serializeSlaveData(MemoryBuffer &dst, unsigned slave); + virtual void getActivityStats(IStatisticGatherer & stats) override; virtual void done(); virtual void slaveDone(size32_t slaveIdx, MemoryBuffer &mb); }; diff --git a/thorlcr/graph/thgraphmaster.cpp b/thorlcr/graph/thgraphmaster.cpp index f4dd1734768..835b1e8101a 100644 --- a/thorlcr/graph/thgraphmaster.cpp +++ b/thorlcr/graph/thgraphmaster.cpp @@ -611,8 +611,6 @@ void CMasterActivity::deserializeStats(unsigned node, MemoryBuffer &mb) void CMasterActivity::getActivityStats(IStatisticGatherer & stats) { statsCollection.getStats(stats); - if (diskAccessCost) - stats.addStatistic(StCostFileAccess, diskAccessCost); } void CMasterActivity::getEdgeStats(IStatisticGatherer & stats, unsigned idx) @@ -647,23 +645,20 @@ void CMasterActivity::done() } } -void CMasterActivity::updateFileReadCostStats() +// calcFileReadCostStats calculates and returns the read costs for all files read by the activity +// In addition, if updateFileProps==true, it updates the file attributes with @readCost and @numDiskReads +// Note: should be called once per activity with "updateFileProps==true" to avoid double counting +cost_type CMasterActivity::calcFileReadCostStats(bool updateFileProps) { // Returns updates numDiskReads & readCost in the file attributes and returns the readCost - auto updateReadCosts = [](bool useJhtreeCacheStats, IDistributedFile *file, CThorStatsCollection &stats) + auto updateReadCosts = [updateFileProps](bool useJhtreeCacheStats, IDistributedFile *file, CThorStatsCollection &stats) { StringBuffer clusterName; file->getClusterName(0, clusterName); IPropertyTree & fileAttr = file->queryAttributes(); - cost_type legacyReadCost = 0, curReadCost = 0; - // Legacy files will not have the readCost stored as an attribute - if (!hasReadWriteCostFields(&fileAttr) && fileAttr.hasProp(getDFUQResultFieldName(DFUQRFnumDiskReads))) - { - // Legacy file: calculate readCost using prev disk reads and new disk reads - stat_type prevDiskReads = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads), 0); - legacyReadCost = money2cost_type(calcFileAccessCost(clusterName, 0, prevDiskReads)); - } + cost_type curReadCost = 0; stat_type curDiskReads = stats.getStatisticSum(StNumDiskReads); + if(useJhtreeCacheStats) { stat_type numActualReads = stats.getStatisticSum(StNumNodeDiskFetches) @@ -673,11 +668,23 @@ void CMasterActivity::updateFileReadCostStats() } else curReadCost = money2cost_type(calcFileAccessCost(clusterName, 0, curDiskReads)); - file->addAttrValue(getDFUQResultFieldName(DFUQRFreadCost), legacyReadCost + curReadCost); - file->addAttrValue(getDFUQResultFieldName(DFUQRFnumDiskReads), curDiskReads); + + if (updateFileProps) + { + cost_type legacyReadCost = 0; + // Legacy files will not have the readCost stored as an attribute + if (!hasReadWriteCostFields(&fileAttr) && fileAttr.hasProp(getDFUQResultFieldName(DFUQRFnumDiskReads))) + { + // Legacy file: calculate readCost using prev disk reads and new disk reads + stat_type prevDiskReads = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads), 0); + legacyReadCost = money2cost_type(calcFileAccessCost(clusterName, 0, prevDiskReads)); + } + file->addAttrValue(getDFUQResultFieldName(DFUQRFreadCost), legacyReadCost + curReadCost); + file->addAttrValue(getDFUQResultFieldName(DFUQRFnumDiskReads), curDiskReads); + } return curReadCost; }; - + cost_type readCost = 0; if (fileStats.size()>0) { ThorActivityKind activityKind = container.getKind(); @@ -699,13 +706,13 @@ void CMasterActivity::updateFileReadCostStats() for (unsigned i=0; iquerySubFile(i, true); - diskAccessCost += updateReadCosts(useJhtreeCache, &subFile, *fileStats[fileIndex]); + readCost += updateReadCosts(useJhtreeCache, &subFile, *fileStats[fileIndex]); fileIndex++; } } else { - diskAccessCost += updateReadCosts(useJhtreeCache, file, *fileStats[fileIndex]); + readCost += updateReadCosts(useJhtreeCache, file, *fileStats[fileIndex]); fileIndex++; } } @@ -715,21 +722,7 @@ void CMasterActivity::updateFileReadCostStats() { IDistributedFile *file = queryReadFile(0); if (file) - diskAccessCost += updateReadCosts(true, file, statsCollection); - } -} - -void CMasterActivity::updateFileWriteCostStats(IFileDescriptor & fileDesc, IPropertyTree &props, stat_type numDiskWrites) -{ - if (numDiskWrites) - { - props.setPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites), numDiskWrites); - assertex(fileDesc.numClusters()>=1); - StringBuffer clusterName; - fileDesc.getClusterGroupName(0, clusterName);// Note: calculating for 1st cluster. (Future: calc for >1 clusters) - cost_type writeCost = money2cost_type(calcFileAccessCost(clusterName, numDiskWrites, 0)); - props.setPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), writeCost); - diskAccessCost += writeCost; + readCost = updateReadCosts(true, file, statsCollection); } } diff --git a/thorlcr/graph/thgraphmaster.ipp b/thorlcr/graph/thgraphmaster.ipp index 448f7f0d141..ee836e059e6 100644 --- a/thorlcr/graph/thgraphmaster.ipp +++ b/thorlcr/graph/thgraphmaster.ipp @@ -253,8 +253,7 @@ protected: unsigned queryReadFileId(const char *lfnName); IDistributedFile *findReadFile(const char *lfnName); IDistributedFile *lookupReadFile(const char *lfnName, AccessMode mode, bool jobTemp, bool temp, bool opt, bool statsForMultipleFiles=false, const StatisticsMapping &statsMapping=diskReadRemoteStatistics, unsigned * fileStatsStartEntry=nullptr); - void updateFileReadCostStats(); - void updateFileWriteCostStats(IFileDescriptor & fileDesc, IPropertyTree &props, stat_type numDiskWrites); + cost_type calcFileReadCostStats(bool updateFileProps); virtual void process() { } public: IMPLEMENT_IINTERFACE_USING(CActivityBase)