Skip to content

Commit

Permalink
HPCC-32880 Ensure that legacy write costs are not lost after file rea…
Browse files Browse the repository at this point in the history
…d operation.

Signed-off-by: Shamser Ahmed <[email protected]>
  • Loading branch information
shamser committed Nov 12, 2024
1 parent 76bf516 commit ad901a7
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 38 deletions.
51 changes: 27 additions & 24 deletions dali/base/dadfs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4990,43 +4990,46 @@ protected: friend class CDistributedFilePart;
double fileAgeDays = difftime(time(nullptr), dt.getSimple())/(24*60*60);
double sizeGB = getDiskSize(true, false) / ((double)1024 * 1024 * 1024);
const IPropertyTree *attrs = root->queryPropTree("Attr");
bool doLegacyAccessCostCalc = false;
bool doLegacyCostCalc = false;
__int64 numDiskWrites = 0, numDiskReads = 0;
cost_type readCost = 0, writeCost = 0;
if (attrs)
{
if (hasReadWriteCostFields(*attrs))
if (!attrs->hasProp(getDFUQResultFieldName(DFUQRFreadCost))&& !isFileKey(*attrs))
{
// Newer files have readCost and writeCost attributes
accessCost = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFreadCost)) + attrs->getPropInt64(getDFUQResultFieldName(DFUQRFwriteCost));
numDiskReads = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads));
doLegacyCostCalc = true;
}
else
readCost = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFreadCost));

if (!attrs->hasProp(getDFUQResultFieldName(DFUQRFwriteCost)))
{
// Costs need to be calculated from numDiskReads and numDiskWrites for legacy files
numDiskWrites = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites));
doLegacyAccessCostCalc = true;
// NB: Costs of index reading can not be reliably estimated based on 'numDiskReads'
if (!isFileKey(*attrs))
numDiskReads = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads));
doLegacyCostCalc = true;
}
else
writeCost = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFwriteCost));
}
accessCost = readCost + writeCost; // access cost excluding legacy costs (to be added below)
if (isEmptyString(cluster))
{
StringArray clusterNames;
unsigned countClusters = getClusterNames(clusterNames);
for (unsigned i = 0; i < countClusters; i++)
atRestCost += calcFileAtRestCost(clusterNames[i], sizeGB, fileAgeDays);
if (countClusters && doLegacyAccessCostCalc)
if (countClusters && doLegacyCostCalc)
{
// NB: numDiskReads/numDiskWrites are stored at the file level, not per cluster.
// So cannot calculate accessCost per cluster, assume cost is based on 1st.
accessCost = calcFileAccessCost(clusterNames[0], numDiskWrites, numDiskReads);
accessCost += calcFileAccessCost(clusterNames[0], numDiskWrites, numDiskReads);
}
}
else
{
atRestCost += calcFileAtRestCost(cluster, sizeGB, fileAgeDays);
if (doLegacyAccessCostCalc)
accessCost = calcFileAccessCost(cluster, numDiskWrites, numDiskReads);
if (doLegacyCostCalc)
accessCost += calcFileAccessCost(cluster, numDiskWrites, numDiskReads);
}
}
};
Expand Down Expand Up @@ -13457,21 +13460,21 @@ IPropertyTreeIterator *deserializeFileAttrIterator(MemoryBuffer& mb, unsigned nu
file->setPropInt64(getDFUQResultFieldName(DFUQRFatRestCost), atRestCost);

// Dyamically calc and set the access cost field and for legacy files set read/write cost fields
cost_type accessCost = 0;
if (hasReadWriteCostFields(*file))
cost_type readCost = file->getPropInt64(getDFUQResultFieldName(DFUQRFreadCost));
if (!readCost)
{
accessCost = file->getPropInt64(getDFUQResultFieldName(DFUQRFreadCost)) + file->getPropInt64(getDFUQResultFieldName(DFUQRFwriteCost));
readCost = getLegacyReadCost(*file, nodeGroup);
file->setPropInt64(getDFUQResultFieldName(DFUQRFreadCost), readCost);
}
else // Calc access cost from numDiskRead & numDiskWrites for Legacy files
{
cost_type legacyReadCost = getLegacyReadCost(*file, nodeGroup);
file->setPropInt64(getDFUQResultFieldName(DFUQRFreadCost), legacyReadCost);

cost_type legacyWriteCost = getLegacyWriteCost(*file, nodeGroup);
file->setPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), legacyWriteCost);

accessCost = legacyReadCost + legacyWriteCost;
cost_type writeCost = file->getPropInt64(getDFUQResultFieldName(DFUQRFwriteCost));
if (!writeCost)
{
cost_type writeCost = getLegacyWriteCost(*file, nodeGroup);
file->setPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), writeCost);
}
cost_type accessCost = readCost + writeCost;

file->setPropInt64(getDFUQResultFieldName(DFUQRFaccessCost), accessCost);

// Dymically calc and set the total cost field
Expand Down
17 changes: 11 additions & 6 deletions dali/base/dadfs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -894,17 +894,17 @@ constexpr bool defaultPrivilegedUser = true;
constexpr bool defaultNonPrivilegedUser = false;

extern da_decl void configurePreferredPlanes();
inline bool hasReadWriteCostFields(const IPropertyTree & fileAttr)
{
return fileAttr.hasProp(getDFUQResultFieldName(DFUQRFreadCost)) || fileAttr.hasProp(getDFUQResultFieldName(DFUQRFwriteCost));
}

// Legacy read cost calculation
// Note: this only returns legacy read cost if DFUQRFreadCost has not been set.
// This means that once DFUQRFreadCost has been updated with legacy read cost,
// it will no longer return the legacy read cost.
template<typename Source>
inline cost_type getLegacyReadCost(const IPropertyTree & fileAttr, Source source)
{
// Legacy files do not have @readCost attribute, so calculate from numDiskRead
// NB: Costs of index reading can not be reliably estimated based on 'numDiskReads'
if (!hasReadWriteCostFields(fileAttr) && fileAttr.hasProp(getDFUQResultFieldName(DFUQRFnumDiskReads))
if (!fileAttr.hasProp(getDFUQResultFieldName(DFUQRFreadCost)) && fileAttr.hasProp(getDFUQResultFieldName(DFUQRFnumDiskReads))
&& !isFileKey(fileAttr))
{
stat_type prevDiskReads = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads), 0);
Expand All @@ -913,11 +913,16 @@ inline cost_type getLegacyReadCost(const IPropertyTree & fileAttr, Source source
else
return 0;
}

// Legacy write cost calculation
// Note: this only returns legacy write cost if DFUQRFwriteCost has not been set.
// This means that once DFUQRFwriteCost has been updated with legacy write cost,
// it will no longer return the legacy write cost.
template<typename Source>
inline cost_type getLegacyWriteCost(const IPropertyTree & fileAttr, Source source)
{
// Legacy files do not have @writeCost attribute, so calculate from numDiskWrites
if (!hasReadWriteCostFields(fileAttr) && fileAttr.hasProp(getDFUQResultFieldName(DFUQRFnumDiskWrites)))
if (!fileAttr.hasProp(getDFUQResultFieldName(DFUQRFwriteCost)) && fileAttr.hasProp(getDFUQResultFieldName(DFUQRFnumDiskWrites)))
{
stat_type prevDiskWrites = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites), 0);
return calcFileAccessCost(source, prevDiskWrites, 0);
Expand Down
9 changes: 1 addition & 8 deletions thorlcr/graph/thgraphmaster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -672,14 +672,7 @@ cost_type CMasterActivity::calcFileReadCostStats(bool updateFileProps)

if (updateFileProps)
{
cost_type legacyReadCost = 0;
// Legacy files will not have the readCost stored as an attribute
if (!hasReadWriteCostFields(fileAttr) && fileAttr.hasProp(getDFUQResultFieldName(DFUQRFnumDiskReads)))
{
// Legacy file: calculate readCost using prev disk reads and new disk reads
stat_type prevDiskReads = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads), 0);
legacyReadCost = calcFileAccessCost(clusterName, 0, prevDiskReads);
}
cost_type legacyReadCost = getLegacyReadCost(fileAttr, clusterName.str());
file->addAttrValue(getDFUQResultFieldName(DFUQRFreadCost), legacyReadCost + curReadCost);
file->addAttrValue(getDFUQResultFieldName(DFUQRFnumDiskReads), curDiskReads);
}
Expand Down

0 comments on commit ad901a7

Please sign in to comment.