Skip to content

Commit

Permalink
HPCC-30911 Store read and write cost as file attributes
Browse files Browse the repository at this point in the history
Signed-off-by: Shamser Ahmed <[email protected]>
  • Loading branch information
shamser committed Dec 6, 2023
1 parent 3c870a7 commit 6fef377
Show file tree
Hide file tree
Showing 6 changed files with 165 additions and 71 deletions.
114 changes: 78 additions & 36 deletions dali/base/dadfs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,41 +177,49 @@ static IPropertyTree *getEmptyAttr()
return createPTree("Attr");
}

extern da_decl void calcFileCost(const char * cluster, double sizeGB, double fileAgeDays, __int64 numDiskWrites, __int64 numDiskReads, double & atRestCost, double & accessCost)
static IPropertyTree *getCostPropTree(const char *cluster)
{
Owned<IPropertyTree> plane = getStoragePlane(cluster);
Owned<IPropertyTree> global;
IPropertyTree * costPT = nullptr;

if (plane && plane->hasProp("cost/@storageAtRest"))
{
costPT = plane->queryPropTree("cost");
return plane->getPropTree("cost");
}
else
{
global.setown(getGlobalConfig());
costPT = global->queryPropTree("cost");
return getGlobalConfigSP()->getPropTree("cost");
}
}

extern da_decl double calcFileAtRestCost(const char * cluster, double sizeGB, double fileAgeDays)
{
Owned<const IPropertyTree> costPT = getCostPropTree(cluster);

if (costPT==nullptr)
{
atRestCost = 0.0;
accessCost = 0.0;
return;
}
constexpr int accessPriceScalingFactor = 10000; // read/write pricing based on 10,000 operations
return 0.0;
double atRestPrice = costPT->getPropReal("@storageAtRest", 0.0);
double readPrice = costPT->getPropReal("@storageReads", 0.0);
double writePrice = costPT->getPropReal("@storageWrites", 0.0);
double storageCostDaily = atRestPrice * 12 / 365;
atRestCost = storageCostDaily * sizeGB * fileAgeDays;
accessCost = (readPrice * numDiskReads / accessPriceScalingFactor) + (writePrice * numDiskWrites / accessPriceScalingFactor);
return storageCostDaily * sizeGB * fileAgeDays;
}

extern da_decl double calcFileAccessCost(const char * cluster, __int64 numDiskWrites, __int64 numDiskReads)
{
double atRestCost, accessCost;
calcFileCost(cluster, 0, 0, numDiskWrites, numDiskReads, atRestCost, accessCost);
return accessCost;
Owned<const IPropertyTree> costPT = getCostPropTree(cluster);

if (costPT==nullptr)
return 0.0;
constexpr int accessPriceScalingFactor = 10000; // read/write pricing based on 10,000 operations
double readPrice = costPT->getPropReal("@storageReads", 0.0);
double writePrice = costPT->getPropReal("@storageWrites", 0.0);
return (readPrice * numDiskReads / accessPriceScalingFactor) + (writePrice * numDiskWrites / accessPriceScalingFactor);
}

extern da_decl double calcFileAccessCost(IDistributedFile *f, __int64 numDiskWrites, __int64 numDiskReads)
{
StringBuffer clusterName;
// Should really specify the cluster number too, but this is the best we can do for now
f->getClusterName(0, clusterName);
return calcFileAccessCost(clusterName, numDiskWrites, numDiskReads);
}

RemoteFilename &constructPartFilename(IGroup *grp,unsigned partno,unsigned partmax,const char *name,const char *partmask,const char *partdir,unsigned copy,ClusterPartDiskMapSpec &mspec,RemoteFilename &rfn)
Expand Down Expand Up @@ -4941,27 +4949,43 @@ protected: friend class CDistributedFilePart;
double fileAgeDays = difftime(time(nullptr), dt.getSimple())/(24*60*60);
double sizeGB = getDiskSize(true, false) / ((double)1024 * 1024 * 1024);
const IPropertyTree *attrs = root->queryPropTree("Attr");
bool doLegacyAccessCostCalc = false;
__int64 numDiskWrites = 0, numDiskReads = 0;
if (attrs)
{
numDiskWrites = attrs->getPropInt64("@numDiskWrites");
numDiskReads = attrs->getPropInt64("@numDiskReads");
if (hasReadWriteCostFields(*attrs))
{
// Newer files have readCost and writeCost attributes
accessCost = cost_type2money(attrs->getPropInt64(getDFUQResultFieldName(DFUQRFreadCost)) + attrs->getPropInt64(getDFUQResultFieldName(DFUQRFwriteCost)));
}
else
{
// Costs need to be calculated from numDiskReads and numDiskWrites for legacy files
numDiskWrites = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites));
doLegacyAccessCostCalc = true;
// NB: Costs of index reading can not be reliably estimated based on 'numDiskReads'
if (!isFileKey(*attrs))
numDiskReads = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads));
}
}
if (isEmptyString(cluster))
{
StringArray clusterNames;
unsigned countClusters = getClusterNames(clusterNames);
for (unsigned i = 0; i < countClusters; i++)
atRestCost += calcFileAtRestCost(clusterNames[i], sizeGB, fileAgeDays);
if (countClusters && doLegacyAccessCostCalc)
{
double tmpAtRestcost, tmpAccessCost;
calcFileCost(clusterNames[i], sizeGB, fileAgeDays, numDiskWrites, numDiskReads, tmpAtRestcost, tmpAccessCost);
atRestCost += tmpAtRestcost;
accessCost += tmpAccessCost;
// NB: numDiskReads/numDiskWrites are stored at the file level, not per cluster.
// So cannot calculate accessCost per cluster, assume cost is based on 1st.
accessCost += calcFileAccessCost(clusterNames[0], numDiskWrites, numDiskReads);
}
}
else
{
calcFileCost(cluster, sizeGB, fileAgeDays, numDiskWrites, numDiskReads, atRestCost, accessCost);
atRestCost += calcFileAtRestCost(cluster, sizeGB, fileAgeDays);
if (doLegacyAccessCostCalc)
accessCost += calcFileAccessCost(cluster, numDiskWrites, numDiskReads);
}
}
};
Expand Down Expand Up @@ -13340,11 +13364,12 @@ IDFProtectedIterator *CDistributedFileDirectory::lookupProtectedFiles(const char
const char* DFUQResultFieldNames[] = { "@name", "@description", "@group", "@kind", "@modified", "@job", "@owner",
"@DFUSFrecordCount", "@recordCount", "@recordSize", "@DFUSFsize", "@size", "@workunit", "@DFUSFcluster", "@numsubfiles",
"@accessed", "@numparts", "@compressedSize", "@directory", "@partmask", "@superowners", "@persistent", "@protect", "@compressed",
"@cost", "@numDiskReads", "@numDiskWrites", "@atRestCost", "@accessCost", "@maxSkew", "@minSkew", "@maxSkewPart", "@minSkewPart" };
"@cost", "@numDiskReads", "@numDiskWrites", "@atRestCost", "@accessCost", "@maxSkew", "@minSkew", "@maxSkewPart", "@minSkewPart",
"@readCost", "@writeCost" };

extern da_decl const char* getDFUQResultFieldName(DFUQResultField feild)
extern da_decl const char* getDFUQResultFieldName(DFUQResultField field)
{
return DFUQResultFieldNames[feild];
return DFUQResultFieldNames[field];
}

IPropertyTreeIterator *deserializeFileAttrIterator(MemoryBuffer& mb, unsigned numFiles, DFUQResultField* localFilters, const char* localFilterBuf)
Expand Down Expand Up @@ -13411,6 +13436,7 @@ IPropertyTreeIterator *deserializeFileAttrIterator(MemoryBuffer& mb, unsigned nu

void setCost(IPropertyTree* file, const char *nodeGroup)
{
// Set the following dynamic fields: atRestCost, accessCost, cost and for legacy files: readCost, writeCost
StringBuffer str;
double fileAgeDays = 0.0;
if (file->getProp(getDFUQResultFieldName(DFUQRFtimemodified), str))
Expand All @@ -13425,13 +13451,29 @@ IPropertyTreeIterator *deserializeFileAttrIterator(MemoryBuffer& mb, unsigned nu
else
sizeDiskSize = file->getPropInt64(getDFUQResultFieldName(DFUQRForigsize), 0);
double sizeGB = sizeDiskSize / ((double)1024 * 1024 * 1024);
__int64 numDiskWrites = file->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads), 0);
__int64 numDiskReads = file->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites), 0);
double atRestCost, accessCost;
calcFileCost(nodeGroup, sizeGB, fileAgeDays, numDiskWrites, numDiskReads, atRestCost, accessCost);
file->setPropReal(getDFUQResultFieldName(DFUQRFcost), atRestCost+accessCost);
file->setPropReal(getDFUQResultFieldName(DFUQRFatRestCost), atRestCost);
file->setPropReal(getDFUQResultFieldName(DFUQRFaccessCost), accessCost);
cost_type atRestCost = money2cost_type(calcFileAtRestCost(nodeGroup, sizeGB, fileAgeDays));
file->setPropInt64(getDFUQResultFieldName(DFUQRFatRestCost), atRestCost);

// Dyamically calc and set the access cost field and for legacy files set read/write cost fields
cost_type accessCost = 0;
if (hasReadWriteCostFields(*file))
{
accessCost = file->getPropInt64(getDFUQResultFieldName(DFUQRFreadCost)) + file->getPropInt64(getDFUQResultFieldName(DFUQRFwriteCost));
}
else // Calc access cost from numDiskRead & numDiskWrites for Legacy files
{
cost_type legacyReadCost = getLegacyReadCost(*file, nodeGroup);
file->setPropInt64(getDFUQResultFieldName(DFUQRFreadCost), legacyReadCost);

cost_type legacyWriteCost = getLegacyWriteCost(*file, nodeGroup);
file->setPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), legacyWriteCost);

accessCost = legacyReadCost + legacyWriteCost;
}
file->setPropInt64(getDFUQResultFieldName(DFUQRFaccessCost), accessCost);

// Dymically calc and set the total cost field
file->setPropInt64(getDFUQResultFieldName(DFUQRFcost), atRestCost + accessCost);
}

IPropertyTree *deserializeFileAttr(MemoryBuffer &mb, StringArray& nodeGroupFilter)
Expand Down
44 changes: 39 additions & 5 deletions dali/base/dadfs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,15 +294,17 @@ enum DFUQResultField
DFUQRFminSkew = 30,
DFUQRFmaxSkewPart = 31,
DFUQRFminSkewPart = 32,
DFUQRFterm = 33,
DFUQRFreadCost = 33,
DFUQRFwriteCost = 34,
DFUQRFterm = 35, // must be last in list
DFUQRFreverse = 256,
DFUQRFnocase = 512,
DFUQRFnumeric = 1024,
DFUQRFfloat = 2048
};

extern da_decl const char* getDFUQFilterFieldName(DFUQFilterField feild);
extern da_decl const char* getDFUQResultFieldName(DFUQResultField feild);
extern da_decl const char* getDFUQFilterFieldName(DFUQFilterField field);
extern da_decl const char* getDFUQResultFieldName(DFUQResultField field);

/**
* File operations can be included in a transaction to ensure that multiple
Expand Down Expand Up @@ -861,7 +863,7 @@ extern da_decl GroupType translateGroupType(const char *groupType);

// Useful property query functions

inline bool isFileKey(IPropertyTree &pt) { const char *kind = pt.queryProp("@kind"); return kind&&strieq(kind,"key"); }
inline bool isFileKey(const IPropertyTree &pt) { const char *kind = pt.queryProp("@kind"); return kind&&strieq(kind,"key"); }
inline bool isFileKey(IDistributedFile *f) { return isFileKey(f->queryAttributes()); }
inline bool isFileKey(IFileDescriptor *f) { return isFileKey(f->queryProperties()); }

Expand All @@ -886,11 +888,43 @@ inline const char *queryFileKind(IFileDescriptor *f) { return queryFileKind(f->q
extern da_decl void ensureFileScope(const CDfsLogicalFileName &dlfn, unsigned timeoutms=INFINITE);

extern da_decl bool checkLogicalName(const char *lfn,IUserDescriptor *user,bool readreq,bool createreq,bool allowquery,const char *specialnotallowedmsg);
extern da_decl void calcFileCost(const char * cluster, double sizeGB, double fileAgeDays, __int64 numDiskWrites, __int64 numDiskReads, double & atRestCost, double & accessCost);

extern da_decl double calcFileAtRestCost(const char * cluster, double sizeGB, double fileAgeDays);
extern da_decl double calcFileAccessCost(const char * cluster, __int64 numDiskWrites, __int64 numDiskReads);
extern da_decl double calcFileAccessCost(IDistributedFile *f, __int64 numDiskWrites, __int64 numDiskReads);
constexpr bool defaultPrivilegedUser = true;
constexpr bool defaultNonPrivilegedUser = false;

extern da_decl void configurePreferredPlanes();
inline bool hasReadWriteCostFields(const IPropertyTree & fileAttr)
{
return fileAttr.hasProp(getDFUQResultFieldName(DFUQRFreadCost)) || fileAttr.hasProp(getDFUQResultFieldName(DFUQRFwriteCost));
}

template<typename Source>
inline cost_type getLegacyReadCost(const IPropertyTree & fileAttr, Source source)
{
// Legacy files do not have @readCost attribute, so calculate from numDiskRead
// NB: Costs of index reading can not be reliably estimated based on 'numDiskReads'
if (!hasReadWriteCostFields(fileAttr) && fileAttr.hasProp(getDFUQResultFieldName(DFUQRFnumDiskReads))
&& !isFileKey(fileAttr))
{
stat_type prevDiskReads = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads), 0);
return money2cost_type(calcFileAccessCost(source, 0, prevDiskReads));
}
else
return 0;
}
template<typename Source>
inline cost_type getLegacyWriteCost(const IPropertyTree & fileAttr, Source source)
{
// Legacy files do not have @writeCost attribute, so calculate from numDiskWrites
if (!hasReadWriteCostFields(fileAttr) && fileAttr.hasProp(getDFUQResultFieldName(DFUQRFnumDiskWrites)))
{
stat_type prevDiskWrites = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites), 0);
return money2cost_type(calcFileAccessCost(source, prevDiskWrites, 0));
}
else
return 0;
}
#endif
13 changes: 11 additions & 2 deletions dali/ft/filecopy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3592,7 +3592,10 @@ void FileSprayer::updateTargetProperties()

DistributedFilePropertyLock lock(distributedTarget);
IPropertyTree &curProps = lock.queryAttributes();
curProps.setPropInt64("@numDiskWrites", totalNumWrites);
cost_type writeCost = money2cost_type(calcFileAccessCost(distributedTarget, totalNumWrites, 0));
curProps.setPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), writeCost);
curProps.setPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites), totalNumWrites);

if (calcCRC())
curProps.setPropInt(FAcrc, totalCRC.get());
curProps.setPropInt64(FAsize, totalLength);
Expand Down Expand Up @@ -3771,7 +3774,13 @@ void FileSprayer::updateTargetProperties()
if (distributedSource)
{
if (distributedSource->querySuperFile()==nullptr)
distributedSource->addAttrValue("@numDiskReads", totalNumReads);
{
IPropertyTree & fileAttr = distributedSource->queryAttributes();
cost_type legacyReadCost = getLegacyReadCost(fileAttr, distributedSource);
cost_type curReadCost = money2cost_type(calcFileAccessCost(distributedSource, 0, totalNumReads));
distributedSource->addAttrValue(getDFUQResultFieldName(DFUQRFreadCost), legacyReadCost+curReadCost);
distributedSource->addAttrValue(getDFUQResultFieldName(DFUQRFnumDiskReads), totalNumReads);
}
}
if (error)
throw error.getClear();
Expand Down
16 changes: 10 additions & 6 deletions ecl/hthor/hthor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -774,7 +774,7 @@ void CHThorDiskWriteActivity::publish()
if (helper.getFlags() & TDWrestricted)
properties.setPropBool("restricted", true);

properties.setPropInt64("@numDiskWrites", numDiskWrites);
properties.setPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites), numDiskWrites);
StringBuffer lfn;
expandLogicalFilename(lfn, mangledHelperFileName.str(), agent.queryWorkUnit(), agent.queryResolveFilesLocally(), false);
CDfsLogicalFileName logicalName;
Expand All @@ -790,6 +790,7 @@ void CHThorDiskWriteActivity::publish()
StringBuffer clusterName;
file->getClusterName(0, clusterName);
diskAccessCost = money2cost_type(calcFileAccessCost(clusterName, numDiskWrites, 0));
properties.setPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), diskAccessCost);
}
file->attach(logicalName.get(), agent.queryCodeContext()->queryUserDescriptor());
agent.logFileAccess(file, "HThor", "CREATED", graph);
Expand Down Expand Up @@ -1366,7 +1367,7 @@ void CHThorIndexWriteActivity::execute()
properties.setProp("@workunit", agent.queryWorkUnit()->queryWuid());
properties.setProp("@job", agent.queryWorkUnit()->queryJobName());
properties.setPropInt64("@duplicateKeyCount",duplicateKeyCount);
properties.setPropInt64("@numDiskWrites", numDiskWrites);
properties.setPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites), numDiskWrites);
properties.setPropInt64("@numLeafNodes", numLeafNodes);
properties.setPropInt64("@numBranchNodes", numBranchNodes);
properties.setPropInt64("@numBlobNodes", numBlobNodes);
Expand Down Expand Up @@ -1437,6 +1438,7 @@ void CHThorIndexWriteActivity::execute()
StringBuffer clusterName;
dfile->getClusterName(0, clusterName);
diskAccessCost = money2cost_type(calcFileAccessCost(clusterName, numDiskWrites, 0));
properties.setPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), diskAccessCost);
}
else
lfn = filename;
Expand Down Expand Up @@ -8539,10 +8541,12 @@ void CHThorDiskReadBaseActivity::closepart()
dFile = &(super->querySubFile(subfile, true));
}
}
dFile->addAttrValue("@numDiskReads", curDiskReads);
StringBuffer clusterName;
dFile->getClusterName(0, clusterName);
diskAccessCost = money2cost_type(calcFileAccessCost(clusterName, 0, curDiskReads));
IPropertyTree & fileAttr = dFile->queryAttributes();
cost_type legacyReadCost = getLegacyReadCost(fileAttr, dFile);
cost_type curReadCost = money2cost_type(calcFileAccessCost(dFile, 0, curDiskReads));

dFile->addAttrValue(getDFUQResultFieldName(DFUQRFreadCost), legacyReadCost + curReadCost);
dFile->addAttrValue(getDFUQResultFieldName(DFUQRFnumDiskReads), curDiskReads);
}
numDiskReads += curDiskReads;
}
Expand Down
11 changes: 8 additions & 3 deletions esp/services/ws_dfu/ws_dfuHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,11 +186,16 @@ bool WsDFUHelpers::addToLogicalFileList(IPropertyTree& file, const char* nodeGro
if (version >= 1.61)
{
if (version < 1.62)
lFile->setCost(file.getPropReal(getDFUQResultFieldName(DFUQRFcost)));
{
cost_type cost = file.getPropInt64(getDFUQResultFieldName(DFUQRFcost));
lFile->setCost(cost_type2money(cost));
}
else
{
lFile->setAtRestCost(file.getPropReal(getDFUQResultFieldName(DFUQRFatRestCost)));
lFile->setAccessCost(file.getPropReal(getDFUQResultFieldName(DFUQRFaccessCost)));
cost_type atRestCost = file.getPropInt64(getDFUQResultFieldName(DFUQRFatRestCost));
lFile->setAtRestCost(cost_type2money(atRestCost));
cost_type accessCost = file .getPropInt64(getDFUQResultFieldName(DFUQRFaccessCost));
lFile->setAccessCost(cost_type2money(accessCost));
}
}
if ((version >= 1.63) && (file.hasProp(getDFUQResultFieldName(DFUQRFmaxSkew))))
Expand Down
Loading

0 comments on commit 6fef377

Please sign in to comment.