Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HPCC-30911 Store read and write cost as file attributes #18080

Merged
merged 1 commit into from
Dec 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 78 additions & 36 deletions dali/base/dadfs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,41 +177,49 @@ static IPropertyTree *getEmptyAttr()
return createPTree("Attr");
}

extern da_decl void calcFileCost(const char * cluster, double sizeGB, double fileAgeDays, __int64 numDiskWrites, __int64 numDiskReads, double & atRestCost, double & accessCost)
static IPropertyTree *getCostPropTree(const char *cluster)
{
Owned<IPropertyTree> plane = getStoragePlane(cluster);
Owned<IPropertyTree> global;
IPropertyTree * costPT = nullptr;

if (plane && plane->hasProp("cost/@storageAtRest"))
{
costPT = plane->queryPropTree("cost");
return plane->getPropTree("cost");
}
else
{
global.setown(getGlobalConfig());
costPT = global->queryPropTree("cost");
return getGlobalConfigSP()->getPropTree("cost");
}
}

extern da_decl double calcFileAtRestCost(const char * cluster, double sizeGB, double fileAgeDays)
{
Owned<const IPropertyTree> costPT = getCostPropTree(cluster);

if (costPT==nullptr)
{
atRestCost = 0.0;
accessCost = 0.0;
return;
}
constexpr int accessPriceScalingFactor = 10000; // read/write pricing based on 10,000 operations
return 0.0;
double atRestPrice = costPT->getPropReal("@storageAtRest", 0.0);
double readPrice = costPT->getPropReal("@storageReads", 0.0);
double writePrice = costPT->getPropReal("@storageWrites", 0.0);
double storageCostDaily = atRestPrice * 12 / 365;
atRestCost = storageCostDaily * sizeGB * fileAgeDays;
accessCost = (readPrice * numDiskReads / accessPriceScalingFactor) + (writePrice * numDiskWrites / accessPriceScalingFactor);
return storageCostDaily * sizeGB * fileAgeDays;
}

extern da_decl double calcFileAccessCost(const char * cluster, __int64 numDiskWrites, __int64 numDiskReads)
{
double atRestCost, accessCost;
calcFileCost(cluster, 0, 0, numDiskWrites, numDiskReads, atRestCost, accessCost);
return accessCost;
Owned<const IPropertyTree> costPT = getCostPropTree(cluster);

if (costPT==nullptr)
return 0.0;
constexpr int accessPriceScalingFactor = 10000; // read/write pricing based on 10,000 operations
double readPrice = costPT->getPropReal("@storageReads", 0.0);
double writePrice = costPT->getPropReal("@storageWrites", 0.0);
return (readPrice * numDiskReads / accessPriceScalingFactor) + (writePrice * numDiskWrites / accessPriceScalingFactor);
jakesmith marked this conversation as resolved.
Show resolved Hide resolved
}

extern da_decl double calcFileAccessCost(IDistributedFile *f, __int64 numDiskWrites, __int64 numDiskReads)
{
StringBuffer clusterName;
// Should really specify the cluster number too, but this is the best we can do for now
f->getClusterName(0, clusterName);
return calcFileAccessCost(clusterName, numDiskWrites, numDiskReads);
}

RemoteFilename &constructPartFilename(IGroup *grp,unsigned partno,unsigned partmax,const char *name,const char *partmask,const char *partdir,unsigned copy,ClusterPartDiskMapSpec &mspec,RemoteFilename &rfn)
Expand Down Expand Up @@ -4941,27 +4949,43 @@ protected: friend class CDistributedFilePart;
double fileAgeDays = difftime(time(nullptr), dt.getSimple())/(24*60*60);
double sizeGB = getDiskSize(true, false) / ((double)1024 * 1024 * 1024);
const IPropertyTree *attrs = root->queryPropTree("Attr");
bool doLegacyAccessCostCalc = false;
__int64 numDiskWrites = 0, numDiskReads = 0;
if (attrs)
{
numDiskWrites = attrs->getPropInt64("@numDiskWrites");
numDiskReads = attrs->getPropInt64("@numDiskReads");
if (hasReadWriteCostFields(*attrs))
{
// Newer files have readCost and writeCost attributes
accessCost = cost_type2money(attrs->getPropInt64(getDFUQResultFieldName(DFUQRFreadCost)) + attrs->getPropInt64(getDFUQResultFieldName(DFUQRFwriteCost)));
}
else
{
// Costs need to be calculated from numDiskReads and numDiskWrites for legacy files
numDiskWrites = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites));
doLegacyAccessCostCalc = true;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possibly only for files, not indexes if the index stats are currently wildly wrong.

// NB: Costs of index reading can not be reliably estimated based on 'numDiskReads'
if (!isFileKey(*attrs))
numDiskReads = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads));
}
}
if (isEmptyString(cluster))
{
StringArray clusterNames;
unsigned countClusters = getClusterNames(clusterNames);
for (unsigned i = 0; i < countClusters; i++)
atRestCost += calcFileAtRestCost(clusterNames[i], sizeGB, fileAgeDays);
if (countClusters && doLegacyAccessCostCalc)
{
double tmpAtRestcost, tmpAccessCost;
calcFileCost(clusterNames[i], sizeGB, fileAgeDays, numDiskWrites, numDiskReads, tmpAtRestcost, tmpAccessCost);
atRestCost += tmpAtRestcost;
accessCost += tmpAccessCost;
// NB: numDiskReads/numDiskWrites are stored at the file level, not per cluster.
// So cannot calculate accessCost per cluster, assume cost is based on 1st.
accessCost = calcFileAccessCost(clusterNames[0], numDiskWrites, numDiskReads);
}
}
else
{
calcFileCost(cluster, sizeGB, fileAgeDays, numDiskWrites, numDiskReads, atRestCost, accessCost);
atRestCost += calcFileAtRestCost(cluster, sizeGB, fileAgeDays);
if (doLegacyAccessCostCalc)
accessCost = calcFileAccessCost(cluster, numDiskWrites, numDiskReads);
}
}
};
Expand Down Expand Up @@ -13340,11 +13364,12 @@ IDFProtectedIterator *CDistributedFileDirectory::lookupProtectedFiles(const char
const char* DFUQResultFieldNames[] = { "@name", "@description", "@group", "@kind", "@modified", "@job", "@owner",
"@DFUSFrecordCount", "@recordCount", "@recordSize", "@DFUSFsize", "@size", "@workunit", "@DFUSFcluster", "@numsubfiles",
"@accessed", "@numparts", "@compressedSize", "@directory", "@partmask", "@superowners", "@persistent", "@protect", "@compressed",
"@cost", "@numDiskReads", "@numDiskWrites", "@atRestCost", "@accessCost", "@maxSkew", "@minSkew", "@maxSkewPart", "@minSkewPart" };
"@cost", "@numDiskReads", "@numDiskWrites", "@atRestCost", "@accessCost", "@maxSkew", "@minSkew", "@maxSkewPart", "@minSkewPart",
"@readCost", "@writeCost" };

extern da_decl const char* getDFUQResultFieldName(DFUQResultField feild)
extern da_decl const char* getDFUQResultFieldName(DFUQResultField field)
{
return DFUQResultFieldNames[feild];
return DFUQResultFieldNames[field];
}

IPropertyTreeIterator *deserializeFileAttrIterator(MemoryBuffer& mb, unsigned numFiles, DFUQResultField* localFilters, const char* localFilterBuf)
Expand Down Expand Up @@ -13411,6 +13436,7 @@ IPropertyTreeIterator *deserializeFileAttrIterator(MemoryBuffer& mb, unsigned nu

void setCost(IPropertyTree* file, const char *nodeGroup)
{
// Set the following dynamic fields: atRestCost, accessCost, cost and for legacy files: readCost, writeCost
StringBuffer str;
double fileAgeDays = 0.0;
if (file->getProp(getDFUQResultFieldName(DFUQRFtimemodified), str))
Expand All @@ -13425,13 +13451,29 @@ IPropertyTreeIterator *deserializeFileAttrIterator(MemoryBuffer& mb, unsigned nu
else
sizeDiskSize = file->getPropInt64(getDFUQResultFieldName(DFUQRForigsize), 0);
double sizeGB = sizeDiskSize / ((double)1024 * 1024 * 1024);
__int64 numDiskWrites = file->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads), 0);
__int64 numDiskReads = file->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites), 0);
double atRestCost, accessCost;
calcFileCost(nodeGroup, sizeGB, fileAgeDays, numDiskWrites, numDiskReads, atRestCost, accessCost);
file->setPropReal(getDFUQResultFieldName(DFUQRFcost), atRestCost+accessCost);
file->setPropReal(getDFUQResultFieldName(DFUQRFatRestCost), atRestCost);
file->setPropReal(getDFUQResultFieldName(DFUQRFaccessCost), accessCost);
cost_type atRestCost = money2cost_type(calcFileAtRestCost(nodeGroup, sizeGB, fileAgeDays));
file->setPropInt64(getDFUQResultFieldName(DFUQRFatRestCost), atRestCost);

// Dyamically calc and set the access cost field and for legacy files set read/write cost fields
cost_type accessCost = 0;
if (hasReadWriteCostFields(*file))
{
accessCost = file->getPropInt64(getDFUQResultFieldName(DFUQRFreadCost)) + file->getPropInt64(getDFUQResultFieldName(DFUQRFwriteCost));
}
else // Calc access cost from numDiskRead & numDiskWrites for Legacy files
{
cost_type legacyReadCost = getLegacyReadCost(*file, nodeGroup);
file->setPropInt64(getDFUQResultFieldName(DFUQRFreadCost), legacyReadCost);

cost_type legacyWriteCost = getLegacyWriteCost(*file, nodeGroup);
file->setPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), legacyWriteCost);

accessCost = legacyReadCost + legacyWriteCost;
}
file->setPropInt64(getDFUQResultFieldName(DFUQRFaccessCost), accessCost);

// Dymically calc and set the total cost field
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

trivial/typo; Dymically -> Dynamically

file->setPropInt64(getDFUQResultFieldName(DFUQRFcost), atRestCost + accessCost);
}

IPropertyTree *deserializeFileAttr(MemoryBuffer &mb, StringArray& nodeGroupFilter)
Expand Down
44 changes: 39 additions & 5 deletions dali/base/dadfs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,15 +294,17 @@ enum DFUQResultField
DFUQRFminSkew = 30,
DFUQRFmaxSkewPart = 31,
DFUQRFminSkewPart = 32,
DFUQRFterm = 33,
DFUQRFreadCost = 33,
DFUQRFwriteCost = 34,
DFUQRFterm = 35, // must be last in list
DFUQRFreverse = 256,
DFUQRFnocase = 512,
DFUQRFnumeric = 1024,
DFUQRFfloat = 2048
};

extern da_decl const char* getDFUQFilterFieldName(DFUQFilterField feild);
extern da_decl const char* getDFUQResultFieldName(DFUQResultField feild);
extern da_decl const char* getDFUQFilterFieldName(DFUQFilterField field);
extern da_decl const char* getDFUQResultFieldName(DFUQResultField field);

/**
* File operations can be included in a transaction to ensure that multiple
Expand Down Expand Up @@ -861,7 +863,7 @@ extern da_decl GroupType translateGroupType(const char *groupType);

// Useful property query functions

inline bool isFileKey(IPropertyTree &pt) { const char *kind = pt.queryProp("@kind"); return kind&&strieq(kind,"key"); }
inline bool isFileKey(const IPropertyTree &pt) { const char *kind = pt.queryProp("@kind"); return kind&&strieq(kind,"key"); }
inline bool isFileKey(IDistributedFile *f) { return isFileKey(f->queryAttributes()); }
inline bool isFileKey(IFileDescriptor *f) { return isFileKey(f->queryProperties()); }

Expand All @@ -886,11 +888,43 @@ inline const char *queryFileKind(IFileDescriptor *f) { return queryFileKind(f->q
extern da_decl void ensureFileScope(const CDfsLogicalFileName &dlfn, unsigned timeoutms=INFINITE);

extern da_decl bool checkLogicalName(const char *lfn,IUserDescriptor *user,bool readreq,bool createreq,bool allowquery,const char *specialnotallowedmsg);
extern da_decl void calcFileCost(const char * cluster, double sizeGB, double fileAgeDays, __int64 numDiskWrites, __int64 numDiskReads, double & atRestCost, double & accessCost);

extern da_decl double calcFileAtRestCost(const char * cluster, double sizeGB, double fileAgeDays);
extern da_decl double calcFileAccessCost(const char * cluster, __int64 numDiskWrites, __int64 numDiskReads);
extern da_decl double calcFileAccessCost(IDistributedFile *f, __int64 numDiskWrites, __int64 numDiskReads);
constexpr bool defaultPrivilegedUser = true;
constexpr bool defaultNonPrivilegedUser = false;

extern da_decl void configurePreferredPlanes();
inline bool hasReadWriteCostFields(const IPropertyTree & fileAttr)
{
return fileAttr.hasProp(getDFUQResultFieldName(DFUQRFreadCost)) || fileAttr.hasProp(getDFUQResultFieldName(DFUQRFwriteCost));
}

template<typename Source>
inline cost_type getLegacyReadCost(const IPropertyTree & fileAttr, Source source)
{
// Legacy files do not have @readCost attribute, so calculate from numDiskRead
// NB: Costs of index reading can not be reliably estimated based on 'numDiskReads'
if (!hasReadWriteCostFields(fileAttr) && fileAttr.hasProp(getDFUQResultFieldName(DFUQRFnumDiskReads))
&& !isFileKey(fileAttr))
{
stat_type prevDiskReads = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads), 0);
return money2cost_type(calcFileAccessCost(source, 0, prevDiskReads));
}
else
return 0;
}
template<typename Source>
inline cost_type getLegacyWriteCost(const IPropertyTree & fileAttr, Source source)
{
// Legacy files do not have @writeCost attribute, so calculate from numDiskWrites
if (!hasReadWriteCostFields(fileAttr) && fileAttr.hasProp(getDFUQResultFieldName(DFUQRFnumDiskWrites)))
{
stat_type prevDiskWrites = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites), 0);
return money2cost_type(calcFileAccessCost(source, prevDiskWrites, 0));
}
else
return 0;
}
#endif
13 changes: 11 additions & 2 deletions dali/ft/filecopy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3592,7 +3592,10 @@ void FileSprayer::updateTargetProperties()

DistributedFilePropertyLock lock(distributedTarget);
IPropertyTree &curProps = lock.queryAttributes();
curProps.setPropInt64("@numDiskWrites", totalNumWrites);
cost_type writeCost = money2cost_type(calcFileAccessCost(distributedTarget, totalNumWrites, 0));
curProps.setPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), writeCost);
jakesmith marked this conversation as resolved.
Show resolved Hide resolved
curProps.setPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites), totalNumWrites);

if (calcCRC())
curProps.setPropInt(FAcrc, totalCRC.get());
curProps.setPropInt64(FAsize, totalLength);
Expand Down Expand Up @@ -3771,7 +3774,13 @@ void FileSprayer::updateTargetProperties()
if (distributedSource)
{
if (distributedSource->querySuperFile()==nullptr)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not new, but what if it is a super being read?
Also not new, but it's misleading that this code to update distributedSource is in a method called "updateTargetProperties". For clarity (now in this PR), I think it should be moved outside of it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it is super file, I believe it uses the aggregate of the cost from all the subfiles.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

per offline conversation - to be investigated.
What happens when 'distributedSource' is a superfile (is it even supported?). If it is, what happens (if anything currently) re. updating cost attributes on the subfiles read.
@shamser - I've opened a subtask for this (HPCC-30970), under HPCC-30965

distributedSource->addAttrValue("@numDiskReads", totalNumReads);
{
IPropertyTree & fileAttr = distributedSource->queryAttributes();
cost_type legacyReadCost = getLegacyReadCost(fileAttr, distributedSource);
cost_type curReadCost = money2cost_type(calcFileAccessCost(distributedSource, 0, totalNumReads));
distributedSource->addAttrValue(getDFUQResultFieldName(DFUQRFreadCost), legacyReadCost+curReadCost);
distributedSource->addAttrValue(getDFUQResultFieldName(DFUQRFnumDiskReads), totalNumReads);
jakesmith marked this conversation as resolved.
Show resolved Hide resolved
}
}
if (error)
throw error.getClear();
Expand Down
16 changes: 10 additions & 6 deletions ecl/hthor/hthor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -774,7 +774,7 @@ void CHThorDiskWriteActivity::publish()
if (helper.getFlags() & TDWrestricted)
properties.setPropBool("restricted", true);

properties.setPropInt64("@numDiskWrites", numDiskWrites);
properties.setPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites), numDiskWrites);
StringBuffer lfn;
expandLogicalFilename(lfn, mangledHelperFileName.str(), agent.queryWorkUnit(), agent.queryResolveFilesLocally(), false);
CDfsLogicalFileName logicalName;
Expand All @@ -790,6 +790,7 @@ void CHThorDiskWriteActivity::publish()
StringBuffer clusterName;
file->getClusterName(0, clusterName);
diskAccessCost = money2cost_type(calcFileAccessCost(clusterName, numDiskWrites, 0));
properties.setPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), diskAccessCost);
}
file->attach(logicalName.get(), agent.queryCodeContext()->queryUserDescriptor());
agent.logFileAccess(file, "HThor", "CREATED", graph);
Expand Down Expand Up @@ -1366,7 +1367,7 @@ void CHThorIndexWriteActivity::execute()
properties.setProp("@workunit", agent.queryWorkUnit()->queryWuid());
properties.setProp("@job", agent.queryWorkUnit()->queryJobName());
properties.setPropInt64("@duplicateKeyCount",duplicateKeyCount);
properties.setPropInt64("@numDiskWrites", numDiskWrites);
properties.setPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites), numDiskWrites);
properties.setPropInt64("@numLeafNodes", numLeafNodes);
properties.setPropInt64("@numBranchNodes", numBranchNodes);
properties.setPropInt64("@numBlobNodes", numBlobNodes);
Expand Down Expand Up @@ -1437,6 +1438,7 @@ void CHThorIndexWriteActivity::execute()
StringBuffer clusterName;
dfile->getClusterName(0, clusterName);
diskAccessCost = money2cost_type(calcFileAccessCost(clusterName, numDiskWrites, 0));
properties.setPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), diskAccessCost);
}
else
lfn = filename;
Expand Down Expand Up @@ -8539,10 +8541,12 @@ void CHThorDiskReadBaseActivity::closepart()
dFile = &(super->querySubFile(subfile, true));
}
}
dFile->addAttrValue("@numDiskReads", curDiskReads);
StringBuffer clusterName;
dFile->getClusterName(0, clusterName);
diskAccessCost = money2cost_type(calcFileAccessCost(clusterName, 0, curDiskReads));
IPropertyTree & fileAttr = dFile->queryAttributes();
cost_type legacyReadCost = getLegacyReadCost(fileAttr, dFile);
cost_type curReadCost = money2cost_type(calcFileAccessCost(dFile, 0, curDiskReads));

dFile->addAttrValue(getDFUQResultFieldName(DFUQRFreadCost), legacyReadCost + curReadCost);
dFile->addAttrValue(getDFUQResultFieldName(DFUQRFnumDiskReads), curDiskReads);
}
numDiskReads += curDiskReads;
}
Expand Down
11 changes: 8 additions & 3 deletions esp/services/ws_dfu/ws_dfuHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,11 +186,16 @@ bool WsDFUHelpers::addToLogicalFileList(IPropertyTree& file, const char* nodeGro
if (version >= 1.61)
{
if (version < 1.62)
lFile->setCost(file.getPropReal(getDFUQResultFieldName(DFUQRFcost)));
{
cost_type cost = file.getPropInt64(getDFUQResultFieldName(DFUQRFcost));
lFile->setCost(cost_type2money(cost));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note to myself - there is no backward compatibility issue here, because DFUQRFcost has been dynamically calculated in this version, via getDFAttributesTreeIterator

}
else
{
lFile->setAtRestCost(file.getPropReal(getDFUQResultFieldName(DFUQRFatRestCost)));
lFile->setAccessCost(file.getPropReal(getDFUQResultFieldName(DFUQRFaccessCost)));
cost_type atRestCost = file.getPropInt64(getDFUQResultFieldName(DFUQRFatRestCost));
lFile->setAtRestCost(cost_type2money(atRestCost));
cost_type accessCost = file .getPropInt64(getDFUQResultFieldName(DFUQRFaccessCost));
lFile->setAccessCost(cost_type2money(accessCost));
}
}
if ((version >= 1.63) && (file.hasProp(getDFUQResultFieldName(DFUQRFmaxSkew))))
Expand Down
Loading
Loading