From a1892863b840efcd9d5d493577ba209e8631bd92 Mon Sep 17 00:00:00 2001 From: Terrence Asselin <terrence.asselin@lexisnexisrisk.com> Date: Tue, 24 Sep 2024 16:50:53 -0500 Subject: [PATCH] HPCC-30368 Add packagemap support for remoteStorage property Similar to how the daliip property is supported in the packagemap file, add support for a remoteStorage property that mirrors the `--remote-storage` CLI option. - Apply the same scoping rules as for daliip- values defined deeper in the hierarchy take precedence, and anything defined in the file takes precedence over a CLI option. - Disallow use of both daliip and remoteStorage in the same element. Signed-off-by: Terrence Asselin <terrence.asselin@lexisnexisrisk.com> --- common/pkgfiles/referencedfilelist.cpp | 139 +++++++++++++++++-------- common/pkgfiles/referencedfilelist.hpp | 2 +- 2 files changed, 99 insertions(+), 42 deletions(-) diff --git a/common/pkgfiles/referencedfilelist.cpp b/common/pkgfiles/referencedfilelist.cpp index 9a246966413..94e86efdc68 100644 --- a/common/pkgfiles/referencedfilelist.cpp +++ b/common/pkgfiles/referencedfilelist.cpp @@ -126,13 +126,43 @@ void splitDerivedDfsLocation(const char *address, StringBuffer &cluster, StringB prefix.append(basePrefix); } +void splitDerivedDfsLocationOrRemote(const char *address, StringBuffer &cluster, StringBuffer &ip, StringBuffer &prefix, + const char *defaultCluster, const char *baseCluster, const char *baseIP, const char *basePrefix, + const char *currentRemoteStorage, StringBuffer& effectiveRemoteStorage, const char *baseRemoteStorage) +{ + if (!isEmptyString(address) && !isEmptyString(currentRemoteStorage)) + throw makeStringExceptionV(-1, "Cannot specify both a dfs location (%s) and a remote storage location (%s)", address, currentRemoteStorage); + // Choose either a daliip (split from address) or a remote-storage location to + // propagate to the next level of parsing as the effective method of resolving a file + if (!isEmptyString(address)) + { + splitDfsLocation(address, cluster, ip, prefix, defaultCluster); + if (!ip.isEmpty()) + effectiveRemoteStorage.clear(); + return; + } + + if (!isEmptyString(currentRemoteStorage)) + { + effectiveRemoteStorage.append(currentRemoteStorage); + // Cluster and prefix aren't used if ip is empty, so they don't need to be cleared + ip.clear(); + return; + } + + ip.append(baseIP); + cluster.append(baseCluster); + prefix.append(basePrefix); + effectiveRemoteStorage.append(baseRemoteStorage); +} + class ReferencedFileList; class ReferencedFile : implements IReferencedFile, public CInterface { public: IMPLEMENT_IINTERFACE; - ReferencedFile(const char *lfn, const char *sourceIP, const char *srcCluster, const char *prefix, bool isSubFile, unsigned _flags, const char *_pkgid, bool noDfs, bool calcSize) + ReferencedFile(const char *lfn, const char *sourceIP, const char *srcCluster, const char *prefix, bool isSubFile, unsigned _flags, const char *_pkgid, bool noDfs, bool calcSize, const char *remoteStorageName) : pkgid(_pkgid), fileSize(0), numParts(0), flags(_flags), noDfsResolution(noDfs), calcFileSize(calcSize), trackSubFiles(false) { { @@ -142,6 +172,12 @@ class ReferencedFile : implements IReferencedFile, public CInterface } if (remoteStorage.length()) flags |= RefFileLFNRemote; + else if (!isEmptyString(remoteStorageName)) + { + // can be declared in packagemap at different scopes + remoteStorage.set(remoteStorageName); + flags |= RefFileLFNRemote; + } else if (daliip.length()) flags |= RefFileLFNForeign; else @@ -152,6 +188,9 @@ class ReferencedFile : implements IReferencedFile, public CInterface flags |= RefSubFile; } + ReferencedFile(const char *lfn, const char *sourceIP, const char *srcCluster, const char *prefix, bool isSubFile, unsigned _flags, const char *_pkgid, bool noDfs, bool calcSize) + : ReferencedFile(lfn, sourceIP, srcCluster, prefix, isSubFile, _flags, _pkgid, noDfs, calcSize, nullptr) { } + void reset() { flags &= ~(RefFileNotOnCluster | RefFileNotFound | RefFileResolvedForeign | RefFileResolvedRemote | RefFileCopyInfoFailed | RefFileCloned | RefFileNotOnSource); //these flags are calculated during resolve @@ -242,18 +281,18 @@ class ReferencedFileList : implements IReferencedFileList, public CInterface user.set(userDesc); } - void ensureFile(const char *ln, unsigned flags, const char *pkgid, bool noDfsResolution, const StringArray *subfileNames, const char *daliip=NULL, const char *srcCluster=NULL, const char *remotePrefix=NULL); + void ensureFile(const char *ln, unsigned flags, const char *pkgid, bool noDfsResolution, const StringArray *subfileNames, const char *daliip=nullptr, const char *srcCluster=nullptr, const char *remotePrefix=nullptr, const char* remoteStorage=nullptr); - virtual void addFile(const char *ln, const char *daliip=NULL, const char *srcCluster=NULL, const char *remotePrefix=NULL); + virtual void addFile(const char *ln, const char *daliip=nullptr, const char *srcCluster=nullptr, const char *remotePrefix=nullptr, const char *remoteStorageName=nullptr); virtual void addFiles(StringArray &files); virtual void addFilesFromWorkUnit(IConstWorkUnit *cw); virtual bool addFilesFromQuery(IConstWorkUnit *cw, const IHpccPackageMap *pm, const char *queryid); virtual bool addFilesFromQuery(IConstWorkUnit *cw, const IHpccPackage *pkg); virtual void addFilesFromPackageMap(IPropertyTree *pm); - void addFileFromSubFile(IPropertyTree &subFile, const char *_daliip, const char *srcCluster, const char *_remotePrefix); - void addFilesFromSuperFile(IPropertyTree &superFile, const char *_daliip, const char *srcCluster, const char *_remotePrefix); - void addFilesFromPackage(IPropertyTree &package, const char *_daliip, const char *srcCluster, const char *_remotePrefix); + void addFileFromSubFile(IPropertyTree &subFile, const char *_daliip, const char *srcCluster, const char *_remotePrefix, const char *_remoteStorageName); + void addFilesFromSuperFile(IPropertyTree &superFile, const char *_daliip, const char *srcCluster, const char *_remotePrefix, const char *_remoteStorageName); + void addFilesFromPackage(IPropertyTree &package, const char *_daliip, const char *srcCluster, const char *_remotePrefix, const char *_remoteStorageName); virtual IReferencedFileIterator *getFiles(); virtual void cloneFileInfo(StringBuffer &publisherWuid, const char *dstCluster, unsigned updateFlags, IDFUhelper *helper, bool cloneSuperInfo, bool cloneForeign, unsigned redundancy, unsigned channelsPerNode, int replicateOffset, const char *defReplicateFolder); @@ -883,12 +922,12 @@ class ReferencedFileIterator : implements IReferencedFileIterator, public CInter Owned<HashIterator> iter; }; -void ReferencedFileList::ensureFile(const char *ln, unsigned flags, const char *pkgid, bool noDfsResolution, const StringArray *subfileNames, const char *daliip, const char *srcCluster, const char *prefix) +void ReferencedFileList::ensureFile(const char *ln, unsigned flags, const char *pkgid, bool noDfsResolution, const StringArray *subfileNames, const char *daliip, const char *srcCluster, const char *prefix, const char *remoteStorageName) { if (!allowForeign && checkForeign(ln)) throw MakeStringException(-1, "Foreign file not allowed%s: %s", (flags & RefFileInPackage) ? " (declared in package)" : "", ln); - Owned<ReferencedFile> file = new ReferencedFile(ln, daliip, srcCluster, prefix, false, flags, pkgid, noDfsResolution, allowSizeCalc); + Owned<ReferencedFile> file = new ReferencedFile(ln, daliip, srcCluster, prefix, false, flags, pkgid, noDfsResolution, allowSizeCalc, remoteStorageName); if (!file->logicalName.length()) return; if (subfileNames) @@ -904,9 +943,9 @@ void ReferencedFileList::ensureFile(const char *ln, unsigned flags, const char * } } -void ReferencedFileList::addFile(const char *ln, const char *daliip, const char *srcCluster, const char *prefix) +void ReferencedFileList::addFile(const char *ln, const char *daliip, const char *srcCluster, const char *prefix, const char *remoteStorageName) { - ensureFile(ln, 0, NULL, false, nullptr, daliip, srcCluster, prefix); + ensureFile(ln, 0, NULL, false, nullptr, daliip, srcCluster, prefix, remoteStorageName); } void ReferencedFileList::addFiles(StringArray &files) @@ -915,37 +954,41 @@ void ReferencedFileList::addFiles(StringArray &files) addFile(files.item(i)); } -void ReferencedFileList::addFileFromSubFile(IPropertyTree &subFile, const char *ip, const char *cluster, const char *prefix) +void ReferencedFileList::addFileFromSubFile(IPropertyTree &subFile, const char *ip, const char *cluster, const char *prefix, const char *remoteStorageName) { - addFile(subFile.queryProp("@value"), ip, cluster, prefix); + addFile(subFile.queryProp("@value"), ip, cluster, prefix, remoteStorageName); } -void ReferencedFileList::addFilesFromSuperFile(IPropertyTree &superFile, const char *_ip, const char *_cluster, const char *_prefix) +void ReferencedFileList::addFilesFromSuperFile(IPropertyTree &superFile, const char *_ip, const char *_cluster, const char *_prefix, const char *ancestorRemoteStorage) { StringBuffer ip; StringBuffer cluster; StringBuffer prefix; - splitDerivedDfsLocation(superFile.queryProp("@daliip"), cluster, ip, prefix, NULL, _cluster, _ip, _prefix); + StringBuffer effectiveRemoteStorage; + splitDerivedDfsLocationOrRemote(superFile.queryProp("@daliip"), cluster, ip, prefix, nullptr, _cluster, _ip, _prefix, + superFile.queryProp("@remoteStorage"), effectiveRemoteStorage, ancestorRemoteStorage); if (superFile.hasProp("@sourceCluster")) cluster.set(superFile.queryProp("@sourceCluster")); Owned<IPropertyTreeIterator> subFiles = superFile.getElements("SubFile[@value]"); ForEach(*subFiles) - addFileFromSubFile(subFiles->query(), ip, cluster, prefix); + addFileFromSubFile(subFiles->query(), ip, cluster, prefix, effectiveRemoteStorage); } -void ReferencedFileList::addFilesFromPackage(IPropertyTree &package, const char *_ip, const char *_cluster, const char *_prefix) +void ReferencedFileList::addFilesFromPackage(IPropertyTree &package, const char *_ip, const char *_cluster, const char *_prefix, const char *ancestorRemoteStorage) { StringBuffer ip; StringBuffer cluster; StringBuffer prefix; - splitDerivedDfsLocation(package.queryProp("@daliip"), cluster, ip, prefix, NULL, _cluster, _ip, _prefix); + StringBuffer effectiveRemoteStorage; + splitDerivedDfsLocationOrRemote(package.queryProp("@daliip"), cluster, ip, prefix, nullptr, _cluster, _ip, _prefix, + package.queryProp("@remoteStorage"), effectiveRemoteStorage, ancestorRemoteStorage); if (package.hasProp("@sourceCluster")) cluster.set(package.queryProp("@sourceCluster")); Owned<IPropertyTreeIterator> supers = package.getElements("SuperFile"); ForEach(*supers) - addFilesFromSuperFile(supers->query(), ip, cluster, prefix); + addFilesFromSuperFile(supers->query(), ip, cluster, prefix, effectiveRemoteStorage); } void ReferencedFileList::addFilesFromPackageMap(IPropertyTree *pm) @@ -953,20 +996,22 @@ void ReferencedFileList::addFilesFromPackageMap(IPropertyTree *pm) StringBuffer ip; StringBuffer cluster; StringBuffer prefix; + StringBuffer remoteStorageName; const char *srcCluster = pm->queryProp("@sourceCluster"); - splitDerivedDfsLocation(pm->queryProp("@daliip"), cluster, ip, prefix, srcCluster, srcCluster, NULL, NULL); + splitDerivedDfsLocationOrRemote(pm->queryProp("@daliip"), cluster, ip, prefix, srcCluster, srcCluster, nullptr, nullptr, + pm->queryProp("@remoteStorage"), remoteStorageName, nullptr); Owned<IPropertyTreeIterator> packages = pm->getElements("Package"); ForEach(*packages) - addFilesFromPackage(packages->query(), ip, cluster, prefix); + addFilesFromPackage(packages->query(), ip, cluster, prefix, remoteStorageName); packages.setown(pm->getElements("Part/Package")); ForEach(*packages) - addFilesFromPackage(packages->query(), ip, cluster, prefix); + addFilesFromPackage(packages->query(), ip, cluster, prefix, remoteStorageName); } bool ReferencedFileList::addFilesFromQuery(IConstWorkUnit *cw, const IHpccPackage *pkg) { SummaryMap files; - if (cw->getSummary(SummaryType::ReadFile, files) && + if (cw->getSummary(SummaryType::ReadFile, files) && cw->getSummary(SummaryType::ReadIndex, files)) { for (const auto& [lName, summaryFlags] : files) @@ -1104,31 +1149,43 @@ void ReferencedFileList::resolveFiles(const StringArray &locations, const char * srcCluster.set(_srcCluster); remotePrefix.set(_remotePrefix); + ReferencedFileIterator files(this); + + // For use when expandSuperFiles=true if (useRemoteStorage) - { - DBGLOG("ReferencedFileList resolving remote storage files at %s", nullText(remoteLocation)); - if (!user) - user.setown(createUserDescriptor()); remoteStorage.set(remoteLocation); - ReferencedFileIterator files(this); - ForEach(files) - files.queryObject().resolveLocalOrRemote(locations, srcCluster, user, remoteStorage, remotePrefix, checkLocalFirst, expandSuperFiles ? &subfiles : NULL, trackSubFiles, resolveLFNForeign); - } - else + + ForEach(files) { - if (!isEmptyString(remoteLocation)) - DBGLOG("ReferencedFileList resolving remote dali files at %s", remoteLocation); + ReferencedFile &file = files.queryObject(); + if (file.daliip.isEmpty() && (!file.remoteStorage.isEmpty() || useRemoteStorage)) + { + if (!user) + user.setown(createUserDescriptor()); + + if (file.remoteStorage.isEmpty()) // Can be set at multiple levels in a packagemap + file.remoteStorage.set(remoteLocation); // Top-level remoteLocation has lowest precedence, used if nothing set in packagemap + + DBGLOG("ReferencedFileList resolving remote storage file at %s", nullText(file.remoteStorage)); + file.resolveLocalOrRemote(locations, srcCluster, user, file.remoteStorage, remotePrefix, checkLocalFirst, expandSuperFiles ? &subfiles : NULL, trackSubFiles, resolveLFNForeign); + } else - DBGLOG("ReferencedFileList resolving local files (no daliip)"); - remote.setown(!isEmptyString(remoteLocation) ? createINode(remoteLocation, 7070) : nullptr); + { + // The remoteLocation is a daliip when useRemoteStorage is false + const char *passedDaliip = !useRemoteStorage ? remoteLocation : nullptr; + if (!isEmptyString(passedDaliip) || !file.daliip.isEmpty()) + DBGLOG("ReferencedFileList resolving remote dali file at %s", isEmptyString(passedDaliip) ? nullText(file.daliip) : passedDaliip); + else + DBGLOG("ReferencedFileList resolving local file (no daliip or remote storage)"); + // Otherwise, passing nullptr for remote allows resolveLocalOrForeign to use ReferencedFile.daliip with + // the matching ReferencedFile.remotePrefix instead of the ReferencedFileList.remotePrefix passed in here. + remote.setown(!isEmptyString(passedDaliip) ? createINode(passedDaliip, 7070) : nullptr); + file.resolveLocalOrForeign(locations, srcCluster, user, remote, remotePrefix, checkLocalFirst, expandSuperFiles ? &subfiles : NULL, trackSubFiles, resolveLFNForeign); + } - ReferencedFileIterator files(this); - ForEach(files) - files.queryObject().resolveLocalOrForeign(locations, srcCluster, user, remote, remotePrefix, checkLocalFirst, expandSuperFiles ? &subfiles : NULL, trackSubFiles, resolveLFNForeign); + if (expandSuperFiles) + resolveSubFiles(subfiles, locations, checkLocalFirst, trackSubFiles, resolveLFNForeign); } - - if (expandSuperFiles) - resolveSubFiles(subfiles, locations, checkLocalFirst, trackSubFiles, resolveLFNForeign); } bool ReferencedFileList::filesNeedCopying(bool cloneForeign) diff --git a/common/pkgfiles/referencedfilelist.hpp b/common/pkgfiles/referencedfilelist.hpp index 3d20b40f7c3..41421b5a812 100644 --- a/common/pkgfiles/referencedfilelist.hpp +++ b/common/pkgfiles/referencedfilelist.hpp @@ -68,7 +68,7 @@ interface IReferencedFileList : extends IInterface virtual bool addFilesFromQuery(IConstWorkUnit *cw, const IHpccPackage *pkg)=0; virtual void addFilesFromPackageMap(IPropertyTree *pm)=0; - virtual void addFile(const char *ln, const char *daliip=NULL, const char *sourceProcessCluster=NULL, const char *remotePrefix=NULL)=0; + virtual void addFile(const char *ln, const char *daliip=nullptr, const char *sourceProcessCluster=nullptr, const char *remotePrefix=nullptr, const char *remoteStorageName=nullptr)=0; virtual void addFiles(StringArray &files)=0; virtual IReferencedFileIterator *getFiles()=0;