From 88f96bc1a45443b0ff9ba26f72b79fa54f625877 Mon Sep 17 00:00:00 2001 From: Adele Reed Date: Thu, 27 Apr 2023 19:45:05 -0700 Subject: [PATCH 01/16] Only swap DFS to blob when immediately relevant --- cmd/copy.go | 107 ++++++++----------------- cmd/copyEnumeratorInit.go | 39 +++++++-- cmd/removeEnumerator.go | 2 +- cmd/setPropertiesEnumerator.go | 2 +- cmd/sync.go | 40 ++++----- cmd/syncEnumerator.go | 4 +- cmd/syncProcessor.go | 7 ++ cmd/validators.go | 53 ++++-------- common/fe-ste-models.go | 61 ++++++++------ e2etest/declarativeResourceManagers.go | 4 + hnstest/main.go | 55 +++++++++++++ ste/mgr-JobPartMgr.go | 46 ++++++----- ste/mgr-JobPartTransferMgr.go | 5 ++ ste/s2sCopier-URLToBlob.go | 17 ++++ ste/sender-blobFolders.go | 13 +-- ste/sourceInfoProvider-Blob.go | 56 +++++++++---- ste/xfer.go | 4 +- 17 files changed, 310 insertions(+), 205 deletions(-) create mode 100644 hnstest/main.go diff --git a/cmd/copy.go b/cmd/copy.go index 6ea91a180..b24a2fa91 100644 --- a/cmd/copy.go +++ b/cmd/copy.go @@ -271,29 +271,6 @@ func (raw rawCopyCmdArgs) cook() (CookedCopyCmdArgs, error) { azcopyScanningLogger.CloseLog() }) - /* We support DFS by using blob end-point of the account. We replace dfs by blob in src and dst */ - if src, dst := InferArgumentLocation(raw.src), InferArgumentLocation(raw.dst); src == common.ELocation.BlobFS() || dst == common.ELocation.BlobFS() { - srcDfs := src == common.ELocation.BlobFS() && dst != common.ELocation.Local() - if srcDfs { - raw.src = strings.Replace(raw.src, ".dfs", ".blob", 1) - glcm.Info("Switching to use blob endpoint on source account.") - - } - - dstDfs := dst == common.ELocation.BlobFS() && src != common.ELocation.Local() - if dstDfs { - raw.dst = strings.Replace(raw.dst, ".dfs", ".blob", 1) - msg := fmt.Sprintf("Switching to use blob endpoint on destination account. There are some limitations when switching endpoints. " + - "Please refer to https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-known-issues#blob-storage-apis") - glcm.Info(msg) - if azcopyScanningLogger != nil { - azcopyScanningLogger.Log(pipeline.LogInfo, msg) - } - } - - cooked.isHNStoHNS = srcDfs && dstDfs - } - fromTo, err := ValidateFromTo(raw.src, raw.dst, raw.fromTo) // TODO: src/dst if err != nil { return cooked, err @@ -688,14 +665,11 @@ func (raw rawCopyCmdArgs) cook() (CookedCopyCmdArgs, error) { return cooked, err } cooked.preservePermissions = common.NewPreservePermissionsOption(isUserPersistingPermissions, raw.preserveOwner, cooked.FromTo) - if cooked.FromTo == common.EFromTo.BlobBlob() && cooked.preservePermissions.IsTruthy() { - cooked.isHNStoHNS = true // override HNS settings, since if a user is tx'ing blob->blob and copying permissions, it's DEFINITELY going to be HNS (since perms don't exist w/o HNS). - } // --as-subdir is OK on all sources and destinations, but additional verification has to be done down the line. (e.g. https://account.blob.core.windows.net is not a valid root) cooked.asSubdir = raw.asSubdir - cooked.IncludeDirectoryStubs = raw.includeDirectoryStubs || (cooked.isHNStoHNS && cooked.preservePermissions.IsTruthy()) + cooked.IncludeDirectoryStubs = raw.includeDirectoryStubs if err = crossValidateSymlinksAndPermissions(cooked.SymlinkHandling, cooked.preservePermissions.IsTruthy()); err != nil { return cooked, err @@ -954,18 +928,24 @@ func areBothLocationsSMBAware(fromTo common.FromTo) bool { } func areBothLocationsPOSIXAware(fromTo common.FromTo) bool { - // POSIX properties are stored in blob metadata-- They don't need a special persistence strategy for BlobBlob. - return runtime.GOOS == "linux" && ( - fromTo == common.EFromTo.BlobLocal() || - fromTo == common.EFromTo.LocalBlob()) || - fromTo == common.EFromTo.BlobBlob() + // POSIX properties are stored in blob metadata-- They don't need a special persistence strategy for S2S methods. + switch fromTo { + case common.EFromTo.BlobLocal(), common.EFromTo.LocalBlob(), common.EFromTo.BlobFSLocal(), common.EFromTo.LocalBlobFS(): + return runtime.GOOS == "linux" + case common.EFromTo.BlobBlob(), common.EFromTo.BlobFSBlobFS(), common.EFromTo.BlobFSBlob(), common.EFromTo.BlobBlobFS(): + return true + default: + return false + } } func validatePreserveSMBPropertyOption(toPreserve bool, fromTo common.FromTo, overwrite *common.OverwriteOption, flagName string) error { - if toPreserve && !(fromTo == common.EFromTo.LocalFile() || + if toPreserve && flagName == PreservePermissionsFlag && (fromTo == common.EFromTo.BlobBlob() || fromTo == common.EFromTo.BlobFSBlob() || fromTo == common.EFromTo.BlobBlobFS() || fromTo == common.EFromTo.BlobFSBlobFS()) { + // the user probably knows what they're doing if they're trying to persist permissions between blob-type endpoints. + return nil + } else if toPreserve && !(fromTo == common.EFromTo.LocalFile() || fromTo == common.EFromTo.FileLocal() || - fromTo == common.EFromTo.FileFile() || - fromTo == common.EFromTo.BlobBlob()) { + fromTo == common.EFromTo.FileFile()) { return fmt.Errorf("%s is set but the job is not between %s-aware resources", flagName, common.IffString(flagName == PreservePermissionsFlag, "permission", "SMB")) } @@ -990,9 +970,9 @@ func validatePreserveOwner(preserve bool, fromTo common.FromTo) error { func validateSymlinkHandlingMode(symlinkHandling common.SymlinkHandlingType, fromTo common.FromTo) error { if symlinkHandling.Preserve() { switch fromTo { - case common.EFromTo.LocalBlob(), common.EFromTo.BlobLocal(): + case common.EFromTo.LocalBlob(), common.EFromTo.BlobLocal(), common.EFromTo.BlobFSLocal(), common.EFromTo.LocalBlobFS(): return nil // Fine on all OSes that support symlink via the OS package. (Win, MacOS, and Linux do, and that's what we officially support.) - case common.EFromTo.BlobBlob(): + case common.EFromTo.BlobBlob(), common.EFromTo.BlobFSBlobFS(), common.EFromTo.BlobBlobFS(), common.EFromTo.BlobFSBlob(): return nil // Blob->Blob doesn't involve any local requirements default: return fmt.Errorf("flag --%s can only be used on Blob<->Blob or Local<->Blob", common.PreserveSymlinkFlagName) @@ -1109,7 +1089,6 @@ type CookedCopyCmdArgs struct { // from arguments Source common.ResourceString Destination common.ResourceString - isHNStoHNS bool // workaround to indicate that BlobBlob is actually HNS->HNS, since we shift to Blob instead of HNS. FromTo common.FromTo // new include/exclude only apply to file names @@ -1544,25 +1523,9 @@ func (cca *CookedCopyCmdArgs) processCopyJobPartOrders() (err error) { cca.StripTopDir = true } - // depending on the source and destination type, we process the cp command differently - // Create enumerator and do enumerating - switch cca.FromTo { - case common.EFromTo.LocalBlob(), - common.EFromTo.LocalBlobFS(), - common.EFromTo.LocalFile(), - common.EFromTo.BlobLocal(), - common.EFromTo.FileLocal(), - common.EFromTo.BlobFSLocal(), - common.EFromTo.BlobBlob(), - common.EFromTo.FileBlob(), - common.EFromTo.FileFile(), - common.EFromTo.BlobFile(), - common.EFromTo.S3Blob(), - common.EFromTo.GCPBlob(), - common.EFromTo.BenchmarkBlob(), - common.EFromTo.BenchmarkBlobFS(), - common.EFromTo.BenchmarkFile(): - + switch { + case cca.FromTo.IsUpload(), cca.FromTo.IsDownload(), cca.FromTo.IsS2S(): + // Execute a standard copy command var e *CopyEnumerator srcCredInfo, _ := cca.getSrcCredential(ctx, &jobPartOrder) @@ -1571,24 +1534,24 @@ func (cca *CookedCopyCmdArgs) processCopyJobPartOrders() (err error) { return fmt.Errorf("failed to initialize enumerator: %w", err) } err = e.enumerate() - case common.EFromTo.BlobTrash(), common.EFromTo.FileTrash(): - e, createErr := newRemoveEnumerator(cca) - if createErr != nil { - return fmt.Errorf("failed to initialize enumerator: %w", createErr) - } - - err = e.enumerate() - case common.EFromTo.BlobFSTrash(): - // TODO merge with BlobTrash case - err = removeBfsResources(cca) + case cca.FromTo.IsDelete(): + // Delete gets ran through copy, so handle delete + if cca.FromTo.From() == common.ELocation.BlobFS() { + // TODO merge with BlobTrash case + // Currently, Blob Delete in STE does not appropriately handle folders. In addition, dfs delete is free-ish. + err = removeBfsResources(cca) + } else { + e, createErr := newRemoveEnumerator(cca) + if createErr != nil { + return fmt.Errorf("failed to initialize enumerator: %w", createErr) + } - // TODO: Hide the File to Blob direction temporarily, as service support on-going. - // case common.EFromTo.FileBlob(): - // e := copyFileToNEnumerator(jobPartOrder) - // err = e.enumerate(cca) + err = e.enumerate() + } - case common.EFromTo.BlobNone(), common.EFromTo.BlobFSNone(), common.EFromTo.FileNone(): + case cca.FromTo.IsSetProperties(): + // Set properties as well e, createErr := setPropertiesEnumerator(cca) if createErr != nil { return fmt.Errorf("failed to initialize enumerator: %w", createErr) diff --git a/cmd/copyEnumeratorInit.go b/cmd/copyEnumeratorInit.go index 4af591b5a..8ee9b762a 100755 --- a/cmd/copyEnumeratorInit.go +++ b/cmd/copyEnumeratorInit.go @@ -5,6 +5,7 @@ import ( "encoding/json" "errors" "fmt" + "github.com/Azure/azure-storage-azcopy/v10/azbfs" "log" "net/url" "os" @@ -230,7 +231,7 @@ func (cca *CookedCopyCmdArgs) initEnumerator(jobPartOrder common.CopyJobPartOrde // decide our folder transfer strategy var message string - jobPartOrder.Fpo, message = NewFolderPropertyOption(cca.FromTo, cca.Recursive, cca.StripTopDir, filters, cca.preserveSMBInfo, cca.preservePermissions.IsTruthy(), cca.preservePOSIXProperties, cca.isHNStoHNS, strings.EqualFold(cca.Destination.Value, common.Dev_Null), cca.IncludeDirectoryStubs) + jobPartOrder.Fpo, message = NewFolderPropertyOption(cca.FromTo, cca.Recursive, cca.StripTopDir, filters, cca.preserveSMBInfo, cca.preservePermissions.IsTruthy(), cca.preservePOSIXProperties, strings.EqualFold(cca.Destination.Value, common.Dev_Null), cca.IncludeDirectoryStubs) if !cca.dryrunMode { glcm.Info(message) } @@ -505,7 +506,6 @@ func (cca *CookedCopyCmdArgs) createDstContainer(containerName string, dstWithSA fsu := azfile.NewServiceURL(*dstURL, dstPipeline) shareURL := fsu.NewShareURL(containerName) _, err = shareURL.GetProperties(ctx) - if err == nil { return err } @@ -521,6 +521,33 @@ func (cca *CookedCopyCmdArgs) createDstContainer(containerName string, dstWithSA } else { return err } + case common.ELocation.BlobFS(): + // TODO: Implement blobfs container creation + accountRoot, err := GetAccountRoot(dstWithSAS, cca.FromTo.To()) + if err != nil { + return err + } + + dstURL, err := url.Parse(accountRoot) + if err != nil { + return err + } + + serviceURL := azbfs.NewServiceURL(*dstURL, dstPipeline) + fsURL := serviceURL.NewFileSystemURL(containerName) + _, err = fsURL.GetProperties(ctx) + if err == nil { + return err + } + + _, err = fsURL.Create(ctx) + if stgErr, ok := err.(azbfs.StorageError); ok { + if stgErr.ServiceCode() != azbfs.ServiceCodeFileSystemAlreadyExists { + return err + } + } else { + return err + } default: panic(fmt.Sprintf("cannot create a destination container at location %s.", cca.FromTo.To())) } @@ -664,25 +691,25 @@ func (cca *CookedCopyCmdArgs) MakeEscapedRelativePath(source bool, dstIsDir bool } // we assume that preserveSmbPermissions and preserveSmbInfo have already been validated, such that they are only true if both resource types support them -func NewFolderPropertyOption(fromTo common.FromTo, recursive, stripTopDir bool, filters []ObjectFilter, preserveSmbInfo, preserveSmbPermissions, preservePosixProperties, isDfsDfs, isDstNull, includeDirectoryStubs bool) (common.FolderPropertyOption, string) { +func NewFolderPropertyOption(fromTo common.FromTo, recursive, stripTopDir bool, filters []ObjectFilter, preserveSmbInfo, preservePermissions, preservePosixProperties, isDstNull, includeDirectoryStubs bool) (common.FolderPropertyOption, string) { getSuffix := func(willProcess bool) string { willProcessString := common.IffString(willProcess, "will be processed", "will not be processed") template := ". For the same reason, %s defined on folders %s" switch { - case preserveSmbPermissions && preserveSmbInfo: + case preservePermissions && preserveSmbInfo: return fmt.Sprintf(template, "properties and permissions", willProcessString) case preserveSmbInfo: return fmt.Sprintf(template, "properties", willProcessString) - case preserveSmbPermissions: + case preservePermissions: return fmt.Sprintf(template, "permissions", willProcessString) default: return "" // no preserve flags set, so we have nothing to say about them } } - bothFolderAware := (fromTo.AreBothFolderAware() || isDfsDfs || preservePosixProperties || includeDirectoryStubs) && !isDstNull + bothFolderAware := (fromTo.AreBothFolderAware() || preservePosixProperties || preservePermissions || includeDirectoryStubs) && !isDstNull isRemoveFromFolderAware := fromTo == common.EFromTo.FileTrash() if bothFolderAware || isRemoveFromFolderAware { if !recursive { diff --git a/cmd/removeEnumerator.go b/cmd/removeEnumerator.go index 3894186d4..b93665fc6 100755 --- a/cmd/removeEnumerator.go +++ b/cmd/removeEnumerator.go @@ -80,7 +80,7 @@ func newRemoveEnumerator(cca *CookedCopyCmdArgs) (enumerator *CopyEnumerator, er // (Must enumerate folders when deleting from a folder-aware location. Can't do folder deletion just based on file // deletion, because that would not handle folders that were empty at the start of the job). // isHNStoHNS is IGNORED here, because BlobFS locations don't take this route currently. - fpo, message := NewFolderPropertyOption(cca.FromTo, cca.Recursive, cca.StripTopDir, filters, false, false, false, false, false, cca.IncludeDirectoryStubs) + fpo, message := NewFolderPropertyOption(cca.FromTo, cca.Recursive, cca.StripTopDir, filters, false, false, false, false, cca.IncludeDirectoryStubs) // do not print Info message if in dry run mode if !cca.dryrunMode { glcm.Info(message) diff --git a/cmd/setPropertiesEnumerator.go b/cmd/setPropertiesEnumerator.go index 29058c878..e8d503a26 100755 --- a/cmd/setPropertiesEnumerator.go +++ b/cmd/setPropertiesEnumerator.go @@ -69,7 +69,7 @@ func setPropertiesEnumerator(cca *CookedCopyCmdArgs) (enumerator *CopyEnumerator filters = append(filters, excludePathFilters...) filters = append(filters, includeSoftDelete...) - fpo, message := NewFolderPropertyOption(cca.FromTo, cca.Recursive, cca.StripTopDir, filters, false, false, false, cca.isHNStoHNS, strings.EqualFold(cca.Destination.Value, common.Dev_Null), cca.IncludeDirectoryStubs) + fpo, message := NewFolderPropertyOption(cca.FromTo, cca.Recursive, cca.StripTopDir, filters, false, false, false, strings.EqualFold(cca.Destination.Value, common.Dev_Null), cca.IncludeDirectoryStubs) // do not print Info message if in dry run mode if !cca.dryrunMode { glcm.Info(message) diff --git a/cmd/sync.go b/cmd/sync.go index 32fa68b7f..3121ed195 100644 --- a/cmd/sync.go +++ b/cmd/sync.go @@ -138,25 +138,25 @@ func (raw *rawSyncCmdArgs) cook() (cookedSyncCmdArgs, error) { // TODO: if expand the set of source/dest combos supported by sync, update this method the declarative test framework: /* We support DFS by using blob end-point of the account. We replace dfs by blob in src and dst */ - srcHNS, dstHNS := false, false - if loc := InferArgumentLocation(raw.src); loc == common.ELocation.BlobFS() { - raw.src = strings.Replace(raw.src, ".dfs", ".blob", 1) - glcm.Info("Sync operates only on blob endpoint. Switching to use blob endpoint on source account.") - srcHNS = true - } - - if loc := InferArgumentLocation(raw.dst); loc == common.ELocation.BlobFS() { - raw.dst = strings.Replace(raw.dst, ".dfs", ".blob", 1) - msg := fmt.Sprintf("Sync operates only on blob endpoint. Switching to use blob endpoint on destination account. There are some limitations when switching endpoints. " + - "Please refer to https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-known-issues#blob-storage-apis") - glcm.Info(msg) - if azcopyScanningLogger != nil { - azcopyScanningLogger.Log(pipeline.LogInfo, msg) - } - dstHNS = true - } - - cooked.isHNSToHNS = srcHNS && dstHNS + //srcHNS, dstHNS := false, false + //if loc := InferArgumentLocation(raw.src); loc == common.ELocation.BlobFS() { + // raw.src = strings.Replace(raw.src, ".dfs", ".blob", 1) + // glcm.Info("Sync operates only on blob endpoint. Switching to use blob endpoint on source account.") + // srcHNS = true + //} + // + //if loc := InferArgumentLocation(raw.dst); loc == common.ELocation.BlobFS() { + // raw.dst = strings.Replace(raw.dst, ".dfs", ".blob", 1) + // msg := fmt.Sprintf("Sync operates only on blob endpoint. Switching to use blob endpoint on destination account. There are some limitations when switching endpoints. " + + // "Please refer to https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-known-issues#blob-storage-apis") + // glcm.Info(msg) + // if azcopyScanningLogger != nil { + // azcopyScanningLogger.Log(pipeline.LogInfo, msg) + // } + // dstHNS = true + //} + // + //cooked.isHNSToHNS = srcHNS && dstHNS var err error cooked.fromTo, err = ValidateFromTo(raw.src, raw.dst, raw.fromTo) @@ -173,7 +173,7 @@ func (raw *rawSyncCmdArgs) cook() (cookedSyncCmdArgs, error) { case common.EFromTo.BlobLocal(), common.EFromTo.FileLocal(): cooked.source, err = SplitResourceString(raw.src, cooked.fromTo.From()) common.PanicIfErr(err) - case common.EFromTo.BlobBlob(), common.EFromTo.FileFile(), common.EFromTo.BlobFile(), common.EFromTo.FileBlob(): + case common.EFromTo.BlobBlob(), common.EFromTo.FileFile(), common.EFromTo.BlobFile(), common.EFromTo.FileBlob(), common.EFromTo.BlobFSBlobFS(), common.EFromTo.BlobFSBlob(), common.EFromTo.BlobFSFile(), common.EFromTo.BlobBlobFS(), common.EFromTo.FileBlobFS(): cooked.destination, err = SplitResourceString(raw.dst, cooked.fromTo.To()) common.PanicIfErr(err) cooked.source, err = SplitResourceString(raw.src, cooked.fromTo.From()) diff --git a/cmd/syncEnumerator.go b/cmd/syncEnumerator.go index bc947d520..9f504a0e1 100644 --- a/cmd/syncEnumerator.go +++ b/cmd/syncEnumerator.go @@ -131,7 +131,7 @@ func (cca *cookedSyncCmdArgs) initEnumerator(ctx context.Context) (enumerator *s } // decide our folder transfer strategy - fpo, folderMessage := NewFolderPropertyOption(cca.fromTo, cca.recursive, true, filters, cca.preserveSMBInfo, cca.preservePermissions.IsTruthy(), false, cca.isHNSToHNS, strings.EqualFold(cca.destination.Value, common.Dev_Null), false) // sync always acts like stripTopDir=true + fpo, folderMessage := NewFolderPropertyOption(cca.fromTo, cca.recursive, true, filters, cca.preserveSMBInfo, cca.preservePermissions.IsTruthy(), false, strings.EqualFold(cca.destination.Value, common.Dev_Null), false) // sync always acts like stripTopDir=true if !cca.dryrunMode { glcm.Info(folderMessage) } @@ -193,7 +193,7 @@ func (cca *cookedSyncCmdArgs) initEnumerator(ctx context.Context) (enumerator *s // since only then can we know which local files definitely don't exist remotely var deleteScheduler objectProcessor switch cca.fromTo.To() { - case common.ELocation.Blob(), common.ELocation.File(): + case common.ELocation.Blob(), common.ELocation.File(), common.ELocation.BlobFS(): deleter, err := newSyncDeleteProcessor(cca) if err != nil { return err diff --git a/cmd/syncProcessor.go b/cmd/syncProcessor.go index 52222cdae..131992856 100644 --- a/cmd/syncProcessor.go +++ b/cmd/syncProcessor.go @@ -25,6 +25,7 @@ import ( "encoding/json" "fmt" "github.com/Azure/azure-pipeline-go/pipeline" + "github.com/Azure/azure-storage-azcopy/v10/azbfs" "github.com/Azure/azure-storage-azcopy/v10/common" "github.com/Azure/azure-storage-azcopy/v10/ste" "github.com/Azure/azure-storage-blob-go/azblob" @@ -314,6 +315,12 @@ func (b *remoteResourceDeleter) delete(object StoredObject) error { fileURL := azfile.NewFileURL(fileURLParts.URL(), b.p) _, err := fileURL.Delete(b.ctx) return err + case common.ELocation.BlobFS(): + bfsURLParts := azbfs.NewBfsURLParts(*b.rootURL) + bfsURLParts.DirectoryOrFilePath = path.Join(bfsURLParts.DirectoryOrFilePath, object.relativePath) + fileURL := azbfs.NewFileURL(bfsURLParts.URL(), b.p) + _, err := fileURL.Delete(b.ctx) + return err default: panic("not implemented, check your code") } diff --git a/cmd/validators.go b/cmd/validators.go index c37e53c22..c641f8497 100644 --- a/cmd/validators.go +++ b/cmd/validators.go @@ -102,41 +102,22 @@ func inferFromTo(src, dst string) common.FromTo { return common.EFromTo.Unknown() } - switch { - case srcLocation == common.ELocation.Local() && dstLocation == common.ELocation.Blob(): - return common.EFromTo.LocalBlob() - case srcLocation == common.ELocation.Blob() && dstLocation == common.ELocation.Local(): - return common.EFromTo.BlobLocal() - case srcLocation == common.ELocation.Local() && dstLocation == common.ELocation.File(): - return common.EFromTo.LocalFile() - case srcLocation == common.ELocation.File() && dstLocation == common.ELocation.Local(): - return common.EFromTo.FileLocal() - case srcLocation == common.ELocation.Pipe() && dstLocation == common.ELocation.Blob(): - return common.EFromTo.PipeBlob() - case srcLocation == common.ELocation.Blob() && dstLocation == common.ELocation.Pipe(): - return common.EFromTo.BlobPipe() - case srcLocation == common.ELocation.Local() && dstLocation == common.ELocation.BlobFS(): - return common.EFromTo.LocalBlobFS() - case srcLocation == common.ELocation.BlobFS() && dstLocation == common.ELocation.Local(): - return common.EFromTo.BlobFSLocal() - case srcLocation == common.ELocation.Blob() && dstLocation == common.ELocation.Blob(): - return common.EFromTo.BlobBlob() - case srcLocation == common.ELocation.File() && dstLocation == common.ELocation.Blob(): - return common.EFromTo.FileBlob() - case srcLocation == common.ELocation.Blob() && dstLocation == common.ELocation.File(): - return common.EFromTo.BlobFile() - case srcLocation == common.ELocation.File() && dstLocation == common.ELocation.File(): - return common.EFromTo.FileFile() - case srcLocation == common.ELocation.S3() && dstLocation == common.ELocation.Blob(): - return common.EFromTo.S3Blob() - case srcLocation == common.ELocation.Benchmark() && dstLocation == common.ELocation.Blob(): - return common.EFromTo.BenchmarkBlob() - case srcLocation == common.ELocation.Benchmark() && dstLocation == common.ELocation.File(): - return common.EFromTo.BenchmarkFile() - case srcLocation == common.ELocation.Benchmark() && dstLocation == common.ELocation.BlobFS(): - return common.EFromTo.BenchmarkBlobFS() - case srcLocation == common.ELocation.GCP() && dstLocation == common.ELocation.Blob(): - return common.EFromTo.GCPBlob() + out := common.EFromTo.Unknown() + intent := (common.FromTo(srcLocation) << 8) | common.FromTo(dstLocation) + enum.GetSymbols(reflect.TypeOf(common.EFromTo), func(enumSymbolName string, enumSymbolValue interface{}) (stop bool) { // find if our fromto is a valid option + fromTo := enumSymbolValue.(common.FromTo) + // none/unknown will never appear as valid outputs of the above functions + // If it's our intended fromto, we're good. + if fromTo == intent { + out = intent + return true + } + + return false + }) + + if out != common.EFromTo.Unknown() { + return out } glcm.Info("The parameters you supplied were " + @@ -146,7 +127,7 @@ func inferFromTo(src, dst string) common.FromTo { "automatically be found. Please check the parameters you supplied. If they are correct, please " + "specify an exact source and destination type using the --from-to switch. " + fromToHelpText) - return common.EFromTo.Unknown() + return out } var IPv4Regex = regexp.MustCompile(`\d+\.\d+\.\d+\.\d+`) // simple regex diff --git a/common/fe-ste-models.go b/common/fe-ste-models.go index 8d197c968..e423d1fc6 100644 --- a/common/fe-ste-models.go +++ b/common/fe-ste-models.go @@ -521,31 +521,34 @@ var EFromTo = FromTo(0) // represents the to location type FromTo uint16 -func (FromTo) Unknown() FromTo { return FromTo(0) } -func (FromTo) LocalBlob() FromTo { return FromTo(fromToValue(ELocation.Local(), ELocation.Blob())) } -func (FromTo) LocalFile() FromTo { return FromTo(fromToValue(ELocation.Local(), ELocation.File())) } -func (FromTo) BlobLocal() FromTo { return FromTo(fromToValue(ELocation.Blob(), ELocation.Local())) } -func (FromTo) FileLocal() FromTo { return FromTo(fromToValue(ELocation.File(), ELocation.Local())) } -func (FromTo) BlobPipe() FromTo { return FromTo(fromToValue(ELocation.Blob(), ELocation.Pipe())) } -func (FromTo) PipeBlob() FromTo { return FromTo(fromToValue(ELocation.Pipe(), ELocation.Blob())) } -func (FromTo) FilePipe() FromTo { return FromTo(fromToValue(ELocation.File(), ELocation.Pipe())) } -func (FromTo) PipeFile() FromTo { return FromTo(fromToValue(ELocation.Pipe(), ELocation.File())) } -func (FromTo) BlobTrash() FromTo { return FromTo(fromToValue(ELocation.Blob(), ELocation.Unknown())) } -func (FromTo) FileTrash() FromTo { return FromTo(fromToValue(ELocation.File(), ELocation.Unknown())) } -func (FromTo) BlobFSTrash() FromTo { - return FromTo(fromToValue(ELocation.BlobFS(), ELocation.Unknown())) -} -func (FromTo) LocalBlobFS() FromTo { return FromTo(fromToValue(ELocation.Local(), ELocation.BlobFS())) } -func (FromTo) BlobFSLocal() FromTo { return FromTo(fromToValue(ELocation.BlobFS(), ELocation.Local())) } -func (FromTo) BlobBlob() FromTo { return FromTo(fromToValue(ELocation.Blob(), ELocation.Blob())) } -func (FromTo) FileBlob() FromTo { return FromTo(fromToValue(ELocation.File(), ELocation.Blob())) } -func (FromTo) BlobFile() FromTo { return FromTo(fromToValue(ELocation.Blob(), ELocation.File())) } -func (FromTo) FileFile() FromTo { return FromTo(fromToValue(ELocation.File(), ELocation.File())) } -func (FromTo) S3Blob() FromTo { return FromTo(fromToValue(ELocation.S3(), ELocation.Blob())) } -func (FromTo) GCPBlob() FromTo { return FromTo(fromToValue(ELocation.GCP(), ELocation.Blob())) } -func (FromTo) BlobNone() FromTo { return fromToValue(ELocation.Blob(), ELocation.None()) } -func (FromTo) BlobFSNone() FromTo { return fromToValue(ELocation.BlobFS(), ELocation.None()) } -func (FromTo) FileNone() FromTo { return fromToValue(ELocation.File(), ELocation.None()) } +func (FromTo) Unknown() FromTo { return FromTo(0) } +func (FromTo) LocalBlob() FromTo { return fromToValue(ELocation.Local(), ELocation.Blob()) } +func (FromTo) LocalFile() FromTo { return fromToValue(ELocation.Local(), ELocation.File()) } +func (FromTo) BlobLocal() FromTo { return fromToValue(ELocation.Blob(), ELocation.Local()) } +func (FromTo) FileLocal() FromTo { return fromToValue(ELocation.File(), ELocation.Local()) } +func (FromTo) BlobPipe() FromTo { return fromToValue(ELocation.Blob(), ELocation.Pipe()) } +func (FromTo) PipeBlob() FromTo { return fromToValue(ELocation.Pipe(), ELocation.Blob()) } +func (FromTo) FilePipe() FromTo { return fromToValue(ELocation.File(), ELocation.Pipe()) } +func (FromTo) PipeFile() FromTo { return fromToValue(ELocation.Pipe(), ELocation.File()) } +func (FromTo) BlobTrash() FromTo { return fromToValue(ELocation.Blob(), ELocation.Unknown()) } +func (FromTo) FileTrash() FromTo { return fromToValue(ELocation.File(), ELocation.Unknown()) } +func (FromTo) BlobFSTrash() FromTo { return fromToValue(ELocation.BlobFS(), ELocation.Unknown()) } +func (FromTo) LocalBlobFS() FromTo { return fromToValue(ELocation.Local(), ELocation.BlobFS()) } +func (FromTo) BlobFSLocal() FromTo { return fromToValue(ELocation.BlobFS(), ELocation.Local()) } +func (FromTo) BlobFSBlobFS() FromTo { return fromToValue(ELocation.BlobFS(), ELocation.BlobFS()) } +func (FromTo) BlobFSBlob() FromTo { return fromToValue(ELocation.BlobFS(), ELocation.Blob()) } +func (FromTo) BlobFSFile() FromTo { return fromToValue(ELocation.BlobFS(), ELocation.File()) } +func (FromTo) BlobBlobFS() FromTo { return fromToValue(ELocation.Blob(), ELocation.BlobFS()) } +func (FromTo) FileBlobFS() FromTo { return fromToValue(ELocation.File(), ELocation.BlobFS()) } +func (FromTo) BlobBlob() FromTo { return fromToValue(ELocation.Blob(), ELocation.Blob()) } +func (FromTo) FileBlob() FromTo { return fromToValue(ELocation.File(), ELocation.Blob()) } +func (FromTo) BlobFile() FromTo { return fromToValue(ELocation.Blob(), ELocation.File()) } +func (FromTo) FileFile() FromTo { return fromToValue(ELocation.File(), ELocation.File()) } +func (FromTo) S3Blob() FromTo { return fromToValue(ELocation.S3(), ELocation.Blob()) } +func (FromTo) GCPBlob() FromTo { return fromToValue(ELocation.GCP(), ELocation.Blob()) } +func (FromTo) BlobNone() FromTo { return fromToValue(ELocation.Blob(), ELocation.None()) } +func (FromTo) BlobFSNone() FromTo { return fromToValue(ELocation.BlobFS(), ELocation.None()) } +func (FromTo) FileNone() FromTo { return fromToValue(ELocation.File(), ELocation.None()) } // todo: to we really want these? Starts to look like a bit of a combinatorial explosion func (FromTo) BenchmarkBlob() FromTo { @@ -602,6 +605,14 @@ func (ft *FromTo) IsUpload() bool { return ft.From().IsLocal() && ft.To().IsRemote() } +func (ft *FromTo) IsDelete() bool { + return ft.To() == ELocation.Unknown() +} + +func (ft *FromTo) IsSetProperties() bool { + return ft.To() == ELocation.None() +} + func (ft *FromTo) AreBothFolderAware() bool { return ft.From().IsFolderAware() && ft.To().IsFolderAware() } diff --git a/e2etest/declarativeResourceManagers.go b/e2etest/declarativeResourceManagers.go index 8224d50e1..6818688b8 100644 --- a/e2etest/declarativeResourceManagers.go +++ b/e2etest/declarativeResourceManagers.go @@ -299,6 +299,10 @@ func (r *resourceBlobContainer) getParam(stripTopDir bool, withSas bool, withFil uri = bURLParts.URL() } + if r.accountType == EAccountType.HierarchicalNamespaceEnabled() { + uri.Host = strings.ReplaceAll(uri.Host, "blob", "dfs") + } + return uri.String() } diff --git a/hnstest/main.go b/hnstest/main.go new file mode 100644 index 000000000..5c74f3ef9 --- /dev/null +++ b/hnstest/main.go @@ -0,0 +1,55 @@ +package main + +import ( + "context" + "fmt" + "github.com/Azure/azure-storage-azcopy/v10/azbfs" + "github.com/google/uuid" + "net/url" + "os" +) + +func main() { + acctKey, acctName := os.Getenv("ACCOUNT_KEY"), os.Getenv("ACCOUNT_NAME") + key := azbfs.NewSharedKeyCredential(acctName, acctKey) + p := azbfs.NewPipeline(key, azbfs.PipelineOptions{}) + serviceURL := azbfs.NewServiceURL(url.URL{ + Scheme: "https", + Host: fmt.Sprintf("%s.dfs.core.windows.net", acctName), + }, p) + + fsURL := serviceURL.NewFileSystemURL(uuid.NewString()) + defer fsURL.Delete(context.Background()) + + _, err := fsURL.Create(context.Background()) + if err != nil { + fmt.Println(err) + return + } + + fURL := fsURL.NewRootDirectoryURL().NewFileURL("asdf.txt") + + _, err = fURL.Create(context.Background(), azbfs.BlobFSHTTPHeaders{}, azbfs.BlobFSAccessControl{}) + if err != nil { + fmt.Println(err) + return + } + + _, err = fURL.SetAccessControl(context.Background(), azbfs.BlobFSAccessControl{ + Owner: "1234", + Group: "5213456", + Permissions: "r-xrw-r--", + }) + if err != nil { + fmt.Println(err) + return + } + + ctl, err := fURL.GetAccessControl(context.Background()) + if err != nil { + fmt.Println(err) + return + } + + fmt.Println(ctl.Owner, ctl.Group, ctl.Permissions) +} diff --git a/ste/mgr-JobPartMgr.go b/ste/mgr-JobPartMgr.go index 8b5a8f447..78bd18422 100644 --- a/ste/mgr-JobPartMgr.go +++ b/ste/mgr-JobPartMgr.go @@ -53,6 +53,7 @@ type IJobPartMgr interface { ChunkStatusLogger() common.ChunkStatusLogger common.ILogger SourceProviderPipeline() pipeline.Pipeline + SecondarySourceProviderPipeline() pipeline.Pipeline SourceCredential() pipeline.Factory getOverwritePrompter() *overwritePrompter getFolderCreationTracker() FolderCreationTracker @@ -563,8 +564,9 @@ func (jpm *jobPartMgr) createPipelines(ctx context.Context, sourceBlobToken azbl var statsAccForSip *PipelineNetworkStats = nil // we don't accumulate stats on the source info provider // Create source info provider's pipeline for S2S copy or download (in some cases). - if fromTo == common.EFromTo.BlobBlob() || fromTo == common.EFromTo.BlobFile() || fromTo == common.EFromTo.BlobLocal() { - var sourceCred azblob.Credential = azblob.NewAnonymousCredential() + // BlobFS and Blob will utilize the Blob source info provider, as they are the "same" resource, but provide different details on both endpoints + if (fromTo.IsS2S() || fromTo.IsDownload()) && (fromTo.From() == common.ELocation.Blob() || fromTo.From() == common.ELocation.BlobFS()) { + sourceCred := azblob.NewAnonymousCredential() jobState := jpm.jobMgr.getInMemoryTransitJobState() if fromTo.To() == common.ELocation.Blob() && jobState.S2SSourceCredentialType.IsAzureOAuth() { credOption := common.CredentialOpOptions{ @@ -593,8 +595,8 @@ func (jpm *jobPartMgr) createPipelines(ctx context.Context, sourceBlobToken azbl jpm.jobMgr.HttpClient(), statsAccForSip) - // Consider the ADLSG2->ADLSG2 ACLs case - if fromTo == common.EFromTo.BlobBlob() && jpm.Plan().PreservePermissions.IsTruthy() { + // Prepare to pull dfs properties if we're working with BlobFS + if fromTo.From() == common.ELocation.BlobFS() || jpm.Plan().PreservePermissions.IsTruthy() || jpm.Plan().PreservePOSIXProperties { credential := common.CreateBlobFSCredential(ctx, credInfo, credOption) jpm.secondarySourceProviderPipeline = NewBlobFSPipeline( credential, @@ -610,8 +612,8 @@ func (jpm *jobPartMgr) createPipelines(ctx context.Context, sourceBlobToken azbl statsAccForSip) } } - // Consider the file-local SDDL transfer case. - if fromTo == common.EFromTo.FileBlob() || fromTo == common.EFromTo.FileFile() || fromTo == common.EFromTo.FileLocal() { + // Set up a source pipeline for files if necessary + if (fromTo.IsS2S() || fromTo.IsDownload()) && (fromTo.From() == common.ELocation.File()) { jpm.sourceProviderPipeline = NewFilePipeline( azfile.NewAnonymousCredential(), azfile.PipelineOptions{ @@ -632,10 +634,12 @@ func (jpm *jobPartMgr) createPipelines(ctx context.Context, sourceBlobToken azbl statsAccForSip) } - // Create pipeline for data transfer. - switch fromTo { - case common.EFromTo.BlobTrash(), common.EFromTo.BlobLocal(), common.EFromTo.LocalBlob(), common.EFromTo.BenchmarkBlob(), - common.EFromTo.BlobBlob(), common.EFromTo.FileBlob(), common.EFromTo.S3Blob(), common.EFromTo.GCPBlob(), common.EFromTo.BlobNone(), common.EFromTo.BlobFSNone(): + switch { + case fromTo.IsS2S() && (fromTo.To() == common.ELocation.Blob() || fromTo.To() == common.ELocation.BlobFS()), // destination determines pipeline for S2S, blobfs uses blob for S2S + fromTo.IsUpload() && fromTo.To() == common.ELocation.Blob(), // destination determines pipeline for upload + fromTo.IsDownload() && fromTo.From() == common.ELocation.Blob(), // source determines pipeline for download + fromTo.IsSetProperties() && (fromTo.From() == common.ELocation.Blob() || fromTo.From() == common.ELocation.BlobFS()), // source determines pipeline for set properties, blobfs uses blob for set properties + fromTo.IsDelete() && fromTo.From() == common.ELocation.Blob(): // ditto for delete credential := common.CreateBlobCredential(ctx, credInfo, credOption) jpm.Log(pipeline.LogInfo, fmt.Sprintf("JobID=%v, credential type: %v", jpm.Plan().JobID, credInfo.CredentialType)) jpm.pipeline = NewBlobPipeline( @@ -651,8 +655,8 @@ func (jpm *jobPartMgr) createPipelines(ctx context.Context, sourceBlobToken azbl jpm.jobMgr.HttpClient(), jpm.jobMgr.PipelineNetworkStats()) - // Consider the ADLSG2->ADLSG2 ACLs case - if fromTo == common.EFromTo.BlobBlob() && jpm.Plan().PreservePermissions.IsTruthy() { + // If we need to write specifically to the gen2 endpoint, we should have this available. + if fromTo.To() == common.ELocation.BlobFS() || jpm.Plan().PreservePermissions.IsTruthy() || jpm.Plan().PreservePOSIXProperties { credential := common.CreateBlobFSCredential(ctx, credInfo, credOption) jpm.secondaryPipeline = NewBlobFSPipeline( credential, @@ -667,8 +671,8 @@ func (jpm *jobPartMgr) createPipelines(ctx context.Context, sourceBlobToken azbl jpm.jobMgr.HttpClient(), statsAccForSip) } - // Create pipeline for Azure BlobFS. - case common.EFromTo.BlobFSLocal(), common.EFromTo.LocalBlobFS(), common.EFromTo.BenchmarkBlobFS(): + case fromTo.IsUpload() && fromTo.To() == common.ELocation.BlobFS(), // Blobfs up/down use the dfs endpoint + fromTo.IsDownload() && fromTo.From() == common.ELocation.BlobFS(): credential := common.CreateBlobFSCredential(ctx, credInfo, credOption) jpm.Log(pipeline.LogInfo, fmt.Sprintf("JobID=%v, credential type: %v", jpm.Plan().JobID, credInfo.CredentialType)) @@ -684,9 +688,11 @@ func (jpm *jobPartMgr) createPipelines(ctx context.Context, sourceBlobToken azbl jpm.pacer, jpm.jobMgr.HttpClient(), jpm.jobMgr.PipelineNetworkStats()) - // Create pipeline for Azure File. - case common.EFromTo.FileTrash(), common.EFromTo.FileLocal(), common.EFromTo.LocalFile(), common.EFromTo.BenchmarkFile(), - common.EFromTo.FileFile(), common.EFromTo.BlobFile(), common.EFromTo.FileNone(): + case fromTo.IsS2S() && fromTo.To() == common.ELocation.File(), + fromTo.IsUpload() && fromTo.To() == common.ELocation.File(), + fromTo.IsDownload() && fromTo.From() == common.ELocation.File(), + fromTo.IsSetProperties() && fromTo.From() == common.ELocation.File(), + fromTo.IsDelete() && fromTo.From() == common.ELocation.File(): jpm.pipeline = NewFilePipeline( azfile.NewAnonymousCredential(), azfile.PipelineOptions{ @@ -705,8 +711,6 @@ func (jpm *jobPartMgr) createPipelines(ctx context.Context, sourceBlobToken azbl jpm.pacer, jpm.jobMgr.HttpClient(), jpm.jobMgr.PipelineNetworkStats()) - default: - panic(fmt.Errorf("Unrecognized from-to: %q", fromTo.String())) } } @@ -946,6 +950,10 @@ func (jpm *jobPartMgr) SourceProviderPipeline() pipeline.Pipeline { return jpm.sourceProviderPipeline } +func (jpm *jobPartMgr) SecondarySourceProviderPipeline() pipeline.Pipeline { + return jpm.secondarySourceProviderPipeline +} + func (jpm *jobPartMgr) SourceCredential() pipeline.Factory { return jpm.sourceCredential } diff --git a/ste/mgr-JobPartTransferMgr.go b/ste/mgr-JobPartTransferMgr.go index 1d4ae2028..73b89211d 100644 --- a/ste/mgr-JobPartTransferMgr.go +++ b/ste/mgr-JobPartTransferMgr.go @@ -60,6 +60,7 @@ type IJobPartTransferMgr interface { // TODO: added for debugging purpose. remove later ReleaseAConnection() SourceProviderPipeline() pipeline.Pipeline + SecondarySourceProviderPipeline() pipeline.Pipeline SourceCredential() pipeline.Factory FailActiveUpload(where string, err error) FailActiveDownload(where string, err error) @@ -967,6 +968,10 @@ func (jptm *jobPartTransferMgr) SourceProviderPipeline() pipeline.Pipeline { return jptm.jobPartMgr.SourceProviderPipeline() } +func (jptm *jobPartTransferMgr) SecondarySourceProviderPipeline() pipeline.Pipeline { + return jptm.jobPartMgr.SecondarySourceProviderPipeline() +} + func (jptm *jobPartTransferMgr) SourceCredential() pipeline.Factory { return jptm.jobPartMgr.SourceCredential() } diff --git a/ste/s2sCopier-URLToBlob.go b/ste/s2sCopier-URLToBlob.go index bb1fca4f7..b86e18c23 100644 --- a/ste/s2sCopier-URLToBlob.go +++ b/ste/s2sCopier-URLToBlob.go @@ -23,6 +23,7 @@ package ste import ( "fmt" "net/url" + "strings" "github.com/Azure/azure-pipeline-go/pipeline" "github.com/Azure/azure-storage-azcopy/v10/common" @@ -33,6 +34,22 @@ import ( func newURLToBlobCopier(jptm IJobPartTransferMgr, destination string, p pipeline.Pipeline, pacer pacer, sip ISourceInfoProvider) (sender, error) { srcInfoProvider := sip.(IRemoteSourceInfoProvider) // "downcast" to the type we know it really has + // If our destination is a dfs endpoint, make an attempt to cast it to the blob endpoint + // Like other dfs<->blob casts, dfs doesn't actually exist on stack/emu, so the only time this should get used is against the real service. + fromTo := jptm.FromTo() + if fromTo.To() == common.ELocation.BlobFS() { + u, err := url.Parse(destination) + if err != nil { + return nil, err + } + + bURLParts := azblob.NewBlobURLParts(*u) + + bURLParts.Host = strings.Replace(bURLParts.Host, ".dfs", ".blob", 1) + newDest := bURLParts.URL() + destination = newDest.String() + } + var targetBlobType azblob.BlobType blobTypeOverride := jptm.BlobTypeOverride() // BlobTypeOverride is copy info specified by user diff --git a/ste/sender-blobFolders.go b/ste/sender-blobFolders.go index 2a2f53fba..044364913 100644 --- a/ste/sender-blobFolders.go +++ b/ste/sender-blobFolders.go @@ -110,10 +110,10 @@ func (b *blobFolderSender) overwriteDFSProperties() (string, error) { func (b *blobFolderSender) SetContainerACL() error { bURLParts := azblob.NewBlobURLParts(b.destination.URL()) - bURLParts.BlobName = "/" // Container-level ACLs NEED a / + bURLParts.ContainerName += "/" // Container-level ACLs NEED a / bURLParts.Host = strings.ReplaceAll(bURLParts.Host, ".blob", ".dfs") // todo: jank, and violates the principle of interfaces - fileURL := azbfs.NewFileSystemURL(bURLParts.URL(), b.jptm.(*jobPartTransferMgr).jobPartMgr.(*jobPartMgr).secondaryPipeline) + rootURL := azbfs.NewFileSystemURL(bURLParts.URL(), b.jptm.(*jobPartTransferMgr).jobPartMgr.(*jobPartMgr).secondaryPipeline) // We know for a fact our source is a "blob". acl, err := b.sip.(*blobSourceInfoProvider).AccessControl() @@ -122,7 +122,7 @@ func (b *blobFolderSender) SetContainerACL() error { return folderPropertiesSetInCreation{} // standard completion will detect failure } acl.Permissions = "" // Since we're sending the full ACL, Permissions is irrelevant. - _, err = fileURL.SetAccessControl(b.jptm.Context(), acl) + _, err = rootURL.SetAccessControl(b.jptm.Context(), acl) if err != nil { b.jptm.FailActiveSend("Putting ACLs", err) return folderPropertiesSetInCreation{} // standard completion will detect failure @@ -214,9 +214,10 @@ func (b *blobFolderSender) SetFolderProperties() error { } func (b *blobFolderSender) DirUrlToString() string { - url := b.destination.URL() - url.RawQuery = "" - return url.String() + uri, _ := url.Parse(b.jptm.Info().Destination) + uri.RawPath = "" + uri.RawQuery = "" + return uri.String() } // ===== Implement sender so that it can be returned in newBlobUploader. ===== diff --git a/ste/sourceInfoProvider-Blob.go b/ste/sourceInfoProvider-Blob.go index f0de67003..a3f6160f1 100644 --- a/ste/sourceInfoProvider-Blob.go +++ b/ste/sourceInfoProvider-Blob.go @@ -22,6 +22,7 @@ package ste import ( "io" + "net/url" "strings" "time" @@ -36,8 +37,42 @@ type blobSourceInfoProvider struct { defaultRemoteSourceInfoProvider } +func (p *blobSourceInfoProvider) IsDFSSource() bool { + fromTo := p.jptm.FromTo() + return fromTo.From() == common.ELocation.BlobFS() +} + +func (p *blobSourceInfoProvider) internalPresignedURL(useHNS bool) (*url.URL, error) { + uri, err := p.defaultRemoteSourceInfoProvider.PreSignedSourceURL() + if err != nil { + return nil, err + } + + // This will have no real effect on non-standard endpoints (e.g. emulator, stack), and *may* work, but probably won't. + // However, Stack/Emulator don't support HNS, so, this won't get use. + bURLParts := azblob.NewBlobURLParts(*uri) + if useHNS { + bURLParts.Host = strings.Replace(bURLParts.Host, ".blob", ".dfs", 1) + + if bURLParts.BlobName != "" { + bURLParts.BlobName = strings.TrimSuffix(bURLParts.BlobName, "/") // BlobFS doesn't handle folders correctly like this. + } else { + bURLParts.ContainerName += "/" // container level perms MUST have a / + } + } else { + bURLParts.Host = strings.Replace(bURLParts.Host, ".dfs", ".blob", 1) + } + out := bURLParts.URL() + + return &out, nil +} + +func (p *blobSourceInfoProvider) PreSignedSourceURL() (*url.URL, error) { + return p.internalPresignedURL(false) // prefer to return the blob URL; data can be read from either endpoint. +} + func (p *blobSourceInfoProvider) ReadLink() (string, error) { - uri, err := p.PreSignedSourceURL() + uri, err := p.internalPresignedURL(false) if err != nil { return "", err } @@ -102,24 +137,14 @@ func newBlobSourceInfoProvider(jptm IJobPartTransferMgr) (ISourceInfoProvider, e return &blobSourceInfoProvider{defaultRemoteSourceInfoProvider: *base}, nil } -// AccessControl should ONLY get called when we know for a fact it is a blobFS->blobFS tranfser. -// It *assumes* that the source is actually a HNS account. func (p *blobSourceInfoProvider) AccessControl() (azbfs.BlobFSAccessControl, error) { - presignedURL, err := p.PreSignedSourceURL() + // We can only get access control via HNS, so we MUST switch here. + presignedURL, err := p.internalPresignedURL(true) if err != nil { return azbfs.BlobFSAccessControl{}, err } - bURLParts := azblob.NewBlobURLParts(*presignedURL) - bURLParts.Host = strings.ReplaceAll(bURLParts.Host, ".blob", ".dfs") - if bURLParts.BlobName != "" { - bURLParts.BlobName = strings.TrimSuffix(bURLParts.BlobName, "/") // BlobFS doesn't handle folders correctly like this. - } else { - bURLParts.BlobName = "/" // container level perms MUST have a / - } - - // todo: jank, and violates the principle of interfaces - fURL := azbfs.NewFileURL(bURLParts.URL(), p.jptm.(*jobPartTransferMgr).jobPartMgr.(*jobPartMgr).secondarySourceProviderPipeline) + fURL := azbfs.NewFileURL(*presignedURL, p.jptm.SecondarySourceProviderPipeline()) return fURL.GetAccessControl(p.jptm.Context()) } @@ -132,7 +157,8 @@ func (p *blobSourceInfoProvider) BlobType() azblob.BlobType { } func (p *blobSourceInfoProvider) GetFreshFileLastModifiedTime() (time.Time, error) { - presignedURL, err := p.PreSignedSourceURL() + // We can't set a custom LMT on HNS, so it doesn't make sense to swap here. + presignedURL, err := p.internalPresignedURL(false) if err != nil { return time.Time{}, err } diff --git a/ste/xfer.go b/ste/xfer.go index d125c4191..a88fa90e3 100644 --- a/ste/xfer.go +++ b/ste/xfer.go @@ -109,7 +109,7 @@ func computeJobXfer(fromTo common.FromTo, blobType common.BlobType) newJobXfer { case common.ELocation.File(): return newURLToAzureFileCopier case common.ELocation.BlobFS(): - panic(blobFSNotS2S) + return newURLToBlobCopier default: panic("unexpected target location type") } @@ -139,7 +139,7 @@ func computeJobXfer(fromTo common.FromTo, blobType common.BlobType) newJobXfer { case common.ELocation.File(): return newFileSourceInfoProvider case common.ELocation.BlobFS(): - panic(blobFSNotS2S) + return newBlobSourceInfoProvider case common.ELocation.S3(): return newS3SourceInfoProvider case common.ELocation.GCP(): From 1057d265607dd7b8b7b7dd4cd9e222496fdbf045 Mon Sep 17 00:00:00 2001 From: Adele Reed Date: Thu, 27 Apr 2023 19:52:25 -0700 Subject: [PATCH 02/16] Add warning log --- ste/s2sCopier-URLToBlob.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ste/s2sCopier-URLToBlob.go b/ste/s2sCopier-URLToBlob.go index b86e18c23..a9bcd05e9 100644 --- a/ste/s2sCopier-URLToBlob.go +++ b/ste/s2sCopier-URLToBlob.go @@ -24,12 +24,15 @@ import ( "fmt" "net/url" "strings" + "sync" "github.com/Azure/azure-pipeline-go/pipeline" "github.com/Azure/azure-storage-azcopy/v10/common" "github.com/Azure/azure-storage-blob-go/azblob" ) +var LogBlobConversionOnce = &sync.Once{} + // Creates the right kind of URL to blob copier, based on the blob type of the source func newURLToBlobCopier(jptm IJobPartTransferMgr, destination string, p pipeline.Pipeline, pacer pacer, sip ISourceInfoProvider) (sender, error) { srcInfoProvider := sip.(IRemoteSourceInfoProvider) // "downcast" to the type we know it really has @@ -48,6 +51,11 @@ func newURLToBlobCopier(jptm IJobPartTransferMgr, destination string, p pipeline bURLParts.Host = strings.Replace(bURLParts.Host, ".dfs", ".blob", 1) newDest := bURLParts.URL() destination = newDest.String() + + LogBlobConversionOnce.Do(func() { + common.GetLifecycleMgr().Info("Switching to blob endpoint to write to destination account. There are some limitations when writing between blob/dfs endpoints. " + + "Please refer to https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-known-issues#blob-storage-apis") + }) } var targetBlobType azblob.BlobType From bdd1be5b5cb2adb8b24ce5971bfa753532e26677 Mon Sep 17 00:00:00 2001 From: Adele Reed Date: Thu, 27 Apr 2023 19:53:41 -0700 Subject: [PATCH 03/16] Drop commented-out code --- cmd/sync.go | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/cmd/sync.go b/cmd/sync.go index 3121ed195..ef4d5541c 100644 --- a/cmd/sync.go +++ b/cmd/sync.go @@ -137,27 +137,6 @@ func (raw *rawSyncCmdArgs) cook() (cookedSyncCmdArgs, error) { // consider making a map of valid source/dest combos and consolidating this to generic source/dest setups, akin to the lower if statement // TODO: if expand the set of source/dest combos supported by sync, update this method the declarative test framework: - /* We support DFS by using blob end-point of the account. We replace dfs by blob in src and dst */ - //srcHNS, dstHNS := false, false - //if loc := InferArgumentLocation(raw.src); loc == common.ELocation.BlobFS() { - // raw.src = strings.Replace(raw.src, ".dfs", ".blob", 1) - // glcm.Info("Sync operates only on blob endpoint. Switching to use blob endpoint on source account.") - // srcHNS = true - //} - // - //if loc := InferArgumentLocation(raw.dst); loc == common.ELocation.BlobFS() { - // raw.dst = strings.Replace(raw.dst, ".dfs", ".blob", 1) - // msg := fmt.Sprintf("Sync operates only on blob endpoint. Switching to use blob endpoint on destination account. There are some limitations when switching endpoints. " + - // "Please refer to https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-known-issues#blob-storage-apis") - // glcm.Info(msg) - // if azcopyScanningLogger != nil { - // azcopyScanningLogger.Log(pipeline.LogInfo, msg) - // } - // dstHNS = true - //} - // - //cooked.isHNSToHNS = srcHNS && dstHNS - var err error cooked.fromTo, err = ValidateFromTo(raw.src, raw.dst, raw.fromTo) if err != nil { From b08d848271b5935d1d445cc1d26b7044ec6ea2e5 Mon Sep 17 00:00:00 2001 From: Adele Reed Date: Mon, 1 May 2023 13:36:01 -0700 Subject: [PATCH 04/16] Support sync; fix OAuth S2S --- cmd/copy.go | 12 ++++++++---- cmd/credentialUtil.go | 25 +++++++++++++++---------- cmd/syncEnumerator.go | 19 ++++++++++--------- cmd/zc_enumerator.go | 28 ++++++++++++++++++++++------ common/fe-ste-models.go | 4 ++++ ste/mgr-JobPartMgr.go | 2 +- 6 files changed, 60 insertions(+), 30 deletions(-) diff --git a/cmd/copy.go b/cmd/copy.go index b24a2fa91..337ebca55 100644 --- a/cmd/copy.go +++ b/cmd/copy.go @@ -1383,15 +1383,16 @@ func (cca *CookedCopyCmdArgs) processRedirectionUpload(blobResource common.Resou // get source credential - if there is a token it will be used to get passed along our pipeline func (cca *CookedCopyCmdArgs) getSrcCredential(ctx context.Context, jpo *common.CopyJobPartOrderRequest) (common.CredentialInfo, error) { - srcCredInfo, isPublic, err := GetCredentialInfoForLocation(ctx, cca.FromTo.From(), cca.Source.Value, cca.Source.SAS, true, cca.CpkOptions) if err != nil { return srcCredInfo, err // If S2S and source takes OAuthToken as its cred type (OR) source takes anonymous as its cred type, but it's not public and there's no SAS } else if cca.FromTo.IsS2S() && - ((srcCredInfo.CredentialType == common.ECredentialType.OAuthToken() && cca.FromTo.To() != common.ELocation.Blob()) || // Blob can forward OAuth tokens + ((srcCredInfo.CredentialType == common.ECredentialType.OAuthToken() && !cca.FromTo.To().CanForwardOAuthTokens()) || // Blob can forward OAuth tokens; BlobFS inherits this. (srcCredInfo.CredentialType == common.ECredentialType.Anonymous() && !isPublic && cca.Source.SAS == "")) { - return srcCredInfo, errors.New("a SAS token (or S3 access key) is required as a part of the source in S2S transfers, unless the source is a public resource, or the destination is blob storage") + return srcCredInfo, errors.New("a SAS token (or S3 access key) is required as a part of the source in S2S transfers, unless the source is a public resource. Blob and BlobFS additionally support OAuth on both source and destination") + } else if cca.FromTo.IsS2S() && (srcCredInfo.CredentialType == common.ECredentialType.SharedKey() || jpo.CredentialInfo.CredentialType == common.ECredentialType.SharedKey()) { + return srcCredInfo, errors.New("shared key auth is not supported for S2S operations") } if cca.Source.SAS != "" && cca.FromTo.IsS2S() && jpo.CredentialInfo.CredentialType == common.ECredentialType.OAuthToken() { @@ -1527,7 +1528,10 @@ func (cca *CookedCopyCmdArgs) processCopyJobPartOrders() (err error) { case cca.FromTo.IsUpload(), cca.FromTo.IsDownload(), cca.FromTo.IsS2S(): // Execute a standard copy command var e *CopyEnumerator - srcCredInfo, _ := cca.getSrcCredential(ctx, &jobPartOrder) + srcCredInfo, err := cca.getSrcCredential(ctx, &jobPartOrder) + if err != nil { + return fmt.Errorf("failed to discern source credential type: %w", err) + } e, err = cca.initEnumerator(jobPartOrder, srcCredInfo, ctx) if err != nil { diff --git a/cmd/credentialUtil.go b/cmd/credentialUtil.go index 06948b92a..287096e9f 100644 --- a/cmd/credentialUtil.go +++ b/cmd/credentialUtil.go @@ -646,15 +646,7 @@ func getCredentialType(ctx context.Context, raw rawFromToInfo, cpkOptions common // ============================================================================================== // pipeline factory methods // ============================================================================================== -func createBlobPipeline(ctx context.Context, credInfo common.CredentialInfo, logLevel pipeline.LogLevel) (pipeline.Pipeline, error) { - // are we getting dest token? - credential := credInfo.SourceBlobToken - if credential == nil { - credential = common.CreateBlobCredential(ctx, credInfo, common.CredentialOpOptions{ - // LogInfo: glcm.Info, //Comment out for debugging - LogError: glcm.Info, - }) - } +func createBlobPipelineFromCred(credential azblob.Credential, logLevel pipeline.LogLevel) pipeline.Pipeline { logOption := pipeline.LogOptions{} if azcopyScanningLogger != nil { logOption = pipeline.LogOptions{ @@ -681,7 +673,20 @@ func createBlobPipeline(ctx context.Context, credInfo common.CredentialInfo, log nil, ste.NewAzcopyHTTPClient(frontEndMaxIdleConnectionsPerHost), nil, // we don't gather network stats on the credential pipeline - ), nil + ) +} + +func createBlobPipeline(ctx context.Context, credInfo common.CredentialInfo, logLevel pipeline.LogLevel) (pipeline.Pipeline, error) { + // are we getting dest token? + credential := credInfo.SourceBlobToken + if credential == nil { + credential = common.CreateBlobCredential(ctx, credInfo, common.CredentialOpOptions{ + // LogInfo: glcm.Info, //Comment out for debugging + LogError: glcm.Info, + }) + } + + return createBlobPipelineFromCred(credential, logLevel), nil } const frontEndMaxIdleConnectionsPerHost = http.DefaultMaxIdleConnsPerHost diff --git a/cmd/syncEnumerator.go b/cmd/syncEnumerator.go index 9f504a0e1..c4a104f5f 100644 --- a/cmd/syncEnumerator.go +++ b/cmd/syncEnumerator.go @@ -39,20 +39,21 @@ import ( func (cca *cookedSyncCmdArgs) initEnumerator(ctx context.Context) (enumerator *syncEnumerator, err error) { - srcCredInfo, srcIsPublic, err := GetCredentialInfoForLocation(ctx, cca.fromTo.From(), cca.source.Value, cca.source.SAS, true, cca.cpkOptions) + srcCredInfo, _, err := GetCredentialInfoForLocation(ctx, cca.fromTo.From(), cca.source.Value, cca.source.SAS, true, cca.cpkOptions) if err != nil { return nil, err } - if cca.fromTo.IsS2S() { - if cca.fromTo.From() != common.ELocation.S3() && cca.fromTo.From() != common.ELocation.Blob() { // blob and S3 don't necessarily require SAS tokens (S3 w/ access key, blob w/ copysourceauthorization) - // Adding files here seems like an odd case, but since files can't be public - // the second half of this if statement does not hurt. - - if srcCredInfo.CredentialType != common.ECredentialType.Anonymous() && !srcIsPublic { - return nil, fmt.Errorf("the source of a %s->%s sync must either be public, or authorized with a SAS token", cca.fromTo.From(), cca.fromTo.To()) - } + if cca.fromTo.IsS2S() && srcCredInfo.CredentialType != common.ECredentialType.Anonymous() { + if srcCredInfo.CredentialType.IsAzureOAuth() && cca.fromTo.To().CanForwardOAuthTokens() { + // no-op, this is OK + } else if srcCredInfo.CredentialType == common.ECredentialType.GoogleAppCredentials() || srcCredInfo.CredentialType == common.ECredentialType.S3AccessKey() || srcCredInfo.CredentialType == common.ECredentialType.S3PublicBucket() { + // this too, is OK + } else if srcCredInfo.CredentialType == common.ECredentialType.Anonymous() { + // this is OK + } else { + return nil, fmt.Errorf("the source of a %s->%s sync must either be public, or authorized with a SAS token; blob destinations can forward OAuth", cca.fromTo.From(), cca.fromTo.To()) } } diff --git a/cmd/zc_enumerator.go b/cmd/zc_enumerator.go index 6c123889d..33f4727ef 100755 --- a/cmd/zc_enumerator.go +++ b/cmd/zc_enumerator.go @@ -35,7 +35,6 @@ import ( "github.com/Azure/azure-storage-blob-go/azblob" "github.com/Azure/azure-storage-file-go/azfile" - "github.com/Azure/azure-storage-azcopy/v10/azbfs" "github.com/Azure/azure-storage-azcopy/v10/common" ) @@ -486,18 +485,35 @@ func InitResourceTraverser(resource common.ResourceString, location common.Locat recommendHttpsIfNecessary(*resourceURL) - bfsURL := azbfs.NewBfsURLParts(*resourceURL) + // Convert BlobFS pipeline to blob-compatible pipeline + var credElement azblob.Credential + if credential.CredentialType == common.ECredentialType.SharedKey() { + // Convert the shared key credential to a blob credential & re-use it + credElement, err = azblob.NewSharedKeyCredential(glcm.GetEnvironmentVariable(common.EEnvironmentVariable.AccountName()), glcm.GetEnvironmentVariable(common.EEnvironmentVariable.AccountKey())) + } else { + // Get a standard blob credential, anything else is compatible + credElement = common.CreateBlobCredential(*ctx, *credential, common.CredentialOpOptions{ + LogError: glcm.Info, + }) + } + + blobPipeline := createBlobPipelineFromCred(credElement, logLevel) - if bfsURL.FileSystemName == "" || strings.Contains(bfsURL.FileSystemName, "*") { - // TODO service traverser + burl := azblob.NewBlobURLParts(*resourceURL) + burl.Host = strings.Replace(burl.Host, ".dfs", ".blob", 1) + blobResourceURL := burl.URL() + includeDirectoryStubs = true // DFS is supposed to feed folders in + if burl.ContainerName == "" || strings.Contains(burl.ContainerName, "*") { if !recursive { return nil, errors.New(accountTraversalInherentlyRecursiveError) } - output = newBlobFSAccountTraverser(resourceURL, *p, *ctx, incrementEnumerationCounter) + output = newBlobAccountTraverser(&blobResourceURL, blobPipeline, *ctx, includeDirectoryStubs, incrementEnumerationCounter, s2sPreserveBlobTags, cpkOptions, preservePermissions) + } else if listOfVersionIds != nil { + output = newBlobVersionsTraverser(&blobResourceURL, blobPipeline, *ctx, recursive, includeDirectoryStubs, incrementEnumerationCounter, listOfVersionIds, cpkOptions) } else { - output = newBlobFSTraverser(resourceURL, *p, *ctx, recursive, incrementEnumerationCounter) + output = newBlobTraverser(&blobResourceURL, blobPipeline, *ctx, recursive, includeDirectoryStubs, incrementEnumerationCounter, s2sPreserveBlobTags, cpkOptions, includeDeleted, includeSnapshot, includeVersion, preservePermissions) } case common.ELocation.S3(): resourceURL, err := resource.FullURL() diff --git a/common/fe-ste-models.go b/common/fe-ste-models.go index e423d1fc6..59509eb70 100644 --- a/common/fe-ste-models.go +++ b/common/fe-ste-models.go @@ -512,6 +512,10 @@ func (l Location) IsFolderAware() bool { } } +func (l Location) CanForwardOAuthTokens() bool { + return l == ELocation.Blob() || l == ELocation.BlobFS() +} + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// var EFromTo = FromTo(0) diff --git a/ste/mgr-JobPartMgr.go b/ste/mgr-JobPartMgr.go index 78bd18422..465d93fe3 100644 --- a/ste/mgr-JobPartMgr.go +++ b/ste/mgr-JobPartMgr.go @@ -568,7 +568,7 @@ func (jpm *jobPartMgr) createPipelines(ctx context.Context, sourceBlobToken azbl if (fromTo.IsS2S() || fromTo.IsDownload()) && (fromTo.From() == common.ELocation.Blob() || fromTo.From() == common.ELocation.BlobFS()) { sourceCred := azblob.NewAnonymousCredential() jobState := jpm.jobMgr.getInMemoryTransitJobState() - if fromTo.To() == common.ELocation.Blob() && jobState.S2SSourceCredentialType.IsAzureOAuth() { + if fromTo.To().CanForwardOAuthTokens() && jobState.S2SSourceCredentialType.IsAzureOAuth() { credOption := common.CredentialOpOptions{ LogInfo: func(str string) { jpm.Log(pipeline.LogInfo, str) }, LogError: func(str string) { jpm.Log(pipeline.LogError, str) }, From 663bfd1b66f21cbd73603107546bcfaf4921eba1 Mon Sep 17 00:00:00 2001 From: Adele Reed Date: Mon, 1 May 2023 14:21:45 -0700 Subject: [PATCH 05/16] Linter --- cmd/copy.go | 4 +- cmd/copyUtil.go | 19 -- cmd/zc_enumerator.go | 4 + cmd/zc_newobjectadapters.go | 11 -- cmd/zc_traverser_blobfs.go | 279 ----------------------------- cmd/zc_traverser_blobfs_account.go | 138 -------------- hnstest/main.go | 55 ------ ste/xfer.go | 2 - 8 files changed, 6 insertions(+), 506 deletions(-) delete mode 100644 cmd/zc_traverser_blobfs.go delete mode 100644 cmd/zc_traverser_blobfs_account.go delete mode 100644 hnstest/main.go diff --git a/cmd/copy.go b/cmd/copy.go index 337ebca55..7cd667e76 100644 --- a/cmd/copy.go +++ b/cmd/copy.go @@ -629,7 +629,6 @@ func (raw rawCopyCmdArgs) cook() (CookedCopyCmdArgs, error) { if err != nil { return cooked, err } - globalBlobFSMd5ValidationOption = cooked.md5ValidationOption // workaround, to avoid having to pass this all the way through the chain of methods in enumeration, just for one weird and (presumably) temporary workaround cooked.CheckLength = raw.CheckLength // length of devnull will be 0, thus this will always fail unless downloading an empty file @@ -1528,7 +1527,8 @@ func (cca *CookedCopyCmdArgs) processCopyJobPartOrders() (err error) { case cca.FromTo.IsUpload(), cca.FromTo.IsDownload(), cca.FromTo.IsS2S(): // Execute a standard copy command var e *CopyEnumerator - srcCredInfo, err := cca.getSrcCredential(ctx, &jobPartOrder) + var srcCredInfo common.CredentialInfo + srcCredInfo, err = cca.getSrcCredential(ctx, &jobPartOrder) if err != nil { return fmt.Errorf("failed to discern source credential type: %w", err) } diff --git a/cmd/copyUtil.go b/cmd/copyUtil.go index b5b7d3e98..5eab51b72 100644 --- a/cmd/copyUtil.go +++ b/cmd/copyUtil.go @@ -29,7 +29,6 @@ import ( "strings" "github.com/Azure/azure-pipeline-go/pipeline" - "github.com/Azure/azure-storage-azcopy/v10/azbfs" "github.com/Azure/azure-storage-azcopy/v10/common" "github.com/Azure/azure-storage-blob-go/azblob" "github.com/Azure/azure-storage-file-go/azfile" @@ -119,24 +118,6 @@ func (util copyHandlerUtil) ConstructCommandStringFromArgs() string { return s.String() } -func (util copyHandlerUtil) urlIsBFSFileSystemOrDirectory(ctx context.Context, url *url.URL, p pipeline.Pipeline) bool { - if util.urlIsContainerOrVirtualDirectory(url) { - - return true - } - // Need to get the resource properties and verify if it is a file or directory - dirURL := azbfs.NewDirectoryURL(*url, p) - isDir, err := dirURL.IsDirectory(ctx) - - if err != nil { - if jobsAdmin.JobsAdmin != nil { - jobsAdmin.JobsAdmin.LogToJobLog(fmt.Sprintf("Failed to check if destination is a folder or a file (ADLSg2). Assuming the destination is a file: %s", err), pipeline.LogWarning) - } - } - - return isDir -} - func (util copyHandlerUtil) urlIsAzureFileDirectory(ctx context.Context, url *url.URL, p pipeline.Pipeline) bool { // Azure file share case if util.urlIsContainerOrVirtualDirectory(url) { diff --git a/cmd/zc_enumerator.go b/cmd/zc_enumerator.go index 33f4727ef..3af62b607 100755 --- a/cmd/zc_enumerator.go +++ b/cmd/zc_enumerator.go @@ -490,6 +490,10 @@ func InitResourceTraverser(resource common.ResourceString, location common.Locat if credential.CredentialType == common.ECredentialType.SharedKey() { // Convert the shared key credential to a blob credential & re-use it credElement, err = azblob.NewSharedKeyCredential(glcm.GetEnvironmentVariable(common.EEnvironmentVariable.AccountName()), glcm.GetEnvironmentVariable(common.EEnvironmentVariable.AccountKey())) + + if err != nil { + return nil, err + } } else { // Get a standard blob credential, anything else is compatible credElement = common.CreateBlobCredential(*ctx, *credential, common.CredentialOpOptions{ diff --git a/cmd/zc_newobjectadapters.go b/cmd/zc_newobjectadapters.go index b3dfb13a1..a4587a3d0 100644 --- a/cmd/zc_newobjectadapters.go +++ b/cmd/zc_newobjectadapters.go @@ -81,17 +81,6 @@ func (e emptyPropertiesAdapter) LeaseStatus() azblob.LeaseStatusType { return azblob.LeaseStatusNone } -// md5OnlyAdapter is like emptyProperties adapter, except for the ContentMD5 -// method, for which it returns a real value -type md5OnlyAdapter struct { - emptyPropertiesAdapter - md5 []byte -} - -func (m md5OnlyAdapter) ContentMD5() []byte { - return m.md5 -} - // blobPropertiesResponseAdapter adapts a BlobGetPropertiesResponse to the blobPropsProvider interface type blobPropertiesResponseAdapter struct { *azblob.BlobGetPropertiesResponse diff --git a/cmd/zc_traverser_blobfs.go b/cmd/zc_traverser_blobfs.go deleted file mode 100644 index 566a08ceb..000000000 --- a/cmd/zc_traverser_blobfs.go +++ /dev/null @@ -1,279 +0,0 @@ -// Copyright © Microsoft -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. -package cmd - -import ( - "context" - "encoding/base64" - "fmt" - "net/url" - "strings" - "time" - - "github.com/Azure/azure-pipeline-go/pipeline" - - "github.com/Azure/azure-storage-azcopy/v10/azbfs" - "github.com/Azure/azure-storage-azcopy/v10/common" -) - -type blobFSTraverser struct { - rawURL *url.URL - p pipeline.Pipeline - ctx context.Context - recursive bool - - // Generic function to indicate that a new stored object has been enumerated - incrementEnumerationCounter enumerationCounterFunc -} - -func newBlobFSTraverser(rawURL *url.URL, p pipeline.Pipeline, ctx context.Context, recursive bool, incrementEnumerationCounter enumerationCounterFunc) (t *blobFSTraverser) { - t = &blobFSTraverser{ - rawURL: rawURL, - p: p, - ctx: ctx, - recursive: recursive, - incrementEnumerationCounter: incrementEnumerationCounter, - } - return -} - -func (t *blobFSTraverser) IsDirectory(bool) (bool, error) { - return copyHandlerUtil{}.urlIsBFSFileSystemOrDirectory(t.ctx, t.rawURL, t.p), nil // This gets all the fanciness done for us. -} - -func (t *blobFSTraverser) getPropertiesIfSingleFile() (*azbfs.PathGetPropertiesResponse, bool, error) { - pathURL := azbfs.NewFileURL(*t.rawURL, t.p) - pgr, err := pathURL.GetProperties(t.ctx) - - if err != nil { - return nil, false, err - } - - if pgr.XMsResourceType() == "directory" { - return pgr, false, nil - } - - return pgr, true, nil -} - -func (_ *blobFSTraverser) parseLMT(t string) time.Time { - out, err := time.Parse(time.RFC1123, t) - - if err != nil { - return time.Time{} - } - - return out -} - -func (t *blobFSTraverser) getFolderProps() (p contentPropsProvider, size int64) { - return noContentProps, 0 -} - -func (t *blobFSTraverser) Traverse(preprocessor objectMorpher, processor objectProcessor, filters []ObjectFilter) (err error) { - bfsURLParts := azbfs.NewBfsURLParts(*t.rawURL) - - pathProperties, isFile, _ := t.getPropertiesIfSingleFile() - if isFile { - if azcopyScanningLogger != nil { - azcopyScanningLogger.Log(pipeline.LogDebug, "Detected the root as a file.") - } - - storedObject := newStoredObject( - preprocessor, - getObjectNameOnly(bfsURLParts.DirectoryOrFilePath), - "", - common.EEntityType.File(), - t.parseLMT(pathProperties.LastModified()), - pathProperties.ContentLength(), - md5OnlyAdapter{md5: pathProperties.ContentMD5()}, // not supplying full props, since we can't below, and it would be inconsistent to do so here - noBlobProps, - noMetdata, // not supplying metadata, since we can't below and it would be inconsistent to do so here - bfsURLParts.FileSystemName, - ) - - if t.incrementEnumerationCounter != nil { - t.incrementEnumerationCounter(common.EEntityType.File()) - } - - err := processIfPassedFilters(filters, storedObject, processor) - _, err = getProcessingError(err) - return err - } - - // else, its not just one file - - // Include the root dir in the enumeration results - // Our rule is that enumerators of folder-aware sources must always include the root folder's properties - // So include it if its a directory (which exists), or the file system root. - contentProps, size := t.getFolderProps() - if pathProperties != nil || bfsURLParts.DirectoryOrFilePath == "" { - rootLmt := time.Time{} // if root is filesystem (no path) then we won't have any properties to get an LMT from. Also, we won't actually end up syncing the folder, since its not really a folder, so it's OK to use a zero-like time here - if pathProperties != nil { - rootLmt = t.parseLMT(pathProperties.LastModified()) - } - - storedObject := newStoredObject( - preprocessor, - "", - "", // it IS the root, so has no name within the root - common.EEntityType.Folder(), - rootLmt, - size, - contentProps, - noBlobProps, - noMetdata, - bfsURLParts.FileSystemName) - if t.incrementEnumerationCounter != nil { - t.incrementEnumerationCounter(common.EEntityType.Folder()) - } - err = processIfPassedFilters(filters, storedObject, processor) - _, err = getProcessingError(err) - if err != nil { - return err - } - } - - // enumerate everything inside the folder - dirUrl := azbfs.NewDirectoryURL(*t.rawURL, t.p) - marker := "" - searchPrefix := bfsURLParts.DirectoryOrFilePath - - if !strings.HasSuffix(searchPrefix, common.AZCOPY_PATH_SEPARATOR_STRING) { - searchPrefix += common.AZCOPY_PATH_SEPARATOR_STRING - } - - for { - dlr, err := dirUrl.ListDirectorySegment(t.ctx, &marker, t.recursive) - - if err != nil { - return fmt.Errorf("could not list files. Failed with error %s", err.Error()) - } - - for _, v := range dlr.Paths { - var entityType common.EntityType - lmt := v.LastModifiedTime() - if v.IsDirectory == nil || !*v.IsDirectory { - entityType = common.EEntityType.File() - contentProps = md5OnlyAdapter{md5: t.getContentMd5(t.ctx, dirUrl, v)} - size = *v.ContentLength - } else { - entityType = common.EEntityType.Folder() - contentProps, size = t.getFolderProps() - } - - // TODO: if we need to get full properties and metadata, then add call here to - // dirUrl.NewFileURL(StoredObject.relativePath).GetProperties(t.ctx) - // AND consider also supporting alternate mechanism to get the props in the backend - // using s2sGetPropertiesInBackend - storedObject := newStoredObject( - preprocessor, - getObjectNameOnly(*v.Name), - strings.TrimPrefix(*v.Name, searchPrefix), - entityType, - lmt, - size, - contentProps, - noBlobProps, - noMetdata, - bfsURLParts.FileSystemName, - ) - - if t.incrementEnumerationCounter != nil { - t.incrementEnumerationCounter(entityType) - } - - err := processIfPassedFilters(filters, storedObject, processor) - _, err = getProcessingError(err) - if err != nil { - return err - } - - } - - // if debug mode is on, note down the result, this is not going to be fast - if azcopyScanningLogger != nil && azcopyScanningLogger.ShouldLog(pipeline.LogDebug) { - tokenValue := "NONE" - if marker != "" { - tokenValue = marker - } - - var dirListBuilder strings.Builder - var fileListBuilder strings.Builder - - for _, v := range dlr.Paths { - if v.IsDirectory == nil || !*v.IsDirectory { - // it's a file - fmt.Fprintf(&fileListBuilder, " %s,", *v.Name) - } else { - // it's a directory - fmt.Fprintf(&dirListBuilder, " %s,", *v.Name) - } - } - - msg := fmt.Sprintf("Enumerating with token %s. Sub-dirs:%s Files:%s", - tokenValue, dirListBuilder.String(), fileListBuilder.String()) - azcopyScanningLogger.Log(pipeline.LogDebug, msg) - } - - marker = dlr.XMsContinuation() - if marker == "" { // do-while pattern - break - } - } - - return -} - -// globalBlobFSMd5ValidationOption is an ugly workaround, to tweak performance of another ugly workaround (namely getContentMd5, below) -var globalBlobFSMd5ValidationOption = common.EHashValidationOption.FailIfDifferentOrMissing() // default to strict, if not set - -// getContentMd5 compensates for the fact that ADLS Gen 2 currently does not return MD5s in the PathListResponse (even -// tho the property is there in the swagger and the generated API) -func (t *blobFSTraverser) getContentMd5(ctx context.Context, directoryURL azbfs.DirectoryURL, file azbfs.Path) []byte { - if globalBlobFSMd5ValidationOption == common.EHashValidationOption.NoCheck() { - return nil // not gonna check it, so don't need it - } - - var returnValueForError []byte = nil // If we get an error, we just act like there was no content MD5. If validation is set to fail on error, this will fail the transfer of this file later on (at the time of the MD5 check) - - // convert format of what we have, if we have something in the PathListResponse from Service - if file.ContentMD5Base64 != nil { - value, err := base64.StdEncoding.DecodeString(*file.ContentMD5Base64) - if err != nil { - return returnValueForError - } - return value - } - - // Fall back to making a new round trip to the server - // This is an interim measure, so that we can still validate MD5s even before they are being returned in the server's - // PathList response - // TODO: remove this in a future release, once we know that Service is always returning the MD5s in the PathListResponse. - // Why? Because otherwise, if there's a file with NO MD5, we'll make a round-trip here, but that's pointless if we KNOW that - // that Service is always returning them in the PathListResponse which we've already checked above. - // As at mid-Feb 2019, we don't KNOW that (in fact it's not returning them in the PathListResponse) so we need this code for now. - fileURL := directoryURL.FileSystemURL().NewDirectoryURL(*file.Name) - props, err := fileURL.GetProperties(ctx) - if err != nil { - return returnValueForError - } - return props.ContentMD5() -} diff --git a/cmd/zc_traverser_blobfs_account.go b/cmd/zc_traverser_blobfs_account.go deleted file mode 100644 index 9c13fb2e6..000000000 --- a/cmd/zc_traverser_blobfs_account.go +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright © Microsoft -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -package cmd - -import ( - "context" - "fmt" - "net/url" - - "github.com/Azure/azure-pipeline-go/pipeline" - - "github.com/Azure/azure-storage-azcopy/v10/azbfs" -) - -// We don't allow S2S from BlobFS, but what this gives us is the ability for users to download entire accounts at once. -// This is just added to create that feature parity. -// Enumerates an entire blobFS account, looking into each matching filesystem as it goes -type BlobFSAccountTraverser struct { - accountURL azbfs.ServiceURL - p pipeline.Pipeline - ctx context.Context - fileSystemPattern string - cachedFileSystems []string - - // a generic function to notify that a new stored object has been enumerated - incrementEnumerationCounter enumerationCounterFunc -} - -func (t *BlobFSAccountTraverser) IsDirectory(isSource bool) (bool, error) { - return true, nil // Returns true as account traversal is inherently folder-oriented and recursive. -} - -func (t *BlobFSAccountTraverser) listContainers() ([]string, error) { - // a nil list also returns 0 - if len(t.cachedFileSystems) == 0 { - marker := "" - fsList := make([]string, 0) - - for { - resp, err := t.accountURL.ListFilesystemsSegment(t.ctx, &marker) - - if err != nil { - return nil, err - } - - for _, v := range resp.Filesystems { - var fsName string - - if v.Name != nil { - fsName = *v.Name - } else { - // realistically this should never ever happen - // but on the off-chance that it does, should we panic? - WarnStdoutAndScanningLog("filesystem listing returned nil filesystem name") - continue - } - - // match against the filesystem name pattern if present - if t.fileSystemPattern != "" { - if ok, err := containerNameMatchesPattern(fsName, t.fileSystemPattern); err != nil { - return nil, err - } else if !ok { - // ignore any filesystems that don't match - continue - } - } - - fsList = append(fsList, fsName) - } - - marker = resp.XMsContinuation() - if marker == "" { - break - } - } - - t.cachedFileSystems = fsList - return fsList, nil - } else { - return t.cachedFileSystems, nil - } -} - -func (t *BlobFSAccountTraverser) Traverse(preprocessor objectMorpher, processor objectProcessor, filters []ObjectFilter) error { - // listContainers will return the cached filesystem list if filesystems have already been listed by this traverser. - fsList, err := t.listContainers() - - if err != nil { - return err - } - - for _, v := range fsList { - fileSystemURL := t.accountURL.NewFileSystemURL(v).URL() - fileSystemTraverser := newBlobFSTraverser(&fileSystemURL, t.p, t.ctx, true, t.incrementEnumerationCounter) - - preprocessorForThisChild := preprocessor.FollowedBy(newContainerDecorator(v)) - - err := fileSystemTraverser.Traverse(preprocessorForThisChild, processor, filters) - - if err != nil { - WarnStdoutAndScanningLog(fmt.Sprintf("failed to list files in filesystem %s: %s", v, err)) - continue - } - } - - return nil -} - -func newBlobFSAccountTraverser(rawURL *url.URL, p pipeline.Pipeline, ctx context.Context, incrementEnumerationCounter enumerationCounterFunc) (t *BlobFSAccountTraverser) { - bfsURLParts := azbfs.NewBfsURLParts(*rawURL) - fsPattern := bfsURLParts.FileSystemName - - if bfsURLParts.FileSystemName != "" { - bfsURLParts.FileSystemName = "" - } - - t = &BlobFSAccountTraverser{p: p, ctx: ctx, incrementEnumerationCounter: incrementEnumerationCounter, accountURL: azbfs.NewServiceURL(bfsURLParts.URL(), p), fileSystemPattern: fsPattern} - - return -} diff --git a/hnstest/main.go b/hnstest/main.go deleted file mode 100644 index 5c74f3ef9..000000000 --- a/hnstest/main.go +++ /dev/null @@ -1,55 +0,0 @@ -package main - -import ( - "context" - "fmt" - "github.com/Azure/azure-storage-azcopy/v10/azbfs" - "github.com/google/uuid" - "net/url" - "os" -) - -func main() { - acctKey, acctName := os.Getenv("ACCOUNT_KEY"), os.Getenv("ACCOUNT_NAME") - key := azbfs.NewSharedKeyCredential(acctName, acctKey) - p := azbfs.NewPipeline(key, azbfs.PipelineOptions{}) - serviceURL := azbfs.NewServiceURL(url.URL{ - Scheme: "https", - Host: fmt.Sprintf("%s.dfs.core.windows.net", acctName), - }, p) - - fsURL := serviceURL.NewFileSystemURL(uuid.NewString()) - defer fsURL.Delete(context.Background()) - - _, err := fsURL.Create(context.Background()) - if err != nil { - fmt.Println(err) - return - } - - fURL := fsURL.NewRootDirectoryURL().NewFileURL("asdf.txt") - - _, err = fURL.Create(context.Background(), azbfs.BlobFSHTTPHeaders{}, azbfs.BlobFSAccessControl{}) - if err != nil { - fmt.Println(err) - return - } - - _, err = fURL.SetAccessControl(context.Background(), azbfs.BlobFSAccessControl{ - Owner: "1234", - Group: "5213456", - Permissions: "r-xrw-r--", - }) - if err != nil { - fmt.Println(err) - return - } - - ctl, err := fURL.GetAccessControl(context.Background()) - if err != nil { - fmt.Println(err) - return - } - - fmt.Println(ctl.Owner, ctl.Group, ctl.Permissions) -} diff --git a/ste/xfer.go b/ste/xfer.go index a88fa90e3..cf8b919d6 100644 --- a/ste/xfer.go +++ b/ste/xfer.go @@ -81,8 +81,6 @@ func parameterizeSend(targetFunction newJobXferWithSenderFactory, sf senderFacto // the xfer factory is generated based on the type of source and destination func computeJobXfer(fromTo common.FromTo, blobType common.BlobType) newJobXfer { - const blobFSNotS2S = "blobFS not supported as S2S source" - //local helper functions getDownloader := func(sourceType common.Location) downloaderFactory { From aca473daac07f3cb43d221d321ac2fc1438b4eb8 Mon Sep 17 00:00:00 2001 From: Adele Reed Date: Mon, 1 May 2023 14:47:53 -0700 Subject: [PATCH 06/16] Remove blobfs traverser testing --- cmd/zt_generic_service_traverser_test.go | 114 ----------------------- cmd/zt_generic_traverser_test.go | 60 +----------- 2 files changed, 2 insertions(+), 172 deletions(-) diff --git a/cmd/zt_generic_service_traverser_test.go b/cmd/zt_generic_service_traverser_test.go index 09c5427e2..e5be29dad 100644 --- a/cmd/zt_generic_service_traverser_test.go +++ b/cmd/zt_generic_service_traverser_test.go @@ -7,86 +7,9 @@ import ( "github.com/Azure/azure-storage-file-go/azfile" chk "gopkg.in/check.v1" - "github.com/Azure/azure-storage-azcopy/v10/azbfs" "github.com/Azure/azure-storage-azcopy/v10/common" ) -// Separated the ADLS tests from others as ADLS can't safely be tested on the same storage account -func (s *genericTraverserSuite) TestBlobFSServiceTraverserWithManyObjects(c *chk.C) { - bfssu := GetBFSSU() - bsu := getBSU() // Only used to clean up - - // BlobFS is tested on the same account, therefore this is safe to clean up this way - cleanBlobAccount(c, bsu) - - containerList := []string{ - generateName("suchcontainermanystorage", 63), - generateName("containertwoelectricboogaloo", 63), - generateName("funnymemereference", 63), - generateName("gettingmeta", 63), - } - - // convert containerList into a map for easy validation - cnames := map[string]bool{} - for _, v := range containerList { - cnames[v] = true - } - - objectList := []string{ - generateName("basedir", 63), - "allyourbase/" + generateName("arebelongtous", 63), - "sub1/sub2/" + generateName("", 63), - generateName("someobject", 63), - } - - objectData := "Hello world!" - - // Generate remote scenarios - scenarioHelper{}.generateFilesystemsAndFilesFromLists(c, bfssu, containerList, objectList, objectData) - - // deferred container cleanup - defer func() { - for _, v := range containerList { - // create container URLs - blobContainer := bsu.NewContainerURL(v) - _, _ = blobContainer.Delete(ctx, azblob.ContainerAccessConditions{}) - } - }() - - // Generate local files to ensure behavior conforms to other traversers - dstDirName := scenarioHelper{}.generateLocalDirectory(c) - scenarioHelper{}.generateLocalFilesFromList(c, dstDirName, objectList) - - // Create a local traversal - localTraverser := newLocalTraverser(context.TODO(), dstDirName, true, false, common.ESymlinkHandlingType.Follow(), common.ESyncHashType.None(), func(common.EntityType) {}, nil) - - // Invoke the traversal with an indexer so the results are indexed for easy validation - localIndexer := newObjectIndexer() - err := localTraverser.Traverse(noPreProccessor, localIndexer.store, nil) - c.Assert(err, chk.IsNil) - - // construct a blob account traverser - blobFSPipeline := azbfs.NewPipeline(azbfs.NewAnonymousCredential(), azbfs.PipelineOptions{}) - rawBSU := scenarioHelper{}.getRawAdlsServiceURLWithSAS(c).URL() - blobAccountTraverser := newBlobFSAccountTraverser(&rawBSU, blobFSPipeline, ctx, func(common.EntityType) {}) - - // invoke the blob account traversal with a dummy processor - blobDummyProcessor := dummyProcessor{} - err = blobAccountTraverser.Traverse(noPreProccessor, blobDummyProcessor.process, nil) - c.Assert(err, chk.IsNil) - - c.Assert(len(blobDummyProcessor.record), chk.Equals, len(localIndexer.indexMap)*len(containerList)) - - for _, storedObject := range blobDummyProcessor.record { - correspondingLocalFile, present := localIndexer.indexMap[storedObject.relativePath] - _, cnamePresent := cnames[storedObject.ContainerName] - - c.Assert(present, chk.Equals, true) - c.Assert(cnamePresent, chk.Equals, true) - c.Assert(correspondingLocalFile.name, chk.Equals, storedObject.name) - } -} - func (s *genericTraverserSuite) TestServiceTraverserWithManyObjects(c *chk.C) { bsu := getBSU() fsu := getFSU() @@ -259,7 +182,6 @@ func (s *genericTraverserSuite) TestServiceTraverserWithManyObjects(c *chk.C) { func (s *genericTraverserSuite) TestServiceTraverserWithWildcards(c *chk.C) { bsu := getBSU() fsu := getFSU() - bfssu := GetBFSSU() testS3 := false // Only test S3 if credentials are present. testGCP := false @@ -294,25 +216,12 @@ func (s *genericTraverserSuite) TestServiceTraverserWithWildcards(c *chk.C) { generateName("objectmatchtwo", 63), } - bfsContainerList := []string{ - generateName("bfsmatchobjectmatchone", 63), - generateName("bfsmatchobjectnomatchone", 63), - generateName("bfsmatchobjectnomatchtwo", 63), - generateName("bfsmatchobjectmatchtwo", 63), - } - // load only matching container names in cnames := map[string]bool{ containerList[0]: true, containerList[3]: true, } - // load matching bfs container names in - bfscnames := map[string]bool{ - bfsContainerList[0]: true, - bfsContainerList[3]: true, - } - objectList := []string{ generateName("basedir", 63), "allyourbase/" + generateName("arebelongtous", 63), @@ -325,8 +234,6 @@ func (s *genericTraverserSuite) TestServiceTraverserWithWildcards(c *chk.C) { // Generate remote scenarios scenarioHelper{}.generateBlobContainersAndBlobsFromLists(c, bsu, containerList, objectList, objectData) scenarioHelper{}.generateFileSharesAndFilesFromLists(c, fsu, containerList, objectList, objectData) - // Subject ADLS tests to a different container name prefix to avoid conflicts with blob - scenarioHelper{}.generateFilesystemsAndFilesFromLists(c, bfssu, bfsContainerList, objectList, objectData) if testS3 { scenarioHelper{}.generateS3BucketsAndObjectsFromLists(c, s3Client, containerList, objectList, objectData) } @@ -387,16 +294,6 @@ func (s *genericTraverserSuite) TestServiceTraverserWithWildcards(c *chk.C) { err = fileAccountTraverser.Traverse(noPreProccessor, fileDummyProcessor.process, nil) c.Assert(err, chk.IsNil) - // construct a ADLS account traverser - blobFSPipeline := azbfs.NewPipeline(azbfs.NewAnonymousCredential(), azbfs.PipelineOptions{}) - rawBFSSU := scenarioHelper{}.getRawAdlsServiceURLWithSAS(c).URL() - rawBFSSU.Path = "/bfsmatchobjectmatch*" // set the container name to contain a wildcard and not conflict with blob - bfsAccountTraverser := newBlobFSAccountTraverser(&rawBFSSU, blobFSPipeline, ctx, func(common.EntityType) {}) - - // invoke the blobFS account traversal with a dummy processor - bfsDummyProcessor := dummyProcessor{} - err = bfsAccountTraverser.Traverse(noPreProccessor, bfsDummyProcessor.process, nil) - var s3DummyProcessor dummyProcessor var gcpDummyProcessor dummyProcessor if testS3 { @@ -457,15 +354,4 @@ func (s *genericTraverserSuite) TestServiceTraverserWithWildcards(c *chk.C) { c.Assert(cnamePresent, chk.Equals, true) c.Assert(correspondingLocalFile.name, chk.Equals, storedObject.name) } - - // Test ADLSG2 separately due to different container naming - c.Assert(len(bfsDummyProcessor.record), chk.Equals, len(localIndexer.indexMap)*2) - for _, storedObject := range bfsDummyProcessor.record { - correspondingLocalFile, present := localIndexer.indexMap[storedObject.relativePath] - _, cnamePresent := bfscnames[storedObject.ContainerName] - - c.Assert(present, chk.Equals, true) - c.Assert(cnamePresent, chk.Equals, true) - c.Assert(correspondingLocalFile.name, chk.Equals, storedObject.name) - } } diff --git a/cmd/zt_generic_traverser_test.go b/cmd/zt_generic_traverser_test.go index c094a0590..ca5a94ecd 100644 --- a/cmd/zt_generic_traverser_test.go +++ b/cmd/zt_generic_traverser_test.go @@ -37,7 +37,6 @@ import ( "github.com/minio/minio-go" chk "gopkg.in/check.v1" - "github.com/Azure/azure-storage-azcopy/v10/azbfs" "github.com/Azure/azure-storage-azcopy/v10/common" "github.com/Azure/azure-storage-azcopy/v10/ste" ) @@ -513,10 +512,6 @@ func (s *genericTraverserSuite) TestTraverserWithSingleObject(c *chk.C) { shareURL, shareName := createNewAzureShare(c, fsu) defer deleteShare(c, shareURL) - bfsu := GetBFSSU() - filesystemURL, _ := createNewFilesystem(c, bfsu) - defer deleteFilesystem(c, filesystemURL) - s3Client, err := createS3ClientWithMinio(createS3ResOptions{}) s3Enabled := err == nil && !isS3Disabled() gcpClient, err := createGCPClientWithGCSSDK() @@ -593,25 +588,6 @@ func (s *genericTraverserSuite) TestTraverserWithSingleObject(c *chk.C) { c.Assert(localDummyProcessor.record[0].name, chk.Equals, fileDummyProcessor.record[0].name) } - // set up the filesystem with a single file - bfsList := []string{storedObjectName} - scenarioHelper{}.generateBFSPathsFromList(c, filesystemURL, bfsList) - - // construct a BlobFS traverser - accountName, accountKey := getAccountAndKey() - bfsPipeline := azbfs.NewPipeline(azbfs.NewSharedKeyCredential(accountName, accountKey), azbfs.PipelineOptions{}) - rawFileURL := filesystemURL.NewRootDirectoryURL().NewFileURL(bfsList[0]).URL() - bfsTraverser := newBlobFSTraverser(&rawFileURL, bfsPipeline, ctx, false, func(common.EntityType) {}) - - // Construct and run a dummy processor for bfs - bfsDummyProcessor := dummyProcessor{} - err = bfsTraverser.Traverse(noPreProccessor, bfsDummyProcessor.process, nil) - c.Assert(err, chk.IsNil) - c.Assert(len(bfsDummyProcessor.record), chk.Equals, 1) - - c.Assert(localDummyProcessor.record[0].relativePath, chk.Equals, bfsDummyProcessor.record[0].relativePath) - c.Assert(localDummyProcessor.record[0].name, chk.Equals, bfsDummyProcessor.record[0].name) - if s3Enabled { // set up the bucket with a single file s3List := []string{storedObjectName} @@ -734,17 +710,6 @@ func (s *genericTraverserSuite) TestTraverserContainerAndLocalDirectory(c *chk.C err = azureFileTraverser.Traverse(noPreProccessor, fileDummyProcessor.process, nil) c.Assert(err, chk.IsNil) - // construct a directory URL and pipeline - accountName, accountKey := getAccountAndKey() - bfsPipeline := azbfs.NewPipeline(azbfs.NewSharedKeyCredential(accountName, accountKey), azbfs.PipelineOptions{}) - rawFilesystemURL := filesystemURL.NewRootDirectoryURL().URL() - - // construct and run a FS traverser - bfsTraverser := newBlobFSTraverser(&rawFilesystemURL, bfsPipeline, ctx, isRecursiveOn, func(common.EntityType) {}) - bfsDummyProcessor := dummyProcessor{} - err = bfsTraverser.Traverse(noPreProccessor, bfsDummyProcessor.process, nil) - c.Assert(err, chk.IsNil) - s3DummyProcessor := dummyProcessor{} gcpDummyProcessor := dummyProcessor{} if s3Enabled { @@ -776,13 +741,11 @@ func (s *genericTraverserSuite) TestTraverserContainerAndLocalDirectory(c *chk.C c.Assert(len(blobDummyProcessor.record), chk.Equals, localFileOnlyCount) if isRecursiveOn { c.Assert(len(fileDummyProcessor.record), chk.Equals, localTotalCount) - c.Assert(len(bfsDummyProcessor.record), chk.Equals, localTotalCount) } else { // in real usage, folders get stripped out in ToNewCopyTransfer when non-recursive, // but that doesn't run here in this test, // so we have to count files only on the processor c.Assert(fileDummyProcessor.countFilesOnly(), chk.Equals, localTotalCount) - c.Assert(bfsDummyProcessor.countFilesOnly(), chk.Equals, localTotalCount) } if s3Enabled { @@ -793,7 +756,7 @@ func (s *genericTraverserSuite) TestTraverserContainerAndLocalDirectory(c *chk.C } // if s3dummyprocessor is empty, it's A-OK because no records will be tested - for _, storedObject := range append(append(append(append(blobDummyProcessor.record, fileDummyProcessor.record...), bfsDummyProcessor.record...), s3DummyProcessor.record...), gcpDummyProcessor.record...) { + for _, storedObject := range append(append(append(blobDummyProcessor.record, fileDummyProcessor.record...), s3DummyProcessor.record...), gcpDummyProcessor.record...) { if isRecursiveOn || storedObject.entityType == common.EEntityType.File() { // folder enumeration knowingly NOT consistent when non-recursive (since the folders get stripped out by ToNewCopyTransfer when non-recursive anyway) correspondingLocalFile, present := localIndexer.indexMap[storedObject.relativePath] @@ -819,10 +782,6 @@ func (s *genericTraverserSuite) TestTraverserWithVirtualAndLocalDirectory(c *chk shareURL, shareName := createNewAzureShare(c, fsu) defer deleteShare(c, shareURL) - bfsu := GetBFSSU() - filesystemURL, _ := createNewFilesystem(c, bfsu) - defer deleteFilesystem(c, filesystemURL) - s3Client, err := createS3ClientWithMinio(createS3ResOptions{}) s3Enabled := err == nil && !isS3Disabled() gcpClient, err := createGCPClientWithGCSSDK() @@ -845,9 +804,6 @@ func (s *genericTraverserSuite) TestTraverserWithVirtualAndLocalDirectory(c *chk // set up an Azure File Share with the same files scenarioHelper{}.generateAzureFilesFromList(c, shareURL, fileList) - // set up the filesystem with the same files - scenarioHelper{}.generateBFSPathsFromList(c, filesystemURL, fileList) - if s3Enabled { // Set up the bucket with the same files scenarioHelper{}.generateObjects(c, s3Client, bucketName, fileList) @@ -895,16 +851,6 @@ func (s *genericTraverserSuite) TestTraverserWithVirtualAndLocalDirectory(c *chk err = azureFileTraverser.Traverse(noPreProccessor, fileDummyProcessor.process, nil) c.Assert(err, chk.IsNil) - // construct a filesystem URL & pipeline - accountName, accountKey := getAccountAndKey() - bfsPipeline := azbfs.NewPipeline(azbfs.NewSharedKeyCredential(accountName, accountKey), azbfs.PipelineOptions{}) - rawFilesystemURL := filesystemURL.NewRootDirectoryURL().NewDirectoryURL(virDirName).URL() - - // construct and run a FS traverser - bfsTraverser := newBlobFSTraverser(&rawFilesystemURL, bfsPipeline, ctx, isRecursiveOn, func(common.EntityType) {}) - bfsDummyProcessor := dummyProcessor{} - err = bfsTraverser.Traverse(noPreProccessor, bfsDummyProcessor.process, nil) - localTotalCount := len(localIndexer.indexMap) localFileOnlyCount := 0 for _, x := range localIndexer.indexMap { @@ -942,14 +888,12 @@ func (s *genericTraverserSuite) TestTraverserWithVirtualAndLocalDirectory(c *chk c.Assert(len(blobDummyProcessor.record), chk.Equals, localFileOnlyCount) if isRecursiveOn { c.Assert(len(fileDummyProcessor.record), chk.Equals, localTotalCount) - c.Assert(len(bfsDummyProcessor.record), chk.Equals, localTotalCount) } else { // only files matter when not recursive (since ToNewCopyTransfer strips out everything else when non-recursive) c.Assert(fileDummyProcessor.countFilesOnly(), chk.Equals, localTotalCount) - c.Assert(bfsDummyProcessor.countFilesOnly(), chk.Equals, localTotalCount) } // if s3 testing is disabled the s3 dummy processors' records will be empty. This is OK for appending. Nothing will happen. - for _, storedObject := range append(append(append(append(blobDummyProcessor.record, fileDummyProcessor.record...), bfsDummyProcessor.record...), s3DummyProcessor.record...), gcpDummyProcessor.record...) { + for _, storedObject := range append(append(append(blobDummyProcessor.record, fileDummyProcessor.record...), s3DummyProcessor.record...), gcpDummyProcessor.record...) { if isRecursiveOn || storedObject.entityType == common.EEntityType.File() { // folder enumeration knowingly NOT consistent when non-recursive (since the folders get stripped out by ToNewCopyTransfer when non-recursive anyway) correspondingLocalFile, present := localIndexer.indexMap[storedObject.relativePath] From 6e17c4d2f64655db82103326c2b4572f7e480ec3 Mon Sep 17 00:00:00 2001 From: Adele Reed Date: Tue, 2 May 2023 09:08:41 -0700 Subject: [PATCH 07/16] Temporarily disable HNS POSIX tests; support oauth and shared key sip --- e2etest/zt_preserve_posix_properties_test.go | 134 ++++++++++--------- ste/mgr-JobPartMgr.go | 17 ++- 2 files changed, 78 insertions(+), 73 deletions(-) diff --git a/e2etest/zt_preserve_posix_properties_test.go b/e2etest/zt_preserve_posix_properties_test.go index 49c1c51b0..9e4b116e5 100644 --- a/e2etest/zt_preserve_posix_properties_test.go +++ b/e2etest/zt_preserve_posix_properties_test.go @@ -42,70 +42,72 @@ func TestPOSIX_SpecialFilesToBlob(t *testing.T) { ) } -// Block/char device rep is untested due to difficulty to test -func TestPOSIX_SpecialFilesToHNS(t *testing.T) { - ptr := func(u uint32) *uint32 { - return &u - } - - RunScenarios( - t, - eOperation.Copy(), - eTestFromTo.Other(common.EFromTo.LocalBlob()), // no blobblob since that's just metadata and we already test that - eValidate.Auto(), - anonymousAuthOnly, // this is a small test, so running it with all cred types (which will really just be oauth and anon) is fine - anonymousAuthOnly, - params{ - recursive: true, - preservePOSIXProperties: true, - symlinkHandling: common.ESymlinkHandlingType.Preserve(), - }, - nil, - testFiles{ - defaultSize: "1K", - shouldTransfer: []interface{}{ - folder(""), - f("fifo", with{ posixProperties: objectUnixStatContainer{ mode: ptr(common.DEFAULT_FILE_PERM | common.S_IFIFO) } }), // fifo should work - f("sock", with{ posixProperties: objectUnixStatContainer{ mode: ptr(common.DEFAULT_FILE_PERM | common.S_IFSOCK) } }), // sock should work - "a", - symlink("b", "a"), //symlink to real target should succeed - symlink("d", "c"), //symlink to nowhere should succeed - }, - }, - EAccountType.HierarchicalNamespaceEnabled(), EAccountType.Standard(), "", - ) -} +// *** TESTS DISABLED UNTIL POSIX PROPS HNS PR *** -// Block/char device rep is untested due to difficulty to test -func TestPOSIX_SpecialFilesFromHNS(t *testing.T) { - ptr := func(u uint32) *uint32 { - return &u - } - - RunScenarios( - t, - eOperation.Copy(), - eTestFromTo.Other(common.EFromTo.BlobLocal()), // no blobblob since that's just metadata and we already test that - eValidate.Auto(), - anonymousAuthOnly, // this is a small test, so running it with all cred types (which will really just be oauth and anon) is fine - anonymousAuthOnly, - params{ - recursive: true, - preservePOSIXProperties: true, - symlinkHandling: common.ESymlinkHandlingType.Preserve(), - }, - nil, - testFiles{ - defaultSize: "1K", - shouldTransfer: []interface{}{ - folder(""), - f("fifo", with{ posixProperties: objectUnixStatContainer{ mode: ptr(common.DEFAULT_FILE_PERM | common.S_IFIFO) } }), // fifo should work - f("sock", with{ posixProperties: objectUnixStatContainer{ mode: ptr(common.DEFAULT_FILE_PERM | common.S_IFSOCK) } }), // sock should work - "a", - symlink("b", "a"), //symlink to real target should succeed - symlink("d", "c"), //symlink to nowhere should succeed - }, - }, - EAccountType.Standard(), EAccountType.HierarchicalNamespaceEnabled(), "", - ) -} \ No newline at end of file +//// Block/char device rep is untested due to difficulty to test +//func TestPOSIX_SpecialFilesToHNS(t *testing.T) { +// ptr := func(u uint32) *uint32 { +// return &u +// } +// +// RunScenarios( +// t, +// eOperation.Copy(), +// eTestFromTo.Other(common.EFromTo.LocalBlob()), // no blobblob since that's just metadata and we already test that +// eValidate.Auto(), +// anonymousAuthOnly, // this is a small test, so running it with all cred types (which will really just be oauth and anon) is fine +// anonymousAuthOnly, +// params{ +// recursive: true, +// preservePOSIXProperties: true, +// symlinkHandling: common.ESymlinkHandlingType.Preserve(), +// }, +// nil, +// testFiles{ +// defaultSize: "1K", +// shouldTransfer: []interface{}{ +// folder(""), +// f("fifo", with{ posixProperties: objectUnixStatContainer{ mode: ptr(common.DEFAULT_FILE_PERM | common.S_IFIFO) } }), // fifo should work +// f("sock", with{ posixProperties: objectUnixStatContainer{ mode: ptr(common.DEFAULT_FILE_PERM | common.S_IFSOCK) } }), // sock should work +// "a", +// symlink("b", "a"), //symlink to real target should succeed +// symlink("d", "c"), //symlink to nowhere should succeed +// }, +// }, +// EAccountType.HierarchicalNamespaceEnabled(), EAccountType.Standard(), "", +// ) +//} +// +//// Block/char device rep is untested due to difficulty to test +//func TestPOSIX_SpecialFilesFromHNS(t *testing.T) { +// ptr := func(u uint32) *uint32 { +// return &u +// } +// +// RunScenarios( +// t, +// eOperation.Copy(), +// eTestFromTo.Other(common.EFromTo.BlobLocal()), // no blobblob since that's just metadata and we already test that +// eValidate.Auto(), +// anonymousAuthOnly, // this is a small test, so running it with all cred types (which will really just be oauth and anon) is fine +// anonymousAuthOnly, +// params{ +// recursive: true, +// preservePOSIXProperties: true, +// symlinkHandling: common.ESymlinkHandlingType.Preserve(), +// }, +// nil, +// testFiles{ +// defaultSize: "1K", +// shouldTransfer: []interface{}{ +// folder(""), +// f("fifo", with{ posixProperties: objectUnixStatContainer{ mode: ptr(common.DEFAULT_FILE_PERM | common.S_IFIFO) } }), // fifo should work +// f("sock", with{ posixProperties: objectUnixStatContainer{ mode: ptr(common.DEFAULT_FILE_PERM | common.S_IFSOCK) } }), // sock should work +// "a", +// symlink("b", "a"), //symlink to real target should succeed +// symlink("d", "c"), //symlink to nowhere should succeed +// }, +// }, +// EAccountType.Standard(), EAccountType.HierarchicalNamespaceEnabled(), "", +// ) +//} \ No newline at end of file diff --git a/ste/mgr-JobPartMgr.go b/ste/mgr-JobPartMgr.go index 465d93fe3..f2b753ca9 100644 --- a/ste/mgr-JobPartMgr.go +++ b/ste/mgr-JobPartMgr.go @@ -569,17 +569,20 @@ func (jpm *jobPartMgr) createPipelines(ctx context.Context, sourceBlobToken azbl sourceCred := azblob.NewAnonymousCredential() jobState := jpm.jobMgr.getInMemoryTransitJobState() if fromTo.To().CanForwardOAuthTokens() && jobState.S2SSourceCredentialType.IsAzureOAuth() { - credOption := common.CredentialOpOptions{ - LogInfo: func(str string) { jpm.Log(pipeline.LogInfo, str) }, - LogError: func(str string) { jpm.Log(pipeline.LogError, str) }, - Panic: jpm.Panic, - CallerID: fmt.Sprintf("JobID=%v, Part#=%d", jpm.Plan().JobID, jpm.Plan().PartNum), - Cancel: jpm.jobMgr.Cancel, - } if jpm.sourceCredential == nil { sourceCred = common.CreateBlobCredential(ctx, jobState.CredentialInfo.WithType(jobState.S2SSourceCredentialType), credOption) jpm.sourceCredential = sourceCred } + } else if fromTo.IsDownload() && jobState.CredentialInfo.CredentialType.IsAzureOAuth() { + sourceCred = common.CreateBlobCredential(ctx, jobState.CredentialInfo, credOption) + } else if fromTo.IsDownload() && jobState.CredentialInfo.CredentialType == common.ECredentialType.SharedKey() { + lcm := common.GetLifecycleMgr() + var err error + // Convert the shared key credential to a blob credential & re-use it + sourceCred, err = azblob.NewSharedKeyCredential(lcm.GetEnvironmentVariable(common.EEnvironmentVariable.AccountName()), lcm.GetEnvironmentVariable(common.EEnvironmentVariable.AccountKey())) + if err != nil { + jpm.Panic(fmt.Errorf("sanity check: failed to initialize shared key credential: %w", err)) + } } jpm.sourceProviderPipeline = NewBlobPipeline( From 4f7b4eb56e58bdc9336459c127c00dde42d5441c Mon Sep 17 00:00:00 2001 From: Adele Reed Date: Tue, 2 May 2023 09:54:26 -0700 Subject: [PATCH 08/16] Remove DFS override test; limit isdownload/s2s/upload --- cmd/zt_sync_cmd_test.go | 39 --------------------------------------- common/fe-ste-models.go | 6 +++--- 2 files changed, 3 insertions(+), 42 deletions(-) delete mode 100644 cmd/zt_sync_cmd_test.go diff --git a/cmd/zt_sync_cmd_test.go b/cmd/zt_sync_cmd_test.go deleted file mode 100644 index 9ae175c17..000000000 --- a/cmd/zt_sync_cmd_test.go +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright © 2017 Microsoft -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -package cmd - -import ( - "github.com/Azure/azure-storage-azcopy/v10/common" - chk "gopkg.in/check.v1" -) - -//Test dfs endpoints are cooked to blob endpoints -func (s *cmdIntegrationSuite) TestSyncS2SWithDFS(c *chk.C) { - src := "https://myaccount1.dfs.core.windows.net/container1/" - dst := "https://myaccount2.dfs.core.windows.net/container2/" - - raw := getDefaultSyncRawInput(src, dst) - - cooked, err := raw.cook() - - c.Assert(err, chk.IsNil) - c.Assert(cooked.fromTo, chk.Equals, common.EFromTo.BlobBlob()) -} diff --git a/common/fe-ste-models.go b/common/fe-ste-models.go index 59509eb70..637932b41 100644 --- a/common/fe-ste-models.go +++ b/common/fe-ste-models.go @@ -598,15 +598,15 @@ func (ft *FromTo) From() Location { } func (ft *FromTo) IsDownload() bool { - return ft.From().IsRemote() && ft.To().IsLocal() + return ft.From().IsRemote() && ft.To().IsLocal() && ft.To() != ELocation.None() && ft.To() != ELocation.Unknown() } func (ft *FromTo) IsS2S() bool { - return ft.From().IsRemote() && ft.To().IsRemote() + return ft.From().IsRemote() && ft.To().IsRemote() && ft.To() != ELocation.None() && ft.To() != ELocation.Unknown() } func (ft *FromTo) IsUpload() bool { - return ft.From().IsLocal() && ft.To().IsRemote() + return ft.From().IsLocal() && ft.To().IsRemote() && ft.To() != ELocation.None() && ft.To() != ELocation.Unknown() } func (ft *FromTo) IsDelete() bool { From ac9d3033cfdc79260b980aebcebe995042685800 Mon Sep 17 00:00:00 2001 From: Adele Reed Date: Tue, 2 May 2023 12:47:26 -0700 Subject: [PATCH 09/16] Correct test to new behaviour --- cmd/zt_copy_s2smigration_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/zt_copy_s2smigration_test.go b/cmd/zt_copy_s2smigration_test.go index 8479e6101..158d4f346 100644 --- a/cmd/zt_copy_s2smigration_test.go +++ b/cmd/zt_copy_s2smigration_test.go @@ -1329,7 +1329,7 @@ func (s *cmdIntegrationSuite) TestCopyWithDFSResource(c *chk.C) { c.Assert(err, chk.IsNil) // validate that the right number of transfers were scheduled - c.Assert(len(mockedRPC.transfers), chk.Equals, 1) + c.Assert(len(mockedRPC.transfers), chk.Equals, 2) // c.Assert(mockedRPC.transfers[0].Destination, chk.Equals, "/file") }) From 4f80e6e38d26c544ad1f3001b19db05626c36597 Mon Sep 17 00:00:00 2001 From: Adele Reed Date: Wed, 3 May 2023 11:41:06 -0700 Subject: [PATCH 10/16] POSIX properties into Blob metadata on up/download --- common/fe-ste-models.go | 24 ++-- e2etest/zt_preserve_posix_properties_test.go | 2 +- ste/downloader-blobFS.go | 35 ++++-- ste/downloader-blobFS_linux.go | 111 +++++++++++++++++++ ste/downloader-blobFS_other.go | 7 ++ ste/mgr-JobPartMgr.go | 27 +++++ ste/sender-blobFSFromLocal.go | 34 ++++++ ste/sender-blobFolders_other.go | 7 -- ste/sourceInfoProvider-Blob.go | 3 +- 9 files changed, 219 insertions(+), 31 deletions(-) create mode 100644 ste/downloader-blobFS_linux.go create mode 100644 ste/downloader-blobFS_other.go delete mode 100644 ste/sender-blobFolders_other.go diff --git a/common/fe-ste-models.go b/common/fe-ste-models.go index 637932b41..e1f1db2b5 100644 --- a/common/fe-ste-models.go +++ b/common/fe-ste-models.go @@ -576,7 +576,7 @@ func (ft *FromTo) Parse(s string) error { return err } -func (ft *FromTo) FromAndTo(s string) (srcLocation, dstLocation Location, err error) { +func (ft FromTo) FromAndTo(s string) (srcLocation, dstLocation Location, err error) { srcLocation = ELocation.Unknown() dstLocation = ELocation.Unknown() val, err := enum.ParseInt(reflect.TypeOf(ft), s, true, true) @@ -589,27 +589,27 @@ func (ft *FromTo) FromAndTo(s string) (srcLocation, dstLocation Location, err er return } -func (ft *FromTo) To() Location { - return Location(((1 << 8) - 1) & *ft) +func (ft FromTo) To() Location { + return Location(((1 << 8) - 1) & ft) } -func (ft *FromTo) From() Location { - return Location((((1 << 16) - 1) & *ft) >> 8) +func (ft FromTo) From() Location { + return Location((((1 << 16) - 1) & ft) >> 8) } -func (ft *FromTo) IsDownload() bool { +func (ft FromTo) IsDownload() bool { return ft.From().IsRemote() && ft.To().IsLocal() && ft.To() != ELocation.None() && ft.To() != ELocation.Unknown() } -func (ft *FromTo) IsS2S() bool { +func (ft FromTo) IsS2S() bool { return ft.From().IsRemote() && ft.To().IsRemote() && ft.To() != ELocation.None() && ft.To() != ELocation.Unknown() } -func (ft *FromTo) IsUpload() bool { +func (ft FromTo) IsUpload() bool { return ft.From().IsLocal() && ft.To().IsRemote() && ft.To() != ELocation.None() && ft.To() != ELocation.Unknown() } -func (ft *FromTo) IsDelete() bool { +func (ft FromTo) IsDelete() bool { return ft.To() == ELocation.Unknown() } @@ -617,12 +617,12 @@ func (ft *FromTo) IsSetProperties() bool { return ft.To() == ELocation.None() } -func (ft *FromTo) AreBothFolderAware() bool { +func (ft FromTo) AreBothFolderAware() bool { return ft.From().IsFolderAware() && ft.To().IsFolderAware() } -func (ft *FromTo) IsPropertyOnlyTransfer() bool { - return *ft == EFromTo.BlobNone() || *ft == EFromTo.BlobFSNone() || *ft == EFromTo.FileNone() +func (ft FromTo) IsPropertyOnlyTransfer() bool { + return ft == EFromTo.BlobNone() || ft == EFromTo.BlobFSNone() || ft == EFromTo.FileNone() } // TODO: deletes are not covered by the above Is* routines diff --git a/e2etest/zt_preserve_posix_properties_test.go b/e2etest/zt_preserve_posix_properties_test.go index 9e4b116e5..1c2a92962 100644 --- a/e2etest/zt_preserve_posix_properties_test.go +++ b/e2etest/zt_preserve_posix_properties_test.go @@ -19,7 +19,7 @@ func TestPOSIX_SpecialFilesToBlob(t *testing.T) { eOperation.Copy(), eTestFromTo.Other(common.EFromTo.LocalBlob(), common.EFromTo.BlobLocal()), // no blobblob since that's just metadata and we already test that eValidate.Auto(), - anonymousAuthOnly, // this is a small test, so running it with all cred types (which will really just be oauth and anon) is fine + allCredentialTypes, // this relies upon a working source info provider; this validates appropriate creds are supplied to it. anonymousAuthOnly, params{ recursive: true, diff --git a/ste/downloader-blobFS.go b/ste/downloader-blobFS.go index f34207fde..cfbd6c139 100644 --- a/ste/downloader-blobFS.go +++ b/ste/downloader-blobFS.go @@ -30,18 +30,40 @@ import ( "github.com/Azure/azure-storage-azcopy/v10/common" ) -type blobFSDownloader struct{} +type blobFSDownloader struct { + jptm IJobPartTransferMgr + txInfo TransferInfo +} func newBlobFSDownloader() downloader { return &blobFSDownloader{} } func (bd *blobFSDownloader) Prologue(jptm IJobPartTransferMgr, srcPipeline pipeline.Pipeline) { - // noop + bd.jptm = jptm } func (bd *blobFSDownloader) Epilogue() { - //noop + if bd.jptm != nil { + if bd.jptm.IsLive() && bd.jptm.Info().PreservePOSIXProperties { + bsip, err := newBlobSourceInfoProvider(bd.jptm) + if err != nil { + bd.jptm.FailActiveDownload("get blob source info provider", err) + } + unixstat, _ := bsip.(IUNIXPropertyBearingSourceInfoProvider) + if ubd, ok := (interface{})(bd).(unixPropertyAwareDownloader); ok && unixstat.HasUNIXProperties() { + adapter, err := unixstat.GetUNIXProperties() + if err != nil { + bd.jptm.FailActiveDownload("get unix properties", err) + } + + stage, err := ubd.ApplyUnixProperties(adapter) + if err != nil { + bd.jptm.FailActiveDownload("set unix properties: "+stage, err) + } + } + } + } } // Returns a chunk-func for ADLS gen2 downloads @@ -88,9 +110,4 @@ func (bd *blobFSDownloader) GenerateDownloadFunc(jptm IJobPartTransferMgr, srcPi return } }) -} - -func (bd *blobFSDownloader) SetFolderProperties(jptm IJobPartTransferMgr) error { - // no-op (BlobFS is folder aware, but we don't currently preserve properties from its folders) - return nil -} +} \ No newline at end of file diff --git a/ste/downloader-blobFS_linux.go b/ste/downloader-blobFS_linux.go new file mode 100644 index 000000000..a6891ae25 --- /dev/null +++ b/ste/downloader-blobFS_linux.go @@ -0,0 +1,111 @@ +// +build linux + +package ste + +import ( + "fmt" + "github.com/Azure/azure-storage-azcopy/v10/common" + "os" + "syscall" + "time" +) + +func (bd *blobFSDownloader) ApplyUnixProperties(adapter common.UnixStatAdapter) (stage string, err error) { + // At this point, mode has already been applied. Let's work out what we need to apply, and apply the rest. + destination := bd.txInfo.getDownloadPath() + + // First, grab our file descriptor and such. + fi, err := os.Stat(destination) + if err != nil { + return "stat", err + } + + // At this point, mode has already been applied. Let's work out what we need to apply, and apply the rest. + if adapter.Extended() { + stat := fi.Sys().(*syscall.Stat_t) + mask := adapter.StatxMask() + + // stx_attributes is not persisted. + + mode := os.FileMode(common.DEFAULT_FILE_PERM) + if common.StatXReturned(mask, common.STATX_MODE) { + mode = os.FileMode(adapter.FileMode()) + } + + err = os.Chmod(destination, mode) + if err != nil { + return "chmod", err + } + + uid := stat.Uid + if common.StatXReturned(mask, common.STATX_UID) { + uid = adapter.Owner() + } + + gid := stat.Gid + if common.StatXReturned(mask, common.STATX_GID) { + gid = adapter.Group() + } + // set ownership + err = os.Chown(destination, int(uid), int(gid)) + if err != nil { + return "chown", err + } + + atime := time.Unix(stat.Atim.Unix()) + if common.StatXReturned(mask, common.STATX_ATIME) || !adapter.ATime().IsZero() { // workaround for noatime when underlying fs supports atime + atime = adapter.ATime() + } + + mtime := time.Unix(stat.Mtim.Unix()) + if common.StatXReturned(mask, common.STATX_MTIME) { + mtime = adapter.MTime() + } + + // adapt times + err = os.Chtimes(destination, atime, mtime) + if err != nil { + return "chtimes", err + } + } else { + err = os.Chmod(destination, os.FileMode(adapter.FileMode())) // only write permissions + if err != nil { + return "chmod", err + } + err = os.Chown(destination, int(adapter.Owner()), int(adapter.Group())) + if err != nil { + return "chown", err + } + err = os.Chtimes(destination, adapter.ATime(), adapter.MTime()) + if err != nil { + return "chtimes", err + } + } + + return +} + +func (bd *blobFSDownloader) SetFolderProperties(jptm IJobPartTransferMgr) error { + sip, err := newBlobSourceInfoProvider(jptm) + if err != nil { + return err + } + + // inform the downloader + bd.txInfo = jptm.Info() + + usip := sip.(IUNIXPropertyBearingSourceInfoProvider) + if usip.HasUNIXProperties() { + props, err := usip.GetUNIXProperties() + if err != nil { + return err + } + stage, err := bd.ApplyUnixProperties(props) + + if err != nil { + return fmt.Errorf("set unix properties: %s; %w", stage, err) + } + } + + return nil +} \ No newline at end of file diff --git a/ste/downloader-blobFS_other.go b/ste/downloader-blobFS_other.go new file mode 100644 index 000000000..6f19abaca --- /dev/null +++ b/ste/downloader-blobFS_other.go @@ -0,0 +1,7 @@ +// +build !linux + +package ste + +func (bd *blobFSDownloader) SetFolderProperties(jptm IJobPartTransferMgr) error { + return nil +} \ No newline at end of file diff --git a/ste/mgr-JobPartMgr.go b/ste/mgr-JobPartMgr.go index f2b753ca9..b531949b7 100644 --- a/ste/mgr-JobPartMgr.go +++ b/ste/mgr-JobPartMgr.go @@ -691,6 +691,33 @@ func (jpm *jobPartMgr) createPipelines(ctx context.Context, sourceBlobToken azbl jpm.pacer, jpm.jobMgr.HttpClient(), jpm.jobMgr.PipelineNetworkStats()) + + // Just in case we need to talk to blob while we're at it + var blobCred = azblob.NewAnonymousCredential() + if credInfo.CredentialType == common.ECredentialType.SharedKey() { + lcm := common.GetLifecycleMgr() + var err error + // Convert the shared key credential to a blob credential & re-use it + blobCred, err = azblob.NewSharedKeyCredential(lcm.GetEnvironmentVariable(common.EEnvironmentVariable.AccountName()), lcm.GetEnvironmentVariable(common.EEnvironmentVariable.AccountKey())) + if err != nil { + jpm.Panic(fmt.Errorf("sanity check: failed to initialize shared key credential: %w", err)) + } + } else if credInfo.CredentialType != common.ECredentialType.Anonymous() { + blobCred = common.CreateBlobCredential(ctx, credInfo, credOption) + } + + jpm.secondaryPipeline = NewBlobPipeline( + blobCred, + azblob.PipelineOptions{ + Log: jpm.jobMgr.PipelineLogInfo(), + Telemetry: azblob.TelemetryOptions{ + Value: userAgent, + }, + }, + xferRetryOption, + jpm.pacer, + jpm.jobMgr.HttpClient(), + jpm.jobMgr.PipelineNetworkStats()) case fromTo.IsS2S() && fromTo.To() == common.ELocation.File(), fromTo.IsUpload() && fromTo.To() == common.ELocation.File(), fromTo.IsDownload() && fromTo.From() == common.ELocation.File(), diff --git a/ste/sender-blobFSFromLocal.go b/ste/sender-blobFSFromLocal.go index 09703d3f3..c3621ad32 100644 --- a/ste/sender-blobFSFromLocal.go +++ b/ste/sender-blobFSFromLocal.go @@ -23,7 +23,9 @@ package ste import ( "github.com/Azure/azure-pipeline-go/pipeline" "github.com/Azure/azure-storage-azcopy/v10/common" + "github.com/Azure/azure-storage-blob-go/azblob" "math" + "strings" ) type blobFSUploader struct { @@ -91,4 +93,36 @@ func (u *blobFSUploader) Epilogue() { jptm.FailActiveUpload("Getting hash", errNoHash) // don't return, since need cleanup below } } + + // Write POSIX data + if jptm.IsLive() { + if jptm.Info().PreservePOSIXProperties { + sip, err := newLocalSourceInfoProvider(jptm) // never returns an error (as of yet) + if err != nil { + jptm.FailActiveUpload("Creating local source info provider for POSIX properties", err) + return // Defensively handle the error just in case + } + + if unixSIP, ok := sip.(IUNIXPropertyBearingSourceInfoProvider); ok { + stat, err := unixSIP.GetUNIXProperties() + if err != nil { + jptm.FailActiveUpload("Getting POSIX properties from source", err) + return + } + + blobPipeline := jptm.(*jobPartTransferMgr).jobPartMgr.(*jobPartMgr).secondaryPipeline + bURLParts := azblob.NewBlobURLParts(u.fileOrDirURL.URL()) + bURLParts.Host = strings.ReplaceAll(bURLParts.Host, ".dfs", ".blob") // switch back to blob + blobURL := azblob.NewBlobURL(bURLParts.URL(), blobPipeline) + + meta := azblob.Metadata{} + common.AddStatToBlobMetadata(stat, meta) + + _, err = blobURL.SetMetadata(jptm.Context(), meta, azblob.BlobAccessConditions{}, common.ToClientProvidedKeyOptions(jptm.CpkInfo(), jptm.CpkScopeInfo())) + if err != nil { + jptm.FailActiveSend("Putting POSIX properties in blob metadata", err) + } + } + } + } } diff --git a/ste/sender-blobFolders_other.go b/ste/sender-blobFolders_other.go deleted file mode 100644 index 7680a7a14..000000000 --- a/ste/sender-blobFolders_other.go +++ /dev/null @@ -1,7 +0,0 @@ -// +build !linux - -package ste - -func (b blobFolderSender) getExtraProperties() error { - return nil -} diff --git a/ste/sourceInfoProvider-Blob.go b/ste/sourceInfoProvider-Blob.go index a3f6160f1..12df77173 100644 --- a/ste/sourceInfoProvider-Blob.go +++ b/ste/sourceInfoProvider-Blob.go @@ -38,8 +38,7 @@ type blobSourceInfoProvider struct { } func (p *blobSourceInfoProvider) IsDFSSource() bool { - fromTo := p.jptm.FromTo() - return fromTo.From() == common.ELocation.BlobFS() + return p.jptm.FromTo().From() == common.ELocation.BlobFS() } func (p *blobSourceInfoProvider) internalPresignedURL(useHNS bool) (*url.URL, error) { From 4cff74340e7c872d92010dff85073ae41e7bdddf Mon Sep 17 00:00:00 2001 From: Adele Reed Date: Wed, 3 May 2023 17:59:50 -0700 Subject: [PATCH 11/16] Add symlink & special file creation support on download --- e2etest/zt_preserve_posix_properties_test.go | 134 +++++++++---------- ste/downloader-blobFS.go | 16 +++ ste/downloader-blobFS_linux.go | 81 +++++++++++ ste/sender-blobFSFromLocal.go | 69 +++++++++- ste/xfer-remoteToLocal-symlink.go | 43 ++++++ ste/xfer.go | 2 +- 6 files changed, 272 insertions(+), 73 deletions(-) diff --git a/e2etest/zt_preserve_posix_properties_test.go b/e2etest/zt_preserve_posix_properties_test.go index 1c2a92962..748dea2e7 100644 --- a/e2etest/zt_preserve_posix_properties_test.go +++ b/e2etest/zt_preserve_posix_properties_test.go @@ -44,70 +44,70 @@ func TestPOSIX_SpecialFilesToBlob(t *testing.T) { // *** TESTS DISABLED UNTIL POSIX PROPS HNS PR *** -//// Block/char device rep is untested due to difficulty to test -//func TestPOSIX_SpecialFilesToHNS(t *testing.T) { -// ptr := func(u uint32) *uint32 { -// return &u -// } -// -// RunScenarios( -// t, -// eOperation.Copy(), -// eTestFromTo.Other(common.EFromTo.LocalBlob()), // no blobblob since that's just metadata and we already test that -// eValidate.Auto(), -// anonymousAuthOnly, // this is a small test, so running it with all cred types (which will really just be oauth and anon) is fine -// anonymousAuthOnly, -// params{ -// recursive: true, -// preservePOSIXProperties: true, -// symlinkHandling: common.ESymlinkHandlingType.Preserve(), -// }, -// nil, -// testFiles{ -// defaultSize: "1K", -// shouldTransfer: []interface{}{ -// folder(""), -// f("fifo", with{ posixProperties: objectUnixStatContainer{ mode: ptr(common.DEFAULT_FILE_PERM | common.S_IFIFO) } }), // fifo should work -// f("sock", with{ posixProperties: objectUnixStatContainer{ mode: ptr(common.DEFAULT_FILE_PERM | common.S_IFSOCK) } }), // sock should work -// "a", -// symlink("b", "a"), //symlink to real target should succeed -// symlink("d", "c"), //symlink to nowhere should succeed -// }, -// }, -// EAccountType.HierarchicalNamespaceEnabled(), EAccountType.Standard(), "", -// ) -//} -// -//// Block/char device rep is untested due to difficulty to test -//func TestPOSIX_SpecialFilesFromHNS(t *testing.T) { -// ptr := func(u uint32) *uint32 { -// return &u -// } -// -// RunScenarios( -// t, -// eOperation.Copy(), -// eTestFromTo.Other(common.EFromTo.BlobLocal()), // no blobblob since that's just metadata and we already test that -// eValidate.Auto(), -// anonymousAuthOnly, // this is a small test, so running it with all cred types (which will really just be oauth and anon) is fine -// anonymousAuthOnly, -// params{ -// recursive: true, -// preservePOSIXProperties: true, -// symlinkHandling: common.ESymlinkHandlingType.Preserve(), -// }, -// nil, -// testFiles{ -// defaultSize: "1K", -// shouldTransfer: []interface{}{ -// folder(""), -// f("fifo", with{ posixProperties: objectUnixStatContainer{ mode: ptr(common.DEFAULT_FILE_PERM | common.S_IFIFO) } }), // fifo should work -// f("sock", with{ posixProperties: objectUnixStatContainer{ mode: ptr(common.DEFAULT_FILE_PERM | common.S_IFSOCK) } }), // sock should work -// "a", -// symlink("b", "a"), //symlink to real target should succeed -// symlink("d", "c"), //symlink to nowhere should succeed -// }, -// }, -// EAccountType.Standard(), EAccountType.HierarchicalNamespaceEnabled(), "", -// ) -//} \ No newline at end of file +// Block/char device rep is untested due to difficulty to test +func TestPOSIX_SpecialFilesToHNS(t *testing.T) { + ptr := func(u uint32) *uint32 { + return &u + } + + RunScenarios( + t, + eOperation.Copy(), + eTestFromTo.Other(common.EFromTo.LocalBlob()), // no blobblob since that's just metadata and we already test that + eValidate.Auto(), + anonymousAuthOnly, // this is a small test, so running it with all cred types (which will really just be oauth and anon) is fine + anonymousAuthOnly, + params{ + recursive: true, + preservePOSIXProperties: true, + symlinkHandling: common.ESymlinkHandlingType.Preserve(), + }, + nil, + testFiles{ + defaultSize: "1K", + shouldTransfer: []interface{}{ + folder(""), + f("fifo", with{ posixProperties: objectUnixStatContainer{ mode: ptr(common.DEFAULT_FILE_PERM | common.S_IFIFO) } }), // fifo should work + f("sock", with{ posixProperties: objectUnixStatContainer{ mode: ptr(common.DEFAULT_FILE_PERM | common.S_IFSOCK) } }), // sock should work + "a", + symlink("b", "a"), //symlink to real target should succeed + symlink("d", "c"), //symlink to nowhere should succeed + }, + }, + EAccountType.HierarchicalNamespaceEnabled(), EAccountType.Standard(), "", + ) +} + +// Block/char device rep is untested due to difficulty to test +func TestPOSIX_SpecialFilesFromHNS(t *testing.T) { + ptr := func(u uint32) *uint32 { + return &u + } + + RunScenarios( + t, + eOperation.Copy(), + eTestFromTo.Other(common.EFromTo.BlobLocal()), // no blobblob since that's just metadata and we already test that + eValidate.Auto(), + anonymousAuthOnly, // this is a small test, so running it with all cred types (which will really just be oauth and anon) is fine + anonymousAuthOnly, + params{ + recursive: true, + preservePOSIXProperties: true, + symlinkHandling: common.ESymlinkHandlingType.Preserve(), + }, + nil, + testFiles{ + defaultSize: "1K", + shouldTransfer: []interface{}{ + folder(""), + f("fifo", with{ posixProperties: objectUnixStatContainer{ mode: ptr(common.DEFAULT_FILE_PERM | common.S_IFIFO) } }), // fifo should work + f("sock", with{ posixProperties: objectUnixStatContainer{ mode: ptr(common.DEFAULT_FILE_PERM | common.S_IFSOCK) } }), // sock should work + "a", + symlink("b", "a"), //symlink to real target should succeed + symlink("d", "c"), //symlink to nowhere should succeed + }, + }, + EAccountType.Standard(), EAccountType.HierarchicalNamespaceEnabled(), "", + ) +} \ No newline at end of file diff --git a/ste/downloader-blobFS.go b/ste/downloader-blobFS.go index cfbd6c139..265abf282 100644 --- a/ste/downloader-blobFS.go +++ b/ste/downloader-blobFS.go @@ -23,6 +23,7 @@ package ste import ( "errors" "net/url" + "os" "time" "github.com/Azure/azure-pipeline-go/pipeline" @@ -41,6 +42,7 @@ func newBlobFSDownloader() downloader { func (bd *blobFSDownloader) Prologue(jptm IJobPartTransferMgr, srcPipeline pipeline.Pipeline) { bd.jptm = jptm + bd.txInfo = jptm.Info() // Inform the downloader } func (bd *blobFSDownloader) Epilogue() { @@ -110,4 +112,18 @@ func (bd *blobFSDownloader) GenerateDownloadFunc(jptm IJobPartTransferMgr, srcPi return } }) +} + +func (bd *blobFSDownloader) CreateSymlink(jptm IJobPartTransferMgr) error { + sip, err := newBlobSourceInfoProvider(jptm) + if err != nil { + return err + } + symsip := sip.(ISymlinkBearingSourceInfoProvider) // blob always implements this + symlinkInfo, _ := symsip.ReadLink() + + // create the link + err = os.Symlink(symlinkInfo, jptm.Info().Destination) + + return err } \ No newline at end of file diff --git a/ste/downloader-blobFS_linux.go b/ste/downloader-blobFS_linux.go index a6891ae25..244248c71 100644 --- a/ste/downloader-blobFS_linux.go +++ b/ste/downloader-blobFS_linux.go @@ -5,11 +5,92 @@ package ste import ( "fmt" "github.com/Azure/azure-storage-azcopy/v10/common" + "golang.org/x/sys/unix" + "io" "os" "syscall" "time" ) +// CreateFile covers the following UNIX properties: +// File Mode, File Type +// TODO: Consolidate and reduce duplication later +func (bd *blobFSDownloader) CreateFile(jptm IJobPartTransferMgr, destination string, size int64, writeThrough bool, t FolderCreationTracker) (file io.WriteCloser, needChunks bool, err error) { + var sip ISourceInfoProvider + sip, err = newBlobSourceInfoProvider(jptm) + if err != nil { + return + } + + unixSIP := sip.(IUNIXPropertyBearingSourceInfoProvider) // Blob may have unix properties. + + err = common.CreateParentDirectoryIfNotExist(destination, t) + if err != nil { + return + } + + // try to remove the file before we create something else over it + _ = os.Remove(destination) + + needChunks = size > 0 + needMakeFile := true + var mode = uint32(common.DEFAULT_FILE_PERM) + if jptm.Info().PreservePOSIXProperties && unixSIP.HasUNIXProperties() { + var stat common.UnixStatAdapter + stat, err = unixSIP.GetUNIXProperties() + + if stat.Extended() { + if stat.StatxMask()&common.STATX_MODE == common.STATX_MODE { // We need to retain access to the file until we're well & done with it + mode = stat.FileMode() | common.DEFAULT_FILE_PERM + } + } else { + mode = stat.FileMode() | common.DEFAULT_FILE_PERM + } + + if mode != 0 { // Folders & Symlinks are not necessary to handle + switch { + case mode&common.S_IFBLK == common.S_IFBLK || mode&common.S_IFCHR == common.S_IFCHR: + // the file is representative of a device and does not need to be written to + err = unix.Mknod(destination, mode, int(stat.RDevice())) + + needChunks = false + needMakeFile = false + case mode&common.S_IFIFO == common.S_IFIFO || mode&common.S_IFSOCK == common.S_IFSOCK: + // the file is a pipe and does not need to be written to + err = unix.Mknod(destination, mode, 0) + + needChunks = false + needMakeFile = false + } + } + } + + if !needMakeFile { + return + } + + flags := os.O_RDWR | os.O_CREATE | os.O_TRUNC + if writeThrough { + flags |= os.O_SYNC + } + + file, err = os.OpenFile(destination, flags, os.FileMode(mode)) // os.FileMode is uint32 on Linux. + if err != nil { + return + } + + if size == 0 { + return + } + + err = syscall.Fallocate(int(file.(*os.File).Fd()), 0, 0, size) + if err == syscall.ENOTSUP { + err = file.(*os.File).Truncate(size) // err will get returned at the end + } + + return +} + func (bd *blobFSDownloader) ApplyUnixProperties(adapter common.UnixStatAdapter) (stage string, err error) { // At this point, mode has already been applied. Let's work out what we need to apply, and apply the rest. destination := bd.txInfo.getDownloadPath() diff --git a/ste/sender-blobFSFromLocal.go b/ste/sender-blobFSFromLocal.go index c3621ad32..907e493f0 100644 --- a/ste/sender-blobFSFromLocal.go +++ b/ste/sender-blobFSFromLocal.go @@ -21,6 +21,8 @@ package ste import ( + "errors" + "fmt" "github.com/Azure/azure-pipeline-go/pipeline" "github.com/Azure/azure-storage-azcopy/v10/common" "github.com/Azure/azure-storage-blob-go/azblob" @@ -68,6 +70,14 @@ func (u *blobFSUploader) GenerateUploadFunc(id common.ChunkID, blockIndex int32, }) } +func (u *blobFSUploader) GetBlobURL() azblob.BlobURL{ + blobPipeline := u.jptm.(*jobPartTransferMgr).jobPartMgr.(*jobPartMgr).secondaryPipeline // pull the secondary (blob) pipeline + bURLParts := azblob.NewBlobURLParts(u.fileOrDirURL.URL()) + bURLParts.Host = strings.ReplaceAll(bURLParts.Host, ".dfs", ".blob") // switch back to blob + + return azblob.NewBlobURL(bURLParts.URL(), blobPipeline) +} + func (u *blobFSUploader) Epilogue() { jptm := u.jptm @@ -110,15 +120,17 @@ func (u *blobFSUploader) Epilogue() { return } - blobPipeline := jptm.(*jobPartTransferMgr).jobPartMgr.(*jobPartMgr).secondaryPipeline - bURLParts := azblob.NewBlobURLParts(u.fileOrDirURL.URL()) - bURLParts.Host = strings.ReplaceAll(bURLParts.Host, ".dfs", ".blob") // switch back to blob - blobURL := azblob.NewBlobURL(bURLParts.URL(), blobPipeline) + blobURL := u.GetBlobURL() meta := azblob.Metadata{} common.AddStatToBlobMetadata(stat, meta) + delete(meta, common.POSIXFolderMeta) // hdi_isfolder is illegal to set on HNS accounts - _, err = blobURL.SetMetadata(jptm.Context(), meta, azblob.BlobAccessConditions{}, common.ToClientProvidedKeyOptions(jptm.CpkInfo(), jptm.CpkScopeInfo())) + _, err = blobURL.SetMetadata( + jptm.Context(), + meta, + azblob.BlobAccessConditions{}, + azblob.ClientProvidedKeyOptions{}) // cpk isn't used for dfs if err != nil { jptm.FailActiveSend("Putting POSIX properties in blob metadata", err) } @@ -126,3 +138,50 @@ func (u *blobFSUploader) Epilogue() { } } } + +func (u *blobFSUploader) SendSymlink(linkData string) error { + sip, err := newLocalSourceInfoProvider(u.jptm) + if err != nil { + return fmt.Errorf("when creating local source info provider: %w", err) + } + + meta := azblob.Metadata{} // meta isn't traditionally supported for dfs, but still exists + + if u.jptm.Info().PreservePOSIXProperties { + if unixSIP, ok := sip.(IUNIXPropertyBearingSourceInfoProvider); ok { + statAdapter, err := unixSIP.GetUNIXProperties() + if err != nil { + return err + } + + if !(statAdapter.FileMode()&common.S_IFLNK == common.S_IFLNK) { // sanity check this is actually targeting the symlink + return errors.New("sanity check: GetUNIXProperties did not return symlink properties") + } + + common.AddStatToBlobMetadata(statAdapter, meta) + } + } + + meta["is_symlink"] = "true" + blobHeaders := azblob.BlobHTTPHeaders{ // translate headers, since those still apply + ContentType: u.creationTimeHeaders.ContentType, + ContentEncoding: u.creationTimeHeaders.ContentEncoding, + ContentLanguage: u.creationTimeHeaders.ContentLanguage, + ContentDisposition: u.creationTimeHeaders.ContentDisposition, + CacheControl: u.creationTimeHeaders.CacheControl, + } + + u.GetBlobURL().ToBlockBlobURL().Upload( + u.jptm.Context(), + strings.NewReader(linkData), + blobHeaders, + meta, + azblob.BlobAccessConditions{}, + azblob.AccessTierNone, // dfs uses default tier + nil, // dfs doesn't support tags + azblob.ClientProvidedKeyOptions{}, // cpk isn't used for dfs + azblob.ImmutabilityPolicyOptions{}) // dfs doesn't support immutability policy + + //_, err = s.destBlockBlobURL.Upload(s.jptm.Context(), strings.NewReader(linkData), s.headersToApply, s.metadataToApply, azblob.BlobAccessConditions{}, s.destBlobTier, s.blobTagsToApply, s.cpkToApply, azblob.ImmutabilityPolicyOptions{}) + return err +} diff --git a/ste/xfer-remoteToLocal-symlink.go b/ste/xfer-remoteToLocal-symlink.go index 463e29dd0..507ffa3d1 100644 --- a/ste/xfer-remoteToLocal-symlink.go +++ b/ste/xfer-remoteToLocal-symlink.go @@ -3,6 +3,7 @@ package ste import ( "github.com/Azure/azure-pipeline-go/pipeline" "github.com/Azure/azure-storage-azcopy/v10/common" + "os" ) func remoteToLocal_symlink(jptm IJobPartTransferMgr, p pipeline.Pipeline, pacer pacer, df downloaderFactory) { @@ -16,6 +17,48 @@ func remoteToLocal_symlink(jptm IJobPartTransferMgr, p pipeline.Pipeline, pacer jptm.ReportTransferDone() return } + // if the force Write flags is set to false or prompt + // then check the file exists at the remote location + // if it does, react accordingly + if jptm.GetOverwriteOption() != common.EOverwriteOption.True() { + dstProps, err := common.OSStat(info.Destination) + if err == nil { + // if the error is nil, then file exists locally + shouldOverwrite := false + + // if necessary, prompt to confirm user's intent + if jptm.GetOverwriteOption() == common.EOverwriteOption.Prompt() { + shouldOverwrite = jptm.GetOverwritePrompter().ShouldOverwrite(info.Destination, common.EEntityType.File()) + } else if jptm.GetOverwriteOption() == common.EOverwriteOption.IfSourceNewer() { + // only overwrite if source lmt is newer (after) the destination + if jptm.LastModifiedTime().After(dstProps.ModTime()) { + shouldOverwrite = true + } + } + + if !shouldOverwrite { + // logging as Warning so that it turns up even in compact logs, and because previously we use Error here + jptm.LogAtLevelForCurrentTransfer(pipeline.LogWarning, "File already exists, so will be skipped") + jptm.SetStatus(common.ETransferStatus.SkippedEntityAlreadyExists()) + jptm.ReportTransferDone() + return + } else { + err = os.Remove(info.Destination) + if err != nil { + jptm.FailActiveSend("deleting old file", err) + jptm.ReportTransferDone() + return + } + } + } + } else { + err := os.Remove(info.Destination) + if err != nil { + jptm.FailActiveSend("deleting old file", err) + jptm.ReportTransferDone() + return + } + } dl, ok := df().(symlinkDownloader) if !ok { diff --git a/ste/xfer.go b/ste/xfer.go index cf8b919d6..bc40d2f04 100644 --- a/ste/xfer.go +++ b/ste/xfer.go @@ -137,7 +137,7 @@ func computeJobXfer(fromTo common.FromTo, blobType common.BlobType) newJobXfer { case common.ELocation.File(): return newFileSourceInfoProvider case common.ELocation.BlobFS(): - return newBlobSourceInfoProvider + return newBlobSourceInfoProvider // Blob source info provider pulls info from blob and dfs case common.ELocation.S3(): return newS3SourceInfoProvider case common.ELocation.GCP(): From 94ce5039c78f4a88b86ac073404ec0db7db35e95 Mon Sep 17 00:00:00 2001 From: Adele Reed Date: Thu, 4 May 2023 13:21:55 -0700 Subject: [PATCH 12/16] Reduce code dupe --- common/unixStatAdapter.go | 5 ++ go.mod | 2 +- ste/sender-blobFS.go | 77 ++++++++++++++++++++++++++++++- ste/sender-blobFSFromLocal.go | 87 +---------------------------------- 4 files changed, 83 insertions(+), 88 deletions(-) diff --git a/common/unixStatAdapter.go b/common/unixStatAdapter.go index 6f5b89abc..8e05e84cb 100644 --- a/common/unixStatAdapter.go +++ b/common/unixStatAdapter.go @@ -344,6 +344,10 @@ func ClearStatFromBlobMetadata(metadata azblob.Metadata) { } func AddStatToBlobMetadata(s UnixStatAdapter, metadata azblob.Metadata) { + if s == nil { + return + } + applyMode := func(mode os.FileMode) { modes := map[uint32]string { S_IFCHR: POSIXCharDeviceMeta, @@ -351,6 +355,7 @@ func AddStatToBlobMetadata(s UnixStatAdapter, metadata azblob.Metadata) { S_IFSOCK: POSIXSocketMeta, S_IFIFO: POSIXFIFOMeta, S_IFDIR: POSIXFolderMeta, + S_IFLNK: POSIXSymlinkMeta, } for modeToTest, metaToApply := range modes { diff --git a/go.mod b/go.mod index caf8ce0d6..cc8bdf52e 100644 --- a/go.mod +++ b/go.mod @@ -60,4 +60,4 @@ require ( gopkg.in/yaml.v2 v2.4.0 // indirect ) -go 1.17 +go 1.19 diff --git a/ste/sender-blobFS.go b/ste/sender-blobFS.go index 82ac5a514..e25fc8ced 100644 --- a/ste/sender-blobFS.go +++ b/ste/sender-blobFS.go @@ -23,7 +23,9 @@ package ste import ( "context" "fmt" + "github.com/Azure/azure-storage-blob-go/azblob" "net/url" + "strings" "time" "github.com/Azure/azure-pipeline-go/pipeline" @@ -34,6 +36,7 @@ import ( type blobFSSenderBase struct { jptm IJobPartTransferMgr + sip ISourceInfoProvider fileOrDirURL URLHolder chunkSize int64 numChunks uint32 @@ -71,6 +74,7 @@ func newBlobFSSenderBase(jptm IJobPartTransferMgr, destination string, p pipelin } return &blobFSSenderBase{ jptm: jptm, + sip: sip, fileOrDirURL: h, chunkSize: chunkSize, numChunks: numChunks, @@ -212,9 +216,45 @@ func (u *blobFSSenderBase) doEnsureDirExists(d azbfs.DirectoryURL) error { return err } +func (u *blobFSSenderBase) GetBlobURL() azblob.BlobURL{ + blobPipeline := u.jptm.(*jobPartTransferMgr).jobPartMgr.(*jobPartMgr).secondaryPipeline // pull the secondary (blob) pipeline + bURLParts := azblob.NewBlobURLParts(u.fileOrDirURL.URL()) + bURLParts.Host = strings.ReplaceAll(bURLParts.Host, ".dfs", ".blob") // switch back to blob + + return azblob.NewBlobURL(bURLParts.URL(), blobPipeline) +} + +func (u *blobFSSenderBase) GetSourcePOSIXProperties() (common.UnixStatAdapter, error) { + if unixSIP, ok := u.sip.(IUNIXPropertyBearingSourceInfoProvider); ok { + statAdapter, err := unixSIP.GetUNIXProperties() + if err != nil { + return nil, err + } + + return statAdapter, nil + } else { + return nil, nil // no properties present! + } +} + +func (u *blobFSSenderBase) SetPOSIXProperties() error { + adapter, err := u.GetSourcePOSIXProperties() + if err != nil { + return fmt.Errorf("failed to get POSIX properties") + } else if adapter == nil { + return nil + } + + meta := azblob.Metadata{} + common.AddStatToBlobMetadata(adapter, meta) + delete(meta, common.POSIXFolderMeta) // Can't be set on HNS accounts. + + _, err = u.GetBlobURL().SetMetadata(u.jptm.Context(), meta, azblob.BlobAccessConditions{}, azblob.ClientProvidedKeyOptions{}) + return err +} + func (u *blobFSSenderBase) SetFolderProperties() error { - // we don't currently preserve any properties for BlobFS folders - return nil + return u.SetPOSIXProperties() } func (u *blobFSSenderBase) DirUrlToString() string { @@ -225,3 +265,36 @@ func (u *blobFSSenderBase) DirUrlToString() string { dirUrl.RawQuery = "" return dirUrl.String() } + +func (u *blobFSSenderBase) SendSymlink(linkData string) error { + meta := azblob.Metadata{} // meta isn't traditionally supported for dfs, but still exists + adapter, err := u.GetSourcePOSIXProperties() + if err != nil { + return fmt.Errorf("when polling for POSIX properties: %w", err) + } else if adapter == nil { + return nil // No-op + } + + common.AddStatToBlobMetadata(adapter, meta) + meta[common.POSIXSymlinkMeta] = "true" // just in case there isn't any metadata + blobHeaders := azblob.BlobHTTPHeaders{ // translate headers, since those still apply + ContentType: u.creationTimeHeaders.ContentType, + ContentEncoding: u.creationTimeHeaders.ContentEncoding, + ContentLanguage: u.creationTimeHeaders.ContentLanguage, + ContentDisposition: u.creationTimeHeaders.ContentDisposition, + CacheControl: u.creationTimeHeaders.CacheControl, + } + + _, err = u.GetBlobURL().ToBlockBlobURL().Upload( + u.jptm.Context(), + strings.NewReader(linkData), + blobHeaders, + meta, + azblob.BlobAccessConditions{}, + azblob.AccessTierNone, // dfs uses default tier + nil, // dfs doesn't support tags + azblob.ClientProvidedKeyOptions{}, // cpk isn't used for dfs + azblob.ImmutabilityPolicyOptions{}) // dfs doesn't support immutability policy + + return err +} diff --git a/ste/sender-blobFSFromLocal.go b/ste/sender-blobFSFromLocal.go index 907e493f0..4bca93fbb 100644 --- a/ste/sender-blobFSFromLocal.go +++ b/ste/sender-blobFSFromLocal.go @@ -21,13 +21,9 @@ package ste import ( - "errors" - "fmt" "github.com/Azure/azure-pipeline-go/pipeline" "github.com/Azure/azure-storage-azcopy/v10/common" - "github.com/Azure/azure-storage-blob-go/azblob" "math" - "strings" ) type blobFSUploader struct { @@ -70,14 +66,6 @@ func (u *blobFSUploader) GenerateUploadFunc(id common.ChunkID, blockIndex int32, }) } -func (u *blobFSUploader) GetBlobURL() azblob.BlobURL{ - blobPipeline := u.jptm.(*jobPartTransferMgr).jobPartMgr.(*jobPartMgr).secondaryPipeline // pull the secondary (blob) pipeline - bURLParts := azblob.NewBlobURLParts(u.fileOrDirURL.URL()) - bURLParts.Host = strings.ReplaceAll(bURLParts.Host, ".dfs", ".blob") // switch back to blob - - return azblob.NewBlobURL(bURLParts.URL(), blobPipeline) -} - func (u *blobFSUploader) Epilogue() { jptm := u.jptm @@ -107,81 +95,10 @@ func (u *blobFSUploader) Epilogue() { // Write POSIX data if jptm.IsLive() { if jptm.Info().PreservePOSIXProperties { - sip, err := newLocalSourceInfoProvider(jptm) // never returns an error (as of yet) + err := u.SetPOSIXProperties() if err != nil { - jptm.FailActiveUpload("Creating local source info provider for POSIX properties", err) - return // Defensively handle the error just in case - } - - if unixSIP, ok := sip.(IUNIXPropertyBearingSourceInfoProvider); ok { - stat, err := unixSIP.GetUNIXProperties() - if err != nil { - jptm.FailActiveUpload("Getting POSIX properties from source", err) - return - } - - blobURL := u.GetBlobURL() - - meta := azblob.Metadata{} - common.AddStatToBlobMetadata(stat, meta) - delete(meta, common.POSIXFolderMeta) // hdi_isfolder is illegal to set on HNS accounts - - _, err = blobURL.SetMetadata( - jptm.Context(), - meta, - azblob.BlobAccessConditions{}, - azblob.ClientProvidedKeyOptions{}) // cpk isn't used for dfs - if err != nil { - jptm.FailActiveSend("Putting POSIX properties in blob metadata", err) - } + jptm.FailActiveUpload("Setting POSIX Properties", err) } } } } - -func (u *blobFSUploader) SendSymlink(linkData string) error { - sip, err := newLocalSourceInfoProvider(u.jptm) - if err != nil { - return fmt.Errorf("when creating local source info provider: %w", err) - } - - meta := azblob.Metadata{} // meta isn't traditionally supported for dfs, but still exists - - if u.jptm.Info().PreservePOSIXProperties { - if unixSIP, ok := sip.(IUNIXPropertyBearingSourceInfoProvider); ok { - statAdapter, err := unixSIP.GetUNIXProperties() - if err != nil { - return err - } - - if !(statAdapter.FileMode()&common.S_IFLNK == common.S_IFLNK) { // sanity check this is actually targeting the symlink - return errors.New("sanity check: GetUNIXProperties did not return symlink properties") - } - - common.AddStatToBlobMetadata(statAdapter, meta) - } - } - - meta["is_symlink"] = "true" - blobHeaders := azblob.BlobHTTPHeaders{ // translate headers, since those still apply - ContentType: u.creationTimeHeaders.ContentType, - ContentEncoding: u.creationTimeHeaders.ContentEncoding, - ContentLanguage: u.creationTimeHeaders.ContentLanguage, - ContentDisposition: u.creationTimeHeaders.ContentDisposition, - CacheControl: u.creationTimeHeaders.CacheControl, - } - - u.GetBlobURL().ToBlockBlobURL().Upload( - u.jptm.Context(), - strings.NewReader(linkData), - blobHeaders, - meta, - azblob.BlobAccessConditions{}, - azblob.AccessTierNone, // dfs uses default tier - nil, // dfs doesn't support tags - azblob.ClientProvidedKeyOptions{}, // cpk isn't used for dfs - azblob.ImmutabilityPolicyOptions{}) // dfs doesn't support immutability policy - - //_, err = s.destBlockBlobURL.Upload(s.jptm.Context(), strings.NewReader(linkData), s.headersToApply, s.metadataToApply, azblob.BlobAccessConditions{}, s.destBlobTier, s.blobTagsToApply, s.cpkToApply, azblob.ImmutabilityPolicyOptions{}) - return err -} From c94ed86f9ac89fc8e740c404f4f0add754dedbe6 Mon Sep 17 00:00:00 2001 From: Adele Reed Date: Thu, 4 May 2023 13:29:59 -0700 Subject: [PATCH 13/16] Fix tests --- e2etest/declarativeTestFiles.go | 4 ++++ e2etest/helpers.go | 4 ++++ e2etest/scenario_helpers.go | 13 +++++++++++++ e2etest/zt_preserve_posix_properties_test.go | 13 +++++-------- ste/sender-blobFolders_other.go | 7 +++++++ 5 files changed, 33 insertions(+), 8 deletions(-) create mode 100644 ste/sender-blobFolders_other.go diff --git a/e2etest/declarativeTestFiles.go b/e2etest/declarativeTestFiles.go index 18ee0920f..bb1b98127 100644 --- a/e2etest/declarativeTestFiles.go +++ b/e2etest/declarativeTestFiles.go @@ -114,6 +114,10 @@ type objectUnixStatContainer struct { modTime *time.Time } +func (o *objectUnixStatContainer) HasTimes() bool { + return o != nil && (o.accessTime != nil || o.modTime != nil) +} + func (o *objectUnixStatContainer) Empty() bool { if o == nil { return true diff --git a/e2etest/helpers.go b/e2etest/helpers.go index 9e9e0cdf9..8a3c451fb 100644 --- a/e2etest/helpers.go +++ b/e2etest/helpers.go @@ -64,6 +64,10 @@ const ( defaultBlobFSFileSizeInBytes = 1000 ) +func pointerTo[T any](in T) *T { + return &in +} + // if S3_TESTS_OFF is set at all, S3 tests are disabled. func isS3Disabled() bool { return strings.ToLower(os.Getenv("S3_TESTS_OFF")) != "" diff --git a/e2etest/scenario_helpers.go b/e2etest/scenario_helpers.go index 9be00636a..13133ee92 100644 --- a/e2etest/scenario_helpers.go +++ b/e2etest/scenario_helpers.go @@ -163,6 +163,19 @@ func (s scenarioHelper) generateLocalFilesFromList(c asserter, options *generate } if file.creationProperties.lastWriteTime != nil { c.AssertNoErr(os.Chtimes(destFile, time.Now(), *file.creationProperties.lastWriteTime), "set times") + } else if file.creationProperties.posixProperties.HasTimes() { + aTime, mTime := time.Now(), time.Now() + props := file.creationProperties.posixProperties + + if props.modTime != nil { + mTime = *props.modTime + } + + if props.accessTime != nil { + aTime = *props.accessTime + } + + c.AssertNoErr(os.Chtimes(destFile, aTime, mTime), "set times") } } else if file.creationProperties.entityType == common.EEntityType.Symlink() { c.Assert(file.creationProperties.symlinkTarget, notEquals(), nil) diff --git a/e2etest/zt_preserve_posix_properties_test.go b/e2etest/zt_preserve_posix_properties_test.go index 748dea2e7..4626cc953 100644 --- a/e2etest/zt_preserve_posix_properties_test.go +++ b/e2etest/zt_preserve_posix_properties_test.go @@ -6,6 +6,7 @@ package e2etest import ( "github.com/Azure/azure-storage-azcopy/v10/common" "testing" + "time" ) // Block/char device rep is untested due to difficulty to test @@ -80,10 +81,6 @@ func TestPOSIX_SpecialFilesToHNS(t *testing.T) { // Block/char device rep is untested due to difficulty to test func TestPOSIX_SpecialFilesFromHNS(t *testing.T) { - ptr := func(u uint32) *uint32 { - return &u - } - RunScenarios( t, eOperation.Copy(), @@ -100,10 +97,10 @@ func TestPOSIX_SpecialFilesFromHNS(t *testing.T) { testFiles{ defaultSize: "1K", shouldTransfer: []interface{}{ - folder(""), - f("fifo", with{ posixProperties: objectUnixStatContainer{ mode: ptr(common.DEFAULT_FILE_PERM | common.S_IFIFO) } }), // fifo should work - f("sock", with{ posixProperties: objectUnixStatContainer{ mode: ptr(common.DEFAULT_FILE_PERM | common.S_IFSOCK) } }), // sock should work - "a", + folder("", with{ posixProperties: objectUnixStatContainer{ modTime: pointerTo(time.Now().Add(time.Second*-5))}}), + f("fifo", with{ posixProperties: objectUnixStatContainer{ mode: pointerTo(uint32(common.DEFAULT_FILE_PERM | common.S_IFIFO)) } }), // fifo should work + f("sock", with{ posixProperties: objectUnixStatContainer{ mode: pointerTo(uint32(common.DEFAULT_FILE_PERM | common.S_IFSOCK)) } }), // sock should work + f("a", with{ posixProperties: objectUnixStatContainer{ modTime: pointerTo(time.Now().Add(time.Second*-5))}}), symlink("b", "a"), //symlink to real target should succeed symlink("d", "c"), //symlink to nowhere should succeed }, diff --git a/ste/sender-blobFolders_other.go b/ste/sender-blobFolders_other.go new file mode 100644 index 000000000..ba7115642 --- /dev/null +++ b/ste/sender-blobFolders_other.go @@ -0,0 +1,7 @@ +// +build !linux + +package ste + +func (b blobFolderSender) getExtraProperties() error { + return nil +} \ No newline at end of file From 59bb3ac1b7d0545130990dda66930caffb3a1721 Mon Sep 17 00:00:00 2001 From: Adele Reed Date: Fri, 5 May 2023 08:58:46 -0700 Subject: [PATCH 14/16] Prevent delete failure for non-existent file --- e2etest/declarativeTestFiles.go | 2 +- ste/xfer-remoteToLocal-symlink.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/e2etest/declarativeTestFiles.go b/e2etest/declarativeTestFiles.go index bb1b98127..384295f52 100644 --- a/e2etest/declarativeTestFiles.go +++ b/e2etest/declarativeTestFiles.go @@ -208,7 +208,7 @@ func (o *objectUnixStatContainer) AddToMetadata(metadata map[string]string) { if o.modTime != nil { mask |= common.STATX_MTIME - metadata[common.POSIXModTimeMeta] = strconv.FormatInt(o.accessTime.UnixNano(), 10) + metadata[common.POSIXModTimeMeta] = strconv.FormatInt(o.modTime.UnixNano(), 10) } metadata[common.LINUXStatxMaskMeta] = strconv.FormatUint(uint64(mask), 10) diff --git a/ste/xfer-remoteToLocal-symlink.go b/ste/xfer-remoteToLocal-symlink.go index 507ffa3d1..870a34f0b 100644 --- a/ste/xfer-remoteToLocal-symlink.go +++ b/ste/xfer-remoteToLocal-symlink.go @@ -44,7 +44,7 @@ func remoteToLocal_symlink(jptm IJobPartTransferMgr, p pipeline.Pipeline, pacer return } else { err = os.Remove(info.Destination) - if err != nil { + if err != nil && !os.IsNotExist(err) { // should not get back a non-existent error, but if we do, it's not a bad thing. jptm.FailActiveSend("deleting old file", err) jptm.ReportTransferDone() return @@ -53,7 +53,7 @@ func remoteToLocal_symlink(jptm IJobPartTransferMgr, p pipeline.Pipeline, pacer } } else { err := os.Remove(info.Destination) - if err != nil { + if err != nil && !os.IsNotExist(err) { // it's OK to fail because it doesn't exist. jptm.FailActiveSend("deleting old file", err) jptm.ReportTransferDone() return From 78e5b458b7e27aedd79deb0b6e330bd0bac46b40 Mon Sep 17 00:00:00 2001 From: Adele Reed Date: Fri, 5 May 2023 09:34:20 -0700 Subject: [PATCH 15/16] Syscall6->SyscallN --- common/mmf_windows.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/mmf_windows.go b/common/mmf_windows.go index 0b3f79cf7..9a69fb1ae 100644 --- a/common/mmf_windows.go +++ b/common/mmf_windows.go @@ -139,7 +139,7 @@ func prefetchVirtualMemory(virtualAddresses *memoryRangeEntry) (err error) { // make system call to prefetch the memory range hProcess, _ := syscall.GetCurrentProcess() - r1, _, e1 := syscall.Syscall6(procPrefetchVirtualMemory.Addr(), 4, uintptr(hProcess), 1, uintptr(unsafe.Pointer(virtualAddresses)), 0, 0, 0) + r1, _, e1 := syscall.SyscallN(procPrefetchVirtualMemory.Addr(), 4, uintptr(hProcess), 1, uintptr(unsafe.Pointer(virtualAddresses)), 0, 0, 0) if r1 == 0 { if e1 != 0 { From 274c4243a7c4421dea2a4c614ec97744baeff43c Mon Sep 17 00:00:00 2001 From: Adele Reed Date: Thu, 4 May 2023 19:07:04 -0700 Subject: [PATCH 16/16] Support setting DFS group/owner/perms --- common/unixStatAdapter.go | 18 +++++------ ste/sender-blobFS.go | 61 ++++++++++++++++++++++++++++++++--- ste/sender-blobFSFromLocal.go | 2 +- 3 files changed, 66 insertions(+), 15 deletions(-) diff --git a/common/unixStatAdapter.go b/common/unixStatAdapter.go index 8e05e84cb..96e6ec857 100644 --- a/common/unixStatAdapter.go +++ b/common/unixStatAdapter.go @@ -324,15 +324,15 @@ const ( // Values cloned from x/sys/unix to avoid dependency S_IFIFO = 0x1000 S_IFLNK = 0xa000 - S_IRUSR = 0x400 - S_IWUSR = 0x200 - S_IXUSR = 0x100 - S_IRGRP = 0x040 - S_IWGRP = 0x020 - S_IXGRP = 0x010 - S_IROTH = 0x004 - S_IWOTH = 0x002 - S_IXOTH = 0x001 + S_IRUSR = 0400 + S_IWUSR = 0200 + S_IXUSR = 0100 + S_IRGRP = 0040 + S_IWGRP = 0020 + S_IXGRP = 0010 + S_IROTH = 0004 + S_IWOTH = 0002 + S_IXOTH = 0001 S_ALLPERM = 0x777 ) diff --git a/ste/sender-blobFS.go b/ste/sender-blobFS.go index e25fc8ced..5f5e127fb 100644 --- a/ste/sender-blobFS.go +++ b/ste/sender-blobFS.go @@ -26,6 +26,7 @@ import ( "github.com/Azure/azure-storage-blob-go/azblob" "net/url" "strings" + "sync" "time" "github.com/Azure/azure-pipeline-go/pipeline" @@ -237,7 +238,9 @@ func (u *blobFSSenderBase) GetSourcePOSIXProperties() (common.UnixStatAdapter, e } } -func (u *blobFSSenderBase) SetPOSIXProperties() error { +var HNSSetAccessControlFailedOnce = &sync.Once{} + +func (u *blobFSSenderBase) SetPOSIXProperties(hnsOnly bool) error { adapter, err := u.GetSourcePOSIXProperties() if err != nil { return fmt.Errorf("failed to get POSIX properties") @@ -249,12 +252,57 @@ func (u *blobFSSenderBase) SetPOSIXProperties() error { common.AddStatToBlobMetadata(adapter, meta) delete(meta, common.POSIXFolderMeta) // Can't be set on HNS accounts. - _, err = u.GetBlobURL().SetMetadata(u.jptm.Context(), meta, azblob.BlobAccessConditions{}, azblob.ClientProvidedKeyOptions{}) - return err + var AccessControlURL interface {SetAccessControl(ctx context.Context, permissions azbfs.BlobFSAccessControl) (*azbfs.PathUpdateResponse, error)} + switch u.SendableEntityType() { + case common.EEntityType.File(), common.EEntityType.Symlink(): + AccessControlURL = u.fileURL() + case common.EEntityType.Folder(): + AccessControlURL = u.dirURL() + } + + isRoot := false + if dURL, ok := AccessControlURL.(azbfs.DirectoryURL); ok { + if dURL.IsFileSystemRoot() { + isRoot = true + } + } + + if !hnsOnly && !isRoot { // don't try to set metadata on the container + _, err = u.GetBlobURL().SetMetadata(u.jptm.Context(), meta, azblob.BlobAccessConditions{}, azblob.ClientProvidedKeyOptions{}) + if err != nil { + return err + } + } + + mode := adapter.FileMode() + fields := []uint32{common.S_IRUSR, common.S_IWUSR, common.S_IXUSR, common.S_IRGRP, common.S_IWGRP, common.S_IXGRP, common.S_IROTH, common.S_IWOTH, common.S_IXOTH } + chars := "rwx" + out := "" + for _, field := range fields { + if mode & field == field { + out += string(chars[len(out) % 3]) + } else { + out += "-" + } + } + + _, err = AccessControlURL.SetAccessControl(u.jptm.Context(), azbfs.BlobFSAccessControl{ + Owner: fmt.Sprint(adapter.Owner()), + Group: fmt.Sprint(adapter.Group()), + Permissions: out, + }) + if err != nil { // A user could be targeting a non-HNS account with the dfs endpoint; it's best to warn rather than fail. + u.jptm.LogAtLevelForCurrentTransfer(pipeline.LogError, fmt.Sprintf("Failed to set dfs owner/group: %s", err.Error())) + HNSSetAccessControlFailedOnce.Do(func() { + common.GetLifecycleMgr().Info("One or more files or directories have failed to set access control; check the logs for details. (are you targeting a non-HNS account?)") + }) + } + + return nil } func (u *blobFSSenderBase) SetFolderProperties() error { - return u.SetPOSIXProperties() + return u.SetPOSIXProperties(false) } func (u *blobFSSenderBase) DirUrlToString() string { @@ -295,6 +343,9 @@ func (u *blobFSSenderBase) SendSymlink(linkData string) error { nil, // dfs doesn't support tags azblob.ClientProvidedKeyOptions{}, // cpk isn't used for dfs azblob.ImmutabilityPolicyOptions{}) // dfs doesn't support immutability policy + if err != nil { + return err + } - return err + return u.SetPOSIXProperties(true) // set only the HNS props } diff --git a/ste/sender-blobFSFromLocal.go b/ste/sender-blobFSFromLocal.go index 4bca93fbb..abea5ba20 100644 --- a/ste/sender-blobFSFromLocal.go +++ b/ste/sender-blobFSFromLocal.go @@ -95,7 +95,7 @@ func (u *blobFSUploader) Epilogue() { // Write POSIX data if jptm.IsLive() { if jptm.Info().PreservePOSIXProperties { - err := u.SetPOSIXProperties() + err := u.SetPOSIXProperties(false) // set all posix properties if err != nil { jptm.FailActiveUpload("Setting POSIX Properties", err) }