Skip to content

Commit

Permalink
Remove optimization for small files in sender-blockBlobFromURL (#2373)
Browse files Browse the repository at this point in the history
* Remove optimization for small files in PutBlobFromURL

* Update comment
  • Loading branch information
nakulkar-msft authored Sep 13, 2023
1 parent 7e08e2f commit 08a60ee
Showing 1 changed file with 9 additions and 54 deletions.
63 changes: 9 additions & 54 deletions ste/sender-blockBlobFromURL.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@
package ste

import (
"bytes"
"fmt"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/streaming"
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob"
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blockblob"
"sync/atomic"
Expand Down Expand Up @@ -64,10 +62,15 @@ func newURLToBlockBlobCopier(jptm IJobPartTransferMgr, destination string, pacer

// Returns a chunk-func for blob copies
func (c *urlToBlockBlobCopier) GenerateCopyFunc(id common.ChunkID, blockIndex int32, adjustedChunkSize int64, chunkIsWholeFile bool) chunkFunc {
if blockIndex == 0 && adjustedChunkSize == 0 {
setPutListNeed(&c.atomicPutListIndicator, putListNotNeeded)
return c.generateCreateEmptyBlob(id)
}
/*
* There was a optimization here to use PutBlob for zero-byte blobs instead of PutBlobFromURL.
* It was removed because of these reasons:
* 1. Both apis are different in some aspects. For put blob service verifies the content md5.
* This is not required if check-md5 is false. Using same calls helps us be consistent.
* 2. If the source only has list (and no read) permissions, we will still put the blob here
* While it is arguable that content can be inferred from size, it is better to fail transfer
* for blobs of all sizes.
*/
// Small blobs from all sources will be copied over to destination using PutBlobFromUrl
if c.NumChunks() == 1 && adjustedChunkSize <= int64(blockblob.MaxUploadBlobBytes) {
/*
Expand All @@ -82,54 +85,6 @@ func (c *urlToBlockBlobCopier) GenerateCopyFunc(id common.ChunkID, blockIndex in
return c.generatePutBlockFromURL(id, blockIndex, adjustedChunkSize)
}

// generateCreateEmptyBlob generates a func to create empty blob in destination.
// This could be replaced by sync version of copy blob from URL.
func (c *urlToBlockBlobCopier) generateCreateEmptyBlob(id common.ChunkID) chunkFunc {
return createSendToRemoteChunkFunc(c.jptm, id, func() {
jptm := c.jptm

jptm.LogChunkStatus(id, common.EWaitReason.S2SCopyOnWire())
// Create blob and finish.
if !ValidateTier(jptm, c.destBlobTier, c.destBlockBlobClient, c.jptm.Context(), false) {
c.destBlobTier = ""
}

blobTags := c.blobTagsToApply
setTags := separateSetTagsRequired(blobTags)
if setTags || len(blobTags) == 0 {
blobTags = nil
}

// TODO: Remove this snippet once service starts supporting CPK with blob tier
destBlobTier := &c.destBlobTier
if c.jptm.IsSourceEncrypted() {
destBlobTier = nil
}

_, err := c.destBlockBlobClient.Upload(c.jptm.Context(), streaming.NopCloser(bytes.NewReader(nil)),
&blockblob.UploadOptions{
HTTPHeaders: &c.headersToApply,
Metadata: c.metadataToApply,
Tier: destBlobTier,
Tags: blobTags,
CPKInfo: c.jptm.CpkInfo(),
CPKScopeInfo: c.jptm.CpkScopeInfo(),
})
if err != nil {
jptm.FailActiveSend("Creating empty blob", err)
return
}

atomic.AddInt32(&c.atomicChunksWritten, 1)

if setTags {
if _, err := c.destBlockBlobClient.SetTags(jptm.Context(), c.blobTagsToApply, nil); err != nil {
c.jptm.Log(common.LogWarning, err.Error())
}
}
})
}

// generatePutBlockFromURL generates a func to copy the block of src data from given startIndex till the given chunkSize.
func (c *urlToBlockBlobCopier) generatePutBlockFromURL(id common.ChunkID, blockIndex int32, adjustedChunkSize int64) chunkFunc {
return createSendToRemoteChunkFunc(c.jptm, id, func() {
Expand Down

0 comments on commit 08a60ee

Please sign in to comment.