From 4fb2677c4a6849b6f140214fcb84dce2cc6c7899 Mon Sep 17 00:00:00 2001 From: Kyle Brennan Date: Mon, 25 Sep 2023 13:36:05 -0400 Subject: [PATCH] [content-service] download s3 content using s5cmd (#18783) * [content-service] download s3 content using s5cmd Fixes ENG-884 * No pie for you * [content-service] tune s5cmd based on testing Results in https://gist.github.com/kylos101/8c49b65d257cf9f642a45877081efc26 --- components/content-service/pkg/storage/s3.go | 41 ++++++++++++-------- components/ws-daemon/leeway.Dockerfile | 6 +++ 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/components/content-service/pkg/storage/s3.go b/components/content-service/pkg/storage/s3.go index 0f36c5b795f773..8641dd9f06a21c 100644 --- a/components/content-service/pkg/storage/s3.go +++ b/components/content-service/pkg/storage/s3.go @@ -9,6 +9,7 @@ import ( "errors" "fmt" "os" + "os/exec" "path/filepath" "strings" @@ -289,33 +290,39 @@ func (s3st *s3Storage) DownloadSnapshot(ctx context.Context, destination string, return s3st.download(ctx, destination, name, mappings) } +// download object using s5cmd (prior to which we used aws sdk) func (s3st *s3Storage) download(ctx context.Context, destination string, obj string, mappings []archive.IDMapping) (found bool, err error) { - downloader := s3manager.NewDownloader(s3st.client, func(d *s3manager.Downloader) { - d.Concurrency = defaultCopyConcurrency - d.PartSize = defaultPartSize * megabytes - d.BufferProvider = s3manager.NewPooledBufferedWriterReadFromProvider(25 * megabytes) - }) - - s3File, err := os.CreateTemp("", "temporal-s3-file") + tempFile, err := os.CreateTemp("", "temporal-s3-file") if err != nil { return true, xerrors.Errorf("creating temporal file: %s", err.Error()) } - defer os.Remove(s3File.Name()) - - _, err = downloader.Download(ctx, s3File, &s3.GetObjectInput{ - Bucket: aws.String(s3st.Config.Bucket), - Key: aws.String(obj), - }) + tempFile.Close() + + args := []string{ + "cp", + // # of file parts to download at once + "--concurrency", "20", + // size in MB of each part + "--part-size", "25", + destination, + tempFile.Name(), + } + cmd := exec.Command("s5cmd", args...) + out, err := cmd.CombinedOutput() if err != nil { - return false, err + log.WithError(err).WithField("out", string(out)).Error("unexpected error downloading file") + return true, xerrors.Errorf("unexpected error downloading file") } - _, err = s3File.Seek(0, 0) + tempFile, err = os.Open(tempFile.Name()) if err != nil { - return false, err + return true, xerrors.Errorf("unexpected error opening downloaded file") } - err = archive.ExtractTarbal(ctx, s3File, destination, archive.WithUIDMapping(mappings), archive.WithGIDMapping(mappings)) + defer os.Remove(tempFile.Name()) + defer tempFile.Close() + + err = archive.ExtractTarbal(ctx, tempFile, destination, archive.WithUIDMapping(mappings), archive.WithGIDMapping(mappings)) if err != nil { return true, xerrors.Errorf("tar %s: %s", destination, err.Error()) } diff --git a/components/ws-daemon/leeway.Dockerfile b/components/ws-daemon/leeway.Dockerfile index 243f57a33c23d4..6d08a2b104237b 100644 --- a/components/ws-daemon/leeway.Dockerfile +++ b/components/ws-daemon/leeway.Dockerfile @@ -9,6 +9,11 @@ RUN apk add --no-cache curl file \ && chmod +x runc.amd64 \ && if ! file runc.amd64 | grep -iq "ELF 64-bit LSB pie executable"; then echo "runc.amd64 is not a binary file"; exit 1;fi +RUN curl -OsSL https://github.com/peak/s5cmd/releases/download/v2.2.2/s5cmd_2.2.2_Linux-64bit.tar.gz \ + && tar -xzvf s5cmd_2.2.2_Linux-64bit.tar.gz s5cmd \ + && chmod +x s5cmd \ + && if ! file s5cmd | grep -iq "ELF 64-bit LSB executable"; then echo "s5cmd is not a binary file"; exit 1;fi + FROM ubuntu:22.04 # trigger manual rebuild increasing the value @@ -46,6 +51,7 @@ RUN apt update \ /var/tmp/* COPY --from=dl /dl/runc.amd64 /usr/bin/runc +COPY --from=dl /dl/s5cmd /usr/bin/s5cmd # Add gitpod user for operations (e.g. checkout because of the post-checkout hook!) RUN groupadd -r -g 33333 gitpod \