Skip to content

Commit

Permalink
[content-service] download s3 content using s5cmd (#18783)
Browse files Browse the repository at this point in the history
* [content-service] download s3 content using s5cmd

Fixes ENG-884

* No pie for you

* [content-service] tune s5cmd based on testing

Results in https://gist.github.com/kylos101/8c49b65d257cf9f642a45877081efc26
  • Loading branch information
kylos101 authored Sep 25, 2023
1 parent 02d54f5 commit 4fb2677
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 17 deletions.
41 changes: 24 additions & 17 deletions components/content-service/pkg/storage/s3.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"

Expand Down Expand Up @@ -289,33 +290,39 @@ func (s3st *s3Storage) DownloadSnapshot(ctx context.Context, destination string,
return s3st.download(ctx, destination, name, mappings)
}

// download object using s5cmd (prior to which we used aws sdk)
func (s3st *s3Storage) download(ctx context.Context, destination string, obj string, mappings []archive.IDMapping) (found bool, err error) {
downloader := s3manager.NewDownloader(s3st.client, func(d *s3manager.Downloader) {
d.Concurrency = defaultCopyConcurrency
d.PartSize = defaultPartSize * megabytes
d.BufferProvider = s3manager.NewPooledBufferedWriterReadFromProvider(25 * megabytes)
})

s3File, err := os.CreateTemp("", "temporal-s3-file")
tempFile, err := os.CreateTemp("", "temporal-s3-file")
if err != nil {
return true, xerrors.Errorf("creating temporal file: %s", err.Error())
}
defer os.Remove(s3File.Name())

_, err = downloader.Download(ctx, s3File, &s3.GetObjectInput{
Bucket: aws.String(s3st.Config.Bucket),
Key: aws.String(obj),
})
tempFile.Close()

args := []string{
"cp",
// # of file parts to download at once
"--concurrency", "20",
// size in MB of each part
"--part-size", "25",
destination,
tempFile.Name(),
}
cmd := exec.Command("s5cmd", args...)
out, err := cmd.CombinedOutput()
if err != nil {
return false, err
log.WithError(err).WithField("out", string(out)).Error("unexpected error downloading file")
return true, xerrors.Errorf("unexpected error downloading file")
}

_, err = s3File.Seek(0, 0)
tempFile, err = os.Open(tempFile.Name())
if err != nil {
return false, err
return true, xerrors.Errorf("unexpected error opening downloaded file")
}

err = archive.ExtractTarbal(ctx, s3File, destination, archive.WithUIDMapping(mappings), archive.WithGIDMapping(mappings))
defer os.Remove(tempFile.Name())
defer tempFile.Close()

err = archive.ExtractTarbal(ctx, tempFile, destination, archive.WithUIDMapping(mappings), archive.WithGIDMapping(mappings))
if err != nil {
return true, xerrors.Errorf("tar %s: %s", destination, err.Error())
}
Expand Down
6 changes: 6 additions & 0 deletions components/ws-daemon/leeway.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ RUN apk add --no-cache curl file \
&& chmod +x runc.amd64 \
&& if ! file runc.amd64 | grep -iq "ELF 64-bit LSB pie executable"; then echo "runc.amd64 is not a binary file"; exit 1;fi

RUN curl -OsSL https://github.com/peak/s5cmd/releases/download/v2.2.2/s5cmd_2.2.2_Linux-64bit.tar.gz \
&& tar -xzvf s5cmd_2.2.2_Linux-64bit.tar.gz s5cmd \
&& chmod +x s5cmd \
&& if ! file s5cmd | grep -iq "ELF 64-bit LSB executable"; then echo "s5cmd is not a binary file"; exit 1;fi

FROM ubuntu:22.04

# trigger manual rebuild increasing the value
Expand Down Expand Up @@ -46,6 +51,7 @@ RUN apt update \
/var/tmp/*

COPY --from=dl /dl/runc.amd64 /usr/bin/runc
COPY --from=dl /dl/s5cmd /usr/bin/s5cmd

# Add gitpod user for operations (e.g. checkout because of the post-checkout hook!)
RUN groupadd -r -g 33333 gitpod \
Expand Down

0 comments on commit 4fb2677

Please sign in to comment.