Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[content-service] download s3 content using s5cmd #18783

Merged
merged 3 commits into from
Sep 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 24 additions & 17 deletions components/content-service/pkg/storage/s3.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"

Expand Down Expand Up @@ -289,33 +290,39 @@ func (s3st *s3Storage) DownloadSnapshot(ctx context.Context, destination string,
return s3st.download(ctx, destination, name, mappings)
}

// download object using s5cmd (prior to which we used aws sdk)
func (s3st *s3Storage) download(ctx context.Context, destination string, obj string, mappings []archive.IDMapping) (found bool, err error) {
downloader := s3manager.NewDownloader(s3st.client, func(d *s3manager.Downloader) {
d.Concurrency = defaultCopyConcurrency
d.PartSize = defaultPartSize * megabytes
d.BufferProvider = s3manager.NewPooledBufferedWriterReadFromProvider(25 * megabytes)
})

s3File, err := os.CreateTemp("", "temporal-s3-file")
tempFile, err := os.CreateTemp("", "temporal-s3-file")
if err != nil {
return true, xerrors.Errorf("creating temporal file: %s", err.Error())
}
defer os.Remove(s3File.Name())

_, err = downloader.Download(ctx, s3File, &s3.GetObjectInput{
Bucket: aws.String(s3st.Config.Bucket),
Key: aws.String(obj),
})
tempFile.Close()

args := []string{
"cp",
// # of file parts to download at once
"--concurrency", "20",
// size in MB of each part
"--part-size", "25",
destination,
tempFile.Name(),
}
cmd := exec.Command("s5cmd", args...)
out, err := cmd.CombinedOutput()
if err != nil {
return false, err
log.WithError(err).WithField("out", string(out)).Error("unexpected error downloading file")
return true, xerrors.Errorf("unexpected error downloading file")
}

_, err = s3File.Seek(0, 0)
tempFile, err = os.Open(tempFile.Name())
if err != nil {
return false, err
return true, xerrors.Errorf("unexpected error opening downloaded file")
}

err = archive.ExtractTarbal(ctx, s3File, destination, archive.WithUIDMapping(mappings), archive.WithGIDMapping(mappings))
defer os.Remove(tempFile.Name())
defer tempFile.Close()

err = archive.ExtractTarbal(ctx, tempFile, destination, archive.WithUIDMapping(mappings), archive.WithGIDMapping(mappings))
if err != nil {
return true, xerrors.Errorf("tar %s: %s", destination, err.Error())
}
Expand Down
6 changes: 6 additions & 0 deletions components/ws-daemon/leeway.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ RUN apk add --no-cache curl file \
&& chmod +x runc.amd64 \
&& if ! file runc.amd64 | grep -iq "ELF 64-bit LSB pie executable"; then echo "runc.amd64 is not a binary file"; exit 1;fi

RUN curl -OsSL https://github.com/peak/s5cmd/releases/download/v2.2.2/s5cmd_2.2.2_Linux-64bit.tar.gz \
&& tar -xzvf s5cmd_2.2.2_Linux-64bit.tar.gz s5cmd \
&& chmod +x s5cmd \
&& if ! file s5cmd | grep -iq "ELF 64-bit LSB executable"; then echo "s5cmd is not a binary file"; exit 1;fi

FROM ubuntu:22.04

# trigger manual rebuild increasing the value
Expand Down Expand Up @@ -46,6 +51,7 @@ RUN apt update \
/var/tmp/*

COPY --from=dl /dl/runc.amd64 /usr/bin/runc
COPY --from=dl /dl/s5cmd /usr/bin/s5cmd

# Add gitpod user for operations (e.g. checkout because of the post-checkout hook!)
RUN groupadd -r -g 33333 gitpod \
Expand Down
Loading