From 0c8d91ecba234e2041f622d810bef48b39c8b548 Mon Sep 17 00:00:00 2001 From: Paul Van Eck Date: Tue, 3 May 2022 21:47:05 -0700 Subject: [PATCH] feat: Add archive extraction support for http(s) This enables archive extraction for the zip format and the gz/tar/tar.gz formats. If a user provides an http(s) link to one of these formats, pullman will now automatically extract the contents into the destination directory. Signed-off-by: Paul Van Eck --- pullman/helpers.go | 191 ++++++++++++++++++++ pullman/helpers_test.go | 191 ++++++++++++++++++++ pullman/storageproviders/http/downloader.go | 44 ++++- 3 files changed, 425 insertions(+), 1 deletion(-) create mode 100644 pullman/helpers_test.go diff --git a/pullman/helpers.go b/pullman/helpers.go index eea88dad..06d891b8 100644 --- a/pullman/helpers.go +++ b/pullman/helpers.go @@ -14,12 +14,61 @@ package pullman import ( + "archive/tar" + "archive/zip" + "bufio" + "bytes" + "compress/gzip" "fmt" "hash/fnv" + "io" "os" "path/filepath" + "strings" ) +type FileFormat struct { + MagicBytes []byte + Offset int + Extension string +} + +// Magic byte values pulled from: https://en.wikipedia.org/wiki/List_of_file_signatures +var fileFormats = []FileFormat{ + { + MagicBytes: []byte{0x75, 0x73, 0x74, 0x61, 0x72, 0x00, 0x30, 0x30}, + Offset: 257, + Extension: "tar", + }, + { + MagicBytes: []byte{0x75, 0x73, 0x74, 0x61, 0x72, 0x20, 0x20, 0x00}, + Offset: 257, + Extension: "tar", + }, + { + MagicBytes: []byte{0x1F, 0x8B}, + Offset: 0, + Extension: "gz", + }, + { + MagicBytes: []byte{0x50, 0x4B, 0x03, 0x04}, + Offset: 0, + Extension: "zip", + }, + + { + MagicBytes: []byte{0x50, 0x4B, 0x05, 0x06}, + Offset: 0, + Extension: "zip", + }, + + { + MagicBytes: []byte{0x50, 0x4B, 0x07, 0x08}, + Offset: 0, + Extension: "zip", + }, +} + // OpenFile will check the path and the filesystem for mismatch errors func OpenFile(path string) (*os.File, error) { // resource paths need to be compatible with a local filesystem download @@ -57,3 +106,145 @@ func HashStrings(strings ...string) string { return fmt.Sprintf("%#x", h.Sum64()) } + +// Extract a zip file into the provided destination directory. +func ExtractZip(filePath string, dest string) error { + zipReader, err := zip.OpenReader(filePath) + if err != nil { + return fmt.Errorf("unable to open '%s' for reading: %w", filePath, err) + } + defer zipReader.Close() + + prefix := filepath.Clean(dest) + string(os.PathSeparator) + for _, zipFileEntry := range zipReader.File { + destFilePath := filepath.Join(dest, zipFileEntry.Name) + + // Zip slip vulnerability check + if !strings.HasPrefix(destFilePath, prefix) { + return fmt.Errorf("%s: illegal file path", destFilePath) + } + + if zipFileEntry.FileInfo().IsDir() { + err = os.MkdirAll(destFilePath, 0755) + if err != nil { + return fmt.Errorf("error creating new directory %s", destFilePath) + } + continue + } + + file, fileErr := OpenFile(destFilePath) + if fileErr != nil { + return fmt.Errorf("unable to open local file '%s' for writing: %w", destFilePath, fileErr) + } + defer file.Close() + + zippedRc, err := zipFileEntry.Open() + if err != nil { + return fmt.Errorf("error opening zip file entry: %w", err) + } + defer zippedRc.Close() + + if _, err = io.Copy(file, zippedRc); err != nil { + return fmt.Errorf("error writing zip resource to local file '%s': %w", destFilePath, err) + } + + } + return nil +} + +// Extract a tar archive file into the provided destination directory. +func ExtractTar(filePath string, dest string) error { + tarFile, err := os.Open(filePath) + if err != nil { + return fmt.Errorf("unable to open '%s' for reading: %w", filePath, err) + } + defer tarFile.Close() + + tr := tar.NewReader(tarFile) + for { + header, err := tr.Next() + + if err == io.EOF { + break + } + + if err != nil { + return fmt.Errorf("error reading tar archive entry: %w", err) + } + + if header == nil { + continue + } + + destFilePath := filepath.Join(dest, header.Name) + if header.Typeflag == tar.TypeDir { + err = os.MkdirAll(destFilePath, 0755) + if err != nil { + return fmt.Errorf("error creating new directory %s", destFilePath) + } + continue + } + + file, fileErr := OpenFile(destFilePath) + if fileErr != nil { + return fmt.Errorf("unable to open local file '%s' for writing: %w", destFilePath, fileErr) + } + defer file.Close() + if _, err = io.Copy(file, tr); err != nil { + return fmt.Errorf("error writing tar resource to local file '%s': %w", destFilePath, err) + } + } + return nil +} + +// Extract a gzip compressed file into the provided destination file path. +func ExtractGzip(filePath string, dest string) error { + gzipFile, err := os.Open(filePath) + if err != nil { + return fmt.Errorf("unable to open '%s' for reading: %w", filePath, err) + } + defer gzipFile.Close() + gzr, err := gzip.NewReader(gzipFile) + if err != nil { + return fmt.Errorf("unable to create gzip reader: %w", err) + } + defer gzr.Close() + + file, fileErr := OpenFile(dest) + if fileErr != nil { + return fmt.Errorf("unable to open local file '%s' for writing: %w", dest, fileErr) + } + defer file.Close() + + if _, err = io.Copy(file, gzr); err != nil { + return fmt.Errorf("error writing gzip resource to local file '%s': %w", dest, err) + } + + return nil +} + +// Get the file type based on the first few hundred bytes of the stream. +// If the file isn't one of the expected formats, nil is returned. +// If an error occurs while determining the file format, nil is returned. +func GetFileFormat(filePath string) *FileFormat { + + file, err := os.Open(filePath) + if err != nil { + return nil + } + defer file.Close() + + r := bufio.NewReader(file) + n := 264 + fileBytes, err := r.Peek(n) + if err != nil { + return nil + } + + for _, format := range fileFormats { + if bytes.Equal(fileBytes[format.Offset:format.Offset+len(format.MagicBytes)], format.MagicBytes) { + return &format + } + } + return nil +} diff --git a/pullman/helpers_test.go b/pullman/helpers_test.go new file mode 100644 index 00000000..f0e1c43a --- /dev/null +++ b/pullman/helpers_test.go @@ -0,0 +1,191 @@ +// Copyright 2022 IBM Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package pullman + +import ( + "archive/tar" + "archive/zip" + "bytes" + "compress/gzip" + "io/ioutil" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "sigs.k8s.io/controller-runtime/pkg/log/zap" +) + +const generatedDirectory = "generated" + +var zipFilePath = filepath.Join(generatedDirectory, "test-archive.zip") +var tarFilePath = filepath.Join(generatedDirectory, "test-archive.tar") +var tarGzFilePath = filepath.Join(generatedDirectory, "test-archive.tar.gz") +var log = zap.New(zap.UseDevMode(true)) + +var files = []struct { + Name, Body string +}{ + {"nested/path/file1.txt", "Foo"}, + {"file2.txt", "Bar"}, + {"file3.txt", "Fun"}, +} + +func generateZip() { + buf := new(bytes.Buffer) + zipWriter := zip.NewWriter(buf) + + for _, file := range files { + f, err := zipWriter.Create(file.Name) + if err != nil { + log.Error(err, "Failed to add file to test zip file") + os.Exit(1) + } + _, err = f.Write([]byte(file.Body)) + if err != nil { + log.Error(err, "Failed to write file to test zip file") + os.Exit(1) + } + } + + if err := zipWriter.Close(); err != nil { + log.Error(err, "Failed to close zip writer") + } + + writeBytes(buf.Bytes(), zipFilePath) +} + +func generateTar() { + buf := new(bytes.Buffer) + tarWriter := tar.NewWriter(buf) + defer tarWriter.Close() + + for _, file := range files { + header := &tar.Header{ + Name: file.Name, + Mode: 0600, + Size: int64(len(file.Body)), + } + + if err := tarWriter.WriteHeader(header); err != nil { + log.Error(err, "Failed to write header to test tar file") + os.Exit(1) + } + if _, err := tarWriter.Write([]byte(file.Body)); err != nil { + log.Error(err, "Failed to write header to test tar file") + os.Exit(1) + } + } + + if err := tarWriter.Close(); err != nil { + log.Error(err, "Failed to close tar writer") + } + + writeBytes(buf.Bytes(), tarFilePath) +} + +func generateTarGz() { + buf := new(bytes.Buffer) + gzipWriter := gzip.NewWriter(buf) + defer gzipWriter.Close() + tarWriter := tar.NewWriter(gzipWriter) + defer tarWriter.Close() + + for _, file := range files { + header := &tar.Header{ + Name: file.Name, + Mode: 0600, + Size: int64(len(file.Body)), + } + + if err := tarWriter.WriteHeader(header); err != nil { + log.Error(err, "Failed to write header to test tar.gz file") + os.Exit(1) + } + if _, err := tarWriter.Write([]byte(file.Body)); err != nil { + log.Error(err, "Failed to write header to test tar.gz file") + os.Exit(1) + } + } + + if err := gzipWriter.Close(); err != nil { + log.Error(err, "Failed to close gzip writer") + } + + writeBytes(buf.Bytes(), tarGzFilePath) +} + +func writeBytes(bytes []byte, outputPath string) { + if err := os.MkdirAll(filepath.Dir(outputPath), os.ModePerm); err != nil { + log.Error(err, "Failed to create archive parent directories") + os.Exit(1) + } + + if err := ioutil.WriteFile(outputPath, bytes, 0777); err != nil { + log.Error(err, "Failed to write archive file to disk") + os.Exit(1) + } +} + +func tearDown() { + os.RemoveAll(generatedDirectory) +} + +func Test_ExtractZip(t *testing.T) { + generateZip() + defer tearDown() + + err := ExtractZip(zipFilePath, generatedDirectory) + assert.NoError(t, err) + + for _, file := range files { + contents, err := os.ReadFile(filepath.Join(generatedDirectory, file.Name)) + assert.NoError(t, err) + assert.Equal(t, file.Body, string(contents)) + } +} + +func Test_ExtractTar(t *testing.T) { + generateTar() + defer tearDown() + + err := ExtractTar(tarFilePath, generatedDirectory) + assert.NoError(t, err) + + for _, file := range files { + contents, err := os.ReadFile(filepath.Join(generatedDirectory, file.Name)) + assert.NoError(t, err) + assert.Equal(t, file.Body, string(contents)) + } + +} + +func Test_ExtractTarGz(t *testing.T) { + generateTarGz() + defer tearDown() + + newFilePath := strings.TrimSuffix(tarGzFilePath, ".gz") + err := ExtractGzip(tarGzFilePath, newFilePath) + assert.NoError(t, err) + err = ExtractTar(newFilePath, generatedDirectory) + assert.NoError(t, err) + + for _, file := range files { + contents, err := os.ReadFile(filepath.Join(generatedDirectory, file.Name)) + assert.NoError(t, err) + assert.Equal(t, file.Body, string(contents)) + } + +} diff --git a/pullman/storageproviders/http/downloader.go b/pullman/storageproviders/http/downloader.go index 2e320d19..1185ed70 100644 --- a/pullman/storageproviders/http/downloader.go +++ b/pullman/storageproviders/http/downloader.go @@ -20,6 +20,9 @@ import ( "fmt" "io" "net/http" + "os" + "path/filepath" + "strings" "time" "github.com/go-logr/logr" @@ -68,13 +71,22 @@ type httpFetcher struct { var _ fetcher = (*httpFetcher)(nil) func (c *httpFetcher) download(ctx context.Context, req *http.Request, filename string) error { - + fmt.Println(filename) resp, err := c.httpClient.Do(req) if err != nil { return fmt.Errorf("error getting resource '%s': %w", req.URL.String(), err) } defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + body, readErr := io.ReadAll(resp.Body) + if readErr != nil { + return fmt.Errorf("error getting resource '%s'. HTTP Status Code: %d, Error: %w", req.URL.String(), resp.StatusCode, readErr) + } + return fmt.Errorf("error getting resource '%s'. HTTP Status Code: %d, Error: %s", req.URL.String(), resp.StatusCode, string(body)) + } + + // Download file first. file, fileErr := pullman.OpenFile(filename) if fileErr != nil { return fmt.Errorf("unable to open local file '%s' for writing: %w", filename, fileErr) @@ -85,5 +97,35 @@ func (c *httpFetcher) download(ctx context.Context, req *http.Request, filename return fmt.Errorf("error writing resource to local file '%s': %w", filename, err) } + fileFormat := pullman.GetFileFormat(filename) + + // Check if the file format is a supported archive format and automatically extract it. + if fileFormat != nil { + switch fileFormat.Extension { + case "gz": + decompressedFilePath := strings.TrimSuffix(filename, ".gz") + if err := pullman.ExtractGzip(filename, decompressedFilePath); err != nil { + return fmt.Errorf("error decompressing gzip file '%s': %w", filename, err) + } + os.Remove(filename) + fileFormat = pullman.GetFileFormat(decompressedFilePath) + if fileFormat != nil && fileFormat.Extension == "tar" { + if err := pullman.ExtractTar(decompressedFilePath, filepath.Dir(decompressedFilePath)); err != nil { + return fmt.Errorf("error extracting tar file '%s': %w", filename, err) + } + os.Remove(decompressedFilePath) + } + case "tar": + if err := pullman.ExtractTar(filename, filepath.Dir(filename)); err != nil { + return fmt.Errorf("error extracting tar file '%s': %w", filename, err) + } + os.Remove(filename) + case "zip": + if err := pullman.ExtractZip(filename, filepath.Dir(filename)); err != nil { + return fmt.Errorf("error extracting zip file '%s': %w", filename, err) + } + os.Remove(filename) + } + } return nil }