Skip to content

Commit

Permalink
feat: Add archive extraction support for http(s)
Browse files Browse the repository at this point in the history
This enables archive extraction for the zip format and the
gz/tar/tar.gz formats. If a user provides an http(s) link to one of
these formats, pullman will now automatically extract the contents
into the destination directory.

Signed-off-by: Paul Van Eck <[email protected]>
  • Loading branch information
pvaneck committed Jun 7, 2022
1 parent 473fff4 commit 0c8d91e
Show file tree
Hide file tree
Showing 3 changed files with 425 additions and 1 deletion.
191 changes: 191 additions & 0 deletions pullman/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,61 @@
package pullman

import (
"archive/tar"
"archive/zip"
"bufio"
"bytes"
"compress/gzip"
"fmt"
"hash/fnv"
"io"
"os"
"path/filepath"
"strings"
)

type FileFormat struct {
MagicBytes []byte
Offset int
Extension string
}

// Magic byte values pulled from: https://en.wikipedia.org/wiki/List_of_file_signatures
var fileFormats = []FileFormat{
{
MagicBytes: []byte{0x75, 0x73, 0x74, 0x61, 0x72, 0x00, 0x30, 0x30},
Offset: 257,
Extension: "tar",
},
{
MagicBytes: []byte{0x75, 0x73, 0x74, 0x61, 0x72, 0x20, 0x20, 0x00},
Offset: 257,
Extension: "tar",
},
{
MagicBytes: []byte{0x1F, 0x8B},
Offset: 0,
Extension: "gz",
},
{
MagicBytes: []byte{0x50, 0x4B, 0x03, 0x04},
Offset: 0,
Extension: "zip",
},

{
MagicBytes: []byte{0x50, 0x4B, 0x05, 0x06},
Offset: 0,
Extension: "zip",
},

{
MagicBytes: []byte{0x50, 0x4B, 0x07, 0x08},
Offset: 0,
Extension: "zip",
},
}

// OpenFile will check the path and the filesystem for mismatch errors
func OpenFile(path string) (*os.File, error) {
// resource paths need to be compatible with a local filesystem download
Expand Down Expand Up @@ -57,3 +106,145 @@ func HashStrings(strings ...string) string {

return fmt.Sprintf("%#x", h.Sum64())
}

// Extract a zip file into the provided destination directory.
func ExtractZip(filePath string, dest string) error {
zipReader, err := zip.OpenReader(filePath)
if err != nil {
return fmt.Errorf("unable to open '%s' for reading: %w", filePath, err)
}
defer zipReader.Close()

prefix := filepath.Clean(dest) + string(os.PathSeparator)
for _, zipFileEntry := range zipReader.File {
destFilePath := filepath.Join(dest, zipFileEntry.Name)

// Zip slip vulnerability check
if !strings.HasPrefix(destFilePath, prefix) {
return fmt.Errorf("%s: illegal file path", destFilePath)
}

if zipFileEntry.FileInfo().IsDir() {
err = os.MkdirAll(destFilePath, 0755)
if err != nil {
return fmt.Errorf("error creating new directory %s", destFilePath)
}
continue
}

file, fileErr := OpenFile(destFilePath)
if fileErr != nil {
return fmt.Errorf("unable to open local file '%s' for writing: %w", destFilePath, fileErr)
}
defer file.Close()

zippedRc, err := zipFileEntry.Open()
if err != nil {
return fmt.Errorf("error opening zip file entry: %w", err)
}
defer zippedRc.Close()

if _, err = io.Copy(file, zippedRc); err != nil {
return fmt.Errorf("error writing zip resource to local file '%s': %w", destFilePath, err)
}

}
return nil
}

// Extract a tar archive file into the provided destination directory.
func ExtractTar(filePath string, dest string) error {
tarFile, err := os.Open(filePath)
if err != nil {
return fmt.Errorf("unable to open '%s' for reading: %w", filePath, err)
}
defer tarFile.Close()

tr := tar.NewReader(tarFile)
for {
header, err := tr.Next()

if err == io.EOF {
break
}

if err != nil {
return fmt.Errorf("error reading tar archive entry: %w", err)
}

if header == nil {
continue
}

destFilePath := filepath.Join(dest, header.Name)
if header.Typeflag == tar.TypeDir {
err = os.MkdirAll(destFilePath, 0755)
if err != nil {
return fmt.Errorf("error creating new directory %s", destFilePath)
}
continue
}

file, fileErr := OpenFile(destFilePath)
if fileErr != nil {
return fmt.Errorf("unable to open local file '%s' for writing: %w", destFilePath, fileErr)
}
defer file.Close()
if _, err = io.Copy(file, tr); err != nil {
return fmt.Errorf("error writing tar resource to local file '%s': %w", destFilePath, err)
}
}
return nil
}

// Extract a gzip compressed file into the provided destination file path.
func ExtractGzip(filePath string, dest string) error {
gzipFile, err := os.Open(filePath)
if err != nil {
return fmt.Errorf("unable to open '%s' for reading: %w", filePath, err)
}
defer gzipFile.Close()
gzr, err := gzip.NewReader(gzipFile)
if err != nil {
return fmt.Errorf("unable to create gzip reader: %w", err)
}
defer gzr.Close()

file, fileErr := OpenFile(dest)
if fileErr != nil {
return fmt.Errorf("unable to open local file '%s' for writing: %w", dest, fileErr)
}
defer file.Close()

if _, err = io.Copy(file, gzr); err != nil {
return fmt.Errorf("error writing gzip resource to local file '%s': %w", dest, err)
}

return nil
}

// Get the file type based on the first few hundred bytes of the stream.
// If the file isn't one of the expected formats, nil is returned.
// If an error occurs while determining the file format, nil is returned.
func GetFileFormat(filePath string) *FileFormat {

file, err := os.Open(filePath)
if err != nil {
return nil
}
defer file.Close()

r := bufio.NewReader(file)
n := 264
fileBytes, err := r.Peek(n)
if err != nil {
return nil
}

for _, format := range fileFormats {
if bytes.Equal(fileBytes[format.Offset:format.Offset+len(format.MagicBytes)], format.MagicBytes) {
return &format
}
}
return nil
}
191 changes: 191 additions & 0 deletions pullman/helpers_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
// Copyright 2022 IBM Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package pullman

import (
"archive/tar"
"archive/zip"
"bytes"
"compress/gzip"
"io/ioutil"
"os"
"path/filepath"
"strings"
"testing"

"github.com/stretchr/testify/assert"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
)

const generatedDirectory = "generated"

var zipFilePath = filepath.Join(generatedDirectory, "test-archive.zip")
var tarFilePath = filepath.Join(generatedDirectory, "test-archive.tar")
var tarGzFilePath = filepath.Join(generatedDirectory, "test-archive.tar.gz")
var log = zap.New(zap.UseDevMode(true))

var files = []struct {
Name, Body string
}{
{"nested/path/file1.txt", "Foo"},
{"file2.txt", "Bar"},
{"file3.txt", "Fun"},
}

func generateZip() {
buf := new(bytes.Buffer)
zipWriter := zip.NewWriter(buf)

for _, file := range files {
f, err := zipWriter.Create(file.Name)
if err != nil {
log.Error(err, "Failed to add file to test zip file")
os.Exit(1)
}
_, err = f.Write([]byte(file.Body))
if err != nil {
log.Error(err, "Failed to write file to test zip file")
os.Exit(1)
}
}

if err := zipWriter.Close(); err != nil {
log.Error(err, "Failed to close zip writer")
}

writeBytes(buf.Bytes(), zipFilePath)
}

func generateTar() {
buf := new(bytes.Buffer)
tarWriter := tar.NewWriter(buf)
defer tarWriter.Close()

for _, file := range files {
header := &tar.Header{
Name: file.Name,
Mode: 0600,
Size: int64(len(file.Body)),
}

if err := tarWriter.WriteHeader(header); err != nil {
log.Error(err, "Failed to write header to test tar file")
os.Exit(1)
}
if _, err := tarWriter.Write([]byte(file.Body)); err != nil {
log.Error(err, "Failed to write header to test tar file")
os.Exit(1)
}
}

if err := tarWriter.Close(); err != nil {
log.Error(err, "Failed to close tar writer")
}

writeBytes(buf.Bytes(), tarFilePath)
}

func generateTarGz() {
buf := new(bytes.Buffer)
gzipWriter := gzip.NewWriter(buf)
defer gzipWriter.Close()
tarWriter := tar.NewWriter(gzipWriter)
defer tarWriter.Close()

for _, file := range files {
header := &tar.Header{
Name: file.Name,
Mode: 0600,
Size: int64(len(file.Body)),
}

if err := tarWriter.WriteHeader(header); err != nil {
log.Error(err, "Failed to write header to test tar.gz file")
os.Exit(1)
}
if _, err := tarWriter.Write([]byte(file.Body)); err != nil {
log.Error(err, "Failed to write header to test tar.gz file")
os.Exit(1)
}
}

if err := gzipWriter.Close(); err != nil {
log.Error(err, "Failed to close gzip writer")
}

writeBytes(buf.Bytes(), tarGzFilePath)
}

func writeBytes(bytes []byte, outputPath string) {
if err := os.MkdirAll(filepath.Dir(outputPath), os.ModePerm); err != nil {
log.Error(err, "Failed to create archive parent directories")
os.Exit(1)
}

if err := ioutil.WriteFile(outputPath, bytes, 0777); err != nil {
log.Error(err, "Failed to write archive file to disk")
os.Exit(1)
}
}

func tearDown() {
os.RemoveAll(generatedDirectory)
}

func Test_ExtractZip(t *testing.T) {
generateZip()
defer tearDown()

err := ExtractZip(zipFilePath, generatedDirectory)
assert.NoError(t, err)

for _, file := range files {
contents, err := os.ReadFile(filepath.Join(generatedDirectory, file.Name))
assert.NoError(t, err)
assert.Equal(t, file.Body, string(contents))
}
}

func Test_ExtractTar(t *testing.T) {
generateTar()
defer tearDown()

err := ExtractTar(tarFilePath, generatedDirectory)
assert.NoError(t, err)

for _, file := range files {
contents, err := os.ReadFile(filepath.Join(generatedDirectory, file.Name))
assert.NoError(t, err)
assert.Equal(t, file.Body, string(contents))
}

}

func Test_ExtractTarGz(t *testing.T) {
generateTarGz()
defer tearDown()

newFilePath := strings.TrimSuffix(tarGzFilePath, ".gz")
err := ExtractGzip(tarGzFilePath, newFilePath)
assert.NoError(t, err)
err = ExtractTar(newFilePath, generatedDirectory)
assert.NoError(t, err)

for _, file := range files {
contents, err := os.ReadFile(filepath.Join(generatedDirectory, file.Name))
assert.NoError(t, err)
assert.Equal(t, file.Body, string(contents))
}

}
Loading

0 comments on commit 0c8d91e

Please sign in to comment.