From 07541a61dc3b19452cdecd3bf800788cdc78e67a Mon Sep 17 00:00:00 2001 From: Wayne Starr Date: Tue, 20 Feb 2024 15:09:07 -0600 Subject: [PATCH] fix: multi-part tarballs being mismatched sizes (#2314) ## Description This fixes multipart tarballs being different sizes with `--max-package-size` ## Related Issue Fixes #2313 ## Type of change - [X] Bug fix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [ ] Other (security config, docs update, etc) ## Checklist before merging - [X] Test, docs, adr added or updated as needed - [X] [Contributor Guide Steps](https://github.com/defenseunicorns/zarf/blob/main/CONTRIBUTING.md#developer-workflow) followed --------- Co-authored-by: Austin Abro <37223396+AustinAbro321@users.noreply.github.com> Co-authored-by: Lucas Rodriguez Co-authored-by: Lucas Rodriguez --- .gitignore | 1 + src/pkg/packager/sources/split.go | 5 ++++ src/pkg/utils/io.go | 28 ++++++++++-------- src/test/e2e/05_tarball_test.go | 35 ++++++++++++++++++++--- src/test/packages/05-multi-part/zarf.yaml | 9 ++++-- 5 files changed, 59 insertions(+), 19 deletions(-) diff --git a/.gitignore b/.gitignore index 4270dbb1fb..3d96988028 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ *.bak *.key *.crt +*.dat *.run.zstd *.tar *.tar.gz diff --git a/src/pkg/packager/sources/split.go b/src/pkg/packager/sources/split.go index 1aade2eee5..34d736fabb 100644 --- a/src/pkg/packager/sources/split.go +++ b/src/pkg/packager/sources/split.go @@ -85,6 +85,11 @@ func (s *SplitTarballSource) Collect(dir string) (string, error) { if _, err = io.Copy(pkgFile, f); err != nil { return "", fmt.Errorf("unable to copy file %s: %w", file, err) } + + // Close the file when done copying + if err := f.Close(); err != nil { + return "", fmt.Errorf("unable to close file %s: %w", file, err) + } } if err := utils.SHAsMatch(reassembled, pkgData.Sha256Sum); err != nil { diff --git a/src/pkg/utils/io.go b/src/pkg/utils/io.go index 7bda78ff7f..bb6ae79db2 100755 --- a/src/pkg/utils/io.go +++ b/src/pkg/utils/io.go @@ -339,7 +339,7 @@ func ReadFileByChunks(path string, chunkSizeBytes int) (chunks [][]byte, sha256s // - fileNames: list of file paths srcFile was split across // - sha256sum: sha256sum of the srcFile before splitting // - err: any errors encountered -func SplitFile(srcFile string, chunkSizeBytes int) (err error) { +func SplitFile(srcPath string, chunkSizeBytes int) (err error) { var fileNames []string var sha256sum string hash := sha256.New() @@ -353,7 +353,7 @@ func SplitFile(srcFile string, chunkSizeBytes int) (err error) { buf := make([]byte, bufferSize) // get file size - fi, err := os.Stat(srcFile) + fi, err := os.Stat(srcPath) if err != nil { return err } @@ -364,15 +364,15 @@ func SplitFile(srcFile string, chunkSizeBytes int) (err error) { progressBar := message.NewProgressBar(fileSize, title) defer progressBar.Stop() - // open file - file, err := os.Open(srcFile) - defer file.Close() + // open srcFile + srcFile, err := os.Open(srcPath) if err != nil { return err } + defer srcFile.Close() // create file path starting from part 001 - path := fmt.Sprintf("%s.part001", srcFile) + path := fmt.Sprintf("%s.part001", srcPath) chunkFile, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY, 0644) if err != nil { return err @@ -384,7 +384,7 @@ func SplitFile(srcFile string, chunkSizeBytes int) (err error) { chunkBytesRemaining := chunkSizeBytes // Loop over the tarball hashing as we go and breaking it into chunks based on the chunkSizeBytes for { - bytesRead, err := file.Read(buf) + bytesRead, err := srcFile.Read(buf) if err != nil { if err == io.EOF { @@ -404,10 +404,14 @@ func SplitFile(srcFile string, chunkSizeBytes int) (err error) { if err != nil { return err } + err = chunkFile.Close() + if err != nil { + return err + } // create new file - path = fmt.Sprintf("%s.part%03d", srcFile, len(fileNames)+1) - chunkFile, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY, 0644) + path = fmt.Sprintf("%s.part%03d", srcPath, len(fileNames)+1) + chunkFile, err = os.OpenFile(path, os.O_CREATE|os.O_WRONLY, 0644) if err != nil { return err } @@ -435,8 +439,8 @@ func SplitFile(srcFile string, chunkSizeBytes int) (err error) { title := fmt.Sprintf("[%d/%d] MB bytes written", progressBar.GetCurrent()/1000/1000, fileSize/1000/1000) progressBar.UpdateTitle(title) } - file.Close() - _ = os.RemoveAll(srcFile) + srcFile.Close() + _ = os.RemoveAll(srcPath) // calculate sha256 sum sha256sum = fmt.Sprintf("%x", hash.Sum(nil)) @@ -452,7 +456,7 @@ func SplitFile(srcFile string, chunkSizeBytes int) (err error) { } // write header file - path = fmt.Sprintf("%s.part000", srcFile) + path = fmt.Sprintf("%s.part000", srcPath) if err := os.WriteFile(path, jsonData, 0644); err != nil { return fmt.Errorf("unable to write the file %s: %w", path, err) } diff --git a/src/test/e2e/05_tarball_test.go b/src/test/e2e/05_tarball_test.go index 1e0e3c8473..f6df5f7e1b 100644 --- a/src/test/e2e/05_tarball_test.go +++ b/src/test/e2e/05_tarball_test.go @@ -5,6 +5,7 @@ package test import ( + "encoding/json" "fmt" "os" "path/filepath" @@ -28,14 +29,29 @@ func TestMultiPartPackage(t *testing.T) { e2e.CleanFiles(deployPath, outputFile) - // Create the package with a max size of 1MB - stdOut, stdErr, err := e2e.Zarf("package", "create", createPath, "--max-package-size=1", "--confirm") + // Create the package with a max size of 20MB + stdOut, stdErr, err := e2e.Zarf("package", "create", createPath, "--max-package-size=20", "--confirm") require.NoError(t, err, stdOut, stdErr) parts, err := filepath.Glob("zarf-package-multi-part-*") require.NoError(t, err) - // Length is 7 because there are 6 parts and 1 manifest - require.Len(t, parts, 7) + // Length is 4 because there are 3 parts and 1 manifest + require.Len(t, parts, 4) + // Check the file sizes are even + part1FileInfo, err := os.Stat(parts[1]) + require.NoError(t, err) + require.Equal(t, int64(20000000), part1FileInfo.Size()) + part2FileInfo, err := os.Stat(parts[2]) + require.NoError(t, err) + require.Equal(t, int64(20000000), part2FileInfo.Size()) + // Check the package data is correct + pkgData := types.ZarfSplitPackageData{} + part0File, err := os.ReadFile(parts[0]) + require.NoError(t, err) + err = json.Unmarshal(part0File, &pkgData) + require.NoError(t, err) + require.Equal(t, pkgData.Count, 3) + fmt.Printf("%#v", pkgData) stdOut, stdErr, err = e2e.Zarf("package", "deploy", deployPath, "--confirm") require.NoError(t, err, stdOut, stdErr) @@ -45,6 +61,17 @@ func TestMultiPartPackage(t *testing.T) { // deploying package combines parts back into single archive, check dir again to find all files parts, err = filepath.Glob("zarf-package-multi-part-*") + require.NoError(t, err) + // Length is 1 because `zarf package deploy` will recombine the file + require.Len(t, parts, 1) + // Ensure that the number of pkgData bytes was correct + fullFileInfo, err := os.Stat(parts[0]) + require.NoError(t, err) + require.Equal(t, pkgData.Bytes, fullFileInfo.Size()) + // Ensure that the pkgData shasum was correct (should be checked during deploy as well, but this is to double check) + err = utils.SHAsMatch(parts[0], pkgData.Sha256Sum) + require.NoError(t, err) + e2e.CleanFiles(parts...) e2e.CleanFiles(outputFile) } diff --git a/src/test/packages/05-multi-part/zarf.yaml b/src/test/packages/05-multi-part/zarf.yaml index 78b47ba606..0fb4a75652 100644 --- a/src/test/packages/05-multi-part/zarf.yaml +++ b/src/test/packages/05-multi-part/zarf.yaml @@ -6,8 +6,11 @@ metadata: components: - name: big-ol-file required: true - description: Single 5 MB file needed to demonstrate a multi-part package + description: Include a 50 MB file needed to demonstrate a multi-part package + actions: + onCreate: + before: + - cmd: dd if=/dev/urandom of=multi-part-demo.dat bs=1048576 count=50 files: - - source: https://zarf-public.s3-us-gov-west-1.amazonaws.com/examples/multi-part-demo.dat - shasum: 22ebd38c2f5e04821c87c924c910be57d2169c292f85b2936d53cae24ebf8055 + - source: multi-part-demo.dat target: multi-part-demo.dat