Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: test and refactor split file #2708

Merged
merged 2 commits into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/pkg/layout/package.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@
return fmt.Errorf("unable to split the package archive into multiple files: must be less than 1,000 files")
}
message.Notef("Package is larger than %dMB, splitting into multiple files", maxPackageSizeMB)
err := utils.SplitFile(destinationTarball, chunkSize)
err := splitFile(destinationTarball, chunkSize)

Check warning on line 246 in src/pkg/layout/package.go

View check run for this annotation

Codecov / codecov/patch

src/pkg/layout/package.go#L246

Added line #L246 was not covered by tests
if err != nil {
return fmt.Errorf("unable to split the package archive into multiple files: %w", err)
}
Expand Down
109 changes: 109 additions & 0 deletions src/pkg/layout/split.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: 2021-Present The Zarf Authors

// Package layout contains functions for interacting with Zarf's package layout on disk.
package layout

import (
"crypto/sha256"
"encoding/json"
"errors"
"fmt"
"io"
"os"

"github.com/defenseunicorns/pkg/helpers/v2"
"github.com/defenseunicorns/zarf/src/pkg/message"
"github.com/defenseunicorns/zarf/src/types"
)

// splitFile will split the file into chunks and remove the original file.
func splitFile(srcPath string, chunkSize int) error {
srcFile, err := os.Open(srcPath)
if err != nil {
return err

Check warning on line 24 in src/pkg/layout/split.go

View check run for this annotation

Codecov / codecov/patch

src/pkg/layout/split.go#L24

Added line #L24 was not covered by tests
}
defer srcFile.Close()
fi, err := srcFile.Stat()
if err != nil {
return err

Check warning on line 29 in src/pkg/layout/split.go

View check run for this annotation

Codecov / codecov/patch

src/pkg/layout/split.go#L29

Added line #L29 was not covered by tests
}

title := fmt.Sprintf("[0/%d] MB bytes written", fi.Size()/1000/1000)
progressBar := message.NewProgressBar(fi.Size(), title)
defer progressBar.Close()

hash := sha256.New()
fileCount := 0
for {
path := fmt.Sprintf("%s.part%03d", srcPath, fileCount+1)
dstFile, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, helpers.ReadAllWriteUser)
if err != nil {
return err

Check warning on line 42 in src/pkg/layout/split.go

View check run for this annotation

Codecov / codecov/patch

src/pkg/layout/split.go#L42

Added line #L42 was not covered by tests
}
defer dstFile.Close()

written, copyErr := io.CopyN(dstFile, srcFile, int64(chunkSize))
if copyErr != nil && !errors.Is(copyErr, io.EOF) {
return err

Check warning on line 48 in src/pkg/layout/split.go

View check run for this annotation

Codecov / codecov/patch

src/pkg/layout/split.go#L48

Added line #L48 was not covered by tests
}
progressBar.Add(int(written))
title := fmt.Sprintf("[%d/%d] MB bytes written", progressBar.GetCurrent()/1000/1000, fi.Size()/1000/1000)
progressBar.Updatef(title)

_, err = dstFile.Seek(0, io.SeekStart)
if err != nil {
return err

Check warning on line 56 in src/pkg/layout/split.go

View check run for this annotation

Codecov / codecov/patch

src/pkg/layout/split.go#L56

Added line #L56 was not covered by tests
}
_, err = io.Copy(hash, dstFile)
if err != nil {
return err

Check warning on line 60 in src/pkg/layout/split.go

View check run for this annotation

Codecov / codecov/patch

src/pkg/layout/split.go#L60

Added line #L60 was not covered by tests
}
err = dstFile.Close()
if err != nil {
return err

Check warning on line 64 in src/pkg/layout/split.go

View check run for this annotation

Codecov / codecov/patch

src/pkg/layout/split.go#L64

Added line #L64 was not covered by tests
}

// EOF error could be returned on 0 bytes written.
if written == 0 {
err = os.Remove(path)
if err != nil {
return err

Check warning on line 71 in src/pkg/layout/split.go

View check run for this annotation

Codecov / codecov/patch

src/pkg/layout/split.go#L71

Added line #L71 was not covered by tests
}
break
}

fileCount++
if errors.Is(copyErr, io.EOF) {
break
}
}

// Remove original file
err = srcFile.Close()
if err != nil {
return err

Check warning on line 85 in src/pkg/layout/split.go

View check run for this annotation

Codecov / codecov/patch

src/pkg/layout/split.go#L85

Added line #L85 was not covered by tests
}
err = os.Remove(srcPath)
if err != nil {
return err

Check warning on line 89 in src/pkg/layout/split.go

View check run for this annotation

Codecov / codecov/patch

src/pkg/layout/split.go#L89

Added line #L89 was not covered by tests
}

// Write header file
data := types.ZarfSplitPackageData{
Count: fileCount,
Bytes: fi.Size(),
Sha256Sum: fmt.Sprintf("%x", hash.Sum(nil)),
}
b, err := json.Marshal(data)
if err != nil {
return fmt.Errorf("unable to marshal the split package data: %w", err)

Check warning on line 100 in src/pkg/layout/split.go

View check run for this annotation

Codecov / codecov/patch

src/pkg/layout/split.go#L100

Added line #L100 was not covered by tests
}
path := fmt.Sprintf("%s.part000", srcPath)
if err := os.WriteFile(path, b, helpers.ReadAllWriteUser); err != nil {
return fmt.Errorf("unable to write the file %s: %w", path, err)

Check warning on line 104 in src/pkg/layout/split.go

View check run for this annotation

Codecov / codecov/patch

src/pkg/layout/split.go#L104

Added line #L104 was not covered by tests
}
progressBar.Successf("Package split across %d files", fileCount+1)

return nil
}
96 changes: 96 additions & 0 deletions src/pkg/layout/split_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: 2021-Present The Zarf Authors

package layout

import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"testing"

"github.com/defenseunicorns/zarf/src/types"
"github.com/stretchr/testify/require"
)

func TestSplitFile(t *testing.T) {
t.Parallel()

tests := []struct {
name string
fileSize int
chunkSize int
expectedFileSize int64
expectedLastFileSize int64
expectedFileCount int
expectedSha256Sum string
}{
{
name: "split evenly",
fileSize: 2048,
chunkSize: 16,
expectedFileSize: 16,
expectedLastFileSize: 16,
expectedFileCount: 128,
expectedSha256Sum: "93ecad679eff0df493aaf5d7d615211b0f1d7a919016efb15c98f0b8efb1ba43",
},
{
name: "split with remainder",
fileSize: 2048,
chunkSize: 10,
expectedFileSize: 10,
expectedLastFileSize: 8,
expectedFileCount: 205,
expectedSha256Sum: "fe8460f4d53d3578aa37191acf55b3db7bbcb706056f4b6b02a0c70f24b0d95a",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()

dir := t.TempDir()
name := "random"
p := filepath.Join(dir, name)
f, err := os.Create(p)
require.NoError(t, err)
b := make([]byte, tt.fileSize)
for i := range tt.fileSize {
b[i] = byte(tt.chunkSize)
}
require.NoError(t, err)
_, err = f.Write(b)
require.NoError(t, err)
f.Close()

err = splitFile(p, tt.chunkSize)
require.NoError(t, err)

_, err = os.Stat(p)
require.ErrorIs(t, err, os.ErrNotExist)
entries, err := os.ReadDir(dir)
require.NoError(t, err)
require.Len(t, entries, tt.expectedFileCount+1)
for i, entry := range entries[1:] {
require.Equal(t, fmt.Sprintf("%s.part%03d", name, i+1), entry.Name())

fi, err := entry.Info()
require.NoError(t, err)
if i == len(entries)-2 {
require.Equal(t, tt.expectedLastFileSize, fi.Size())
} else {
require.Equal(t, tt.expectedFileSize, fi.Size())
}
}

b, err = os.ReadFile(filepath.Join(dir, fmt.Sprintf("%s.part000", name)))
require.NoError(t, err)
var data types.ZarfSplitPackageData
err = json.Unmarshal(b, &data)
require.NoError(t, err)
require.Equal(t, tt.expectedFileCount, data.Count)
require.Equal(t, int64(tt.fileSize), data.Bytes)
require.Equal(t, tt.expectedSha256Sum, data.Sha256Sum)
})
}
}
142 changes: 0 additions & 142 deletions src/pkg/utils/io.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,13 @@
package utils

import (
"crypto/sha256"
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"

"github.com/defenseunicorns/pkg/helpers/v2"
"github.com/defenseunicorns/zarf/src/config"
"github.com/defenseunicorns/zarf/src/pkg/message"
"github.com/defenseunicorns/zarf/src/types"
)

const (
Expand Down Expand Up @@ -73,141 +69,3 @@ func GetFinalExecutableCommand() (string, error) {

return zarfCommand, err
}

// SplitFile will take a srcFile path and split it into files based on chunkSizeBytes
// the first file will be a metadata file containing:
// - sha256sum of the original file
// - number of bytes in the original file
// - number of files the srcFile was split into
// SplitFile will delete the original file
//
// Returns:
// - fileNames: list of file paths srcFile was split across
// - sha256sum: sha256sum of the srcFile before splitting
// - err: any errors encountered
func SplitFile(srcPath string, chunkSizeBytes int) (err error) {
var fileNames []string
var sha256sum string
hash := sha256.New()

// Set buffer size to some multiple of 4096 KiB for modern file system cluster sizes
bufferSize := 16 * 1024 * 1024 // 16 MiB
// if chunkSizeBytes is less than bufferSize, use chunkSizeBytes as bufferSize for simplicity
if chunkSizeBytes < bufferSize {
bufferSize = chunkSizeBytes
}
buf := make([]byte, bufferSize)

// get file size
fi, err := os.Stat(srcPath)
if err != nil {
return err
}
fileSize := fi.Size()

// start progress bar
title := fmt.Sprintf("[0/%d] MB bytes written", fileSize/1000/1000)
progressBar := message.NewProgressBar(fileSize, title)
defer progressBar.Close()

// open srcFile
srcFile, err := os.Open(srcPath)
if err != nil {
return err
}
defer srcFile.Close()

// create file path starting from part 001
path := fmt.Sprintf("%s.part001", srcPath)
chunkFile, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY, helpers.ReadAllWriteUser)
if err != nil {
return err
}
fileNames = append(fileNames, path)
defer chunkFile.Close()

// setup counter for tracking how many bytes are left to write to file
chunkBytesRemaining := chunkSizeBytes
// Loop over the tarball hashing as we go and breaking it into chunks based on the chunkSizeBytes
for {
bytesRead, err := srcFile.Read(buf)

if err != nil {
if err == io.EOF {
// At end of file, break out of loop
break
}
return err
}

// Pass data to hash
hash.Write(buf[0:bytesRead])

// handle if we should split the data between two chunks
if chunkBytesRemaining < bytesRead {
// write the remaining chunk size to file
_, err := chunkFile.Write(buf[0:chunkBytesRemaining])
if err != nil {
return err
}
err = chunkFile.Close()
if err != nil {
return err
}

// create new file
path = fmt.Sprintf("%s.part%03d", srcPath, len(fileNames)+1)
chunkFile, err = os.OpenFile(path, os.O_CREATE|os.O_WRONLY, helpers.ReadAllWriteUser)
if err != nil {
return err
}
fileNames = append(fileNames, path)
defer chunkFile.Close()

// write to new file where we left off
_, err = chunkFile.Write(buf[chunkBytesRemaining:bytesRead])
if err != nil {
return err
}

// set chunkBytesRemaining considering how many bytes are already written to new file
chunkBytesRemaining = chunkSizeBytes - (bufferSize - chunkBytesRemaining)
} else {
_, err := chunkFile.Write(buf[0:bytesRead])
if err != nil {
return err
}
chunkBytesRemaining = chunkBytesRemaining - bytesRead
}

// update progress bar
progressBar.Add(bufferSize)
title := fmt.Sprintf("[%d/%d] MB bytes written", progressBar.GetCurrent()/1000/1000, fileSize/1000/1000)
progressBar.Updatef(title)
}
srcFile.Close()
_ = os.RemoveAll(srcPath)

// calculate sha256 sum
sha256sum = fmt.Sprintf("%x", hash.Sum(nil))

// Marshal the data into a json file.
jsonData, err := json.Marshal(types.ZarfSplitPackageData{
Count: len(fileNames),
Bytes: fileSize,
Sha256Sum: sha256sum,
})
if err != nil {
return fmt.Errorf("unable to marshal the split package data: %w", err)
}

// write header file
path = fmt.Sprintf("%s.part000", srcPath)
if err := os.WriteFile(path, jsonData, helpers.ReadAllWriteUser); err != nil {
return fmt.Errorf("unable to write the file %s: %w", path, err)
}
fileNames = append(fileNames, path)
progressBar.Successf("Package split across %d files", len(fileNames))

return nil
}
Loading