Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace (custom) base64 encoding of paths with simple quoting. #100

Merged
merged 2 commits into from
Oct 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion basedirs/basedirs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,8 @@ func TestBaseDirs(t *testing.T) {
UIDs: []uint32{101},
FTs: expectedFTsBam,
Modtime: dbModTime,
}}))
},
}))

dcss, err = bd.calculateForGroup(2)
So(err, ShouldBeNil)
Expand Down
34 changes: 19 additions & 15 deletions cmd/stat.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,30 +37,34 @@ import (
"github.com/wtsi-ssg/wrstat/v5/summary"
)

const reportFrequency = 10 * time.Minute
const statOutputFileSuffix = ".stats"
const statUserGroupSummaryOutputFileSuffix = ".byusergroup"
const statGroupSummaryOutputFileSuffix = ".bygroup"
const statDGUTASummaryOutputFileSuffix = ".dguta"
const statLogOutputFileSuffix = ".log"
const lstatTimeout = 10 * time.Second
const lstatAttempts = 3

var statDebug bool
var statCh string
const (
reportFrequency = 10 * time.Minute
statOutputFileSuffix = ".stats"
statUserGroupSummaryOutputFileSuffix = ".byusergroup"
statGroupSummaryOutputFileSuffix = ".bygroup"
statDGUTASummaryOutputFileSuffix = ".dguta"
statLogOutputFileSuffix = ".log"
lstatTimeout = 10 * time.Second
lstatAttempts = 3
)

var (
statDebug bool
statCh string
)

// statCmd represents the stat command.
var statCmd = &cobra.Command{
Use: "stat",
Short: "Stat paths",
Long: `Stat paths in a given file.

Given a file containing a base64 encoded absolute file path per line (eg. as
produced by 'wrstat walk'), this creates a new file with stats for each of those
file paths. The new file is named after the input file with a ".stats" suffix.
Given a file containing a quoted absolute file path per line (eg. as produced
by 'wrstat walk'), this creates a new file with stats for each of those file
paths. The new file is named after the input file with a ".stats" suffix.

The output file format is 11 tab separated columns with the following contents:
1. Base64 encoded path to the file.
1. Quoted path to the file.
2. File size in bytes. If this is greater than the number of bytes in blocks
allocated, this will be the number of bytes in allocated blocks. (This is to
account for files with holes in them; as a byproduct, symbolic links will
Expand Down
26 changes: 14 additions & 12 deletions cmd/walk.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,14 @@ const (
)

// options for this cmd.
var outputDir string
var depGroup string
var walkInodesPerJob int
var walkNumOfJobs int
var walkID string
var walkCh string
var (
outputDir string
depGroup string
walkInodesPerJob int
walkNumOfJobs int
walkID string
walkCh string
)

// walkCmd represents the walk command.
var walkCmd = &cobra.Command{
Expand All @@ -68,11 +70,10 @@ user that can sudo without a password when running wrstat, and supply the --sudo
option to this command.

For each entry recursively within the directory of interest, their paths are
quickly retrieved (without doing any expensive stat calls) and written (base64
encoded) to output files in the given output directory. The number of files is
such that they will each contain about --inodes_per_stat entries (or if
--num_stats was supplied greater than zero, then there will be that number of
output files).
quickly retrieved (without doing any expensive stat calls) and written (quoted)
to output files in the given output directory. The number of files is such that
they will each contain about --inodes_per_stat entries (or if --num_stats was
supplied greater than zero, then there will be that number of output files).

For each output file, a 'wrstat stat' job is then added to wr's queue with the
given dependency group. For the meaning of the --ch option which is passed
Expand Down Expand Up @@ -149,7 +150,8 @@ func statRepGrp(dir, unique string) string {

// walkDirAndScheduleStats does the main work.
func walkDirAndScheduleStats(desiredDir, outputDir string, statJobs, inodes int, depGroup, repGroup,
yamlPath string, s *scheduler.Scheduler) {
yamlPath string, s *scheduler.Scheduler,
) {
n := statJobs
if n == 0 {
n = calculateSplitBasedOnInodes(inodes, desiredDir)
Expand Down
24 changes: 15 additions & 9 deletions combine/dguta_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@ import (
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"testing"
"time"

. "github.com/smartystreets/goconvey/convey"
"github.com/wtsi-ssg/wrstat/v5/dguta"
"github.com/wtsi-ssg/wrstat/v5/fs"
"github.com/wtsi-ssg/wrstat/v5/internal/encode"
"github.com/wtsi-ssg/wrstat/v5/summary"
)

Expand Down Expand Up @@ -172,18 +172,23 @@ func TestOldFile(t *testing.T) {
So(ds.Mtime, ShouldEqual, time.Unix(amtime2, 0))

Convey("and the DirGUTAges are set as expected", func() {
expectedSizes := [17]int64{tfs, tfs, tfs, tfs, tfs, tfs, tfs, tfs - 2,
tfs - 3, tfs, tfs, tfs, tfs, tfs, tfs, tfs - 6, tfs - 7}
expectedSizes := [17]int64{
tfs, tfs, tfs, tfs, tfs, tfs, tfs, tfs - 2,
tfs - 3, tfs, tfs, tfs, tfs, tfs, tfs, tfs - 6, tfs - 7,
}

expectedCounts := [17]int{expectedCount, expectedCount, expectedCount,
expectedCounts := [17]int{
expectedCount, expectedCount, expectedCount,
expectedCount, expectedCount, expectedCount, expectedCount,
expectedCount - 1, expectedCount - 2, expectedCount, expectedCount,
expectedCount, expectedCount, expectedCount, expectedCount,
expectedCount - 2, expectedCount - 3}
expectedCount - 2, expectedCount - 3,
}

expectedAtime := amtime3 - 1

expectedMtimes := [17]int64{amtime2, amtime2, amtime2, amtime2, amtime2, amtime2, amtime2, amtime2, amtime2,
expectedMtimes := [17]int64{
amtime2, amtime2, amtime2, amtime2, amtime2, amtime2, amtime2, amtime2, amtime2,
amtime2, amtime2, amtime2, amtime2, amtime2, amtime2, amtime1, amtime3,
}

Expand Down Expand Up @@ -250,7 +255,8 @@ func createDGUTAFile(t *testing.T, tempDir, fileName, content string) string {
// 1668768807 /lustre 1313 13912 0 0 1 0 1668768807
// /lustre/scratch123 1313 13912 0 0 1 0 1668768807.
func buildDGUTAContent(directory, gid, uid string, filetype, nestedFiles, //nolint:unparam
fileSize, oldestAtime, newestAtime, refTime int64) string {
fileSize, oldestAtime, newestAtime, refTime int64,
) string {
var dgutaContents string

splitDir := recursivePath(directory)
Expand All @@ -263,7 +269,7 @@ func buildDGUTAContent(directory, gid, uid string, filetype, nestedFiles, //noli
continue
}

dgutaContents += encode.Base64Encode(split) + guta +
dgutaContents += strconv.Quote(split) + guta +
fmt.Sprintf("\t%d\t%d\t%d\t%d\n",
nestedFiles, fileSize, oldestAtime, newestAtime)
}
Expand All @@ -285,7 +291,7 @@ func recursivePath(path string) []string {
count := strings.Count(path, "/")
newPath := path

var dgutaContents = make([]string, count+1)
dgutaContents := make([]string, count+1)
dgutaContents[count] = path

for i := count - 1; i >= 0; i-- {
Expand Down
7 changes: 3 additions & 4 deletions combine/stat_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@ import (
"fmt"
"os"
"path/filepath"
"strconv"
"testing"

. "github.com/smartystreets/goconvey/convey"
"github.com/wtsi-ssg/wrstat/v5/fs"
"github.com/wtsi-ssg/wrstat/v5/internal/encode"
)

// TestStatFiles tests that the stat files concatenate and compress properly.
Expand All @@ -53,7 +53,7 @@ func TestStatFiles(t *testing.T) {
actualContent, err := fs.ReadCompressedFile(outputPath)
So(err, ShouldBeNil)

encodedDir := encode.Base64Encode(dir)
encodedDir := strconv.Quote(dir)

expectedOutput := fmt.Sprintf(
"%s\t5\t345\t152\t217434\t82183\t147\t'f'\t3\t7\t28472\t\n"+
Expand Down Expand Up @@ -82,7 +82,7 @@ func buildStatFiles(t *testing.T) (string, []*os.File, *os.File, string) {

_, err = f.WriteString(fmt.Sprintf(
"%s\t%d\t%d\t%d\t%d\t%d\t%d\t%q\t%d\t%d\t%d\t\n",
encode.Base64Encode(dir),
strconv.Quote(dir),
5+i,
345,
152,
Expand All @@ -93,7 +93,6 @@ func buildStatFiles(t *testing.T) (string, []*os.File, *os.File, string) {
3+i,
7,
28472))

if err != nil {
t.Fatal(err)
}
Expand Down
Loading
Loading