From 725969f7775b66e0888c2183e921486b90f937a3 Mon Sep 17 00:00:00 2001 From: Michael Woolnough Date: Tue, 29 Oct 2024 09:23:03 +0000 Subject: [PATCH] Replace (custom) base64 encoding of paths with simple quoting. --- cmd/stat.go | 34 ++-- cmd/walk.go | 26 +-- combine/dgut_test.go | 9 +- combine/stat_test.go | 7 +- dgut/dgut_test.go | 41 +++-- dgut/parse.go | 9 +- dgut/tree_test.go | 93 +++++++---- internal/encode/encode.go | 63 ------- internal/encode/encode_test.go | 17 -- main_test.go | 290 ++++++++++++++++----------------- stat/file.go | 9 +- stat/file_test.go | 11 +- stat/paths.go | 10 +- stat/paths_test.go | 12 +- summary/dirgut.go | 11 +- summary/dirgut_test.go | 19 ++- summary/usergroup.go | 4 +- summary/usergroup_test.go | 3 +- walk/file.go | 8 +- walk/walk_test.go | 6 +- 20 files changed, 318 insertions(+), 364 deletions(-) delete mode 100644 internal/encode/encode.go delete mode 100644 internal/encode/encode_test.go diff --git a/cmd/stat.go b/cmd/stat.go index 6b690304..550cac1e 100644 --- a/cmd/stat.go +++ b/cmd/stat.go @@ -37,17 +37,21 @@ import ( "github.com/wtsi-ssg/wrstat/v5/summary" ) -const reportFrequency = 10 * time.Minute -const statOutputFileSuffix = ".stats" -const statUserGroupSummaryOutputFileSuffix = ".byusergroup" -const statGroupSummaryOutputFileSuffix = ".bygroup" -const statDGUTSummaryOutputFileSuffix = ".dgut" -const statLogOutputFileSuffix = ".log" -const lstatTimeout = 10 * time.Second -const lstatAttempts = 3 - -var statDebug bool -var statCh string +const ( + reportFrequency = 10 * time.Minute + statOutputFileSuffix = ".stats" + statUserGroupSummaryOutputFileSuffix = ".byusergroup" + statGroupSummaryOutputFileSuffix = ".bygroup" + statDGUTSummaryOutputFileSuffix = ".dgut" + statLogOutputFileSuffix = ".log" + lstatTimeout = 10 * time.Second + lstatAttempts = 3 +) + +var ( + statDebug bool + statCh string +) // statCmd represents the stat command. var statCmd = &cobra.Command{ @@ -55,12 +59,12 @@ var statCmd = &cobra.Command{ Short: "Stat paths", Long: `Stat paths in a given file. -Given a file containing a base64 encoded absolute file path per line (eg. as -produced by 'wrstat walk'), this creates a new file with stats for each of those -file paths. The new file is named after the input file with a ".stats" suffix. +Given a file containing a quoted absolute file path per line (eg. as produced +by 'wrstat walk'), this creates a new file with stats for each of those file +paths. The new file is named after the input file with a ".stats" suffix. The output file format is 11 tab separated columns with the following contents: -1. Base64 encoded path to the file. +1. Quoted path to the file. 2. File size in bytes. If this is greater than the number of bytes in blocks allocated, this will be the number of bytes in allocated blocks. (This is to account for files with holes in them; as a byproduct, symbolic links will diff --git a/cmd/walk.go b/cmd/walk.go index 8fea94e2..05aeddc6 100644 --- a/cmd/walk.go +++ b/cmd/walk.go @@ -45,12 +45,14 @@ const ( ) // options for this cmd. -var outputDir string -var depGroup string -var walkInodesPerJob int -var walkNumOfJobs int -var walkID string -var walkCh string +var ( + outputDir string + depGroup string + walkInodesPerJob int + walkNumOfJobs int + walkID string + walkCh string +) // walkCmd represents the walk command. var walkCmd = &cobra.Command{ @@ -68,11 +70,10 @@ user that can sudo without a password when running wrstat, and supply the --sudo option to this command. For each entry recursively within the directory of interest, their paths are -quickly retrieved (without doing any expensive stat calls) and written (base64 -encoded) to output files in the given output directory. The number of files is -such that they will each contain about --inodes_per_stat entries (or if ---num_stats was supplied greater than zero, then there will be that number of -output files). +quickly retrieved (without doing any expensive stat calls) and written (quoted) +to output files in the given output directory. The number of files is such that +they will each contain about --inodes_per_stat entries (or if --num_stats was +supplied greater than zero, then there will be that number of output files). For each output file, a 'wrstat stat' job is then added to wr's queue with the given dependency group. For the meaning of the --ch option which is passed @@ -149,7 +150,8 @@ func statRepGrp(dir, unique string) string { // walkDirAndScheduleStats does the main work. func walkDirAndScheduleStats(desiredDir, outputDir string, statJobs, inodes int, depGroup, repGroup, - yamlPath string, s *scheduler.Scheduler) { + yamlPath string, s *scheduler.Scheduler, +) { n := statJobs if n == 0 { n = calculateSplitBasedOnInodes(inodes, desiredDir) diff --git a/combine/dgut_test.go b/combine/dgut_test.go index 5d69c1c8..9e9fd930 100644 --- a/combine/dgut_test.go +++ b/combine/dgut_test.go @@ -30,13 +30,13 @@ import ( "fmt" "os" "path/filepath" + "strconv" "strings" "testing" . "github.com/smartystreets/goconvey/convey" "github.com/wtsi-ssg/wrstat/v5/dgut" "github.com/wtsi-ssg/wrstat/v5/fs" - "github.com/wtsi-ssg/wrstat/v5/internal/encode" "github.com/wtsi-ssg/wrstat/v5/summary" ) @@ -116,13 +116,14 @@ func buildDGUTFiles(t *testing.T) ([]string, string, string, string) { // /lustre 1313 13912 0 1 0 1668768807 // /lustre/scratch123 1313 13912 0 1 0 1668768807. func buildDGUTContent(directory, gid, uid string, filetype, nestedFiles, - fileSize, oldestAtime, newestAtime int) string { + fileSize, oldestAtime, newestAtime int, +) string { var DGUTContents string splitDir := recursivePath(directory) for _, split := range splitDir { - DGUTContents += encode.Base64Encode(split) + fmt.Sprintf("\t%s\t%s\t%d\t%d\t%d\t%d\t%d\n", + DGUTContents += strconv.Quote(split) + fmt.Sprintf("\t%s\t%s\t%d\t%d\t%d\t%d\t%d\n", gid, uid, filetype, nestedFiles, fileSize, oldestAtime, newestAtime) } @@ -137,7 +138,7 @@ func recursivePath(path string) []string { count := strings.Count(path, "/") newPath := path - var DGUTContents = make([]string, count+1) + DGUTContents := make([]string, count+1) DGUTContents[count] = path for i := count - 1; i >= 0; i-- { diff --git a/combine/stat_test.go b/combine/stat_test.go index 4ecafabf..8a0ccc33 100644 --- a/combine/stat_test.go +++ b/combine/stat_test.go @@ -30,11 +30,11 @@ import ( "fmt" "os" "path/filepath" + "strconv" "testing" . "github.com/smartystreets/goconvey/convey" "github.com/wtsi-ssg/wrstat/v5/fs" - "github.com/wtsi-ssg/wrstat/v5/internal/encode" ) // TestStatFiles tests that the stat files concatenate and compress properly. @@ -53,7 +53,7 @@ func TestStatFiles(t *testing.T) { actualContent, err := fs.ReadCompressedFile(outputPath) So(err, ShouldBeNil) - encodedDir := encode.Base64Encode(dir) + encodedDir := strconv.Quote(dir) expectedOutput := fmt.Sprintf( "%s\t5\t345\t152\t217434\t82183\t147\t'f'\t3\t7\t28472\t\n"+ @@ -82,7 +82,7 @@ func buildStatFiles(t *testing.T) (string, []*os.File, *os.File, string) { _, err = f.WriteString(fmt.Sprintf( "%s\t%d\t%d\t%d\t%d\t%d\t%d\t%q\t%d\t%d\t%d\t\n", - encode.Base64Encode(dir), + strconv.Quote(dir), 5+i, 345, 152, @@ -93,7 +93,6 @@ func buildStatFiles(t *testing.T) (string, []*os.File, *os.File, string) { 3+i, 7, 28472)) - if err != nil { t.Fatal(err) } diff --git a/dgut/dgut_test.go b/dgut/dgut_test.go index 903de596..0ec6e77c 100644 --- a/dgut/dgut_test.go +++ b/dgut/dgut_test.go @@ -28,48 +28,48 @@ package dgut import ( "math" "os" + "strconv" "strings" "testing" . "github.com/smartystreets/goconvey/convey" "github.com/ugorji/go/codec" internaldata "github.com/wtsi-ssg/wrstat/v5/internal/data" - "github.com/wtsi-ssg/wrstat/v5/internal/encode" "github.com/wtsi-ssg/wrstat/v5/summary" bolt "go.etcd.io/bbolt" ) func TestDGUT(t *testing.T) { Convey("You can parse a single line of dgut data", t, func() { - line := encode.Base64Encode("/") + "\t1\t101\t0\t3\t30\t50\t50\n" + line := strconv.Quote("/") + "\t1\t101\t0\t3\t30\t50\t50\n" dir, gut, err := parseDGUTLine(line) So(err, ShouldBeNil) So(dir, ShouldEqual, "/") So(gut, ShouldResemble, &GUT{GID: 1, UID: 101, FT: 0, Count: 3, Size: 30, Atime: 50, Mtime: 50}) Convey("But invalid data won't parse", func() { - _, _, err = parseDGUTLine(encode.Base64Encode("/") + "\t1\t101\t0\t3\t50\t50\n") + _, _, err = parseDGUTLine(strconv.Quote("/") + "\t1\t101\t0\t3\t50\t50\n") So(err, ShouldEqual, ErrInvalidFormat) - _, _, err = parseDGUTLine(encode.Base64Encode("/") + "\tfoo\t101\t0\t3\t30\t50\t50\n") + _, _, err = parseDGUTLine(strconv.Quote("/") + "\tfoo\t101\t0\t3\t30\t50\t50\n") So(err, ShouldEqual, ErrInvalidFormat) - _, _, err = parseDGUTLine(encode.Base64Encode("/") + "\t1\tfoo\t0\t3\t30\t50\t50\n") + _, _, err = parseDGUTLine(strconv.Quote("/") + "\t1\tfoo\t0\t3\t30\t50\t50\n") So(err, ShouldEqual, ErrInvalidFormat) - _, _, err = parseDGUTLine(encode.Base64Encode("/") + "\t1\t101\tfoo\t3\t30\t50\t50\n") + _, _, err = parseDGUTLine(strconv.Quote("/") + "\t1\t101\tfoo\t3\t30\t50\t50\n") So(err, ShouldEqual, ErrInvalidFormat) - _, _, err = parseDGUTLine(encode.Base64Encode("/") + "\t1\t101\t0\tfoo\t30\t50\t50\n") + _, _, err = parseDGUTLine(strconv.Quote("/") + "\t1\t101\t0\tfoo\t30\t50\t50\n") So(err, ShouldEqual, ErrInvalidFormat) - _, _, err = parseDGUTLine(encode.Base64Encode("/") + "\t1\t101\t0\t3\tfoo\t50\t50\n") + _, _, err = parseDGUTLine(strconv.Quote("/") + "\t1\t101\t0\t3\tfoo\t50\t50\n") So(err, ShouldEqual, ErrInvalidFormat) - _, _, err = parseDGUTLine(encode.Base64Encode("/") + "\t1\t101\t0\t3\t30\tfoo\t50\n") + _, _, err = parseDGUTLine(strconv.Quote("/") + "\t1\t101\t0\t3\t30\tfoo\t50\n") So(err, ShouldEqual, ErrInvalidFormat) - _, _, err = parseDGUTLine(encode.Base64Encode("/") + "\t1\t101\t0\t3\t30\t50\tfoo\n") + _, _, err = parseDGUTLine(strconv.Quote("/") + "\t1\t101\t0\t3\t30\t50\tfoo\n") So(err, ShouldEqual, ErrInvalidFormat) So(err.Error(), ShouldEqual, "the provided data was not in dgut format") @@ -147,8 +147,10 @@ func TestDGUT(t *testing.T) { expectedUIDs := []uint32{101, 102} expectedGIDs := []uint32{1, 2} - expectedFTs := []summary.DirGUTFileType{summary.DGUTFileTypeTemp, - summary.DGUTFileTypeBam, summary.DGUTFileTypeCram, summary.DGUTFileTypeDir} + expectedFTs := []summary.DirGUTFileType{ + summary.DGUTFileTypeTemp, + summary.DGUTFileTypeBam, summary.DGUTFileTypeCram, summary.DGUTFileTypeDir, + } const numDirectories = 10 @@ -321,7 +323,8 @@ func TestDGUT(t *testing.T) { c, s, a, m, u, g, t, _, errd = db.DirInfo("/", &Filter{ GIDs: []uint32{1}, UIDs: []uint32{102}, - FTs: []summary.DirGUTFileType{summary.DGUTFileTypeTemp}}) + FTs: []summary.DirGUTFileType{summary.DGUTFileTypeTemp}, + }) So(errd, ShouldBeNil) So(c, ShouldEqual, 0) So(s, ShouldEqual, 0) @@ -381,9 +384,9 @@ func TestDGUT(t *testing.T) { }) Convey("Store()ing multiple times", func() { - data = strings.NewReader(encode.Base64Encode("/") + "\t3\t103\t7\t2\t2\t25\t25\n" + - encode.Base64Encode("/a/i") + "\t3\t103\t7\t1\t1\t25\t25\n" + - encode.Base64Encode("/i") + "\t3\t103\t7\t1\t1\t30\t30\n") + data = strings.NewReader(strconv.Quote("/") + "\t3\t103\t7\t2\t2\t25\t25\n" + + strconv.Quote("/a/i") + "\t3\t103\t7\t1\t1\t25\t25\n" + + strconv.Quote("/i") + "\t3\t103\t7\t1\t1\t30\t30\n") Convey("to the same db file doesn't work", func() { err = db.Store(data, 4) @@ -618,8 +621,10 @@ func testData(t *testing.T) (dgutData string, expectedRootGUTs GUTs, expected [] }, } - expectedKeys = []string{"/", "/a", "/a/b", "/a/b/d", "/a/b/d/f", - "/a/b/d/g", "/a/b/e", "/a/b/e/h", "/a/b/e/h/tmp", "/a/c", "/a/c/d"} + expectedKeys = []string{ + "/", "/a", "/a/b", "/a/b/d", "/a/b/d/f", + "/a/b/d/g", "/a/b/e", "/a/b/e/h", "/a/b/e/h/tmp", "/a/c", "/a/c/d", + } return dgutData, expectedRootGUTs, expected, expectedKeys } diff --git a/dgut/parse.go b/dgut/parse.go index b455b0f6..62e6bdb7 100644 --- a/dgut/parse.go +++ b/dgut/parse.go @@ -32,7 +32,6 @@ import ( "strconv" "strings" - "github.com/wtsi-ssg/wrstat/v5/internal/encode" "github.com/wtsi-ssg/wrstat/v5/summary" ) @@ -40,8 +39,10 @@ type Error string func (e Error) Error() string { return string(e) } -const ErrInvalidFormat = Error("the provided data was not in dgut format") -const ErrBlankLine = Error("the provided line had no information") +const ( + ErrInvalidFormat = Error("the provided data was not in dgut format") + ErrBlankLine = Error("the provided line had no information") +) const ( gutDataCols = 8 @@ -114,7 +115,7 @@ func parseDGUTLine(line string) (string, *GUT, error) { return "", nil, ErrBlankLine } - path, err := encode.Base64Decode(parts[0]) + path, err := strconv.Unquote(parts[0]) if err != nil { return "", nil, err } diff --git a/dgut/tree_test.go b/dgut/tree_test.go index 411453ba..b2b571fe 100644 --- a/dgut/tree_test.go +++ b/dgut/tree_test.go @@ -26,13 +26,13 @@ package dgut import ( + "strconv" "strings" "testing" "time" . "github.com/smartystreets/goconvey/convey" internaldata "github.com/wtsi-ssg/wrstat/v5/internal/data" - "github.com/wtsi-ssg/wrstat/v5/internal/encode" "github.com/wtsi-ssg/wrstat/v5/internal/fs" "github.com/wtsi-ssg/wrstat/v5/internal/split" "github.com/wtsi-ssg/wrstat/v5/summary" @@ -60,8 +60,10 @@ func TestTree(t *testing.T) { expectedUIDs := []uint32{101, 102} expectedGIDs := []uint32{1, 2} - expectedFTs := []summary.DirGUTFileType{summary.DGUTFileTypeTemp, - summary.DGUTFileTypeBam, summary.DGUTFileTypeCram, summary.DGUTFileTypeDir} + expectedFTs := []summary.DirGUTFileType{ + summary.DGUTFileTypeTemp, + summary.DGUTFileTypeBam, summary.DGUTFileTypeCram, summary.DGUTFileTypeDir, + } expectedUIDsOne := []uint32{101} expectedGIDsOne := []uint32{1} expectedFTsCram := []summary.DirGUTFileType{summary.DGUTFileTypeCram} @@ -78,32 +80,46 @@ func TestTree(t *testing.T) { di, err := tree.DirInfo("/", nil) So(err, ShouldBeNil) So(di, ShouldResemble, &DirInfo{ - Current: &DirSummary{"/", 14 + numDirectories, 85 + numDirectories*directorySize, - expectedAtime, expectedMtime, expectedUIDs, expectedGIDs, expectedFTs, dbModTime}, + Current: &DirSummary{ + "/", 14 + numDirectories, 85 + numDirectories*directorySize, + expectedAtime, expectedMtime, expectedUIDs, expectedGIDs, expectedFTs, dbModTime, + }, Children: []*DirSummary{ - {"/a", 14 + numDirectories, 85 + numDirectories*directorySize, - expectedAtime, expectedMtime, expectedUIDs, expectedGIDs, expectedFTs, dbModTime}, + { + "/a", 14 + numDirectories, 85 + numDirectories*directorySize, + expectedAtime, expectedMtime, expectedUIDs, expectedGIDs, expectedFTs, dbModTime, + }, }, }) di, err = tree.DirInfo("/a", nil) So(err, ShouldBeNil) So(di, ShouldResemble, &DirInfo{ - Current: &DirSummary{"/a", 14 + numDirectories, 85 + numDirectories*directorySize, - expectedAtime, expectedMtime, expectedUIDs, expectedGIDs, expectedFTs, dbModTime}, + Current: &DirSummary{ + "/a", 14 + numDirectories, 85 + numDirectories*directorySize, + expectedAtime, expectedMtime, expectedUIDs, expectedGIDs, expectedFTs, dbModTime, + }, Children: []*DirSummary{ - {"/a/b", 9 + 7, 80 + 7*directorySize, expectedAtime, time.Unix(80, 0), - expectedUIDs, expectedGIDsOne, expectedFTs, dbModTime}, - {"/a/c", 5 + 2, 5 + 2*directorySize, time.Unix(90, 0), time.Unix(90, 0), - []uint32{102}, []uint32{2}, expectedFTsCramAndDir, dbModTime}, + { + "/a/b", 9 + 7, 80 + 7*directorySize, expectedAtime, time.Unix(80, 0), + expectedUIDs, expectedGIDsOne, expectedFTs, dbModTime, + }, + { + "/a/c", 5 + 2, 5 + 2*directorySize, time.Unix(90, 0), time.Unix(90, 0), + []uint32{102}, + []uint32{2}, + expectedFTsCramAndDir, dbModTime, + }, }, }) di, err = tree.DirInfo("/a", &Filter{FTs: expectedFTsBam}) So(err, ShouldBeNil) So(di, ShouldResemble, &DirInfo{ - Current: &DirSummary{"/a", 2, 10, time.Unix(80, 0), time.Unix(80, 0), - expectedUIDsOne, expectedGIDsOne, expectedFTsBam, dbModTime}, + Current: &DirSummary{ + "/a", 2, 10, time.Unix(80, 0), time.Unix(80, 0), + expectedUIDsOne, expectedGIDsOne, expectedFTsBam, dbModTime, + }, Children: []*DirSummary{ {"/a/b", 2, 10, time.Unix(80, 0), time.Unix(80, 0), expectedUIDsOne, expectedGIDsOne, expectedFTsBam, dbModTime}, }, @@ -112,17 +128,28 @@ func TestTree(t *testing.T) { di, err = tree.DirInfo("/a/b/e/h/tmp", nil) So(err, ShouldBeNil) So(di, ShouldResemble, &DirInfo{ - Current: &DirSummary{"/a/b/e/h/tmp", 2, 5 + directorySize, time.Unix(80, 0), time.Unix(80, 0), - expectedUIDsOne, expectedGIDsOne, []summary.DirGUTFileType{summary.DGUTFileTypeTemp, - summary.DGUTFileTypeBam, summary.DGUTFileTypeDir}, dbModTime}, + Current: &DirSummary{ + "/a/b/e/h/tmp", 2, 5 + directorySize, time.Unix(80, 0), time.Unix(80, 0), + expectedUIDsOne, expectedGIDsOne, + []summary.DirGUTFileType{ + summary.DGUTFileTypeTemp, + summary.DGUTFileTypeBam, summary.DGUTFileTypeDir, + }, + dbModTime, + }, Children: nil, }) di, err = tree.DirInfo("/", &Filter{FTs: []summary.DirGUTFileType{summary.DGUTFileTypeCompressed}}) So(err, ShouldBeNil) So(di, ShouldResemble, &DirInfo{ - Current: &DirSummary{"/", 0, 0, time.Unix(0, 0), time.Unix(0, 0), - []uint32{}, []uint32{}, []summary.DirGUTFileType{}, dbModTime}, + Current: &DirSummary{ + "/", 0, 0, time.Unix(0, 0), time.Unix(0, 0), + []uint32{}, + []uint32{}, + []summary.DirGUTFileType{}, + dbModTime, + }, Children: nil, }) }) @@ -212,8 +239,10 @@ func TestTree(t *testing.T) { _, err := tree.DirInfo("/foo", nil) So(err, ShouldNotBeNil) - di := &DirInfo{Current: &DirSummary{"/", 14, 85, expectedAtime, expectedMtime, - expectedUIDs, expectedGIDs, expectedFTs, dbModTime}} + di := &DirInfo{Current: &DirSummary{ + "/", 14, 85, expectedAtime, expectedMtime, + expectedUIDs, expectedGIDs, expectedFTs, dbModTime, + }} err = tree.addChildInfo(di, []string{"/foo"}, nil) So(err, ShouldNotBeNil) }) @@ -228,11 +257,11 @@ func TestTree(t *testing.T) { So(err, ShouldBeNil) db := NewDB(paths1[0]) - data := strings.NewReader(encode.Base64Encode("/") + "\t1\t11\t6\t1\t1\t20\t20\n" + - encode.Base64Encode("/a") + "\t1\t11\t6\t1\t1\t20\t20\n" + - encode.Base64Encode("/a/b") + "\t1\t11\t6\t1\t1\t20\t20\n" + - encode.Base64Encode("/a/b/c") + "\t1\t11\t6\t1\t1\t20\t20\n" + - encode.Base64Encode("/a/b/c/d") + "\t1\t11\t6\t1\t1\t20\t20\n") + data := strings.NewReader(strconv.Quote("/") + "\t1\t11\t6\t1\t1\t20\t20\n" + + strconv.Quote("/a") + "\t1\t11\t6\t1\t1\t20\t20\n" + + strconv.Quote("/a/b") + "\t1\t11\t6\t1\t1\t20\t20\n" + + strconv.Quote("/a/b/c") + "\t1\t11\t6\t1\t1\t20\t20\n" + + strconv.Quote("/a/b/c/d") + "\t1\t11\t6\t1\t1\t20\t20\n") err = db.Store(data, 20) So(err, ShouldBeNil) @@ -240,11 +269,11 @@ func TestTree(t *testing.T) { So(err, ShouldBeNil) db = NewDB(paths2[0]) - data = strings.NewReader(encode.Base64Encode("/") + "\t1\t11\t6\t1\t1\t15\t15\n" + - encode.Base64Encode("/a") + "\t1\t11\t6\t1\t1\t15\t15\n" + - encode.Base64Encode("/a/b") + "\t1\t11\t6\t1\t1\t15\t15\n" + - encode.Base64Encode("/a/b/c") + "\t1\t11\t6\t1\t1\t15\t15\n" + - encode.Base64Encode("/a/b/c/e") + "\t1\t11\t6\t1\t1\t15\t15\n") + data = strings.NewReader(strconv.Quote("/") + "\t1\t11\t6\t1\t1\t15\t15\n" + + strconv.Quote("/a") + "\t1\t11\t6\t1\t1\t15\t15\n" + + strconv.Quote("/a/b") + "\t1\t11\t6\t1\t1\t15\t15\n" + + strconv.Quote("/a/b/c") + "\t1\t11\t6\t1\t1\t15\t15\n" + + strconv.Quote("/a/b/c/e") + "\t1\t11\t6\t1\t1\t15\t15\n") err = db.Store(data, 20) So(err, ShouldBeNil) diff --git a/internal/encode/encode.go b/internal/encode/encode.go deleted file mode 100644 index 700d47c4..00000000 --- a/internal/encode/encode.go +++ /dev/null @@ -1,63 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2024 Genome Research Ltd. - * - * Authors: - * Sendu Bala - * Rosie Kern - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - ******************************************************************************/ - -package encode - -import ( - "encoding/base64" - "unsafe" -) - -var encoding = base64.NewEncoding("+/0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") //nolint:gochecknoglobals,lll - -// Base64Encode encodes the given string to a lexically ordered base64. -func Base64Encode(val string) string { - if val == "" { - return "" - } - - buf := make([]byte, encoding.EncodedLen(len(val))) - - encoding.Encode(buf, unsafe.Slice(unsafe.StringData(val), len(val))) - - return unsafe.String(&buf[0], len(buf)) -} - -// Base64Decode decodes the given encoded string from a lexically ordered Base64 -// encoding. -func Base64Decode(val string) (string, error) { - if val == "" { - return "", nil - } - - str, err := encoding.DecodeString(val) - if err != nil { - return "", err - } - - return unsafe.String(&str[0], len(str)), nil -} diff --git a/internal/encode/encode_test.go b/internal/encode/encode_test.go deleted file mode 100644 index 087a40a7..00000000 --- a/internal/encode/encode_test.go +++ /dev/null @@ -1,17 +0,0 @@ -package encode - -import "testing" - -func TestEncodeDecode(t *testing.T) { - for n, test := range [...]string{ - "", - "abc", - "/some/path/", - } { - if output, err := Base64Decode(Base64Encode(test)); err != nil { - t.Errorf("test %d: unexpected error: %s", n+1, err) - } else if output != test { - t.Errorf("test %d: expected output %q, got %q", n+1, test, output) - } - } -} diff --git a/main_test.go b/main_test.go index efdfcee0..59b7dbe1 100644 --- a/main_test.go +++ b/main_test.go @@ -40,6 +40,7 @@ import ( "path/filepath" "slices" "sort" + "strconv" "strings" "syscall" "testing" @@ -50,7 +51,6 @@ import ( . "github.com/smartystreets/goconvey/convey" "github.com/wtsi-ssg/wrstat/v5/basedirs" "github.com/wtsi-ssg/wrstat/v5/dgut" - "github.com/wtsi-ssg/wrstat/v5/internal/encode" "github.com/wtsi-ssg/wrstat/v5/internal/fixtimes" "github.com/wtsi-ssg/wrstat/v5/summary" ) @@ -637,7 +637,7 @@ func TestWalk(t *testing.T) { "", "/a", "/a/b", "/a/b/c", "/a/b/c/d", "/a/b/c/d/e", "/a/b/c/test.txt", "/a/b/f", "/a/b/f/tes\nt2.csv", "/a/g", "/a/g/h", "/a/test3", } { - expected += encode.Base64Encode(tmp+subPath) + "\n" + expected += strconv.Quote(tmp+subPath) + "\n" } compareFileContents(t, walk1, expected) @@ -818,7 +818,7 @@ func TestStat(t *testing.T) { return err } - _, err = io.WriteString(walkFile, encode.Base64Encode(filepath.Join(tmp, path))+"\n") + _, err = io.WriteString(walkFile, strconv.Quote(filepath.Join(tmp, path))+"\n") So(err, ShouldBeNil) return nil @@ -847,11 +847,11 @@ func TestStat(t *testing.T) { "%[7]s\t4096\t%[1]s\t%[2]s\t%[18]d\t282820\t%[23]d\td\t%[12]d\t2\t%[13]d\n", u.Uid, u.Gid, - encode.Base64Encode(tmp), - encode.Base64Encode(filepath.Join(tmp, "aDirectory")), - encode.Base64Encode(filepath.Join(tmp, "aDirectory", "aFile\nfile")), - encode.Base64Encode(filepath.Join(tmp, "aDirectory", "aSubDirectory")), - encode.Base64Encode(filepath.Join(tmp, "anotherDirectory")), + strconv.Quote(tmp), + strconv.Quote(filepath.Join(tmp, "aDirectory")), + strconv.Quote(filepath.Join(tmp, "aDirectory", "aFile\nfile")), + strconv.Quote(filepath.Join(tmp, "aDirectory", "aSubDirectory")), + strconv.Quote(filepath.Join(tmp, "anotherDirectory")), inodes[4], inodes[2], inodes[0], @@ -885,7 +885,7 @@ func TestStat(t *testing.T) { parent = filepath.Dir(parent) userGroupExpectation = fmt.Sprintf("%s\t%s\t%s\t1\t10\n", - u.Username, g.Name, encode.Base64Encode(parent)) + userGroupExpectation + u.Username, g.Name, strconv.Quote(parent)) + userGroupExpectation walkExpectations = fmt.Sprintf(""+ "%[1]s\t%[2]s\t%[3]s\t0\t1\t10\t%[4]d\t7383773\n"+ "%[1]s\t%[2]s\t%[3]s\t1\t5\t16394\t%[4]d\t7383773\n"+ @@ -895,7 +895,7 @@ func TestStat(t *testing.T) { userGroupExpectation += fmt.Sprintf(""+ "%[1]s\t%[2]s\t%[3]s\t1\t10\n"+ "%[1]s\t%[2]s\t%[4]s\t1\t10\n", u.Username, g.Name, - encode.Base64Encode(tmp), encode.Base64Encode(filepath.Join(tmp, "aDirectory"))) + strconv.Quote(tmp), strconv.Quote(filepath.Join(tmp, "aDirectory"))) walkExpectations += fmt.Sprintf(""+ "%[1]s\t%[2]s\t%[3]s\t0\t1\t10\t%[4]d\t7383773\n"+ @@ -945,27 +945,27 @@ func TestCombine(t *testing.T) { "b.bygroup": "e\tf\tg\th\n5\t6\t7\t8\n", "c.bygroup": "", "a.dgut": "" + - encode.Base64Encode("/") + "\t2000\t1000\t0\t1\t10\t1721915848\t7383773\n" + - encode.Base64Encode("/") + "\t2000\t1000\t2\t5\t16394\t1721915848\t7383773\n" + - encode.Base64Encode("/") + "\t2000\t1000\t15\t4\t16384\t1721915848\t314159\n" + - encode.Base64Encode("/some") + "\t2000\t1000\t0\t1\t10\t1721915848\t7383773\n" + - encode.Base64Encode("/some") + "\t2000\t1000\t2\t5\t16394\t1721915848\t7383773\n" + - encode.Base64Encode("/some") + "\t2000\t1000\t15\t4\t16384\t1721915848\t314159\n" + - encode.Base64Encode("/some/directory") + "\t2000\t1000\t0\t1\t10\t1721915848\t7383773\n" + - encode.Base64Encode("/some/directory") + "\t2000\t1000\t2\t5\t16394\t1721915848\t7383773\n" + - encode.Base64Encode("/some/directory") + "\t2000\t1000\t15\t4\t16384\t1721915848\t314159\n" + - encode.Base64Encode("/some/directory/001") + "\t2000\t1000\t0\t1\t10\t1721915848\t7383773\n" + - encode.Base64Encode("/some/directory/001") + "\t2000\t1000\t2\t5\t16394\t1721915848\t7383773\n" + - encode.Base64Encode("/some/directory/001") + "\t2000\t1000\t15\t4\t16384\t1721915848\t314159\n" + - encode.Base64Encode("/some/directory/001/aDirectory") + "\t2000\t1000\t0\t1\t10\t1721915848\t7383773\n" + - encode.Base64Encode("/some/directory/001/aDirectory") + "\t2000\t1000\t2\t3\t8202\t1721915848\t7383773\n" + - encode.Base64Encode("/some/directory/001/aDirectory") + "\t2000\t1000\t15\t2\t8192\t1721915848\t314159\n" + - encode.Base64Encode("/some/directory/001/aDirectory/aSubDirectory") + + strconv.Quote("/") + "\t2000\t1000\t0\t1\t10\t1721915848\t7383773\n" + + strconv.Quote("/") + "\t2000\t1000\t2\t5\t16394\t1721915848\t7383773\n" + + strconv.Quote("/") + "\t2000\t1000\t15\t4\t16384\t1721915848\t314159\n" + + strconv.Quote("/some") + "\t2000\t1000\t0\t1\t10\t1721915848\t7383773\n" + + strconv.Quote("/some") + "\t2000\t1000\t2\t5\t16394\t1721915848\t7383773\n" + + strconv.Quote("/some") + "\t2000\t1000\t15\t4\t16384\t1721915848\t314159\n" + + strconv.Quote("/some/directory") + "\t2000\t1000\t0\t1\t10\t1721915848\t7383773\n" + + strconv.Quote("/some/directory") + "\t2000\t1000\t2\t5\t16394\t1721915848\t7383773\n" + + strconv.Quote("/some/directory") + "\t2000\t1000\t15\t4\t16384\t1721915848\t314159\n" + + strconv.Quote("/some/directory/001") + "\t2000\t1000\t0\t1\t10\t1721915848\t7383773\n" + + strconv.Quote("/some/directory/001") + "\t2000\t1000\t2\t5\t16394\t1721915848\t7383773\n" + + strconv.Quote("/some/directory/001") + "\t2000\t1000\t15\t4\t16384\t1721915848\t314159\n" + + strconv.Quote("/some/directory/001/aDirectory") + "\t2000\t1000\t0\t1\t10\t1721915848\t7383773\n" + + strconv.Quote("/some/directory/001/aDirectory") + "\t2000\t1000\t2\t3\t8202\t1721915848\t7383773\n" + + strconv.Quote("/some/directory/001/aDirectory") + "\t2000\t1000\t15\t2\t8192\t1721915848\t314159\n" + + strconv.Quote("/some/directory/001/aDirectory/aSubDirectory") + "\t2000\t1000\t2\t1\t4096\t1721915848\t314159\n" + - encode.Base64Encode("/some/directory/001/aDirectory/aSubDirectory") + + strconv.Quote("/some/directory/001/aDirectory/aSubDirectory") + "\t2000\t1000\t15\t1\t4096\t1721915848\t314159\n" + - encode.Base64Encode("/some/directory/001/anotherDirectory") + "\t2000\t1000\t2\t1\t4096\t1721915848\t282820\n" + - encode.Base64Encode("/some/directory/001/anotherDirectory") + "\t2000\t1000\t15\t1\t4096\t1721915848\t282820\n", + strconv.Quote("/some/directory/001/anotherDirectory") + "\t2000\t1000\t2\t1\t4096\t1721915848\t282820\n" + + strconv.Quote("/some/directory/001/anotherDirectory") + "\t2000\t1000\t15\t1\t4096\t1721915848\t282820\n", "a.log": "A log file\nwith 2 lines\n", "b.log": "Another log file, with 1 line\n", "c.log": "Lorem ipsum!!!!", @@ -1824,79 +1824,79 @@ stop;`) GroupA, GroupB, GroupD, UserD, UserA, UserB, UserE), "????????_store3.*.bygroup": fmt.Sprintf("G%d\tU%d\t1\t1024", GroupA, UserA), "????????_A.*.byusergroup.gz": fmt.Sprintf(``+ - "U%[1]d\tG%[2]d\t"+encode.Base64Encode("/")+"\t1\t1\n"+ - "U%[1]d\tG%[2]d\t"+encode.Base64Encode("/simple")+"\t1\t1\n"+ - "U%[1]d\tG%[2]d\t"+encode.Base64Encode("/simple/A")+"\t1\t1\n", UserA, GroupA), + "U%[1]d\tG%[2]d\t"+strconv.Quote("/")+"\t1\t1\n"+ + "U%[1]d\tG%[2]d\t"+strconv.Quote("/simple")+"\t1\t1\n"+ + "U%[1]d\tG%[2]d\t"+strconv.Quote("/simple/A")+"\t1\t1\n", UserA, GroupA), "????????_E.*.byusergroup.gz": fmt.Sprintf(``+ - "U%[1]d\tG%[2]d\t"+encode.Base64Encode("/")+"\t1\t2\n"+ - "U%[1]d\tG%[2]d\t"+encode.Base64Encode("/simple")+"\t1\t2\n"+ - "U%[1]d\tG%[2]d\t"+encode.Base64Encode("/simple/E")+"\t1\t2\n", UserE, GroupE), + "U%[1]d\tG%[2]d\t"+strconv.Quote("/")+"\t1\t2\n"+ + "U%[1]d\tG%[2]d\t"+strconv.Quote("/simple")+"\t1\t2\n"+ + "U%[1]d\tG%[2]d\t"+strconv.Quote("/simple/E")+"\t1\t2\n", UserE, GroupE), "????????_store1.*.byusergroup.gz": fmt.Sprintf(``+ - "U%[1]d\tG%[4]d\t"+encode.Base64Encode("/")+"\t2\t10240\n"+ - "U%[1]d\tG%[4]d\t"+encode.Base64Encode("/objects")+"\t2\t10240\n"+ - "U%[1]d\tG%[4]d\t"+encode.Base64Encode("/objects/store1")+"\t2\t10240\n"+ - "U%[1]d\tG%[4]d\t"+encode.Base64Encode("/objects/store1/data")+"\t2\t10240\n"+ - "U%[1]d\tG%[4]d\t"+encode.Base64Encode("/objects/store1/data/sheets")+"\t2\t10240\n"+ - "U%[2]d\tG%[4]d\t"+encode.Base64Encode("/")+"\t2\t66666\n"+ - "U%[2]d\tG%[4]d\t"+encode.Base64Encode("/objects")+"\t2\t66666\n"+ - "U%[2]d\tG%[4]d\t"+encode.Base64Encode("/objects/store1")+"\t2\t66666\n"+ - "U%[2]d\tG%[4]d\t"+encode.Base64Encode("/objects/store1/data")+"\t2\t66666\n"+ - "U%[2]d\tG%[4]d\t"+encode.Base64Encode("/objects/store1/data/dbs")+"\t2\t66666\n"+ - "U%[3]d\tG%[4]d\t"+encode.Base64Encode("/")+"\t3\t6000\n"+ - "U%[3]d\tG%[4]d\t"+encode.Base64Encode("/objects")+"\t3\t6000\n"+ - "U%[3]d\tG%[4]d\t"+encode.Base64Encode("/objects/store1")+"\t3\t6000\n"+ - "U%[3]d\tG%[4]d\t"+encode.Base64Encode("/objects/store1/data")+"\t3\t6000\n"+ - "U%[3]d\tG%[4]d\t"+encode.Base64Encode("/objects/store1/data/temp")+"\t3\t6000\n"+ - "U%[3]d\tG%[4]d\t"+encode.Base64Encode("/objects/store1/data/temp/a")+"\t1\t1000\n"+ - "U%[3]d\tG%[4]d\t"+encode.Base64Encode("/objects/store1/data/temp/b")+"\t1\t2000\n"+ - "U%[3]d\tG%[4]d\t"+encode.Base64Encode("/objects/store1/data/temp/c")+"\t1\t3000", + "U%[1]d\tG%[4]d\t"+strconv.Quote("/")+"\t2\t10240\n"+ + "U%[1]d\tG%[4]d\t"+strconv.Quote("/objects")+"\t2\t10240\n"+ + "U%[1]d\tG%[4]d\t"+strconv.Quote("/objects/store1")+"\t2\t10240\n"+ + "U%[1]d\tG%[4]d\t"+strconv.Quote("/objects/store1/data")+"\t2\t10240\n"+ + "U%[1]d\tG%[4]d\t"+strconv.Quote("/objects/store1/data/sheets")+"\t2\t10240\n"+ + "U%[2]d\tG%[4]d\t"+strconv.Quote("/")+"\t2\t66666\n"+ + "U%[2]d\tG%[4]d\t"+strconv.Quote("/objects")+"\t2\t66666\n"+ + "U%[2]d\tG%[4]d\t"+strconv.Quote("/objects/store1")+"\t2\t66666\n"+ + "U%[2]d\tG%[4]d\t"+strconv.Quote("/objects/store1/data")+"\t2\t66666\n"+ + "U%[2]d\tG%[4]d\t"+strconv.Quote("/objects/store1/data/dbs")+"\t2\t66666\n"+ + "U%[3]d\tG%[4]d\t"+strconv.Quote("/")+"\t3\t6000\n"+ + "U%[3]d\tG%[4]d\t"+strconv.Quote("/objects")+"\t3\t6000\n"+ + "U%[3]d\tG%[4]d\t"+strconv.Quote("/objects/store1")+"\t3\t6000\n"+ + "U%[3]d\tG%[4]d\t"+strconv.Quote("/objects/store1/data")+"\t3\t6000\n"+ + "U%[3]d\tG%[4]d\t"+strconv.Quote("/objects/store1/data/temp")+"\t3\t6000\n"+ + "U%[3]d\tG%[4]d\t"+strconv.Quote("/objects/store1/data/temp/a")+"\t1\t1000\n"+ + "U%[3]d\tG%[4]d\t"+strconv.Quote("/objects/store1/data/temp/b")+"\t1\t2000\n"+ + "U%[3]d\tG%[4]d\t"+strconv.Quote("/objects/store1/data/temp/c")+"\t1\t3000", UserA, UserB, UserC, GroupA), "????????_store2.*.byusergroup.gz": fmt.Sprintf(``+ - "U%[1]d\tG%[4]d\t"+encode.Base64Encode("/")+"\t1\t100\n"+ - "U%[1]d\tG%[4]d\t"+encode.Base64Encode("/objects")+"\t1\t100\n"+ - "U%[1]d\tG%[4]d\t"+encode.Base64Encode("/objects/store2")+"\t1\t100\n"+ - "U%[1]d\tG%[4]d\t"+encode.Base64Encode("/objects/store2/part0")+"\t1\t100\n"+ - "U%[1]d\tG%[4]d\t"+encode.Base64Encode("/objects/store2/part0/teams")+"\t1\t100\n"+ - "U%[1]d\tG%[4]d\t"+encode.Base64Encode("/objects/store2/part0/teams/team1")+"\t1\t100\n"+ - "U%[1]d\tG%[5]d\t"+encode.Base64Encode("/")+"\t1\t200\n"+ - "U%[1]d\tG%[5]d\t"+encode.Base64Encode("/objects")+"\t1\t200\n"+ - "U%[1]d\tG%[5]d\t"+encode.Base64Encode("/objects/store2")+"\t1\t200\n"+ - "U%[1]d\tG%[5]d\t"+encode.Base64Encode("/objects/store2/part0")+"\t1\t200\n"+ - "U%[1]d\tG%[5]d\t"+encode.Base64Encode("/objects/store2/part0/teams")+"\t1\t200\n"+ - "U%[1]d\tG%[5]d\t"+encode.Base64Encode("/objects/store2/part0/teams/team1")+"\t1\t200\n"+ - "U%[2]d\tG%[5]d\t"+encode.Base64Encode("/")+"\t1\t1000\n"+ - "U%[2]d\tG%[5]d\t"+encode.Base64Encode("/objects")+"\t1\t1000\n"+ - "U%[2]d\tG%[5]d\t"+encode.Base64Encode("/objects/store2")+"\t1\t1000\n"+ - "U%[2]d\tG%[5]d\t"+encode.Base64Encode("/objects/store2/part0")+"\t1\t1000\n"+ - "U%[2]d\tG%[5]d\t"+encode.Base64Encode("/objects/store2/part0/teams")+"\t1\t1000\n"+ - "U%[2]d\tG%[5]d\t"+encode.Base64Encode("/objects/store2/part0/teams/team2")+"\t1\t1000\n"+ - "U%[2]d\tG%[6]d\t"+encode.Base64Encode("/")+"\t1\t1200\n"+ - "U%[2]d\tG%[6]d\t"+encode.Base64Encode("/objects")+"\t1\t1200\n"+ - "U%[2]d\tG%[6]d\t"+encode.Base64Encode("/objects/store2")+"\t1\t1200\n"+ - "U%[2]d\tG%[6]d\t"+encode.Base64Encode("/objects/store2/important")+"\t1\t1200\n"+ - "U%[2]d\tG%[6]d\t"+encode.Base64Encode("/objects/store2/important/docs\t")+"\t1\t1200\n"+ - "U%[3]d\tG%[4]d\t"+encode.Base64Encode("/")+"\t2\t3047\n"+ - "U%[3]d\tG%[4]d\t"+encode.Base64Encode("/objects")+"\t2\t3047\n"+ - "U%[3]d\tG%[4]d\t"+encode.Base64Encode("/objects/store2")+"\t2\t3047\n"+ - "U%[3]d\tG%[4]d\t"+encode.Base64Encode("/objects/store2/part1")+"\t2\t3047\n"+ - "U%[3]d\tG%[4]d\t"+encode.Base64Encode("/objects/store2/part1/other")+"\t1\t2048\n"+ - "U%[3]d\tG%[4]d\t"+encode.Base64Encode("/objects/store2/part1/other/my\nDir")+"\t1\t2048\n"+ - "U%[3]d\tG%[6]d\t"+encode.Base64Encode("/")+"\t1\t1024\n"+ - "U%[3]d\tG%[6]d\t"+encode.Base64Encode("/objects")+"\t1\t1024\n"+ - "U%[3]d\tG%[6]d\t"+encode.Base64Encode("/objects/store2")+"\t1\t1024\n"+ - "U%[3]d\tG%[6]d\t"+encode.Base64Encode("/objects/store2/part1")+"\t1\t1024\n"+ - "U%[3]d\tG%[6]d\t"+encode.Base64Encode("/objects/store2/part1/other")+"\t1\t1024\n"+ - "U%[7]d\tG%[5]d\t"+encode.Base64Encode("/")+"\t1\t2048\n"+ - "U%[7]d\tG%[5]d\t"+encode.Base64Encode("/objects")+"\t1\t2048\n"+ - "U%[7]d\tG%[5]d\t"+encode.Base64Encode("/objects/store2")+"\t1\t2048\n"+ - "U%[7]d\tG%[5]d\t"+encode.Base64Encode("/objects/store2/part1")+"\t1\t2048\n"+ - "U%[7]d\tG%[5]d\t"+encode.Base64Encode("/objects/store2/part1/other")+"\t1\t2048\n"+ - "U%[7]d\tG%[5]d\t"+encode.Base64Encode("/objects/store2/part1/other/my\nDir")+"\t1\t2048\n", + "U%[1]d\tG%[4]d\t"+strconv.Quote("/")+"\t1\t100\n"+ + "U%[1]d\tG%[4]d\t"+strconv.Quote("/objects")+"\t1\t100\n"+ + "U%[1]d\tG%[4]d\t"+strconv.Quote("/objects/store2")+"\t1\t100\n"+ + "U%[1]d\tG%[4]d\t"+strconv.Quote("/objects/store2/part0")+"\t1\t100\n"+ + "U%[1]d\tG%[4]d\t"+strconv.Quote("/objects/store2/part0/teams")+"\t1\t100\n"+ + "U%[1]d\tG%[4]d\t"+strconv.Quote("/objects/store2/part0/teams/team1")+"\t1\t100\n"+ + "U%[1]d\tG%[5]d\t"+strconv.Quote("/")+"\t1\t200\n"+ + "U%[1]d\tG%[5]d\t"+strconv.Quote("/objects")+"\t1\t200\n"+ + "U%[1]d\tG%[5]d\t"+strconv.Quote("/objects/store2")+"\t1\t200\n"+ + "U%[1]d\tG%[5]d\t"+strconv.Quote("/objects/store2/part0")+"\t1\t200\n"+ + "U%[1]d\tG%[5]d\t"+strconv.Quote("/objects/store2/part0/teams")+"\t1\t200\n"+ + "U%[1]d\tG%[5]d\t"+strconv.Quote("/objects/store2/part0/teams/team1")+"\t1\t200\n"+ + "U%[2]d\tG%[5]d\t"+strconv.Quote("/")+"\t1\t1000\n"+ + "U%[2]d\tG%[5]d\t"+strconv.Quote("/objects")+"\t1\t1000\n"+ + "U%[2]d\tG%[5]d\t"+strconv.Quote("/objects/store2")+"\t1\t1000\n"+ + "U%[2]d\tG%[5]d\t"+strconv.Quote("/objects/store2/part0")+"\t1\t1000\n"+ + "U%[2]d\tG%[5]d\t"+strconv.Quote("/objects/store2/part0/teams")+"\t1\t1000\n"+ + "U%[2]d\tG%[5]d\t"+strconv.Quote("/objects/store2/part0/teams/team2")+"\t1\t1000\n"+ + "U%[2]d\tG%[6]d\t"+strconv.Quote("/")+"\t1\t1200\n"+ + "U%[2]d\tG%[6]d\t"+strconv.Quote("/objects")+"\t1\t1200\n"+ + "U%[2]d\tG%[6]d\t"+strconv.Quote("/objects/store2")+"\t1\t1200\n"+ + "U%[2]d\tG%[6]d\t"+strconv.Quote("/objects/store2/important")+"\t1\t1200\n"+ + "U%[2]d\tG%[6]d\t"+strconv.Quote("/objects/store2/important/docs\t")+"\t1\t1200\n"+ + "U%[3]d\tG%[4]d\t"+strconv.Quote("/")+"\t2\t3047\n"+ + "U%[3]d\tG%[4]d\t"+strconv.Quote("/objects")+"\t2\t3047\n"+ + "U%[3]d\tG%[4]d\t"+strconv.Quote("/objects/store2")+"\t2\t3047\n"+ + "U%[3]d\tG%[4]d\t"+strconv.Quote("/objects/store2/part1")+"\t2\t3047\n"+ + "U%[3]d\tG%[4]d\t"+strconv.Quote("/objects/store2/part1/other")+"\t1\t2048\n"+ + "U%[3]d\tG%[4]d\t"+strconv.Quote("/objects/store2/part1/other/my\nDir")+"\t1\t2048\n"+ + "U%[3]d\tG%[6]d\t"+strconv.Quote("/")+"\t1\t1024\n"+ + "U%[3]d\tG%[6]d\t"+strconv.Quote("/objects")+"\t1\t1024\n"+ + "U%[3]d\tG%[6]d\t"+strconv.Quote("/objects/store2")+"\t1\t1024\n"+ + "U%[3]d\tG%[6]d\t"+strconv.Quote("/objects/store2/part1")+"\t1\t1024\n"+ + "U%[3]d\tG%[6]d\t"+strconv.Quote("/objects/store2/part1/other")+"\t1\t1024\n"+ + "U%[7]d\tG%[5]d\t"+strconv.Quote("/")+"\t1\t2048\n"+ + "U%[7]d\tG%[5]d\t"+strconv.Quote("/objects")+"\t1\t2048\n"+ + "U%[7]d\tG%[5]d\t"+strconv.Quote("/objects/store2")+"\t1\t2048\n"+ + "U%[7]d\tG%[5]d\t"+strconv.Quote("/objects/store2/part1")+"\t1\t2048\n"+ + "U%[7]d\tG%[5]d\t"+strconv.Quote("/objects/store2/part1/other")+"\t1\t2048\n"+ + "U%[7]d\tG%[5]d\t"+strconv.Quote("/objects/store2/part1/other/my\nDir")+"\t1\t2048\n", UserA, UserB, UserD, GroupA, GroupB, GroupD, UserE), "????????_store3.*.byusergroup.gz": fmt.Sprintf(``+ - "U%[1]d\tG%[2]d\t"+encode.Base64Encode("/")+"\t1\t1024\n"+ - "U%[1]d\tG%[2]d\t"+encode.Base64Encode("/objects")+"\t1\t1024\n"+ - "U%[1]d\tG%[2]d\t"+encode.Base64Encode("/objects/store3")+"\t1\t1024", + "U%[1]d\tG%[2]d\t"+strconv.Quote("/")+"\t1\t1024\n"+ + "U%[1]d\tG%[2]d\t"+strconv.Quote("/objects")+"\t1\t1024\n"+ + "U%[1]d\tG%[2]d\t"+strconv.Quote("/objects/store3")+"\t1\t1024", UserA, GroupA), "????????_A.*.logs.gz": "", "????????_E.*.logs.gz": "", @@ -1904,58 +1904,58 @@ stop;`) "????????_store2.*.logs.gz": "", "????????_store3.*.logs.gz": "", "????????_A.*.stats.gz": fmt.Sprintf(""+ - encode.Base64Encode("/simple/A/a.file")+"\t1\t%[1]d\t%[2]d\t166\t166\t166\tf\t\x00\t1\t34\n"+ - encode.Base64Encode("/simple/A")+"\t0\t%[1]d\t%[2]d\t166\t166\t166\td\t\x00\t2\t32", + strconv.Quote("/simple/A/a.file")+"\t1\t%[1]d\t%[2]d\t166\t166\t166\tf\t\x00\t1\t34\n"+ + strconv.Quote("/simple/A")+"\t0\t%[1]d\t%[2]d\t166\t166\t166\td\t\x00\t2\t32", UserA, GroupA), "????????_E.*.stats.gz": fmt.Sprintf(""+ - encode.Base64Encode("/simple/E/b.tmp")+"\t2\t%[1]d\t%[2]d\t171\t171\t171\tf\t\x00\t2\t34\n"+ - encode.Base64Encode("/simple/E")+"\t0\t%[1]d\t%[2]d\t171\t171\t171\td\t\x00\t3\t32", + strconv.Quote("/simple/E/b.tmp")+"\t2\t%[1]d\t%[2]d\t171\t171\t171\tf\t\x00\t2\t34\n"+ + strconv.Quote("/simple/E")+"\t0\t%[1]d\t%[2]d\t171\t171\t171\td\t\x00\t3\t32", UserE, GroupE), "????????_store1.*.stats.gz": fmt.Sprintf(""+ - encode.Base64Encode("/objects/store1")+"\t0\t0\t0\t10\t10\t10\td\t\x00\t3\t32\n"+ - encode.Base64Encode("/objects/store1/data")+"\t0\t0\t0\t42\t42\t42\td\t\x00\t5\t32\n"+ - encode.Base64Encode("/objects/store1/data/temp")+"\t0\t%[1]d\t%[2]d\t69\t69\t69\td\t\x00\t5\t32\n"+ - encode.Base64Encode("/objects/store1/data/temp/c/c.bed")+"\t512\t%[1]d\t%[2]d\t75\t75\t75\tf\t\x00\t1\t34\n"+ - encode.Base64Encode("/objects/store1/data/temp/c")+"\t0\t%[1]d\t%[2]d\t75\t75\t75\td\t\x00\t2\t32\n"+ - encode.Base64Encode("/objects/store1/data/dbs/dbA.db")+"\t512\t%[3]d\t%[2]d\t33\t33\t33\tf\t\x00\t1\t34\n"+ - encode.Base64Encode("/objects/store1/data/dbs/dbB.db")+"\t512\t%[3]d\t%[2]d\t38\t38\t38\tf\t\x00\t1\t34\n"+ - encode.Base64Encode("/objects/store1/data/dbs")+"\t0\t%[3]d\t%[2]d\t38\t38\t38\td\t\x00\t2\t32\n"+ - encode.Base64Encode("/objects/store1/data/sheets/doc1.txt")+"\t512\t%[4]d\t%[2]d\t19\t19\t19\tf\t\x00\t1\t34\n"+ - encode.Base64Encode("/objects/store1/data/sheets/doc2.txt")+"\t512\t%[4]d\t%[2]d\t24\t24\t24\tf\t\x00\t1\t34\n"+ - encode.Base64Encode("/objects/store1/data/sheets")+"\t0\t%[4]d\t%[2]d\t24\t24\t24\td\t\x00\t2\t32\n"+ - encode.Base64Encode("/objects/store1/data/temp/a/a.bed")+"\t512\t%[1]d\t%[2]d\t53\t53\t53\tf\t\x00\t1\t34\n"+ - encode.Base64Encode("/objects/store1/data/temp/a")+"\t0\t%[1]d\t%[2]d\t53\t53\t53\td\t\x00\t2\t32\n"+ - encode.Base64Encode("/objects/store1/data/temp/b/b.bed")+"\t512\t%[1]d\t%[2]d\t64\t64\t64\tf\t\x00\t1\t34\n"+ - encode.Base64Encode("/objects/store1/data/temp/b")+"\t0\t%[1]d\t%[2]d\t64\t64\t64\td\t\x00\t2\t32", + strconv.Quote("/objects/store1")+"\t0\t0\t0\t10\t10\t10\td\t\x00\t3\t32\n"+ + strconv.Quote("/objects/store1/data")+"\t0\t0\t0\t42\t42\t42\td\t\x00\t5\t32\n"+ + strconv.Quote("/objects/store1/data/temp")+"\t0\t%[1]d\t%[2]d\t69\t69\t69\td\t\x00\t5\t32\n"+ + strconv.Quote("/objects/store1/data/temp/c/c.bed")+"\t512\t%[1]d\t%[2]d\t75\t75\t75\tf\t\x00\t1\t34\n"+ + strconv.Quote("/objects/store1/data/temp/c")+"\t0\t%[1]d\t%[2]d\t75\t75\t75\td\t\x00\t2\t32\n"+ + strconv.Quote("/objects/store1/data/dbs/dbA.db")+"\t512\t%[3]d\t%[2]d\t33\t33\t33\tf\t\x00\t1\t34\n"+ + strconv.Quote("/objects/store1/data/dbs/dbB.db")+"\t512\t%[3]d\t%[2]d\t38\t38\t38\tf\t\x00\t1\t34\n"+ + strconv.Quote("/objects/store1/data/dbs")+"\t0\t%[3]d\t%[2]d\t38\t38\t38\td\t\x00\t2\t32\n"+ + strconv.Quote("/objects/store1/data/sheets/doc1.txt")+"\t512\t%[4]d\t%[2]d\t19\t19\t19\tf\t\x00\t1\t34\n"+ + strconv.Quote("/objects/store1/data/sheets/doc2.txt")+"\t512\t%[4]d\t%[2]d\t24\t24\t24\tf\t\x00\t1\t34\n"+ + strconv.Quote("/objects/store1/data/sheets")+"\t0\t%[4]d\t%[2]d\t24\t24\t24\td\t\x00\t2\t32\n"+ + strconv.Quote("/objects/store1/data/temp/a/a.bed")+"\t512\t%[1]d\t%[2]d\t53\t53\t53\tf\t\x00\t1\t34\n"+ + strconv.Quote("/objects/store1/data/temp/a")+"\t0\t%[1]d\t%[2]d\t53\t53\t53\td\t\x00\t2\t32\n"+ + strconv.Quote("/objects/store1/data/temp/b/b.bed")+"\t512\t%[1]d\t%[2]d\t64\t64\t64\tf\t\x00\t1\t34\n"+ + strconv.Quote("/objects/store1/data/temp/b")+"\t0\t%[1]d\t%[2]d\t64\t64\t64\td\t\x00\t2\t32", UserC, GroupA, UserB, UserA), "????????_store2.*.stats.gz": fmt.Sprintf(""+ - encode.Base64Encode("/objects/store2")+"\t0\t0\t0\t148\t148\t148\td\t\x00\t5\t32\n"+ - encode.Base64Encode("/objects/store2/part1/other.bed")+"\t512\t%[1]d\t%[2]d\t119\t119\t119\tf\t\x00\t1\t34\n"+ - encode.Base64Encode("/objects/store2/part1")+"\t0\t0\t0\t123\t123\t123\td\t\x00\t3\t32\n"+ - encode.Base64Encode("/objects/store2/part1/other/my.tmp.gz")+"\t512\t%[1]d\t%[3]d\t128\t128\t128\tf\t\x00\t1\t34\n"+ - encode.Base64Encode("/objects/store2/part1/other")+"\t0\t%[1]d\t%[2]d\t133\t133\t133\td\t\x00\t3\t32\n"+ - encode.Base64Encode("/objects/store2/part1/other/my\nDir/my.tmp.old")+ + strconv.Quote("/objects/store2")+"\t0\t0\t0\t148\t148\t148\td\t\x00\t5\t32\n"+ + strconv.Quote("/objects/store2/part1/other.bed")+"\t512\t%[1]d\t%[2]d\t119\t119\t119\tf\t\x00\t1\t34\n"+ + strconv.Quote("/objects/store2/part1")+"\t0\t0\t0\t123\t123\t123\td\t\x00\t3\t32\n"+ + strconv.Quote("/objects/store2/part1/other/my.tmp.gz")+"\t512\t%[1]d\t%[3]d\t128\t128\t128\tf\t\x00\t1\t34\n"+ + strconv.Quote("/objects/store2/part1/other")+"\t0\t%[1]d\t%[2]d\t133\t133\t133\td\t\x00\t3\t32\n"+ + strconv.Quote("/objects/store2/part1/other/my\nDir/my.tmp.old")+ "\t512\t%[1]d\t%[2]d\t139\t139\t139\tf\t\x00\t1\t34\n"+ - encode.Base64Encode("/objects/store2/part1/other/my\nDir/another.file")+ + strconv.Quote("/objects/store2/part1/other/my\nDir/another.file")+ "\t512\t%[7]d\t%[5]d\t145\t145\t145\tf\t\x00\t1\t34\n"+ - encode.Base64Encode("/objects/store2/part1/other/my\nDir")+"\t0\t%[1]d\t%[2]d\t145\t145\t145\td\t\x00\t2\t32\n"+ - encode.Base64Encode("/objects/store2/important")+"\t0\t0\t0\t152\t152\t152\td\t\x00\t3\t32\n"+ - encode.Base64Encode("/objects/store2/important/docs\t/my.doc")+ + strconv.Quote("/objects/store2/part1/other/my\nDir")+"\t0\t%[1]d\t%[2]d\t145\t145\t145\td\t\x00\t2\t32\n"+ + strconv.Quote("/objects/store2/important")+"\t0\t0\t0\t152\t152\t152\td\t\x00\t3\t32\n"+ + strconv.Quote("/objects/store2/important/docs\t/my.doc")+ "\t512\t%[4]d\t%[3]d\t157\t157\t157\tf\t\x00\t1\t34\n"+ - encode.Base64Encode("/objects/store2/important/docs\t")+"\t0\t%[4]d\t%[3]d\t157\t157\t157\td\t\x00\t2\t32\n"+ - encode.Base64Encode("/objects/store2/part0")+"\t0\t0\t0\t87\t87\t87\td\t\x00\t3\t32\n"+ - encode.Base64Encode("/objects/store2/part0/teams")+"\t0\t0\t0\t109\t109\t109\td\t\x00\t4\t32\n"+ - encode.Base64Encode("/objects/store2/part0/teams/team2/c.txt")+ + strconv.Quote("/objects/store2/important/docs\t")+"\t0\t%[4]d\t%[3]d\t157\t157\t157\td\t\x00\t2\t32\n"+ + strconv.Quote("/objects/store2/part0")+"\t0\t0\t0\t87\t87\t87\td\t\x00\t3\t32\n"+ + strconv.Quote("/objects/store2/part0/teams")+"\t0\t0\t0\t109\t109\t109\td\t\x00\t4\t32\n"+ + strconv.Quote("/objects/store2/part0/teams/team2/c.txt")+ "\t512\t%[4]d\t%[5]d\t115\t115\t115\tf\t\x00\t1\t34\n"+ - encode.Base64Encode("/objects/store2/part0/teams/team2")+"\t0\t%[4]d\t%[5]d\t115\t115\t115\td\t\x00\t2\t32\n"+ - encode.Base64Encode("/objects/store2/part0/teams/team1/a.txt")+"\t100\t%[6]d\t%[2]d\t98\t98\t98\tf\t\x00\t1\t34\n"+ - encode.Base64Encode("/objects/store2/part0/teams/team1/b.txt")+ + strconv.Quote("/objects/store2/part0/teams/team2")+"\t0\t%[4]d\t%[5]d\t115\t115\t115\td\t\x00\t2\t32\n"+ + strconv.Quote("/objects/store2/part0/teams/team1/a.txt")+"\t100\t%[6]d\t%[2]d\t98\t98\t98\tf\t\x00\t1\t34\n"+ + strconv.Quote("/objects/store2/part0/teams/team1/b.txt")+ "\t200\t%[6]d\t%[5]d\t104\t104\t104\tf\t\x00\t1\t34\n"+ - encode.Base64Encode("/objects/store2/part0/teams/team1")+"\t0\t%[6]d\t%[2]d\t104\t104\t104\td\t\x00\t2\t32", + strconv.Quote("/objects/store2/part0/teams/team1")+"\t0\t%[6]d\t%[2]d\t104\t104\t104\td\t\x00\t2\t32", UserD, GroupA, GroupD, UserB, GroupB, UserA, UserE), "????????_store3.*.stats.gz": fmt.Sprintf(""+ - encode.Base64Encode("/objects/store3/aFile")+"\t512\t%d\t%d\t160\t160\t160\tf\t\x00\t1\t34\n"+ - encode.Base64Encode("/objects/store3")+"\t0\t0\t0\t160\t160\t160\td\t\x00\t2\t32", + strconv.Quote("/objects/store3/aFile")+"\t512\t%d\t%d\t160\t160\t160\tf\t\x00\t1\t34\n"+ + strconv.Quote("/objects/store3")+"\t0\t0\t0\t160\t160\t160\td\t\x00\t2\t32", UserA, GroupA), "simple/*basedirs.userusage.tsv": fmt.Sprintf(``+ @@ -1971,22 +1971,22 @@ stop;`) "simple/????????_A.*.bygroup": fmt.Sprintf("G%d\tU%d\t1\t1", GroupA, UserA), "simple/????????_E.*.bygroup": fmt.Sprintf("G%d\tU%d\t1\t2", GroupE, UserE), "simple/????????_A.*.byusergroup.gz": fmt.Sprintf(``+ - "U%[1]d\tG%[2]d\t"+encode.Base64Encode("/")+"\t1\t1\n"+ - "U%[1]d\tG%[2]d\t"+encode.Base64Encode("/simple")+"\t1\t1\n"+ - "U%[1]d\tG%[2]d\t"+encode.Base64Encode("/simple/A")+"\t1\t1\n", UserA, GroupA), + "U%[1]d\tG%[2]d\t"+strconv.Quote("/")+"\t1\t1\n"+ + "U%[1]d\tG%[2]d\t"+strconv.Quote("/simple")+"\t1\t1\n"+ + "U%[1]d\tG%[2]d\t"+strconv.Quote("/simple/A")+"\t1\t1\n", UserA, GroupA), "simple/????????_E.*.byusergroup.gz": fmt.Sprintf(``+ - "U%[1]d\tG%[2]d\t"+encode.Base64Encode("/")+"\t1\t2\n"+ - "U%[1]d\tG%[2]d\t"+encode.Base64Encode("/simple")+"\t1\t2\n"+ - "U%[1]d\tG%[2]d\t"+encode.Base64Encode("/simple/E")+"\t1\t2\n", UserE, GroupE), + "U%[1]d\tG%[2]d\t"+strconv.Quote("/")+"\t1\t2\n"+ + "U%[1]d\tG%[2]d\t"+strconv.Quote("/simple")+"\t1\t2\n"+ + "U%[1]d\tG%[2]d\t"+strconv.Quote("/simple/E")+"\t1\t2\n", UserE, GroupE), "simple/????????_A.*.logs.gz": "", "simple/????????_E.*.logs.gz": "", "simple/????????_A.*.stats.gz": fmt.Sprintf(""+ - encode.Base64Encode("/simple/A/a.file")+"\t1\t%[1]d\t%[2]d\t166\t166\t166\tf\t\x00\t1\t34\n"+ - encode.Base64Encode("/simple/A")+"\t0\t%[1]d\t%[2]d\t166\t166\t166\td\t\x00\t2\t32", + strconv.Quote("/simple/A/a.file")+"\t1\t%[1]d\t%[2]d\t166\t166\t166\tf\t\x00\t1\t34\n"+ + strconv.Quote("/simple/A")+"\t0\t%[1]d\t%[2]d\t166\t166\t166\td\t\x00\t2\t32", UserA, GroupA), "simple/????????_E.*.stats.gz": fmt.Sprintf(""+ - encode.Base64Encode("/simple/E/b.tmp")+"\t2\t%[1]d\t%[2]d\t171\t171\t171\tf\t\x00\t2\t34\n"+ - encode.Base64Encode("/simple/E")+"\t0\t%[1]d\t%[2]d\t171\t171\t171\td\t\x00\t3\t32", + strconv.Quote("/simple/E/b.tmp")+"\t2\t%[1]d\t%[2]d\t171\t171\t171\tf\t\x00\t2\t34\n"+ + strconv.Quote("/simple/E")+"\t0\t%[1]d\t%[2]d\t171\t171\t171\td\t\x00\t3\t32", UserE, GroupE), } { files, errr := fs.Glob(os.DirFS(tmpTemp), filepath.Join("final", file)) diff --git a/stat/file.go b/stat/file.go index fc0828f0..3c14b818 100644 --- a/stat/file.go +++ b/stat/file.go @@ -31,9 +31,8 @@ import ( "fmt" "io/fs" "os" + "strconv" "syscall" - - "github.com/wtsi-ssg/wrstat/v5/internal/encode" ) type FileType string @@ -57,7 +56,7 @@ const ( // FileStats contains all the file stats needed by wrstat, interpreted in our // custom way. type FileStats struct { - Base64Path string + QuotedPath string Size int64 UID uint32 GID uint32 @@ -75,7 +74,7 @@ type FileStats struct { func (fs *FileStats) ToString() string { return fmt.Sprintf( "%s\t%d\t%d\t%d\t%d\t%d\t%d\t%s\t%d\t%d\t%d\n", - fs.Base64Path, fs.Size, fs.UID, fs.GID, + fs.QuotedPath, fs.Size, fs.UID, fs.GID, fs.Atim, fs.Mtim, fs.Ctim, fs.Type, fs.Ino, fs.Nlink, fs.Dev) } @@ -94,7 +93,7 @@ func (fs *FileStats) correctSize(stat *syscall.Stat_t) { // calculated correctly (the info only contains the basename). func File(absPath string, info os.FileInfo) *FileStats { fs := &FileStats{ - Base64Path: encode.Base64Encode(absPath), + QuotedPath: strconv.Quote(absPath), Size: info.Size(), Type: modeToType(info.Mode()), } diff --git a/stat/file_test.go b/stat/file_test.go index e15e8199..b3a14c32 100644 --- a/stat/file_test.go +++ b/stat/file_test.go @@ -30,11 +30,11 @@ import ( "io/fs" "os" "path/filepath" + "strconv" "syscall" "testing" . "github.com/smartystreets/goconvey/convey" - "github.com/wtsi-ssg/wrstat/v5/internal/encode" ) func TestStatFile(t *testing.T) { @@ -70,11 +70,6 @@ func TestStatFile(t *testing.T) { So(modeToType(fs.ModeIrregular), ShouldEqual, "X") }) - Convey("base64Encode() works correctly", t, func() { - So(encode.Base64Encode("/a/path/reg"), ShouldEqual, "9q2jQ43oO0xmNKQ=") - So(encode.Base64Encode("/a/path/link"), ShouldEqual, "9q2jQ43oO0xgOKtf") - }) - Convey("File() returns the correct interpretation of FileInfo", t, func() { dir, err := os.MkdirTemp("", "wrstat_statfile_test") So(err, ShouldBeNil) @@ -111,7 +106,7 @@ func testFileStats(path string, size int64, filetype string) { stats := File("/abs/path/to/file", info) So(stats, ShouldNotBeNil) - So(len(stats.Base64Path), ShouldBeGreaterThan, 0) + So(len(stats.QuotedPath), ShouldBeGreaterThan, 0) So(stats.Size, ShouldEqual, size) stat, ok := info.Sys().(*syscall.Stat_t) @@ -128,7 +123,7 @@ func testFileStats(path string, size int64, filetype string) { So(stats.ToString(), ShouldEqual, fmt.Sprintf( "%s\t%d\t%d\t%d\t%d\t%d\t%d\t%s\t%d\t%d\t%d\n", - encode.Base64Encode("/abs/path/to/file"), size, stat.Uid, stat.Gid, + strconv.Quote("/abs/path/to/file"), size, stat.Uid, stat.Gid, stat.Atim.Sec, stat.Mtim.Sec, stat.Ctim.Sec, filetype, stat.Ino, stat.Nlink, stat.Dev)) } diff --git a/stat/paths.go b/stat/paths.go index 0e71e389..4ac14b76 100644 --- a/stat/paths.go +++ b/stat/paths.go @@ -29,11 +29,11 @@ import ( "bufio" "io" "io/fs" + "strconv" "sync" "time" "github.com/inconshreveable/log15" - "github.com/wtsi-ssg/wrstat/v5/internal/encode" "github.com/wtsi-ssg/wrstat/v5/reporter" ) @@ -86,9 +86,9 @@ func (p *Paths) AddOperation(name string, op Operation) error { return nil } -// Scan scans through the given reader which should consist of a base64 encoded -// absolute file path per line. It calls our Statter.Lstat() on each, and passes -// the absolute path and FileInfo to any Operation callbacks you've added. +// Scan scans through the given reader which should consist of quoted absolute +// file path per line. It calls our Statter.Lstat() on each, and passes the +// absolute path and FileInfo to any Operation callbacks you've added. // // Operations are run concurrently (so should not do something like write to the // same file) and their errors logged, but otherwise ignored. @@ -105,7 +105,7 @@ func (p *Paths) Scan(paths io.Reader) error { var wg sync.WaitGroup for scanner.Scan() { - path, err := encode.Base64Decode(scanner.Text()) + path, err := strconv.Unquote(scanner.Text()) if err != nil { return err } diff --git a/stat/paths_test.go b/stat/paths_test.go index 460c7cf9..ba6d9ba0 100644 --- a/stat/paths_test.go +++ b/stat/paths_test.go @@ -30,17 +30,19 @@ import ( "io/fs" "os" "path/filepath" + "strconv" "strings" "sync/atomic" "testing" "time" . "github.com/smartystreets/goconvey/convey" - "github.com/wtsi-ssg/wrstat/v5/internal/encode" ) -const errTestFail = Error("test fail") -const errTestFileDetails = Error("file details wrong") +const ( + errTestFail = Error("test fail") + errTestFileDetails = Error("file details wrong") +) func TestPaths(t *testing.T) { statterTimeout := 1 * time.Second @@ -196,8 +198,8 @@ func createScanInput(t *testing.T) io.Reader { t.Helper() pathEmpty, pathContent := createTestFiles(t) - r := strings.NewReader(encode.Base64Encode(pathEmpty) + "\n" + - encode.Base64Encode("/foo/bar") + "\n" + encode.Base64Encode(pathContent)) + r := strings.NewReader(strconv.Quote(pathEmpty) + "\n" + + strconv.Quote("/foo/bar") + "\n" + strconv.Quote(pathContent)) return r } diff --git a/summary/dirgut.go b/summary/dirgut.go index 2da250a0..600d8883 100644 --- a/summary/dirgut.go +++ b/summary/dirgut.go @@ -31,11 +31,10 @@ import ( "io/fs" "path/filepath" "sort" + "strconv" "strings" "syscall" "time" - - "github.com/wtsi-ssg/wrstat/v5/internal/encode" ) // DirGUTFileType is one of the special file types that the @@ -83,9 +82,11 @@ const ErrInvalidType = Error("not a valid file type") // String lets you convert a DirGUTFileType to a meaningful string. func (d DirGUTFileType) String() string { - return [...]string{"other", "temp", "vcf", "vcf.gz", "bcf", "sam", "bam", + return [...]string{ + "other", "temp", "vcf", "vcf.gz", "bcf", "sam", "bam", "cram", "fasta", "fastq", "fastq.gz", "ped/bed", "compressed", "text", - "log", "dir"}[d] + "log", "dir", + }[d] } // FileTypeStringToDirGUTFileType converts the String() representation of a @@ -476,7 +477,7 @@ func (d *DirGroupUserType) Output(output StringCloser) error { for j, dgut := range dguts { s := summaries[j] _, errw := output.WriteString(fmt.Sprintf("%s\t%s\t%d\t%d\t%d\t%d\n", - encode.Base64Encode(dir), + strconv.Quote(dir), dgut, s.count, s.size, s.atime, s.mtime)) diff --git a/summary/dirgut_test.go b/summary/dirgut_test.go index 2b9350ac..cd7c22b0 100644 --- a/summary/dirgut_test.go +++ b/summary/dirgut_test.go @@ -35,7 +35,6 @@ import ( "time" . "github.com/smartystreets/goconvey/convey" - "github.com/wtsi-ssg/wrstat/v5/internal/encode" ) func TestDirGUTFileType(t *testing.T) { @@ -255,7 +254,8 @@ func TestDirGUTFileType(t *testing.T) { So(d.pathToTypes("/foo/bar.asd"), ShouldResemble, []DirGUTFileType{DGUTFileTypeOther}) So(pathToTypesMap(d, "/foo/.tmp.asd"), ShouldResemble, map[DirGUTFileType]bool{ - DGUTFileTypeOther: true, DGUTFileTypeTemp: true}) + DGUTFileTypeOther: true, DGUTFileTypeTemp: true, + }) So(d.pathToTypes("/foo/bar.vcf"), ShouldResemble, []DirGUTFileType{DGUTFileTypeVCF}) So(d.pathToTypes("/foo/bar.vcf.gz"), ShouldResemble, []DirGUTFileType{DGUTFileTypeVCFGz}) @@ -264,7 +264,8 @@ func TestDirGUTFileType(t *testing.T) { So(d.pathToTypes("/foo/bar.sam"), ShouldResemble, []DirGUTFileType{DGUTFileTypeSam}) So(d.pathToTypes("/foo/bar.bam"), ShouldResemble, []DirGUTFileType{DGUTFileTypeBam}) So(pathToTypesMap(d, "/foo/.tmp.cram"), ShouldResemble, map[DirGUTFileType]bool{ - DGUTFileTypeCram: true, DGUTFileTypeTemp: true}) + DGUTFileTypeCram: true, DGUTFileTypeTemp: true, + }) So(d.pathToTypes("/foo/bar.fa"), ShouldResemble, []DirGUTFileType{DGUTFileTypeFasta}) So(d.pathToTypes("/foo/bar.fq"), ShouldResemble, []DirGUTFileType{DGUTFileTypeFastq}) @@ -390,12 +391,12 @@ func TestDirGUT(t *testing.T) { So(errr, ShouldBeNil) output := string(o) - So(output, ShouldContainSubstring, encode.Base64Encode("/a/b/c/d")+"\t2\t10\t7\t1\t2\t200\t200\n") - So(output, ShouldContainSubstring, encode.Base64Encode("/a/b/c")+"\t"+cuidKey+"\t2\t30\t0\t0\n") - So(output, ShouldContainSubstring, encode.Base64Encode("/a/b")+"\t"+cuidKey+"\t3\t60\t0\t0\n") - So(output, ShouldContainSubstring, encode.Base64Encode("/a/b")+"\t2\t2\t13\t1\t5\t0\t0\n") - So(output, ShouldContainSubstring, encode.Base64Encode("/a/b")+"\t2\t2\t6\t1\t3\t100\t0\n") - So(output, ShouldContainSubstring, encode.Base64Encode("/")+"\t3\t2\t13\t1\t6\t0\t0\n") + So(output, ShouldContainSubstring, strconv.Quote("/a/b/c/d")+"\t2\t10\t7\t1\t2\t200\t200\n") + So(output, ShouldContainSubstring, strconv.Quote("/a/b/c")+"\t"+cuidKey+"\t2\t30\t0\t0\n") + So(output, ShouldContainSubstring, strconv.Quote("/a/b")+"\t"+cuidKey+"\t3\t60\t0\t0\n") + So(output, ShouldContainSubstring, strconv.Quote("/a/b")+"\t2\t2\t13\t1\t5\t0\t0\n") + So(output, ShouldContainSubstring, strconv.Quote("/a/b")+"\t2\t2\t6\t1\t3\t100\t0\n") + So(output, ShouldContainSubstring, strconv.Quote("/")+"\t3\t2\t13\t1\t6\t0\t0\n") So(checkFileIsSorted(outPath), ShouldBeTrue) }) diff --git a/summary/usergroup.go b/summary/usergroup.go index 09a32a58..44b657d5 100644 --- a/summary/usergroup.go +++ b/summary/usergroup.go @@ -33,8 +33,6 @@ import ( "sort" "strconv" "syscall" - - "github.com/wtsi-ssg/wrstat/v5/internal/encode" ) type Error string @@ -330,7 +328,7 @@ func outputDirectorySummariesForGroup(output StringCloser, username, groupname s for i, s := range summaries { _, errw := output.WriteString(fmt.Sprintf("%s\t%s\t%s\t%d\t%d\n", - username, groupname, encode.Base64Encode(dirs[i]), s.count, s.size)) + username, groupname, strconv.Quote(dirs[i]), s.count, s.size)) if errw != nil { return errw } diff --git a/summary/usergroup_test.go b/summary/usergroup_test.go index 8f0b427b..2dd9f0eb 100644 --- a/summary/usergroup_test.go +++ b/summary/usergroup_test.go @@ -37,7 +37,6 @@ import ( "time" . "github.com/smartystreets/goconvey/convey" - "github.com/wtsi-ssg/wrstat/v5/internal/encode" ) func TestUsergroup(t *testing.T) { @@ -104,7 +103,7 @@ func TestUsergroup(t *testing.T) { So(errl, ShouldBeNil) So(output, ShouldContainSubstring, os.Getenv("USER")+"\t"+ - g.Name+"\t"+encode.Base64Encode("/a/b/c")+"\t2\t30\n") + g.Name+"\t"+strconv.Quote("/a/b/c")+"\t2\t30\n") So(checkFileIsSorted(outPath), ShouldBeTrue) }) diff --git a/walk/file.go b/walk/file.go index eb6988ca..397b4a00 100644 --- a/walk/file.go +++ b/walk/file.go @@ -30,9 +30,8 @@ import ( "io" "os" "path/filepath" + "strconv" "sync" - - "github.com/wtsi-ssg/wrstat/v5/internal/encode" ) const userOnlyPerm = 0700 @@ -97,13 +96,12 @@ func NewFiles(outDir string, n int) (*Files, error) { // WritePaths returns a PathCallback function suitable for passing to New(). // -// Paths are written base64 encoded 1 per line to our output files in a -// round-robin. +// Paths are written quoted 1 per line to our output files in a round-robin. // // It will terminate the walk if writes to our output files fail. func (f *Files) WritePaths() PathCallback { return func(entry *Dirent) error { - return f.writePath(encode.Base64Encode(entry.Path)) + return f.writePath(strconv.Quote(entry.Path)) } } diff --git a/walk/walk_test.go b/walk/walk_test.go index a8663d09..a04bb94f 100644 --- a/walk/walk_test.go +++ b/walk/walk_test.go @@ -38,7 +38,6 @@ import ( "testing" . "github.com/smartystreets/goconvey/convey" - "github.com/wtsi-ssg/wrstat/v5/internal/encode" ) const permNoWrite = 0500 @@ -237,7 +236,7 @@ func prepareTestDirs(t *testing.T) (string, string, map[string]int) { pathsEncoded := make(map[string]int, len(paths)) for k, v := range paths { - pathsEncoded[encode.Base64Encode(k)] = v + pathsEncoded[strconv.Quote(k)] = v } return walkDir, outDir, pathsEncoded @@ -314,7 +313,8 @@ func removeAndSymlink(t *testing.T, path, dest string) { } func testOutputToFiles(ignoreSymlinks bool, walkDir, outDir string, cb ErrorCallback, - expectedPaths map[string]int) (int, int, int) { + expectedPaths map[string]int, +) (int, int, int) { files, err := NewFiles(outDir, 1) So(err, ShouldBeNil)