Skip to content

Commit

Permalink
Replace (custom) base64 encoding of paths with simple quoting. (#100)
Browse files Browse the repository at this point in the history
  • Loading branch information
mjkw31 authored Oct 29, 2024
1 parent 5a09d23 commit c5adb64
Show file tree
Hide file tree
Showing 21 changed files with 520 additions and 467 deletions.
3 changes: 2 additions & 1 deletion basedirs/basedirs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,8 @@ func TestBaseDirs(t *testing.T) {
UIDs: []uint32{101},
FTs: expectedFTsBam,
Modtime: dbModTime,
}}))
},
}))

dcss, err = bd.calculateForGroup(2)
So(err, ShouldBeNil)
Expand Down
34 changes: 19 additions & 15 deletions cmd/stat.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,30 +37,34 @@ import (
"github.com/wtsi-ssg/wrstat/v5/summary"
)

const reportFrequency = 10 * time.Minute
const statOutputFileSuffix = ".stats"
const statUserGroupSummaryOutputFileSuffix = ".byusergroup"
const statGroupSummaryOutputFileSuffix = ".bygroup"
const statDGUTASummaryOutputFileSuffix = ".dguta"
const statLogOutputFileSuffix = ".log"
const lstatTimeout = 10 * time.Second
const lstatAttempts = 3

var statDebug bool
var statCh string
const (
reportFrequency = 10 * time.Minute
statOutputFileSuffix = ".stats"
statUserGroupSummaryOutputFileSuffix = ".byusergroup"
statGroupSummaryOutputFileSuffix = ".bygroup"
statDGUTASummaryOutputFileSuffix = ".dguta"
statLogOutputFileSuffix = ".log"
lstatTimeout = 10 * time.Second
lstatAttempts = 3
)

var (
statDebug bool
statCh string
)

// statCmd represents the stat command.
var statCmd = &cobra.Command{
Use: "stat",
Short: "Stat paths",
Long: `Stat paths in a given file.
Given a file containing a base64 encoded absolute file path per line (eg. as
produced by 'wrstat walk'), this creates a new file with stats for each of those
file paths. The new file is named after the input file with a ".stats" suffix.
Given a file containing a quoted absolute file path per line (eg. as produced
by 'wrstat walk'), this creates a new file with stats for each of those file
paths. The new file is named after the input file with a ".stats" suffix.
The output file format is 11 tab separated columns with the following contents:
1. Base64 encoded path to the file.
1. Quoted path to the file.
2. File size in bytes. If this is greater than the number of bytes in blocks
allocated, this will be the number of bytes in allocated blocks. (This is to
account for files with holes in them; as a byproduct, symbolic links will
Expand Down
26 changes: 14 additions & 12 deletions cmd/walk.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,14 @@ const (
)

// options for this cmd.
var outputDir string
var depGroup string
var walkInodesPerJob int
var walkNumOfJobs int
var walkID string
var walkCh string
var (
outputDir string
depGroup string
walkInodesPerJob int
walkNumOfJobs int
walkID string
walkCh string
)

// walkCmd represents the walk command.
var walkCmd = &cobra.Command{
Expand All @@ -68,11 +70,10 @@ user that can sudo without a password when running wrstat, and supply the --sudo
option to this command.
For each entry recursively within the directory of interest, their paths are
quickly retrieved (without doing any expensive stat calls) and written (base64
encoded) to output files in the given output directory. The number of files is
such that they will each contain about --inodes_per_stat entries (or if
--num_stats was supplied greater than zero, then there will be that number of
output files).
quickly retrieved (without doing any expensive stat calls) and written (quoted)
to output files in the given output directory. The number of files is such that
they will each contain about --inodes_per_stat entries (or if --num_stats was
supplied greater than zero, then there will be that number of output files).
For each output file, a 'wrstat stat' job is then added to wr's queue with the
given dependency group. For the meaning of the --ch option which is passed
Expand Down Expand Up @@ -149,7 +150,8 @@ func statRepGrp(dir, unique string) string {

// walkDirAndScheduleStats does the main work.
func walkDirAndScheduleStats(desiredDir, outputDir string, statJobs, inodes int, depGroup, repGroup,
yamlPath string, s *scheduler.Scheduler) {
yamlPath string, s *scheduler.Scheduler,
) {
n := statJobs
if n == 0 {
n = calculateSplitBasedOnInodes(inodes, desiredDir)
Expand Down
24 changes: 15 additions & 9 deletions combine/dguta_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@ import (
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"testing"
"time"

. "github.com/smartystreets/goconvey/convey"
"github.com/wtsi-ssg/wrstat/v5/dguta"
"github.com/wtsi-ssg/wrstat/v5/fs"
"github.com/wtsi-ssg/wrstat/v5/internal/encode"
"github.com/wtsi-ssg/wrstat/v5/summary"
)

Expand Down Expand Up @@ -172,18 +172,23 @@ func TestOldFile(t *testing.T) {
So(ds.Mtime, ShouldEqual, time.Unix(amtime2, 0))

Convey("and the DirGUTAges are set as expected", func() {
expectedSizes := [17]int64{tfs, tfs, tfs, tfs, tfs, tfs, tfs, tfs - 2,
tfs - 3, tfs, tfs, tfs, tfs, tfs, tfs, tfs - 6, tfs - 7}
expectedSizes := [17]int64{
tfs, tfs, tfs, tfs, tfs, tfs, tfs, tfs - 2,
tfs - 3, tfs, tfs, tfs, tfs, tfs, tfs, tfs - 6, tfs - 7,
}

expectedCounts := [17]int{expectedCount, expectedCount, expectedCount,
expectedCounts := [17]int{
expectedCount, expectedCount, expectedCount,
expectedCount, expectedCount, expectedCount, expectedCount,
expectedCount - 1, expectedCount - 2, expectedCount, expectedCount,
expectedCount, expectedCount, expectedCount, expectedCount,
expectedCount - 2, expectedCount - 3}
expectedCount - 2, expectedCount - 3,
}

expectedAtime := amtime3 - 1

expectedMtimes := [17]int64{amtime2, amtime2, amtime2, amtime2, amtime2, amtime2, amtime2, amtime2, amtime2,
expectedMtimes := [17]int64{
amtime2, amtime2, amtime2, amtime2, amtime2, amtime2, amtime2, amtime2, amtime2,
amtime2, amtime2, amtime2, amtime2, amtime2, amtime2, amtime1, amtime3,
}

Expand Down Expand Up @@ -250,7 +255,8 @@ func createDGUTAFile(t *testing.T, tempDir, fileName, content string) string {
// 1668768807 /lustre 1313 13912 0 0 1 0 1668768807
// /lustre/scratch123 1313 13912 0 0 1 0 1668768807.
func buildDGUTAContent(directory, gid, uid string, filetype, nestedFiles, //nolint:unparam
fileSize, oldestAtime, newestAtime, refTime int64) string {
fileSize, oldestAtime, newestAtime, refTime int64,
) string {
var dgutaContents string

splitDir := recursivePath(directory)
Expand All @@ -263,7 +269,7 @@ func buildDGUTAContent(directory, gid, uid string, filetype, nestedFiles, //noli
continue
}

dgutaContents += encode.Base64Encode(split) + guta +
dgutaContents += strconv.Quote(split) + guta +
fmt.Sprintf("\t%d\t%d\t%d\t%d\n",
nestedFiles, fileSize, oldestAtime, newestAtime)
}
Expand All @@ -285,7 +291,7 @@ func recursivePath(path string) []string {
count := strings.Count(path, "/")
newPath := path

var dgutaContents = make([]string, count+1)
dgutaContents := make([]string, count+1)
dgutaContents[count] = path

for i := count - 1; i >= 0; i-- {
Expand Down
7 changes: 3 additions & 4 deletions combine/stat_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@ import (
"fmt"
"os"
"path/filepath"
"strconv"
"testing"

. "github.com/smartystreets/goconvey/convey"
"github.com/wtsi-ssg/wrstat/v5/fs"
"github.com/wtsi-ssg/wrstat/v5/internal/encode"
)

// TestStatFiles tests that the stat files concatenate and compress properly.
Expand All @@ -53,7 +53,7 @@ func TestStatFiles(t *testing.T) {
actualContent, err := fs.ReadCompressedFile(outputPath)
So(err, ShouldBeNil)

encodedDir := encode.Base64Encode(dir)
encodedDir := strconv.Quote(dir)

expectedOutput := fmt.Sprintf(
"%s\t5\t345\t152\t217434\t82183\t147\t'f'\t3\t7\t28472\t\n"+
Expand Down Expand Up @@ -82,7 +82,7 @@ func buildStatFiles(t *testing.T) (string, []*os.File, *os.File, string) {

_, err = f.WriteString(fmt.Sprintf(
"%s\t%d\t%d\t%d\t%d\t%d\t%d\t%q\t%d\t%d\t%d\t\n",
encode.Base64Encode(dir),
strconv.Quote(dir),
5+i,
345,
152,
Expand All @@ -93,7 +93,6 @@ func buildStatFiles(t *testing.T) (string, []*os.File, *os.File, string) {
3+i,
7,
28472))

if err != nil {
t.Fatal(err)
}
Expand Down
Loading

0 comments on commit c5adb64

Please sign in to comment.