Skip to content

Commit

Permalink
Fix steps to work with paths with \n \t in them (#93)
Browse files Browse the repository at this point in the history
* Use custom, lexically ordered, base64 encoding for all paths that will be read by WRStat.
  • Loading branch information
mjkw31 authored Sep 25, 2024
1 parent 63865b8 commit 90e08f0
Show file tree
Hide file tree
Showing 12 changed files with 241 additions and 178 deletions.
3 changes: 2 additions & 1 deletion combine/dgut_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
. "github.com/smartystreets/goconvey/convey"
"github.com/wtsi-ssg/wrstat/v5/dgut"
"github.com/wtsi-ssg/wrstat/v5/fs"
"github.com/wtsi-ssg/wrstat/v5/internal/encode"
"github.com/wtsi-ssg/wrstat/v5/summary"
)

Expand Down Expand Up @@ -121,7 +122,7 @@ func buildDGUTContent(directory, gid, uid string, filetype, nestedFiles,
splitDir := recursivePath(directory)

for _, split := range splitDir {
DGUTContents += split + fmt.Sprintf("\t%s\t%s\t%d\t%d\t%d\t%d\t%d\n",
DGUTContents += encode.Base64Encode(split) + fmt.Sprintf("\t%s\t%s\t%d\t%d\t%d\t%d\t%d\n",
gid, uid, filetype, nestedFiles, fileSize, oldestAtime, newestAtime)
}

Expand Down
6 changes: 3 additions & 3 deletions combine/stat_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@
package combine

import (
b64 "encoding/base64"
"fmt"
"os"
"path/filepath"
"testing"

. "github.com/smartystreets/goconvey/convey"
"github.com/wtsi-ssg/wrstat/v5/fs"
"github.com/wtsi-ssg/wrstat/v5/internal/encode"
)

// TestStatFiles tests that the stat files concatenate and compress properly.
Expand All @@ -53,7 +53,7 @@ func TestStatFiles(t *testing.T) {
actualContent, err := fs.ReadCompressedFile(outputPath)
So(err, ShouldBeNil)

encodedDir := b64.StdEncoding.EncodeToString([]byte(dir))
encodedDir := encode.Base64Encode(dir)

expectedOutput := fmt.Sprintf(
"%s\t5\t345\t152\t217434\t82183\t147\t'f'\t3\t7\t28472\t\n"+
Expand Down Expand Up @@ -82,7 +82,7 @@ func buildStatFiles(t *testing.T) (string, []*os.File, *os.File, string) {

_, err = f.WriteString(fmt.Sprintf(
"%s\t%d\t%d\t%d\t%d\t%d\t%d\t%q\t%d\t%d\t%d\t\n",
b64.StdEncoding.EncodeToString([]byte(dir)),
encode.Base64Encode(dir),
5+i,
345,
152,
Expand Down
25 changes: 13 additions & 12 deletions dgut/dgut_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,41 +34,42 @@ import (
. "github.com/smartystreets/goconvey/convey"
"github.com/ugorji/go/codec"
internaldata "github.com/wtsi-ssg/wrstat/v5/internal/data"
"github.com/wtsi-ssg/wrstat/v5/internal/encode"
"github.com/wtsi-ssg/wrstat/v5/summary"
bolt "go.etcd.io/bbolt"
)

func TestDGUT(t *testing.T) {
Convey("You can parse a single line of dgut data", t, func() {
line := "/\t1\t101\t0\t3\t30\t50\t50\n"
line := encode.Base64Encode("/") + "\t1\t101\t0\t3\t30\t50\t50\n"
dir, gut, err := parseDGUTLine(line)
So(err, ShouldBeNil)
So(dir, ShouldEqual, "/")
So(gut, ShouldResemble, &GUT{GID: 1, UID: 101, FT: 0, Count: 3, Size: 30, Atime: 50, Mtime: 50})

Convey("But invalid data won't parse", func() {
_, _, err = parseDGUTLine("/\t1\t101\t0\t3\t50\t50\n")
_, _, err = parseDGUTLine(encode.Base64Encode("/") + "\t1\t101\t0\t3\t50\t50\n")
So(err, ShouldEqual, ErrInvalidFormat)

_, _, err = parseDGUTLine("/\tfoo\t101\t0\t3\t30\t50\t50\n")
_, _, err = parseDGUTLine(encode.Base64Encode("/") + "\tfoo\t101\t0\t3\t30\t50\t50\n")
So(err, ShouldEqual, ErrInvalidFormat)

_, _, err = parseDGUTLine("/\t1\tfoo\t0\t3\t30\t50\t50\n")
_, _, err = parseDGUTLine(encode.Base64Encode("/") + "\t1\tfoo\t0\t3\t30\t50\t50\n")
So(err, ShouldEqual, ErrInvalidFormat)

_, _, err = parseDGUTLine("/\t1\t101\tfoo\t3\t30\t50\t50\n")
_, _, err = parseDGUTLine(encode.Base64Encode("/") + "\t1\t101\tfoo\t3\t30\t50\t50\n")
So(err, ShouldEqual, ErrInvalidFormat)

_, _, err = parseDGUTLine("/\t1\t101\t0\tfoo\t30\t50\t50\n")
_, _, err = parseDGUTLine(encode.Base64Encode("/") + "\t1\t101\t0\tfoo\t30\t50\t50\n")
So(err, ShouldEqual, ErrInvalidFormat)

_, _, err = parseDGUTLine("/\t1\t101\t0\t3\tfoo\t50\t50\n")
_, _, err = parseDGUTLine(encode.Base64Encode("/") + "\t1\t101\t0\t3\tfoo\t50\t50\n")
So(err, ShouldEqual, ErrInvalidFormat)

_, _, err = parseDGUTLine("/\t1\t101\t0\t3\t30\tfoo\t50\n")
_, _, err = parseDGUTLine(encode.Base64Encode("/") + "\t1\t101\t0\t3\t30\tfoo\t50\n")
So(err, ShouldEqual, ErrInvalidFormat)

_, _, err = parseDGUTLine("/\t1\t101\t0\t3\t30\t50\tfoo\n")
_, _, err = parseDGUTLine(encode.Base64Encode("/") + "\t1\t101\t0\t3\t30\t50\tfoo\n")
So(err, ShouldEqual, ErrInvalidFormat)

So(err.Error(), ShouldEqual, "the provided data was not in dgut format")
Expand Down Expand Up @@ -379,9 +380,9 @@ func TestDGUT(t *testing.T) {
})

Convey("Store()ing multiple times", func() {
data = strings.NewReader("/\t3\t103\t7\t2\t2\t25\t25\n" +
"/a/i\t3\t103\t7\t1\t1\t25\t25\n" +
"/i\t3\t103\t7\t1\t1\t30\t30\n")
data = strings.NewReader(encode.Base64Encode("/") + "\t3\t103\t7\t2\t2\t25\t25\n" +
encode.Base64Encode("/a/i") + "\t3\t103\t7\t1\t1\t25\t25\n" +
encode.Base64Encode("/i") + "\t3\t103\t7\t1\t1\t30\t30\n")

Convey("to the same db file doesn't work", func() {
err = db.Store(data, 4)
Expand Down
8 changes: 7 additions & 1 deletion dgut/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
"strconv"
"strings"

"github.com/wtsi-ssg/wrstat/v5/internal/encode"
"github.com/wtsi-ssg/wrstat/v5/summary"
)

Expand Down Expand Up @@ -113,12 +114,17 @@ func parseDGUTLine(line string) (string, *GUT, error) {
return "", nil, ErrBlankLine
}

path, err := encode.Base64Decode(parts[0])
if err != nil {
return "", nil, err
}

ints, err := gutLinePartsToInts(parts)
if err != nil {
return "", nil, err
}

return parts[0], &GUT{
return path, &GUT{
GID: uint32(ints[0]),
UID: uint32(ints[1]),
FT: summary.DirGUTFileType(ints[2]),
Expand Down
21 changes: 11 additions & 10 deletions dgut/tree_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (

. "github.com/smartystreets/goconvey/convey"
internaldata "github.com/wtsi-ssg/wrstat/v5/internal/data"
"github.com/wtsi-ssg/wrstat/v5/internal/encode"
"github.com/wtsi-ssg/wrstat/v5/internal/fs"
"github.com/wtsi-ssg/wrstat/v5/internal/split"
"github.com/wtsi-ssg/wrstat/v5/summary"
Expand Down Expand Up @@ -227,23 +228,23 @@ func TestTree(t *testing.T) {
So(err, ShouldBeNil)

db := NewDB(paths1[0])
data := strings.NewReader("/\t1\t11\t6\t1\t1\t20\t20\n" +
"/a\t1\t11\t6\t1\t1\t20\t20\n" +
"/a/b\t1\t11\t6\t1\t1\t20\t20\n" +
"/a/b/c\t1\t11\t6\t1\t1\t20\t20\n" +
"/a/b/c/d\t1\t11\t6\t1\t1\t20\t20\n")
data := strings.NewReader(encode.Base64Encode("/") + "\t1\t11\t6\t1\t1\t20\t20\n" +
encode.Base64Encode("/a") + "\t1\t11\t6\t1\t1\t20\t20\n" +
encode.Base64Encode("/a/b") + "\t1\t11\t6\t1\t1\t20\t20\n" +
encode.Base64Encode("/a/b/c") + "\t1\t11\t6\t1\t1\t20\t20\n" +
encode.Base64Encode("/a/b/c/d") + "\t1\t11\t6\t1\t1\t20\t20\n")
err = db.Store(data, 20)
So(err, ShouldBeNil)

paths2, err := testMakeDBPaths(t)
So(err, ShouldBeNil)

db = NewDB(paths2[0])
data = strings.NewReader("/\t1\t11\t6\t1\t1\t15\t15\n" +
"/a\t1\t11\t6\t1\t1\t15\t15\n" +
"/a/b\t1\t11\t6\t1\t1\t15\t15\n" +
"/a/b/c\t1\t11\t6\t1\t1\t15\t15\n" +
"/a/b/c/e\t1\t11\t6\t1\t1\t15\t15\n")
data = strings.NewReader(encode.Base64Encode("/") + "\t1\t11\t6\t1\t1\t15\t15\n" +
encode.Base64Encode("/a") + "\t1\t11\t6\t1\t1\t15\t15\n" +
encode.Base64Encode("/a/b") + "\t1\t11\t6\t1\t1\t15\t15\n" +
encode.Base64Encode("/a/b/c") + "\t1\t11\t6\t1\t1\t15\t15\n" +
encode.Base64Encode("/a/b/c/e") + "\t1\t11\t6\t1\t1\t15\t15\n")
err = db.Store(data, 20)
So(err, ShouldBeNil)

Expand Down
34 changes: 31 additions & 3 deletions internal/encode/encode.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,37 @@

package encode

import "encoding/base64"
import (
"encoding/base64"
"unsafe"
)

// Base64Encode encodes the given string in base64.
var encoding = base64.NewEncoding("+/0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") //nolint:gochecknoglobals,lll

// Base64Encode encodes the given string to a lexically ordered base64.
func Base64Encode(val string) string {
return base64.StdEncoding.EncodeToString([]byte(val))
if val == "" {
return ""
}

buf := make([]byte, encoding.EncodedLen(len(val)))

encoding.Encode(buf, unsafe.Slice(unsafe.StringData(val), len(val)))

return unsafe.String(&buf[0], len(buf))
}

// Base64Decode decodes the given encoded string from a lexically ordered Base64
// encoding.
func Base64Decode(val string) (string, error) {
if val == "" {
return "", nil
}

str, err := encoding.DecodeString(val)
if err != nil {
return "", err
}

return unsafe.String(&str[0], len(str)), nil
}
17 changes: 17 additions & 0 deletions internal/encode/encode_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package encode

import "testing"

func TestEncodeDecode(t *testing.T) {
for n, test := range [...]string{
"",
"abc",
"/some/path/",
} {
if output, err := Base64Decode(Base64Encode(test)); err != nil {
t.Errorf("test %d: unexpected error: %s", n+1, err)
} else if output != test {
t.Errorf("test %d: expected output %q, got %q", n+1, test, output)
}
}
}
Loading

0 comments on commit 90e08f0

Please sign in to comment.