Skip to content

Commit

Permalink
Create new filePath type that uses a sync.Pool to avoid string alloca…
Browse files Browse the repository at this point in the history
…tions
  • Loading branch information
mjkw31 committed Nov 27, 2024
1 parent 10fc1d5 commit aefa764
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 20 deletions.
57 changes: 54 additions & 3 deletions walk/dirent.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,65 @@ package walk
import (
"io/fs"
"os"
"sync"
"unsafe"

"github.com/wtsi-hgi/godirwalk"
)

const maxPathLength = 4096

var filepathPool = sync.Pool{ //nolint:gochecknoglobals
New: func() any {
return new(filePath)
},
}

type filePath struct {
buf [maxPathLength]byte
len int
}

func newFilePath(path string) *filePath {
c := filepathPool.Get().(*filePath) //nolint:errcheck,forcetypeassert
c.len = copy(c.buf[:], path)

return c
}

func (f *filePath) Done() {
f.len = 0

filepathPool.Put(f)
}

func (f *filePath) Sub(d *godirwalk.Dirent) *filePath {
c := filepathPool.Get().(*filePath) //nolint:errcheck,forcetypeassert

copy(c.buf[:f.len], f.buf[:f.len])
c.len = len(append(c.buf[:f.len], d.Name()...))

if d.IsDir() {
c.len = len(append(c.buf[:c.len], '/'))
}

return c
}

func (f *filePath) Bytes() []byte {
return f.buf[:f.len]
}

func (f *filePath) String() string {
return unsafe.String(&f.buf[0], f.len)
}

// Dirent represents a file system directory entry (a file or a directory),
// providing information about the entry's path, type and inode.
type Dirent struct {
// Path is the complete path to the directory entry (including both
// directory and basename)
Path string
Path *filePath

// Type is the type bits of the file mode of this entry.
Type os.FileMode
Expand All @@ -46,8 +97,8 @@ type Dirent struct {

// newDirentForDirectoryPath returns a Dirent for the given directory, with
// a Type for directories and no Inode.
func newDirentForDirectoryPath(dir string) *Dirent {
return &Dirent{Path: dir, Type: fs.ModeDir}
func newDirentForDirectoryPath(dir string) Dirent {
return Dirent{Path: newFilePath(dir), Type: fs.ModeDir}
}

// IsDir returns true if we are a directory.
Expand Down
2 changes: 1 addition & 1 deletion walk/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ func (f *Files) WritePaths() PathCallback {
var quoted [10240]byte

return func(entry *Dirent) error {
return f.writePath(append(strconv.AppendQuote(quoted[:0], entry.Path), '\n'))
return f.writePath(append(strconv.AppendQuote(quoted[:0], entry.Path.String()), '\n'))
}
}

Expand Down
29 changes: 13 additions & 16 deletions walk/walk.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ package walk

import (
"context"
"io/fs"
"os"
"path/filepath"
"slices"
Expand Down Expand Up @@ -79,7 +78,7 @@ func New(cb PathCallback, includDirs, ignoreSymlinks bool) *Walker {
type ErrorCallback func(path string, err error)

type pathRequest struct {
path string
path *filePath
response chan []Dirent
}

Expand Down Expand Up @@ -112,7 +111,7 @@ func (w *Walker) Walk(dir string, errCB ErrorCallback) error {
}

go func() {
walkDirectory(ctx, Dirent{Path: dir, Type: fs.ModeDir},
walkDirectory(ctx, newDirentForDirectoryPath(dir),
flowControl, createPathRequestor(requestCh), w.sendDirs)
close(direntCh)
}()
Expand All @@ -125,8 +124,8 @@ func (w *Walker) Walk(dir string, errCB ErrorCallback) error {
return w.sendDirentsToPathCallback(direntCh)
}

func createPathRequestor(requestCh chan *pathRequest) func(string) []Dirent {
return func(path string) []Dirent {
func createPathRequestor(requestCh chan *pathRequest) func(*filePath) []Dirent {
return func(path *filePath) []Dirent {
pr := pathRequestPool.Get().(*pathRequest) //nolint:errcheck,forcetypeassert
defer pathRequestPool.Put(pr)

Expand All @@ -143,6 +142,8 @@ func (w *Walker) sendDirentsToPathCallback(direntCh <-chan Dirent) error {
if err := w.pathCB(&dirent); err != nil {
return err
}

dirent.Path.Done()
}

return nil
Expand All @@ -151,7 +152,7 @@ func (w *Walker) sendDirentsToPathCallback(direntCh <-chan Dirent) error {
type heap []*pathRequest

func pathCompare(a, b *pathRequest) int {
return strings.Compare(b.path, a.path)
return strings.Compare(b.path.String(), a.path.String())
}

func (h *heap) Insert(req *pathRequest) {
Expand Down Expand Up @@ -205,30 +206,26 @@ Loop:
case <-ctx.Done():
break Loop
case request := <-requests:
children, err := godirwalk.ReadDirents(request.path, buffer)
children, err := godirwalk.ReadDirents(request.path.String(), buffer)
if err != nil {
errCB(request.path, err)
errCB(string(request.path.Bytes()), err)
}

request.response <- w.childrenToDirents(children, request.path)
}
}
}

func (w *Walker) childrenToDirents(children godirwalk.Dirents, parent string) []Dirent {
func (w *Walker) childrenToDirents(children godirwalk.Dirents, parent *filePath) []Dirent {
dirents := make([]Dirent, 0, len(children))

for _, child := range children {
dirent := Dirent{
Path: filepath.Join(parent, child.Name()),
Path: parent.Sub(child),
Type: child.ModeType(),
Inode: child.Inode(),
}

if dirent.IsDir() {
dirent.Path += "/"
}

if w.ignoreSymlinks && dirent.IsSymlink() {
continue
}
Expand All @@ -237,7 +234,7 @@ func (w *Walker) childrenToDirents(children godirwalk.Dirents, parent string) []
}

sort.Slice(dirents, func(i, j int) bool {
return dirents[i].Path < dirents[j].Path
return dirents[i].Path.String() < dirents[j].Path.String()
})

return dirents
Expand Down Expand Up @@ -274,7 +271,7 @@ var controllerPool = sync.Pool{ //nolint:gochecknoglobals
}

func walkDirectory(ctx context.Context, dirent Dirent,
flowControl *flowController, request func(string) []Dirent, sendDirs bool) {
flowControl *flowController, request func(*filePath) []Dirent, sendDirs bool) {
children := request(dirent.Path)
childChans := make([]*flowController, len(children))
control := flowControl.GetControl()
Expand Down

0 comments on commit aefa764

Please sign in to comment.