Skip to content

Commit

Permalink
feat: add --batch-size cli arg
Browse files Browse the repository at this point in the history
Controls the max number of paths
to batch up before applying them to a sequence of formatters.

Signed-off-by: Brian McGee <[email protected]>
  • Loading branch information
brianmcgee committed Jul 3, 2024
1 parent 4cc7e00 commit 976d156
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 12 deletions.
6 changes: 3 additions & 3 deletions cli/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ type Format struct {
Version bool `name:"version" short:"V" help:"Print version."`
Init bool `name:"init" short:"i" help:"Create a new treefmt.toml."`

Stdin bool `help:"Format the context passed in via stdin."`
OnUnmatched log.Level `name:"on-unmatched" short:"u" default:"warn" help:"Log paths that did not match any formatters at the specified log level, with fatal exiting the process with an error. Possible values are <debug|info|warn|error|fatal>."`
CpuProfile string `optional:"" help:"The file into which a cpu profile will be written."`
BatchSize int `default:"1024" short:"b" help:"Specify the maximum number of paths to apply to a sequence of formatters."`

Paths []string `name:"paths" arg:"" type:"path" optional:"" help:"Paths to format. Defaults to formatting the whole tree."`
Stdin bool `help:"Format the context passed in via stdin."`

CpuProfile string `optional:"" help:"The file into which a cpu profile will be written."`

formatters map[string]*format.Formatter
globalExcludes []glob.Glob
Expand Down
21 changes: 12 additions & 9 deletions cli/format.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,19 @@ import (
"golang.org/x/sync/errgroup"
)

const (
BatchSize = 1024
var (
ErrFailOnChange = errors.New("unexpected changes detected, --fail-on-change is enabled")
ErrInvalidBatchSize = errors.New("batch size must be >= 1")
)

var ErrFailOnChange = errors.New("unexpected changes detected, --fail-on-change is enabled")

func (f *Format) Run() (err error) {
// set log level and other options
f.configureLogging()

if f.BatchSize < 1 {
return ErrInvalidBatchSize
}

// cpu profiling
if f.CpuProfile != "" {
cpuProfile, err := os.Create(f.CpuProfile)
Expand Down Expand Up @@ -146,7 +149,7 @@ func (f *Format) Run() (err error) {

// create a channel for files needing to be processed
// we use a multiple of batch size here as a rudimentary concurrency optimization based on the host machine
f.filesCh = make(chan *walk.File, BatchSize*runtime.NumCPU())
f.filesCh = make(chan *walk.File, f.BatchSize*runtime.NumCPU())

// create a channel for files that have been processed
f.processedCh = make(chan *walk.File, cap(f.filesCh))
Expand All @@ -165,7 +168,7 @@ func (f *Format) Run() (err error) {
func (f *Format) updateCache(ctx context.Context) func() error {
return func() error {
// used to batch updates for more efficient txs
batch := make([]*walk.File, 0, BatchSize)
batch := make([]*walk.File, 0, f.BatchSize)

// apply a batch
processBatch := func() error {
Expand Down Expand Up @@ -212,7 +215,7 @@ func (f *Format) updateCache(ctx context.Context) func() error {

// append to batch and process if we have enough
batch = append(batch, file)
if len(batch) == BatchSize {
if len(batch) == f.BatchSize {
if err := processBatch(); err != nil {
return err
}
Expand Down Expand Up @@ -242,7 +245,7 @@ func (f *Format) updateCache(ctx context.Context) func() error {
func (f *Format) walkFilesystem(ctx context.Context) func() error {
return func() error {
eg, ctx := errgroup.WithContext(ctx)
pathsCh := make(chan string, BatchSize)
pathsCh := make(chan string, f.BatchSize)

// By default, we use the cli arg, but if the stdin flag has been set we force a filesystem walk
// since we will only be processing one file from a temp directory
Expand Down Expand Up @@ -352,7 +355,7 @@ func (f *Format) applyFormatters(ctx context.Context) func() error {
}

// process the batch if it's full, or we've been asked to flush partial batches
if flush || len(batch) == BatchSize {
if flush || len(batch) == f.BatchSize {

// copy the batch as we re-use it for the next batch
tasks := make([]*format.Task, len(batch))
Expand Down
27 changes: 27 additions & 0 deletions cli/format_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,33 @@ func TestOnUnmatched(t *testing.T) {
checkOutput("DEBU", out)
}

func TestBatchSize(t *testing.T) {
as := require.New(t)

// capture current cwd, so we can replace it after the test is finished
cwd, err := os.Getwd()
as.NoError(err)

t.Cleanup(func() {
// return to the previous working directory
as.NoError(os.Chdir(cwd))
})

tempDir := test.TempExamples(t)

// 0 batch size
_, err = cmd(t, "-C", tempDir, "--allow-missing-formatter", "-b", "0")
as.ErrorIs(err, ErrInvalidBatchSize)

_, err = cmd(t, "-C", tempDir, "-c", "--allow-missing-formatter", "-b", "1")
as.NoError(err)
assertStats(t, as, 31, 31, 21, 0)

_, err = cmd(t, "-C", tempDir, "-c", "--allow-missing-formatter", "-b", "100")
as.NoError(err)
assertStats(t, as, 31, 31, 21, 0)
}

func TestCpuProfile(t *testing.T) {
as := require.New(t)
tempDir := test.TempExamples(t)
Expand Down

0 comments on commit 976d156

Please sign in to comment.