From 4f1f3d6dbc376f4b0b0a82e3d755ebbf960601f2 Mon Sep 17 00:00:00 2001
From: Brian McGee <brian@bmcgee.ie>
Date: Mon, 14 Oct 2024 10:48:16 +0100
Subject: [PATCH] fix: do not cache formatting errors

This was a regression introduced when the new `walk.Reader` approach was applied.

When a formatter errors out during processing, we should not update those files in the cache to ensure they are retried during later invocations.

I've extended the `walk.ReleaseFunc` to accept an optional `error` which can be used to indicate the file participated in a batch that had errors when being formatted.

This allows the hook implementation to decide what it should do.

Closes #449

Signed-off-by: Brian McGee <brian@bmcgee.ie>
---
 cmd/format/format.go | 18 ++++++++++--
 cmd/root_test.go     | 65 ++++++++++++++++++++++++++++++++++++++++++--
 walk/cached.go       | 22 +++++++++------
 walk/cached_test.go  |  2 +-
 walk/stdin.go        | 32 ++++++++++++----------
 walk/walk.go         | 10 +++----
 6 files changed, 116 insertions(+), 33 deletions(-)

diff --git a/cmd/format/format.go b/cmd/format/format.go
index 8a69b98e..46eb6eed 100644
--- a/cmd/format/format.go
+++ b/cmd/format/format.go
@@ -377,7 +377,7 @@ func applyFormatters(
 
 			if release {
 				// release the file as there is no more processing to be done on it
-				if err := file.Release(); err != nil {
+				if err := file.Release(nil); err != nil {
 					return fmt.Errorf("failed to release file: %w", err)
 				}
 			}
@@ -420,6 +420,20 @@ func postProcessing(
 				// grab the underlying file reference
 				file := task.File
 
+				// check if there were any errors processing the file
+				if len(task.Errors) > 0 {
+					// release the file, passing the first task error
+					// note: task errors are related to the batch in which a task was applied
+					// this does not necessarily indicate this file had a problem being formatted, but this approach
+					// serves our purpose for now of indicating some sort of error condition to the release hooks
+					if err := file.Release(task.Errors[0]); err != nil {
+						return fmt.Errorf("failed to release file: %w", err)
+					}
+
+					// continue processing next task
+					continue
+				}
+
 				// check if the file has changed
 				changed, newInfo, err := file.Stat()
 				if err != nil {
@@ -451,7 +465,7 @@ func postProcessing(
 					file.Info = newInfo
 				}
 
-				if err := file.Release(); err != nil {
+				if err := file.Release(nil); err != nil {
 					return fmt.Errorf("failed to release file: %w", err)
 				}
 			}
diff --git a/cmd/root_test.go b/cmd/root_test.go
index b037f4d7..62bc7452 100644
--- a/cmd/root_test.go
+++ b/cmd/root_test.go
@@ -473,6 +473,58 @@ func TestCache(t *testing.T) {
 		stats.Formatted: 32,
 		stats.Changed:   0,
 	})
+
+	// test that formatting errors are not cached
+
+	// update the config with a failing formatter
+	cfg = &config.Config{
+		FormatterConfigs: map[string]*config.Formatter{
+			// fails to execute
+			"fail": {
+				Command:  "touch",
+				Options:  []string{"--bad-arg"},
+				Includes: []string{"*.hs"},
+			},
+		},
+	}
+	test.WriteConfig(t, configPath, cfg)
+
+	// running should match but not format anything
+	_, statz, err = treefmt(t, "--config-file", configPath, "--tree-root", tempDir)
+	as.NoError(err)
+
+	assertStats(t, as, statz, map[stats.Type]int32{
+		stats.Traversed: 32,
+		stats.Matched:   6,
+		stats.Formatted: 0,
+		stats.Changed:   0,
+	})
+
+	// running again should provide the same result
+	_, statz, err = treefmt(t, "--config-file", configPath, "--tree-root", tempDir, "-vv")
+	as.NoError(err)
+
+	assertStats(t, as, statz, map[stats.Type]int32{
+		stats.Traversed: 32,
+		stats.Matched:   6,
+		stats.Formatted: 0,
+		stats.Changed:   0,
+	})
+
+	// let's fix the haskell config so it no longer fails
+	cfg.FormatterConfigs["fail"].Options = nil
+	test.WriteConfig(t, configPath, cfg)
+
+	// we should now format the haskell files
+	_, statz, err = treefmt(t, "--config-file", configPath, "--tree-root", tempDir)
+	as.NoError(err)
+
+	assertStats(t, as, statz, map[stats.Type]int32{
+		stats.Traversed: 32,
+		stats.Matched:   6,
+		stats.Formatted: 6,
+		stats.Changed:   6,
+	})
 }
 
 func TestChangeWorkingDirectory(t *testing.T) {
@@ -547,9 +599,6 @@ func TestFailOnChange(t *testing.T) {
 	_, _, err := treefmt(t, "--fail-on-change", "--config-file", configPath, "--tree-root", tempDir)
 	as.ErrorIs(err, format2.ErrFailOnChange)
 
-	// we have second precision mod time tracking
-	time.Sleep(time.Second)
-
 	// test with no cache
 	t.Setenv("TREEFMT_FAIL_ON_CHANGE", "true")
 	test.WriteConfig(t, configPath, cfg)
@@ -1204,6 +1253,16 @@ func treefmt(t *testing.T, args ...string) ([]byte, *stats.Stats, error) {
 	root.SetOut(tempOut)
 	root.SetErr(tempOut)
 
+	// record the start time
+	start := time.Now()
+
+	defer func() {
+		// Wait until we tick over into the next second before continuing.
+		// This ensures we correctly detect changes within tests as treefmt compares modtime at second level precision.
+		waitUntil := start.Truncate(time.Second).Add(time.Second)
+		time.Sleep(time.Until(waitUntil))
+	}()
+
 	if err := root.Execute(); err != nil {
 		return nil, nil, err
 	}
diff --git a/walk/cached.go b/walk/cached.go
index 50a0e82a..aa396c87 100644
--- a/walk/cached.go
+++ b/walk/cached.go
@@ -24,8 +24,9 @@ type CachedReader struct {
 	delegate Reader
 
 	eg *errgroup.Group
-	// releaseCh contains files which have been released after processing and can be updated in the cache.
-	releaseCh chan *File
+
+	// updateCh contains files which have been released after processing and should be updated in the cache.
+	updateCh chan *File
 }
 
 // process updates cached file entries by batching file updates and flushing them to the database periodically.
@@ -59,7 +60,7 @@ func (c *CachedReader) process() error {
 		})
 	}
 
-	for file := range c.releaseCh {
+	for file := range c.updateCh {
 		batch = append(batch, file)
 		if len(batch) == c.batchSize {
 			if err := flush(); err != nil {
@@ -94,9 +95,14 @@ func (c *CachedReader) Read(ctx context.Context, files []*File) (n int, err erro
 				return err
 			}
 
-			// set a release function which inserts this file into the release channel for updating
-			file.AddReleaseFunc(func() error {
-				c.releaseCh <- file
+			// set a release function which inserts this file into the update channel
+			file.AddReleaseFunc(func(formatErr error) error {
+				// in the event of a formatting error, we do not want to update this file in the cache
+				// this ensures later invocations will try and re-format this file
+				if formatErr == nil {
+					c.updateCh <- file
+				}
+
 				return nil
 			})
 		}
@@ -116,7 +122,7 @@ func (c *CachedReader) Read(ctx context.Context, files []*File) (n int, err erro
 // Close waits for any processing to complete.
 func (c *CachedReader) Close() error {
 	// close the release channel
-	close(c.releaseCh)
+	close(c.updateCh)
 
 	// wait for any pending releases to be processed
 	return c.eg.Wait()
@@ -138,7 +144,7 @@ func NewCachedReader(db *bolt.DB, batchSize int, delegate Reader) (*CachedReader
 		delegate:  delegate,
 		log:       log.WithPrefix("walk[cache]"),
 		eg:        eg,
-		releaseCh: make(chan *File, batchSize*runtime.NumCPU()),
+		updateCh:  make(chan *File, batchSize*runtime.NumCPU()),
 	}
 
 	// start the processing loop
diff --git a/walk/cached_test.go b/walk/cached_test.go
index b26273a2..37bf0e75 100644
--- a/walk/cached_test.go
+++ b/walk/cached_test.go
@@ -50,7 +50,7 @@ func TestCachedReader(t *testing.T) {
 					changeCount++
 				}
 
-				as.NoError(file.Release())
+				as.NoError(file.Release(nil))
 			}
 
 			cancel()
diff --git a/walk/stdin.go b/walk/stdin.go
index 385e60e0..1f86fc09 100644
--- a/walk/stdin.go
+++ b/walk/stdin.go
@@ -54,22 +54,26 @@ func (s StdinReader) Read(_ context.Context, files []*File) (n int, err error) {
 	}
 
 	// dump the temp file to stdout and remove it once the file is finished being processed
-	files[0].AddReleaseFunc(func() error {
-		// open the temp file
-		file, err := os.Open(file.Name())
-		if err != nil {
-			return fmt.Errorf("failed to open temp file %s: %w", file.Name(), err)
-		}
-
-		// dump file into stdout
-		if _, err = io.Copy(os.Stdout, file); err != nil {
-			return fmt.Errorf("failed to copy %s to stdout: %w", file.Name(), err)
-		}
-
-		if err = file.Close(); err != nil {
-			return fmt.Errorf("failed to close temp file %s: %w", file.Name(), err)
+	files[0].AddReleaseFunc(func(formatErr error) error {
+		// if formatting was successful, we dump its contents into os.Stdout
+		if formatErr == nil {
+			// open the temp file
+			file, err := os.Open(file.Name())
+			if err != nil {
+				return fmt.Errorf("failed to open temp file %s: %w", file.Name(), err)
+			}
+
+			// dump file into stdout
+			if _, err = io.Copy(os.Stdout, file); err != nil {
+				return fmt.Errorf("failed to copy %s to stdout: %w", file.Name(), err)
+			}
+
+			if err = file.Close(); err != nil {
+				return fmt.Errorf("failed to close temp file %s: %w", file.Name(), err)
+			}
 		}
 
+		// clean up the temp file
 		if err = os.Remove(file.Name()); err != nil {
 			return fmt.Errorf("failed to remove temp file %s: %w", file.Name(), err)
 		}
diff --git a/walk/walk.go b/walk/walk.go
index 7805a438..a32ede8a 100644
--- a/walk/walk.go
+++ b/walk/walk.go
@@ -27,7 +27,7 @@ const (
 	BatchSize = 1024
 )
 
-type ReleaseFunc func() error
+type ReleaseFunc func(formatErr error) error
 
 // File represents a file object with its path, relative path, file info, and potential cache entry.
 type File struct {
@@ -41,11 +41,11 @@ type File struct {
 	releaseFuncs []ReleaseFunc
 }
 
-// Release invokes all registered release functions for the File.
-// If any release function returns an error, Release stops and returns that error.
-func (f *File) Release() error {
+// Release calls all registered release functions for the File and returns an error if any function fails.
+// Accepts formatErr, which indicates if an error occurred when formatting this file.
+func (f *File) Release(formatErr error) error {
 	for _, fn := range f.releaseFuncs {
-		if err := fn(); err != nil {
+		if err := fn(formatErr); err != nil {
 			return err
 		}
 	}