diff --git a/go.mod b/go.mod index cd9ba4f..270c944 100644 --- a/go.mod +++ b/go.mod @@ -31,6 +31,7 @@ require ( github.com/spf13/cobra v1.8.0 github.com/stretchr/testify v1.8.4 github.com/tidwall/pretty v1.2.1 + go.uber.org/dig v1.17.1 golang.org/x/exp v0.0.0-20240213143201-ec583247a57a golang.org/x/mod v0.15.0 golang.org/x/oauth2 v0.17.0 diff --git a/go.sum b/go.sum index 1de506b..a0c4a6b 100644 --- a/go.sum +++ b/go.sum @@ -410,6 +410,8 @@ gitlab.com/digitalxero/go-conventional-commit v1.0.7/go.mod h1:05Xc2BFsSyC5tKhK0 go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= +go.uber.org/dig v1.17.1 h1:Tga8Lz8PcYNsWsyHMZ1Vm0OQOUaJNDyvPImgbAu9YSc= +go.uber.org/dig v1.17.1/go.mod h1:Us0rSJiThwCv2GteUN0Q7OKvU7n5J4dxZ9JKUXozFdE= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= diff --git a/pkg/runutil/backoff.go b/pkg/runutil/backoff.go new file mode 100644 index 0000000..d2f61c2 --- /dev/null +++ b/pkg/runutil/backoff.go @@ -0,0 +1,56 @@ +package runutil + +import ( + "math" + "math/rand" + "time" +) + +// Backoff is an interface to calculate the wait times between attemts of doing +// a task. The first attempt must always return 0s. The Duration function +// can be used together with the [Wait] function for a cancelable backoff sleep. +type Backoff interface { + Duration(int) time.Duration +} + +// StaticBackoff always returns the same sleep duration to any but the 0th +// attempt. +type StaticBackoff struct { + Sleep time.Duration +} + +func (b StaticBackoff) Duration(attempt int) time.Duration { + if attempt == 0 { + return 0 + } + return b.Sleep +} + +// ExponentialBackoff is a typical exponentail backoff with Jitter, based on +// this blog post: +// https://aws.amazon.com/ru/blogs/architecture/exponential-backoff-and-jitter/ +type ExponentialBackoff struct { + Initial time.Duration + Max time.Duration + JitterProportion float64 +} + +func (b ExponentialBackoff) Duration(attempt int) time.Duration { + if attempt == 0 { + return time.Duration(0) + } + + var ( + maxWait = math.Pow(2., float64(attempt-1)) + minWait = maxWait * (1. - b.JitterProportion) + jitter = maxWait * b.JitterProportion * rand.Float64() + totalWait = minWait + jitter + ) + + // Note: We must do the min() before muliplying with b.Initial, because it + // is a time.Duration with nano second resolution and we might hit a number + // overflow quite fast which results in not wait time at all. + totalWait = min(totalWait, float64(b.Max)/float64(b.Initial)) + + return time.Duration(float64(b.Initial) * totalWait) +} diff --git a/pkg/runutil/backoff_test.go b/pkg/runutil/backoff_test.go new file mode 100644 index 0000000..96535aa --- /dev/null +++ b/pkg/runutil/backoff_test.go @@ -0,0 +1,107 @@ +package runutil + +import ( + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestBackoffTypes(t *testing.T) { + assert.Implements(t, new(Backoff), ExponentialBackoff{}) + assert.Implements(t, new(Backoff), StaticBackoff{}) +} + +func TestStaticBackoff(t *testing.T) { + bo := StaticBackoff{Sleep: 10 * time.Millisecond} + assert.Equal(t, time.Duration(0), bo.Duration(0)) + for i := 1; i < 10; i++ { + assert.Equal(t, 10*time.Millisecond, bo.Duration(i)) + } +} + +func TestExponentialBackoffWithoutJitter(t *testing.T) { + cases := []struct { + bo ExponentialBackoff + want []int + }{ + { + bo: ExponentialBackoff{Initial: time.Second, Max: time.Minute}, + want: []int{0, 1, 2, 4, 8, 16, 32, 60, 60, 60, 60}, + }, + { + bo: ExponentialBackoff{Initial: 2 * time.Second, Max: time.Minute}, + want: []int{0, 2, 4, 8, 16, 32, 60, 60, 60, 60, 60}, + }, + { + bo: ExponentialBackoff{Initial: 3 * time.Second, Max: time.Minute}, + want: []int{0, 3, 6, 12, 24, 48, 60, 60, 60}, + }, + } + + for _, tc := range cases { + name := fmt.Sprintf("i=%v,m=%v", tc.bo.Initial, tc.bo.Max) + t.Run(name, func(t *testing.T) { + require.Equal(t, 0., tc.bo.JitterProportion, + "jitter contains randomness and cannot be tested here") + for attempt, expected := range tc.want { + want := time.Duration(expected) * time.Second + have := tc.bo.Duration(attempt) + assert.Equal(t, want, have) + } + }) + } +} + +func TestExponentialBackoffWithJitter(t *testing.T) { + cases := []struct { + bo ExponentialBackoff + min []int + max []int + }{ + { + // This is just a sanitiy check for the test itself. + bo: ExponentialBackoff{Initial: 2 * time.Second, Max: time.Minute}, + min: []int{0, 2, 4, 8, 16, 32, 60, 60, 60, 60, 60}, + max: []int{0, 2, 4, 8, 16, 32, 60, 60, 60, 60, 60}, + }, + { + bo: ExponentialBackoff{Initial: 2 * time.Second, Max: time.Minute, JitterProportion: 0.5}, + min: []int{0, 1, 2, 4, 8, 16, 30, 30, 30, 30, 30}, + max: []int{0, 2, 4, 8, 16, 32, 60, 60, 60, 60, 60}, + }, + } + + for _, tc := range cases { + name := fmt.Sprintf("i=%v,m=%v,j=%v", tc.bo.Initial, tc.bo.Max, tc.bo.JitterProportion) + t.Run(name, func(t *testing.T) { + for attempt := range tc.min { + wantMin := time.Duration(tc.min[attempt]) * time.Second + wantMax := time.Duration(tc.max[attempt]) * time.Second + have := tc.bo.Duration(attempt) + + assert.GreaterOrEqual(t, have, wantMin, "attempt #%d", attempt) + assert.LessOrEqual(t, have, wantMax, "attempt #%d", attempt) + } + }) + } +} + +func TestExponentialBackoffWithHighAttempts(t *testing.T) { + bo := ExponentialBackoff{ + Initial: time.Minute, + Max: 5 * time.Minute, + JitterProportion: 0.5, + } + + cases := []int{1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8} + + for _, attempt := range cases { + t.Run(fmt.Sprint(attempt), func(t *testing.T) { + duration := bo.Duration(attempt) + assert.Greater(t, duration, time.Duration(0)) + }) + } +} diff --git a/pkg/runutil/declarative.go b/pkg/runutil/declarative.go new file mode 100644 index 0000000..ca3b2fc --- /dev/null +++ b/pkg/runutil/declarative.go @@ -0,0 +1,30 @@ +package runutil + +import ( + "context" +) + +// DeclarativeWorker is an alternative to building the worker behaviour with +// chained functions.If automatically chains worker functions based on defined +// field in the most sensful order. +// +// It satisfies the Worker interface for easier use. +type DeclarativeWorker struct { + Name string + Worker Worker + Retry Backoff +} + +func (w DeclarativeWorker) Run(ctx context.Context) error { + worker := w.Worker + + if w.Name != "" { + worker = NamedWorker(worker, w.Name) + } + + if w.Retry != nil { + worker = Retry(worker, w.Retry) + } + + return worker.Run(ctx) +} diff --git a/pkg/runutil/dig.go b/pkg/runutil/dig.go new file mode 100644 index 0000000..e468668 --- /dev/null +++ b/pkg/runutil/dig.go @@ -0,0 +1,64 @@ +package runutil + +import ( + "context" + + "go.uber.org/dig" +) + +// WorkerConfiger is for Workers that configure themselfes. This means they can define repeats, backoff and jitter themselves. +// +// func (w *CommitFetcher) Workers() []runutil.Worker { +// return []runutil.Worker{ +// runutil.DeclarativeWorker{ +// Name: "Commits", +// Worker: runutil.Repeat(5*time.Second, runutil.JobFunc(w.fetchCommits)), +// Retry: runutil.ExponentialBackoff{ +// Initial: time.Second, +// Max: time.Minute, +// JitterProportion: 0.5, +// }, +// }, +// runutil.DeclarativeWorker{ +// Name: "PRs", +// Worker: runutil.Repeat(5*time.Second, runutil.JobFunc(w.fetchPRs)), +// Retry: runutil.ExponentialBackoff{ +// Initial: time.Second, +// Max: time.Minute, +// JitterProportion: 0.5, +// }, +// }, +// } +// } +type WorkerConfiger interface { + Workers() []Worker +} + +// WorkerGroup is a input parameter struct for Dig to retrieve all instances +// that implement the WorkerConfigerer. +type WorkerGroup struct { + dig.In + All []WorkerConfiger `group:"worker"` +} + +// ProvideWorker injects a WorkerConfiger, which can later be started with +// RunProvidedWorkers. +func ProvideWorker(c *dig.Container, fn any) error { + return c.Provide(fn, dig.Group("worker"), dig.As(new(WorkerConfiger))) +} + +// RunProvidedWorkers starts all workers there were injected using +// RunAllWorkers. +func RunProvidedWorkers(ctx context.Context, c *dig.Container) error { + return c.Invoke(func(in WorkerGroup) error { + workers := []Worker{} + for _, c := range in.All { + for _, w := range c.Workers() { + workers = append(workers, + NamedWorkerFromType(w, c), + ) + } + } + return RunAllWorkers(ctx, workers...) + }) +} diff --git a/pkg/runutil/names.go b/pkg/runutil/names.go new file mode 100644 index 0000000..8a43a25 --- /dev/null +++ b/pkg/runutil/names.go @@ -0,0 +1,26 @@ +package runutil + +import ( + "context" + "fmt" + "strings" + + "github.com/rebuy-de/rebuy-go-sdk/v8/pkg/logutil" +) + +// NamedWorker assigns a new logutil subsystem on startup. See logutil.Start. +func NamedWorker(worker Worker, name string, a ...any) Worker { + return WorkerFunc(func(ctx context.Context) error { + ctx = logutil.Start(ctx, fmt.Sprintf(name, a...)) + return worker.Run(ctx) + }) +} + +// NamedWorkerFromType assigns a new logutil subsystem on startup based on the +// provided type name. See logutil.Start. +func NamedWorkerFromType(worker Worker, t any) Worker { + name := fmt.Sprintf("%T", t) + name = strings.Trim(name, "*") + name = strings.Replace(name, ".", "/", 1) + return NamedWorker(worker, name) +} diff --git a/pkg/runutil/repeat.go b/pkg/runutil/repeat.go new file mode 100644 index 0000000..b1484f1 --- /dev/null +++ b/pkg/runutil/repeat.go @@ -0,0 +1,82 @@ +package runutil + +import ( + "context" + "time" + + "github.com/rebuy-de/rebuy-go-sdk/v8/pkg/logutil" + "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/ext" + "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer" +) + +type jobWorker struct { + wait time.Duration + job Job + startImmediately bool +} + +// Repeat reruns a job indefinitely until the context gets cancelled. The job +// will run at most once in the given time interval. This means the wait +// duration is not the sleep between executions, but the time between the start +// of runs (based on [time.Ticker]). +func Repeat(wait time.Duration, job Job, opts ...RepeatOption) Worker { + w := &jobWorker{ + wait: wait, + job: job, + } + + for _, o := range opts { + o(w) + } + + return w +} + +type RepeatOption func(*jobWorker) + +func WithStartImmediately() RepeatOption { + return func(w *jobWorker) { + w.startImmediately = true + } +} + +func (w jobWorker) Run(ctx context.Context) error { + if w.startImmediately { + err := w.runOnce(ctx) + if err != nil { + return err + } + } + + ticker := time.NewTicker(w.wait) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return nil + case <-ticker.C: + err := w.runOnce(ctx) + if err != nil { + return err + } + } + } +} + +func (w jobWorker) runOnce(ctx context.Context) error { + span, ctx := tracer.StartSpanFromContext( + ctx, "runutil.job", + tracer.Tag(ext.SpanKind, ext.SpanKindInternal), + tracer.Tag(ext.ResourceName, logutil.GetSubsystem(ctx)), + ) + err := w.job.RunOnce(ctx) + if err != nil { + span.Finish(tracer.WithError(err)) + return err + } else { + span.Finish() + } + + return nil +} diff --git a/pkg/runutil/retry.go b/pkg/runutil/retry.go new file mode 100644 index 0000000..99bc1d9 --- /dev/null +++ b/pkg/runutil/retry.go @@ -0,0 +1,29 @@ +package runutil + +import ( + "context" + + "github.com/rebuy-de/rebuy-go-sdk/v8/pkg/logutil" +) + +// Retry restarts a Worker forever when it exists. This happens regardless of +// whether the worker returns an error or nil. The worker only stops with +// restarting, when the context gets cancelled. +func Retry(worker Worker, bo Backoff) Worker { + return WorkerFunc(func(ctx context.Context) error { + var attempt int + for ctx.Err() == nil { + Wait(ctx, bo.Duration(attempt)) + + err := worker.Run(ctx) + if err != nil { + attempt += 1 + logutil.Get(ctx).Warnf("worker failed %d times: %s", attempt, err.Error()) + } else { + attempt = 0 + } + } + + return nil + }) +} diff --git a/pkg/runutil/run.go b/pkg/runutil/run.go new file mode 100644 index 0000000..0190c39 --- /dev/null +++ b/pkg/runutil/run.go @@ -0,0 +1,93 @@ +package runutil + +import ( + "context" + "errors" + "sync" +) + +// WorkerExitedPrematurely indicates that a worker exited in [RunAllWorkers] +// while the context was not cancelled yet. +var ErrWorkerExitedPrematurely = errors.New("worker exited prematurely") + +// RunAllWorkers starts all workers in goroutines and waits until all are +// exited. +// +// Behaviour: +// - The execution for all workers get cancelled when the first worker +// exists, regardless of the exit code. +// - Err is nil, if the context gets cancelled and the workers return a nil +// error too. +// - Err contains [WorkerExitedPrematurely], if the workers return a nil error +// while the context was not cancelled. +// - Err contains all errors, returned by the workers. +func RunAllWorkers(ctx context.Context, workers ...Worker) error { + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + var wg sync.WaitGroup + wg.Add(len(workers)) + + var errs collector[error] + + for _, w := range workers { + w := w + go func() { + defer wg.Done() + defer cancel() + err := w.Run(ctx) + if err != nil { + errs.Append(err) + } else if ctx.Err() == nil { + // It means that the works exited itself, if the worker returns + // nil and the context was not cancelled yet. + errs.Append(ErrWorkerExitedPrematurely) + } + // Otherwise the Context was cancelled and the worker did not + // return an error, which means it shut down gracefully. + }() + } + + wg.Wait() + + return errors.Join(errs.Result()...) +} + +// RunAllJobs runs all jobs in parallel and return their errors. +func RunAllJobs(ctx context.Context, jobs ...Job) error { + var wg sync.WaitGroup + wg.Add(len(jobs)) + + var errs collector[error] + + for _, j := range jobs { + j := j + go func() { + defer wg.Done() + err := j.RunOnce(ctx) + if err != nil { + errs.Append(err) + } + }() + } + + wg.Wait() + + return errors.Join(errs.Result()...) +} + +// collector is a helper type for a concurrency-safe append to slices. +type collector[T any] struct { + result []T + mux sync.Mutex +} + +func (c *collector[T]) Append(value T) { + c.mux.Lock() + defer c.mux.Unlock() + c.result = append(c.result, value) +} + +func (c *collector[T]) Result() []T { + return c.result +} diff --git a/pkg/runutil/run_test.go b/pkg/runutil/run_test.go new file mode 100644 index 0000000..18697e3 --- /dev/null +++ b/pkg/runutil/run_test.go @@ -0,0 +1,120 @@ +package runutil + +import ( + "context" + "errors" + "sync" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestRunAllWorkersExitedPrematurely(t *testing.T) { + ctx := context.Background() + + err := RunAllWorkers(ctx, + WorkerFunc(func(ctx context.Context) error { + return nil + }), + WorkerFunc(func(ctx context.Context) error { + return nil + }), + WorkerFunc(func(ctx context.Context) error { + return nil + }), + ) + + require.ErrorIs(t, err, ErrWorkerExitedPrematurely) +} + +func TestRunnAllWorkersNoErrorOnCancel(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + + // This waitgroup makes sure all go routines are started before cancelling + // the context. + var wg sync.WaitGroup + wg.Add(3) + + go func() { + wg.Wait() + cancel() + }() + + err := RunAllWorkers(ctx, + WorkerFunc(func(ctx context.Context) error { + wg.Done() + <-ctx.Done() + return nil + }), + WorkerFunc(func(ctx context.Context) error { + wg.Done() + <-ctx.Done() + return nil + }), + WorkerFunc(func(ctx context.Context) error { + wg.Done() + <-ctx.Done() + return nil + }), + ) + + require.NoError(t, err) +} + +func TestRunnAllWorkersPassthroughErrorsOnCancel(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + + // This waitgroup makes sure all go routines are started before cancelling + // the context. + var wg sync.WaitGroup + wg.Add(3) + + var omg = errors.New("some error") + + go func() { + wg.Wait() + cancel() + }() + + err := RunAllWorkers(ctx, + WorkerFunc(func(ctx context.Context) error { + wg.Done() + <-ctx.Done() + return nil + }), + WorkerFunc(func(ctx context.Context) error { + wg.Done() + <-ctx.Done() + return omg + }), + WorkerFunc(func(ctx context.Context) error { + wg.Done() + <-ctx.Done() + return nil + }), + ) + + require.ErrorIs(t, err, omg) +} + +func TestRunnAllWorkersPassthroughErrors(t *testing.T) { + ctx := context.Background() + + var omg = errors.New("some error") + + err := RunAllWorkers(ctx, + WorkerFunc(func(ctx context.Context) error { + <-ctx.Done() + return nil + }), + WorkerFunc(func(ctx context.Context) error { + return omg + }), + WorkerFunc(func(ctx context.Context) error { + <-ctx.Done() + return nil + }), + ) + + require.ErrorIs(t, err, omg) +} diff --git a/pkg/runutil/types.go b/pkg/runutil/types.go new file mode 100644 index 0000000..31082d0 --- /dev/null +++ b/pkg/runutil/types.go @@ -0,0 +1,28 @@ +package runutil + +import "context" + +// Worker is a service that is supposed to run continuously until the context +// gets cancelled. +type Worker interface { + Run(ctx context.Context) error +} + +// WorkerFunc is a helper to cast a function directly to a Worker. +type WorkerFunc func(ctx context.Context) error + +func (fn WorkerFunc) Run(ctx context.Context) error { + return fn(ctx) +} + +// Job is a function that runs once and exits afterwards. +type Job interface { + RunOnce(ctx context.Context) error +} + +// JobFunc is a helper to cast a function directly to a Job. +type JobFunc func(ctx context.Context) error + +func (fn JobFunc) RunOnce(ctx context.Context) error { + return fn(ctx) +} diff --git a/pkg/runutil/wait.go b/pkg/runutil/wait.go new file mode 100644 index 0000000..6561d16 --- /dev/null +++ b/pkg/runutil/wait.go @@ -0,0 +1,17 @@ +package runutil + +import ( + "context" + "time" +) + +// Wait is similar to [time.Sleep], but stops blocking when the context gets +// cancelled. +func Wait(ctx context.Context, d time.Duration) { + select { + case <-ctx.Done(): + return + case <-time.After(d): + return + } +} diff --git a/pkg/runutil/wait_test.go b/pkg/runutil/wait_test.go new file mode 100644 index 0000000..8e26b9b --- /dev/null +++ b/pkg/runutil/wait_test.go @@ -0,0 +1,30 @@ +package runutil + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestWait(t *testing.T) { + stopwatch := time.Now() + for i := 0; i < 4; i++ { + Wait(context.Background(), 10*time.Millisecond) + } + duration := time.Since(stopwatch) + + require.Greater(t, duration, 40*time.Millisecond) +} + +func TestWaitCancel(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + stopwatch := time.Now() + Wait(ctx, 10*time.Second) + duration := time.Since(stopwatch) + + require.Less(t, duration, time.Second) +}