Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move cooldown time to Tendermint #384

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 3 additions & 10 deletions server/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,10 +197,6 @@
return fmt.Errorf("failed to initialize telemetry: %w", err)
}

restartCoolDownDuration := time.Second * time.Duration(serverCtx.Config.SelfRemediation.RestartCooldownSeconds)
// Set the first restart time to be now - restartCoolDownDuration so that the first restart can trigger whenever
canRestartAfter := time.Now().Add(-restartCoolDownDuration)

serverCtx.Logger.Info("Starting Process")
for {
err = startInProcess(
Expand All @@ -210,7 +206,6 @@
tracerProviderOptions,
nodeMetricsProvider,
apiMetrics,
canRestartAfter,
)
errCode, ok := err.(ErrorCode)
exitCode = errCode.Code
Expand All @@ -221,7 +216,6 @@
break
}
serverCtx.Logger.Info("restarting node...")
canRestartAfter = time.Now().Add(restartCoolDownDuration)
}
return nil
},
Expand Down Expand Up @@ -314,7 +308,7 @@
restartCh := make(chan struct{})

// Wait for SIGINT or SIGTERM signal
return WaitForQuitSignals(ctx, restartCh, time.Now())
return WaitForQuitSignals(ctx, restartCh)

Check warning on line 311 in server/start.go

View check run for this annotation

Codecov / codecov/patch

server/start.go#L311

Added line #L311 was not covered by tests
}

func startInProcess(
Expand All @@ -324,7 +318,6 @@
tracerProviderOptions []trace.TracerProviderOption,
nodeMetricsProvider *node.NodeMetrics,
apiMetrics *telemetry.Metrics,
canRestartAfter time.Time,
) error {
cfg := ctx.Config
home := cfg.RootDir
Expand Down Expand Up @@ -461,7 +454,7 @@
// we do not need to start Rosetta or handle any Tendermint related processes.
if gRPCOnly {
// wait for signal capture and gracefully return
return WaitForQuitSignals(ctx, restartCh, canRestartAfter)
return WaitForQuitSignals(ctx, restartCh)

Check warning on line 457 in server/start.go

View check run for this annotation

Codecov / codecov/patch

server/start.go#L457

Added line #L457 was not covered by tests
}

var rosettaSrv crgserver.Server
Expand Down Expand Up @@ -534,5 +527,5 @@
}()

// wait for signal capture and gracefully return
return WaitForQuitSignals(ctx, restartCh, canRestartAfter)
return WaitForQuitSignals(ctx, restartCh)

Check warning on line 530 in server/start.go

View check run for this annotation

Codecov / codecov/patch

server/start.go#L530

Added line #L530 was not covered by tests
}
8 changes: 2 additions & 6 deletions server/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ func TrapSignal(cleanupFunc func()) {
}

// WaitForQuitSignals waits for SIGINT and SIGTERM and returns.
func WaitForQuitSignals(ctx *Context, restartCh chan struct{}, canRestartAfter time.Time) ErrorCode {
func WaitForQuitSignals(ctx *Context, restartCh chan struct{}) ErrorCode {
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
if restartCh != nil {
Expand All @@ -420,11 +420,7 @@ func WaitForQuitSignals(ctx *Context, restartCh chan struct{}, canRestartAfter t
case sig := <-sigs:
return ErrorCode{Code: int(sig.(syscall.Signal)) + 128}
case <-restartCh:
// If it's in the restart cooldown period
if time.Now().Before(canRestartAfter) {
ctx.Logger.Info("Restarting too frequently, can only restart after %s", canRestartAfter)
continue
}
ctx.Logger.Info("Received signal to restart the app")
return ErrorCode{Code: RestartErrorCode}
}
}
Expand Down
27 changes: 1 addition & 26 deletions server/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -407,30 +407,8 @@ func TestInterceptConfigsWithBadPermissions(t *testing.T) {
}

func TestWaitForQuitSignals(t *testing.T) {
t.Run("WithRestartChannelAndCanRestartAfterNotReached", func(t *testing.T) {
restartCh := make(chan struct{})
go func() {
time.Sleep(100 * time.Millisecond)
restartCh <- struct{}{}
}()

go func() {
time.Sleep(200 * time.Millisecond)
syscall.Kill(syscall.Getpid(), syscall.SIGTERM)
}()

errCode := server.WaitForQuitSignals(
&server.Context{Logger: log.NewNopLogger()},
restartCh,
time.Now().Add(500*time.Millisecond),
)
expectedCode := int(syscall.SIGTERM) + 128
if errCode.Code != expectedCode {
t.Errorf("Expected error code %d, got %d", expectedCode, errCode.Code)
}
})

t.Run("WithRestartChannelAndCanRestartAfterReached", func(t *testing.T) {
t.Run("WithRestartChannel", func(t *testing.T) {
restartCh := make(chan struct{})
go func() {
time.Sleep(100 * time.Millisecond)
Expand All @@ -440,7 +418,6 @@ func TestWaitForQuitSignals(t *testing.T) {
errCode := server.WaitForQuitSignals(
&server.Context{Logger: log.NewNopLogger()},
restartCh,
time.Now().Add(-100*time.Millisecond),
)
if errCode.Code != server.RestartErrorCode {
t.Errorf("Expected error code %d, got %d", server.RestartErrorCode, errCode.Code)
Expand All @@ -459,7 +436,6 @@ func TestWaitForQuitSignals(t *testing.T) {
errCode := server.WaitForQuitSignals(
&server.Context{Logger: log.NewNopLogger()},
make(chan struct{}),
time.Now(),
)
expectedCode := int(syscall.SIGINT) + 128
if errCode.Code != expectedCode {
Expand All @@ -476,7 +452,6 @@ func TestWaitForQuitSignals(t *testing.T) {
errCode := server.WaitForQuitSignals(
&server.Context{Logger: log.NewNopLogger()},
make(chan struct{}),
time.Now(),
)
expectedCode := int(syscall.SIGTERM) + 128
if errCode.Code != expectedCode {
Expand Down
Loading