From 0b420fb8af027a0c7371fcf758f3cc7027661e35 Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Wed, 19 Jun 2024 11:48:13 -0300 Subject: [PATCH 01/34] Initial V2 impl for `rpc_client` --- operator/rpc_client_v2_test.go | 259 +++++++++++++++++++++++++++++++++ 1 file changed, 259 insertions(+) create mode 100644 operator/rpc_client_v2_test.go diff --git a/operator/rpc_client_v2_test.go b/operator/rpc_client_v2_test.go new file mode 100644 index 00000000..ee7f87bf --- /dev/null +++ b/operator/rpc_client_v2_test.go @@ -0,0 +1,259 @@ +package operator + +import ( + "context" + "fmt" + "sync" + "testing" + "time" + + "github.com/Layr-Labs/eigensdk-go/logging" + "github.com/NethermindEth/near-sffl/core/types/messages" + "github.com/stretchr/testify/assert" +) + +type RpcClient interface { + Call(method string, args interface{}, reply *bool) error +} + +var _ = RpcClient(&MockRpcClient{}) + +type MockRpcClient struct { + call func(method string, args interface{}, reply *bool) error +} + +func (self *MockRpcClient) Call(method string, args interface{}, reply *bool) error { + return self.call(method, args, reply) +} + +func NoopRpcClient() *MockRpcClient { + return &MockRpcClient{ + call: func(method string, args interface{}, reply *bool) error { return nil }, + } +} + +type Listener interface { + IncError() + IncSuccess() +} + +type MockListener struct { + incError func() + incSuccess func() +} + +var _ = Listener(&MockListener{}) + +func (self *MockListener) IncError() { + self.incError() +} + +func (self *MockListener) IncSuccess() { + self.incSuccess() +} + +func NoopListener() *MockListener { + return &MockListener{ + incError: func() {}, + incSuccess: func() {}, + } +} + +type AggRpcClient struct { + listener Listener + rpcClient RpcClient + logger logging.Logger + actCh chan (func() error) + + once sync.Once + closeCh chan struct{} +} + +func NewAggRpcClient(listener Listener, rpcClient RpcClient, logger logging.Logger) AggRpcClient { + return AggRpcClient{ + listener: listener, + rpcClient: rpcClient, + logger: logger, + actCh: make(chan func() error, 10), + + once: sync.Once{}, + closeCh: make(chan struct{}), + } +} + +func (self *AggRpcClient) Start(ctx context.Context) { + defer func() { + self.closeCh <- struct{}{} + }() + + for { + select { + case <-ctx.Done(): + self.logger.Debug("AggRpcClient: context done") + return + case <-self.closeCh: + self.logger.Debug("AggRpcClient: close message received") + return + case action, ok := <-self.actCh: + if !ok { + continue + } + self.logger.Debug("AggRpcClient: action message received") + err := retryTimes(self.logger, 3, action) + if err != nil { + self.logger.Error("AggRpcClient: action failed after retrying", "err", err) + self.listener.IncError() + } else { + self.logger.Debug("AggRpcClient: action executed successfully") + self.listener.IncSuccess() + } + } + } +} + +func (self *AggRpcClient) Close() { + self.once.Do(func() { + self.logger.Debug("AggRpcClient: close") + + close(self.actCh) + self.closeCh <- struct{}{} + + <-self.closeCh + close(self.closeCh) + }) +} + +func (self *AggRpcClient) SendProcessSignedCheckpointTaskResponse(message *messages.SignedCheckpointTaskResponse) { + self.actCh <- func() error { + var ignore bool + return self.rpcClient.Call("Aggregator.ProcessSignedCheckpointTaskResponse", message, &ignore) + } +} + +func (self *AggRpcClient) SendSignedStateRootUpdateMessage(message *messages.SignedStateRootUpdateMessage) { + self.actCh <- func() error { + var ignore bool + return self.rpcClient.Call("Aggregator.ProcessSignedStateRootUpdateMessage", message, &ignore) + } +} + +func (self *AggRpcClient) SendSignedOperatorSetUpdateMessage(message *messages.SignedOperatorSetUpdateMessage) { + self.actCh <- func() error { + var ignore bool + return self.rpcClient.Call("Aggregator.ProcessSignedOperatorSetUpdateMessage", message, &ignore) + } + +} + +func shouldRetryLater(err error) bool { + return err == assert.AnError +} + +func retryTimes(logger logging.Logger, times int, action func() error) error { + for i := 0; i < times; i++ { + err := action() + if err == nil { + return nil + } + logger.Error("Error in action", err) + } + return fmt.Errorf("Failed after %d retries", times) +} + +func TestSendSuccessfulMessages(t *testing.T) { + ctx := context.Background() + logger, _ := logging.NewZapLogger(logging.Development) + + successCount := 0 + listener := MockListener{ + incSuccess: func() { logger.Debug("IncSuccess"); successCount++ }, + incError: func() { logger.Debug("IncError") }, + } + + rpcClientCallCount := 0 + rpcClient := MockRpcClient{ + call: func(method string, args interface{}, reply *bool) error { + logger.Debug("MockRpcClient.Call", "method", method, "args", args) + rpcClientCallCount++ + return nil + }, + } + + client := NewAggRpcClient(&listener, &rpcClient, logger) + go client.Start(ctx) + + client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) + client.SendSignedOperatorSetUpdateMessage(&messages.SignedOperatorSetUpdateMessage{}) + + time.Sleep(500 * time.Millisecond) + client.Close() + + assert.Equal(t, 2, successCount) + assert.Equal(t, 2, rpcClientCallCount) +} + +func TestCloseWithContext(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + defer cancel() + + logger, _ := logging.NewZapLogger(logging.Development) + listener := NoopListener() + rpcClient := NoopRpcClient() + + client := NewAggRpcClient(listener, rpcClient, logger) + go client.Start(ctx) + + time.Sleep(1 * time.Second) +} + +func TestMultipleConcurrentClose(t *testing.T) { + ctx := context.Background() + logger, _ := logging.NewZapLogger(logging.Development) + listener := NoopListener() + rpcClient := NoopRpcClient() + + client := NewAggRpcClient(listener, rpcClient, logger) + go client.Start(ctx) + + wg := sync.WaitGroup{} + for i := 0; i < 10; i++ { + wg.Add(1) + go func() { + defer wg.Done() + client.Close() + }() + } + wg.Wait() +} + +func TestRetry(t *testing.T) { + ctx := context.Background() + logger, _ := logging.NewZapLogger(logging.Development) + listener := NoopListener() + + rpcFailCount := 0 + rpcSuccess := false + + rpcClient := MockRpcClient{ + call: func(method string, args interface{}, reply *bool) error { + if rpcFailCount < 2 { + rpcFailCount++ + return assert.AnError + } + + rpcSuccess = true + return nil + }, + } + + client := NewAggRpcClient(listener, &rpcClient, logger) + go client.Start(ctx) + + client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) + + time.Sleep(500 * time.Millisecond) + client.Close() + + assert.Equal(t, 2, rpcFailCount) + assert.True(t, rpcSuccess) +} From 27f918bc829a7ec165e4583d919686f9c539818e Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Wed, 19 Jun 2024 11:57:45 -0300 Subject: [PATCH 02/34] Add `retry later` --- operator/rpc_client_v2_test.go | 35 ++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/operator/rpc_client_v2_test.go b/operator/rpc_client_v2_test.go index ee7f87bf..86935e71 100644 --- a/operator/rpc_client_v2_test.go +++ b/operator/rpc_client_v2_test.go @@ -103,6 +103,9 @@ func (self *AggRpcClient) Start(ctx context.Context) { if err != nil { self.logger.Error("AggRpcClient: action failed after retrying", "err", err) self.listener.IncError() + + self.logger.Debug("AggRpcClient: retrying later") + self.actCh <- action } else { self.logger.Debug("AggRpcClient: action executed successfully") self.listener.IncSuccess() @@ -257,3 +260,35 @@ func TestRetry(t *testing.T) { assert.Equal(t, 2, rpcFailCount) assert.True(t, rpcSuccess) } + +func TestRetryLater(t *testing.T) { + ctx := context.Background() + logger, _ := logging.NewZapLogger(logging.Development) + listener := NoopListener() + + rpcFailCount := 0 + rpcSuccess := false + + rpcClient := MockRpcClient{ + call: func(method string, args interface{}, reply *bool) error { + if rpcFailCount < 3 { + rpcFailCount++ + return assert.AnError + } + + rpcSuccess = true + return nil + }, + } + + client := NewAggRpcClient(listener, &rpcClient, logger) + go client.Start(ctx) + + client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) + + time.Sleep(500 * time.Millisecond) + client.Close() + + assert.Equal(t, 3, rpcFailCount) + assert.True(t, rpcSuccess) +} From ae9904eb91f8326a2fa936621b86732d6de8349e Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Wed, 19 Jun 2024 12:25:11 -0300 Subject: [PATCH 03/34] Add `submittedAt` to actions --- operator/rpc_client_v2_test.go | 41 +++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/operator/rpc_client_v2_test.go b/operator/rpc_client_v2_test.go index 86935e71..d671e3bd 100644 --- a/operator/rpc_client_v2_test.go +++ b/operator/rpc_client_v2_test.go @@ -59,11 +59,16 @@ func NoopListener() *MockListener { } } +type action struct { + run func() error + submittedAt time.Time +} + type AggRpcClient struct { listener Listener rpcClient RpcClient logger logging.Logger - actCh chan (func() error) + actionCh chan action once sync.Once closeCh chan struct{} @@ -74,7 +79,7 @@ func NewAggRpcClient(listener Listener, rpcClient RpcClient, logger logging.Logg listener: listener, rpcClient: rpcClient, logger: logger, - actCh: make(chan func() error, 10), + actionCh: make(chan action, 10), once: sync.Once{}, closeCh: make(chan struct{}), @@ -94,18 +99,18 @@ func (self *AggRpcClient) Start(ctx context.Context) { case <-self.closeCh: self.logger.Debug("AggRpcClient: close message received") return - case action, ok := <-self.actCh: + case action, ok := <-self.actionCh: if !ok { continue } self.logger.Debug("AggRpcClient: action message received") - err := retryTimes(self.logger, 3, action) + err := retryTimes(self.logger, 3, action.run) if err != nil { self.logger.Error("AggRpcClient: action failed after retrying", "err", err) self.listener.IncError() self.logger.Debug("AggRpcClient: retrying later") - self.actCh <- action + self.actionCh <- action } else { self.logger.Debug("AggRpcClient: action executed successfully") self.listener.IncSuccess() @@ -118,7 +123,7 @@ func (self *AggRpcClient) Close() { self.once.Do(func() { self.logger.Debug("AggRpcClient: close") - close(self.actCh) + close(self.actionCh) self.closeCh <- struct{}{} <-self.closeCh @@ -127,23 +132,29 @@ func (self *AggRpcClient) Close() { } func (self *AggRpcClient) SendProcessSignedCheckpointTaskResponse(message *messages.SignedCheckpointTaskResponse) { - self.actCh <- func() error { - var ignore bool - return self.rpcClient.Call("Aggregator.ProcessSignedCheckpointTaskResponse", message, &ignore) + self.actionCh <- action{ + run: func() error { + var ignore bool + return self.rpcClient.Call("Aggregator.ProcessSignedCheckpointTaskResponse", message, &ignore) + }, } } func (self *AggRpcClient) SendSignedStateRootUpdateMessage(message *messages.SignedStateRootUpdateMessage) { - self.actCh <- func() error { - var ignore bool - return self.rpcClient.Call("Aggregator.ProcessSignedStateRootUpdateMessage", message, &ignore) + self.actionCh <- action{ + run: func() error { + var ignore bool + return self.rpcClient.Call("Aggregator.ProcessSignedStateRootUpdateMessage", message, &ignore) + }, } } func (self *AggRpcClient) SendSignedOperatorSetUpdateMessage(message *messages.SignedOperatorSetUpdateMessage) { - self.actCh <- func() error { - var ignore bool - return self.rpcClient.Call("Aggregator.ProcessSignedOperatorSetUpdateMessage", message, &ignore) + self.actionCh <- action{ + run: func() error { + var ignore bool + return self.rpcClient.Call("Aggregator.ProcessSignedOperatorSetUpdateMessage", message, &ignore) + }, } } From 9e1e8490042f220c9788870808cb6f1100231f84 Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Wed, 19 Jun 2024 12:41:45 -0300 Subject: [PATCH 04/34] Extract retry and retry later strategies --- operator/rpc_client_v2_test.go | 73 +++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 27 deletions(-) diff --git a/operator/rpc_client_v2_test.go b/operator/rpc_client_v2_test.go index d671e3bd..a335f1bf 100644 --- a/operator/rpc_client_v2_test.go +++ b/operator/rpc_client_v2_test.go @@ -59,6 +59,35 @@ func NoopListener() *MockListener { } } +type RetryStrategy func(action func() error) error + +func noRetry(action func() error) error { + return action() +} + +func retryNTimes(logger logging.Logger, times int) RetryStrategy { + return func(action func() error) error { + for i := 0; i < times; i++ { + err := action() + if err == nil { + return nil + } + logger.Error("Error in action", err) + } + return fmt.Errorf("Failed after %d retries", times) + } +} + +type RetryLaterPredicate func(err error) bool + +func neverRetryLater(_ error) bool { + return false +} + +func alwaysRetryLater(_ error) bool { + return true +} + type action struct { run func() error submittedAt time.Time @@ -86,7 +115,7 @@ func NewAggRpcClient(listener Listener, rpcClient RpcClient, logger logging.Logg } } -func (self *AggRpcClient) Start(ctx context.Context) { +func (self *AggRpcClient) Start(ctx context.Context, retry RetryStrategy, retryLater RetryLaterPredicate) { defer func() { self.closeCh <- struct{}{} }() @@ -104,13 +133,17 @@ func (self *AggRpcClient) Start(ctx context.Context) { continue } self.logger.Debug("AggRpcClient: action message received") - err := retryTimes(self.logger, 3, action.run) + err := retry(action.run) if err != nil { self.logger.Error("AggRpcClient: action failed after retrying", "err", err) self.listener.IncError() - self.logger.Debug("AggRpcClient: retrying later") - self.actionCh <- action + if retryLater(err) { + self.logger.Debug("AggRpcClient: retrying later") + self.actionCh <- action + } else { + self.logger.Debug("AggRpcClient: not retrying later") + } } else { self.logger.Debug("AggRpcClient: action executed successfully") self.listener.IncSuccess() @@ -159,21 +192,6 @@ func (self *AggRpcClient) SendSignedOperatorSetUpdateMessage(message *messages.S } -func shouldRetryLater(err error) bool { - return err == assert.AnError -} - -func retryTimes(logger logging.Logger, times int, action func() error) error { - for i := 0; i < times; i++ { - err := action() - if err == nil { - return nil - } - logger.Error("Error in action", err) - } - return fmt.Errorf("Failed after %d retries", times) -} - func TestSendSuccessfulMessages(t *testing.T) { ctx := context.Background() logger, _ := logging.NewZapLogger(logging.Development) @@ -194,7 +212,7 @@ func TestSendSuccessfulMessages(t *testing.T) { } client := NewAggRpcClient(&listener, &rpcClient, logger) - go client.Start(ctx) + go client.Start(ctx, noRetry, neverRetryLater) client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) client.SendSignedOperatorSetUpdateMessage(&messages.SignedOperatorSetUpdateMessage{}) @@ -215,7 +233,7 @@ func TestCloseWithContext(t *testing.T) { rpcClient := NoopRpcClient() client := NewAggRpcClient(listener, rpcClient, logger) - go client.Start(ctx) + go client.Start(ctx, noRetry, neverRetryLater) time.Sleep(1 * time.Second) } @@ -227,7 +245,7 @@ func TestMultipleConcurrentClose(t *testing.T) { rpcClient := NoopRpcClient() client := NewAggRpcClient(listener, rpcClient, logger) - go client.Start(ctx) + go client.Start(ctx, noRetry, neverRetryLater) wg := sync.WaitGroup{} for i := 0; i < 10; i++ { @@ -240,7 +258,7 @@ func TestMultipleConcurrentClose(t *testing.T) { wg.Wait() } -func TestRetry(t *testing.T) { +func TestRetryNTimes(t *testing.T) { ctx := context.Background() logger, _ := logging.NewZapLogger(logging.Development) listener := NoopListener() @@ -259,9 +277,10 @@ func TestRetry(t *testing.T) { return nil }, } + retryStrategy := retryNTimes(logger, 3) client := NewAggRpcClient(listener, &rpcClient, logger) - go client.Start(ctx) + go client.Start(ctx, retryStrategy, neverRetryLater) client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) @@ -282,7 +301,7 @@ func TestRetryLater(t *testing.T) { rpcClient := MockRpcClient{ call: func(method string, args interface{}, reply *bool) error { - if rpcFailCount < 3 { + if rpcFailCount < 2 { rpcFailCount++ return assert.AnError } @@ -293,13 +312,13 @@ func TestRetryLater(t *testing.T) { } client := NewAggRpcClient(listener, &rpcClient, logger) - go client.Start(ctx) + go client.Start(ctx, noRetry, alwaysRetryLater) client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) time.Sleep(500 * time.Millisecond) client.Close() - assert.Equal(t, 3, rpcFailCount) + assert.Equal(t, 2, rpcFailCount) assert.True(t, rpcSuccess) } From 890ac87d0a3097547e73aeef5f320f36642343f2 Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Wed, 19 Jun 2024 12:44:55 -0300 Subject: [PATCH 05/34] Add `retryIfRecentEnough` --- operator/rpc_client_v2_test.go | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/operator/rpc_client_v2_test.go b/operator/rpc_client_v2_test.go index a335f1bf..4a3767bf 100644 --- a/operator/rpc_client_v2_test.go +++ b/operator/rpc_client_v2_test.go @@ -78,16 +78,22 @@ func retryNTimes(logger logging.Logger, times int) RetryStrategy { } } -type RetryLaterPredicate func(err error) bool +type RetryLaterPredicate func(action action, err error) bool -func neverRetryLater(_ error) bool { +func neverRetryLater(_ action, _ error) bool { return false } -func alwaysRetryLater(_ error) bool { +func alwaysRetryLater(_ action, _ error) bool { return true } +func retryIfRecentEnough(ttl time.Duration) RetryLaterPredicate { + return func(action action, err error) bool { + return time.Since(action.submittedAt) < ttl + } +} + type action struct { run func() error submittedAt time.Time @@ -138,7 +144,7 @@ func (self *AggRpcClient) Start(ctx context.Context, retry RetryStrategy, retryL self.logger.Error("AggRpcClient: action failed after retrying", "err", err) self.listener.IncError() - if retryLater(err) { + if retryLater(action, err) { self.logger.Debug("AggRpcClient: retrying later") self.actionCh <- action } else { From 9a46a4f932b1442438f8f5d47f3455da27201f03 Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Wed, 19 Jun 2024 12:50:07 -0300 Subject: [PATCH 06/34] Add `TestRetryLaterIfRecentEnough` --- operator/rpc_client_v2_test.go | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/operator/rpc_client_v2_test.go b/operator/rpc_client_v2_test.go index 4a3767bf..73aecdfe 100644 --- a/operator/rpc_client_v2_test.go +++ b/operator/rpc_client_v2_test.go @@ -88,7 +88,7 @@ func alwaysRetryLater(_ action, _ error) bool { return true } -func retryIfRecentEnough(ttl time.Duration) RetryLaterPredicate { +func retryLaterIfRecentEnough(ttl time.Duration) RetryLaterPredicate { return func(action action, err error) bool { return time.Since(action.submittedAt) < ttl } @@ -172,6 +172,7 @@ func (self *AggRpcClient) Close() { func (self *AggRpcClient) SendProcessSignedCheckpointTaskResponse(message *messages.SignedCheckpointTaskResponse) { self.actionCh <- action{ + submittedAt: time.Now(), run: func() error { var ignore bool return self.rpcClient.Call("Aggregator.ProcessSignedCheckpointTaskResponse", message, &ignore) @@ -181,6 +182,7 @@ func (self *AggRpcClient) SendProcessSignedCheckpointTaskResponse(message *messa func (self *AggRpcClient) SendSignedStateRootUpdateMessage(message *messages.SignedStateRootUpdateMessage) { self.actionCh <- action{ + submittedAt: time.Now(), run: func() error { var ignore bool return self.rpcClient.Call("Aggregator.ProcessSignedStateRootUpdateMessage", message, &ignore) @@ -190,6 +192,7 @@ func (self *AggRpcClient) SendSignedStateRootUpdateMessage(message *messages.Sig func (self *AggRpcClient) SendSignedOperatorSetUpdateMessage(message *messages.SignedOperatorSetUpdateMessage) { self.actionCh <- action{ + submittedAt: time.Now(), run: func() error { var ignore bool return self.rpcClient.Call("Aggregator.ProcessSignedOperatorSetUpdateMessage", message, &ignore) @@ -328,3 +331,30 @@ func TestRetryLater(t *testing.T) { assert.Equal(t, 2, rpcFailCount) assert.True(t, rpcSuccess) } + +func TestRetryLaterIfRecentEnough(t *testing.T) { + ctx := context.Background() + logger, _ := logging.NewZapLogger(logging.Development) + listener := NoopListener() + + rpcFailCount := 0 + + rpcClient := MockRpcClient{ + call: func(method string, args interface{}, reply *bool) error { + time.Sleep(100 * time.Millisecond) + + rpcFailCount++ + return assert.AnError + }, + } + + client := NewAggRpcClient(listener, &rpcClient, logger) + go client.Start(ctx, noRetry, retryLaterIfRecentEnough(500*time.Millisecond)) + + client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) + + time.Sleep(500 * time.Millisecond) + client.Close() + + assert.Equal(t, 5, rpcFailCount) +} From 4fef16a1400042e29061661e37d3305cecce969b Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Wed, 19 Jun 2024 12:58:38 -0300 Subject: [PATCH 07/34] Rename retry mechanism --- operator/rpc_client_v2_test.go | 85 ++++++++-------------------------- 1 file changed, 20 insertions(+), 65 deletions(-) diff --git a/operator/rpc_client_v2_test.go b/operator/rpc_client_v2_test.go index 73aecdfe..cbda1dbb 100644 --- a/operator/rpc_client_v2_test.go +++ b/operator/rpc_client_v2_test.go @@ -2,7 +2,6 @@ package operator import ( "context" - "fmt" "sync" "testing" "time" @@ -59,44 +58,32 @@ func NoopListener() *MockListener { } } -type RetryStrategy func(action func() error) error +type RetryPredicate func(action action, err error) bool -func noRetry(action func() error) error { - return action() -} - -func retryNTimes(logger logging.Logger, times int) RetryStrategy { - return func(action func() error) error { - for i := 0; i < times; i++ { - err := action() - if err == nil { - return nil - } - logger.Error("Error in action", err) - } - return fmt.Errorf("Failed after %d retries", times) - } -} - -type RetryLaterPredicate func(action action, err error) bool - -func neverRetryLater(_ action, _ error) bool { +func neverRetry(_ action, _ error) bool { return false } -func alwaysRetryLater(_ action, _ error) bool { +func alwaysRetry(_ action, _ error) bool { return true } -func retryLaterIfRecentEnough(ttl time.Duration) RetryLaterPredicate { +func retryIfRecentEnough(ttl time.Duration) RetryPredicate { return func(action action, err error) bool { return time.Since(action.submittedAt) < ttl } } +func retryAtMost(retries int) RetryPredicate { + return func(action action, err error) bool { + return action.retryCount < retries + } +} + type action struct { run func() error submittedAt time.Time + retryCount int } type AggRpcClient struct { @@ -121,7 +108,7 @@ func NewAggRpcClient(listener Listener, rpcClient RpcClient, logger logging.Logg } } -func (self *AggRpcClient) Start(ctx context.Context, retry RetryStrategy, retryLater RetryLaterPredicate) { +func (self *AggRpcClient) Start(ctx context.Context, retryLater RetryPredicate) { defer func() { self.closeCh <- struct{}{} }() @@ -139,13 +126,14 @@ func (self *AggRpcClient) Start(ctx context.Context, retry RetryStrategy, retryL continue } self.logger.Debug("AggRpcClient: action message received") - err := retry(action.run) + err := action.run() if err != nil { self.logger.Error("AggRpcClient: action failed after retrying", "err", err) self.listener.IncError() if retryLater(action, err) { self.logger.Debug("AggRpcClient: retrying later") + action.retryCount++ self.actionCh <- action } else { self.logger.Debug("AggRpcClient: not retrying later") @@ -221,7 +209,7 @@ func TestSendSuccessfulMessages(t *testing.T) { } client := NewAggRpcClient(&listener, &rpcClient, logger) - go client.Start(ctx, noRetry, neverRetryLater) + go client.Start(ctx, neverRetry) client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) client.SendSignedOperatorSetUpdateMessage(&messages.SignedOperatorSetUpdateMessage{}) @@ -242,7 +230,7 @@ func TestCloseWithContext(t *testing.T) { rpcClient := NoopRpcClient() client := NewAggRpcClient(listener, rpcClient, logger) - go client.Start(ctx, noRetry, neverRetryLater) + go client.Start(ctx, neverRetry) time.Sleep(1 * time.Second) } @@ -254,7 +242,7 @@ func TestMultipleConcurrentClose(t *testing.T) { rpcClient := NoopRpcClient() client := NewAggRpcClient(listener, rpcClient, logger) - go client.Start(ctx, noRetry, neverRetryLater) + go client.Start(ctx, neverRetry) wg := sync.WaitGroup{} for i := 0; i < 10; i++ { @@ -267,40 +255,7 @@ func TestMultipleConcurrentClose(t *testing.T) { wg.Wait() } -func TestRetryNTimes(t *testing.T) { - ctx := context.Background() - logger, _ := logging.NewZapLogger(logging.Development) - listener := NoopListener() - - rpcFailCount := 0 - rpcSuccess := false - - rpcClient := MockRpcClient{ - call: func(method string, args interface{}, reply *bool) error { - if rpcFailCount < 2 { - rpcFailCount++ - return assert.AnError - } - - rpcSuccess = true - return nil - }, - } - retryStrategy := retryNTimes(logger, 3) - - client := NewAggRpcClient(listener, &rpcClient, logger) - go client.Start(ctx, retryStrategy, neverRetryLater) - - client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) - - time.Sleep(500 * time.Millisecond) - client.Close() - - assert.Equal(t, 2, rpcFailCount) - assert.True(t, rpcSuccess) -} - -func TestRetryLater(t *testing.T) { +func TestUnboundedRetry(t *testing.T) { ctx := context.Background() logger, _ := logging.NewZapLogger(logging.Development) listener := NoopListener() @@ -321,7 +276,7 @@ func TestRetryLater(t *testing.T) { } client := NewAggRpcClient(listener, &rpcClient, logger) - go client.Start(ctx, noRetry, alwaysRetryLater) + go client.Start(ctx, alwaysRetry) client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) @@ -349,7 +304,7 @@ func TestRetryLaterIfRecentEnough(t *testing.T) { } client := NewAggRpcClient(listener, &rpcClient, logger) - go client.Start(ctx, noRetry, retryLaterIfRecentEnough(500*time.Millisecond)) + go client.Start(ctx, retryIfRecentEnough(500*time.Millisecond)) client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) From 8e558f5a1d5724ebb815ccc72bdb461093c8c084 Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Wed, 19 Jun 2024 13:00:50 -0300 Subject: [PATCH 08/34] Test `retryAtMost` --- operator/rpc_client_v2_test.go | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/operator/rpc_client_v2_test.go b/operator/rpc_client_v2_test.go index cbda1dbb..672a95b8 100644 --- a/operator/rpc_client_v2_test.go +++ b/operator/rpc_client_v2_test.go @@ -287,6 +287,31 @@ func TestUnboundedRetry(t *testing.T) { assert.True(t, rpcSuccess) } +func TestRetryAtMost(t *testing.T) { + ctx := context.Background() + logger, _ := logging.NewZapLogger(logging.Development) + listener := NoopListener() + + rpcFailCount := 0 + + rpcClient := MockRpcClient{ + call: func(method string, args interface{}, reply *bool) error { + rpcFailCount++ + return assert.AnError + }, + } + + client := NewAggRpcClient(listener, &rpcClient, logger) + go client.Start(ctx, retryAtMost(4)) + + client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) + + time.Sleep(500 * time.Millisecond) + client.Close() + + assert.Equal(t, 5, rpcFailCount) // 1 run, 4 retries +} + func TestRetryLaterIfRecentEnough(t *testing.T) { ctx := context.Background() logger, _ := logging.NewZapLogger(logging.Development) From 3950423f4618c2c0a0af2c97bc4b0c41e4efa26d Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Wed, 19 Jun 2024 13:03:33 -0300 Subject: [PATCH 09/34] Store `retryPredicate` in struct member --- operator/rpc_client_v2_test.go | 53 +++++++++++++++++----------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/operator/rpc_client_v2_test.go b/operator/rpc_client_v2_test.go index 672a95b8..754800ac 100644 --- a/operator/rpc_client_v2_test.go +++ b/operator/rpc_client_v2_test.go @@ -87,28 +87,30 @@ type action struct { } type AggRpcClient struct { - listener Listener - rpcClient RpcClient - logger logging.Logger - actionCh chan action + listener Listener + rpcClient RpcClient + shouldRetry RetryPredicate + logger logging.Logger + actionCh chan action once sync.Once closeCh chan struct{} } -func NewAggRpcClient(listener Listener, rpcClient RpcClient, logger logging.Logger) AggRpcClient { +func NewAggRpcClient(listener Listener, rpcClient RpcClient, retryPredicate RetryPredicate, logger logging.Logger) AggRpcClient { return AggRpcClient{ - listener: listener, - rpcClient: rpcClient, - logger: logger, - actionCh: make(chan action, 10), + listener: listener, + rpcClient: rpcClient, + shouldRetry: retryPredicate, + logger: logger, + actionCh: make(chan action, 10), once: sync.Once{}, closeCh: make(chan struct{}), } } -func (self *AggRpcClient) Start(ctx context.Context, retryLater RetryPredicate) { +func (self *AggRpcClient) Start(ctx context.Context) { defer func() { self.closeCh <- struct{}{} }() @@ -131,7 +133,7 @@ func (self *AggRpcClient) Start(ctx context.Context, retryLater RetryPredicate) self.logger.Error("AggRpcClient: action failed after retrying", "err", err) self.listener.IncError() - if retryLater(action, err) { + if self.shouldRetry(action, err) { self.logger.Debug("AggRpcClient: retrying later") action.retryCount++ self.actionCh <- action @@ -208,8 +210,8 @@ func TestSendSuccessfulMessages(t *testing.T) { }, } - client := NewAggRpcClient(&listener, &rpcClient, logger) - go client.Start(ctx, neverRetry) + client := NewAggRpcClient(&listener, &rpcClient, neverRetry, logger) + go client.Start(ctx) client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) client.SendSignedOperatorSetUpdateMessage(&messages.SignedOperatorSetUpdateMessage{}) @@ -229,8 +231,8 @@ func TestCloseWithContext(t *testing.T) { listener := NoopListener() rpcClient := NoopRpcClient() - client := NewAggRpcClient(listener, rpcClient, logger) - go client.Start(ctx, neverRetry) + client := NewAggRpcClient(listener, rpcClient, neverRetry, logger) + go client.Start(ctx) time.Sleep(1 * time.Second) } @@ -241,8 +243,8 @@ func TestMultipleConcurrentClose(t *testing.T) { listener := NoopListener() rpcClient := NoopRpcClient() - client := NewAggRpcClient(listener, rpcClient, logger) - go client.Start(ctx, neverRetry) + client := NewAggRpcClient(listener, rpcClient, neverRetry, logger) + go client.Start(ctx) wg := sync.WaitGroup{} for i := 0; i < 10; i++ { @@ -260,9 +262,8 @@ func TestUnboundedRetry(t *testing.T) { logger, _ := logging.NewZapLogger(logging.Development) listener := NoopListener() - rpcFailCount := 0 rpcSuccess := false - + rpcFailCount := 0 rpcClient := MockRpcClient{ call: func(method string, args interface{}, reply *bool) error { if rpcFailCount < 2 { @@ -275,8 +276,8 @@ func TestUnboundedRetry(t *testing.T) { }, } - client := NewAggRpcClient(listener, &rpcClient, logger) - go client.Start(ctx, alwaysRetry) + client := NewAggRpcClient(listener, &rpcClient, alwaysRetry, logger) + go client.Start(ctx) client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) @@ -293,7 +294,6 @@ func TestRetryAtMost(t *testing.T) { listener := NoopListener() rpcFailCount := 0 - rpcClient := MockRpcClient{ call: func(method string, args interface{}, reply *bool) error { rpcFailCount++ @@ -301,8 +301,8 @@ func TestRetryAtMost(t *testing.T) { }, } - client := NewAggRpcClient(listener, &rpcClient, logger) - go client.Start(ctx, retryAtMost(4)) + client := NewAggRpcClient(listener, &rpcClient, retryAtMost(4), logger) + go client.Start(ctx) client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) @@ -318,7 +318,6 @@ func TestRetryLaterIfRecentEnough(t *testing.T) { listener := NoopListener() rpcFailCount := 0 - rpcClient := MockRpcClient{ call: func(method string, args interface{}, reply *bool) error { time.Sleep(100 * time.Millisecond) @@ -328,8 +327,8 @@ func TestRetryLaterIfRecentEnough(t *testing.T) { }, } - client := NewAggRpcClient(listener, &rpcClient, logger) - go client.Start(ctx, retryIfRecentEnough(500*time.Millisecond)) + client := NewAggRpcClient(listener, &rpcClient, retryIfRecentEnough(500*time.Millisecond), logger) + go client.Start(ctx) client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) From e0a82a07476f4d9ba4048609469024084c71f85f Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Wed, 19 Jun 2024 14:31:19 -0300 Subject: [PATCH 10/34] Separate impl from tests --- operator/v2/rpc_client_v2.go | 151 ++++++++++++++++++++++ operator/{ => v2}/rpc_client_v2_test.go | 161 ++---------------------- 2 files changed, 161 insertions(+), 151 deletions(-) create mode 100644 operator/v2/rpc_client_v2.go rename operator/{ => v2}/rpc_client_v2_test.go (51%) diff --git a/operator/v2/rpc_client_v2.go b/operator/v2/rpc_client_v2.go new file mode 100644 index 00000000..d18bfc2d --- /dev/null +++ b/operator/v2/rpc_client_v2.go @@ -0,0 +1,151 @@ +package operator2 + +import ( + "context" + "sync" + "time" + + "github.com/Layr-Labs/eigensdk-go/logging" + "github.com/NethermindEth/near-sffl/core/types/messages" +) + +type RpcClient interface { + Call(method string, args interface{}, reply *bool) error +} + +type Listener interface { + IncError() + IncSuccess() +} + +type RetryPredicate func(action action, err error) bool + +func NeverRetry(_ action, _ error) bool { + return false +} + +func AlwaysRetry(_ action, _ error) bool { + return true +} + +func RetryIfRecentEnough(ttl time.Duration) RetryPredicate { + return func(action action, err error) bool { + return time.Since(action.submittedAt) < ttl + } +} + +func RetryAtMost(retries int) RetryPredicate { + return func(action action, err error) bool { + return action.retryCount < retries + } +} + +type action struct { + run func() error + submittedAt time.Time + retryCount int +} + +type AggRpcClient struct { + listener Listener + rpcClient RpcClient + shouldRetry RetryPredicate + logger logging.Logger + actionCh chan action + + once sync.Once + closeCh chan struct{} +} + +func NewAggRpcClient(listener Listener, rpcClient RpcClient, retryPredicate RetryPredicate, logger logging.Logger) AggRpcClient { + return AggRpcClient{ + listener: listener, + rpcClient: rpcClient, + shouldRetry: retryPredicate, + logger: logger, + actionCh: make(chan action, 10), + + once: sync.Once{}, + closeCh: make(chan struct{}), + } +} + +func (self *AggRpcClient) Start(ctx context.Context) { + defer func() { + self.closeCh <- struct{}{} + }() + + for { + select { + case <-ctx.Done(): + self.logger.Debug("AggRpcClient: context done") + return + case <-self.closeCh: + self.logger.Debug("AggRpcClient: close message received") + return + case action, ok := <-self.actionCh: + if !ok { + continue + } + self.logger.Debug("AggRpcClient: action message received") + err := action.run() + if err != nil { + self.logger.Error("AggRpcClient: action failed after retrying", "err", err) + self.listener.IncError() + + if self.shouldRetry(action, err) { + self.logger.Debug("AggRpcClient: retrying later") + action.retryCount++ + self.actionCh <- action + } else { + self.logger.Debug("AggRpcClient: not retrying later") + } + } else { + self.logger.Debug("AggRpcClient: action executed successfully") + self.listener.IncSuccess() + } + } + } +} + +func (self *AggRpcClient) Close() { + self.once.Do(func() { + self.logger.Debug("AggRpcClient: close") + + close(self.actionCh) + self.closeCh <- struct{}{} + + <-self.closeCh + close(self.closeCh) + }) +} + +func (self *AggRpcClient) SendProcessSignedCheckpointTaskResponse(message *messages.SignedCheckpointTaskResponse) { + self.actionCh <- action{ + submittedAt: time.Now(), + run: func() error { + var ignore bool + return self.rpcClient.Call("Aggregator.ProcessSignedCheckpointTaskResponse", message, &ignore) + }, + } +} + +func (self *AggRpcClient) SendSignedStateRootUpdateMessage(message *messages.SignedStateRootUpdateMessage) { + self.actionCh <- action{ + submittedAt: time.Now(), + run: func() error { + var ignore bool + return self.rpcClient.Call("Aggregator.ProcessSignedStateRootUpdateMessage", message, &ignore) + }, + } +} + +func (self *AggRpcClient) SendSignedOperatorSetUpdateMessage(message *messages.SignedOperatorSetUpdateMessage) { + self.actionCh <- action{ + submittedAt: time.Now(), + run: func() error { + var ignore bool + return self.rpcClient.Call("Aggregator.ProcessSignedOperatorSetUpdateMessage", message, &ignore) + }, + } +} diff --git a/operator/rpc_client_v2_test.go b/operator/v2/rpc_client_v2_test.go similarity index 51% rename from operator/rpc_client_v2_test.go rename to operator/v2/rpc_client_v2_test.go index 754800ac..19a11b9a 100644 --- a/operator/rpc_client_v2_test.go +++ b/operator/v2/rpc_client_v2_test.go @@ -1,4 +1,4 @@ -package operator +package operator2_test import ( "context" @@ -8,14 +8,11 @@ import ( "github.com/Layr-Labs/eigensdk-go/logging" "github.com/NethermindEth/near-sffl/core/types/messages" + operator "github.com/NethermindEth/near-sffl/operator/v2" "github.com/stretchr/testify/assert" ) -type RpcClient interface { - Call(method string, args interface{}, reply *bool) error -} - -var _ = RpcClient(&MockRpcClient{}) +var _ = operator.RpcClient(&MockRpcClient{}) type MockRpcClient struct { call func(method string, args interface{}, reply *bool) error @@ -31,17 +28,12 @@ func NoopRpcClient() *MockRpcClient { } } -type Listener interface { - IncError() - IncSuccess() -} - type MockListener struct { incError func() incSuccess func() } -var _ = Listener(&MockListener{}) +var _ = operator.Listener(&MockListener{}) func (self *MockListener) IncError() { self.incError() @@ -58,139 +50,6 @@ func NoopListener() *MockListener { } } -type RetryPredicate func(action action, err error) bool - -func neverRetry(_ action, _ error) bool { - return false -} - -func alwaysRetry(_ action, _ error) bool { - return true -} - -func retryIfRecentEnough(ttl time.Duration) RetryPredicate { - return func(action action, err error) bool { - return time.Since(action.submittedAt) < ttl - } -} - -func retryAtMost(retries int) RetryPredicate { - return func(action action, err error) bool { - return action.retryCount < retries - } -} - -type action struct { - run func() error - submittedAt time.Time - retryCount int -} - -type AggRpcClient struct { - listener Listener - rpcClient RpcClient - shouldRetry RetryPredicate - logger logging.Logger - actionCh chan action - - once sync.Once - closeCh chan struct{} -} - -func NewAggRpcClient(listener Listener, rpcClient RpcClient, retryPredicate RetryPredicate, logger logging.Logger) AggRpcClient { - return AggRpcClient{ - listener: listener, - rpcClient: rpcClient, - shouldRetry: retryPredicate, - logger: logger, - actionCh: make(chan action, 10), - - once: sync.Once{}, - closeCh: make(chan struct{}), - } -} - -func (self *AggRpcClient) Start(ctx context.Context) { - defer func() { - self.closeCh <- struct{}{} - }() - - for { - select { - case <-ctx.Done(): - self.logger.Debug("AggRpcClient: context done") - return - case <-self.closeCh: - self.logger.Debug("AggRpcClient: close message received") - return - case action, ok := <-self.actionCh: - if !ok { - continue - } - self.logger.Debug("AggRpcClient: action message received") - err := action.run() - if err != nil { - self.logger.Error("AggRpcClient: action failed after retrying", "err", err) - self.listener.IncError() - - if self.shouldRetry(action, err) { - self.logger.Debug("AggRpcClient: retrying later") - action.retryCount++ - self.actionCh <- action - } else { - self.logger.Debug("AggRpcClient: not retrying later") - } - } else { - self.logger.Debug("AggRpcClient: action executed successfully") - self.listener.IncSuccess() - } - } - } -} - -func (self *AggRpcClient) Close() { - self.once.Do(func() { - self.logger.Debug("AggRpcClient: close") - - close(self.actionCh) - self.closeCh <- struct{}{} - - <-self.closeCh - close(self.closeCh) - }) -} - -func (self *AggRpcClient) SendProcessSignedCheckpointTaskResponse(message *messages.SignedCheckpointTaskResponse) { - self.actionCh <- action{ - submittedAt: time.Now(), - run: func() error { - var ignore bool - return self.rpcClient.Call("Aggregator.ProcessSignedCheckpointTaskResponse", message, &ignore) - }, - } -} - -func (self *AggRpcClient) SendSignedStateRootUpdateMessage(message *messages.SignedStateRootUpdateMessage) { - self.actionCh <- action{ - submittedAt: time.Now(), - run: func() error { - var ignore bool - return self.rpcClient.Call("Aggregator.ProcessSignedStateRootUpdateMessage", message, &ignore) - }, - } -} - -func (self *AggRpcClient) SendSignedOperatorSetUpdateMessage(message *messages.SignedOperatorSetUpdateMessage) { - self.actionCh <- action{ - submittedAt: time.Now(), - run: func() error { - var ignore bool - return self.rpcClient.Call("Aggregator.ProcessSignedOperatorSetUpdateMessage", message, &ignore) - }, - } - -} - func TestSendSuccessfulMessages(t *testing.T) { ctx := context.Background() logger, _ := logging.NewZapLogger(logging.Development) @@ -210,7 +69,7 @@ func TestSendSuccessfulMessages(t *testing.T) { }, } - client := NewAggRpcClient(&listener, &rpcClient, neverRetry, logger) + client := operator.NewAggRpcClient(&listener, &rpcClient, operator.NeverRetry, logger) go client.Start(ctx) client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) @@ -231,7 +90,7 @@ func TestCloseWithContext(t *testing.T) { listener := NoopListener() rpcClient := NoopRpcClient() - client := NewAggRpcClient(listener, rpcClient, neverRetry, logger) + client := operator.NewAggRpcClient(listener, rpcClient, operator.NeverRetry, logger) go client.Start(ctx) time.Sleep(1 * time.Second) @@ -243,7 +102,7 @@ func TestMultipleConcurrentClose(t *testing.T) { listener := NoopListener() rpcClient := NoopRpcClient() - client := NewAggRpcClient(listener, rpcClient, neverRetry, logger) + client := operator.NewAggRpcClient(listener, rpcClient, operator.NeverRetry, logger) go client.Start(ctx) wg := sync.WaitGroup{} @@ -276,7 +135,7 @@ func TestUnboundedRetry(t *testing.T) { }, } - client := NewAggRpcClient(listener, &rpcClient, alwaysRetry, logger) + client := operator.NewAggRpcClient(listener, &rpcClient, operator.AlwaysRetry, logger) go client.Start(ctx) client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) @@ -301,7 +160,7 @@ func TestRetryAtMost(t *testing.T) { }, } - client := NewAggRpcClient(listener, &rpcClient, retryAtMost(4), logger) + client := operator.NewAggRpcClient(listener, &rpcClient, operator.RetryAtMost(4), logger) go client.Start(ctx) client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) @@ -327,7 +186,7 @@ func TestRetryLaterIfRecentEnough(t *testing.T) { }, } - client := NewAggRpcClient(listener, &rpcClient, retryIfRecentEnough(500*time.Millisecond), logger) + client := operator.NewAggRpcClient(listener, &rpcClient, operator.RetryIfRecentEnough(500*time.Millisecond), logger) go client.Start(ctx) client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) From 6a7c004d2daf8e82d13e70d6ec90d93087766988 Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Wed, 19 Jun 2024 14:32:11 -0300 Subject: [PATCH 11/34] Rename to `AggregatorRpcClient` --- operator/v2/rpc_client_v2.go | 16 ++++++++-------- operator/v2/rpc_client_v2_test.go | 12 ++++++------ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/operator/v2/rpc_client_v2.go b/operator/v2/rpc_client_v2.go index d18bfc2d..75c37af7 100644 --- a/operator/v2/rpc_client_v2.go +++ b/operator/v2/rpc_client_v2.go @@ -46,7 +46,7 @@ type action struct { retryCount int } -type AggRpcClient struct { +type AggregatorRpcClient struct { listener Listener rpcClient RpcClient shouldRetry RetryPredicate @@ -57,8 +57,8 @@ type AggRpcClient struct { closeCh chan struct{} } -func NewAggRpcClient(listener Listener, rpcClient RpcClient, retryPredicate RetryPredicate, logger logging.Logger) AggRpcClient { - return AggRpcClient{ +func NewAggregatorRpcClient(listener Listener, rpcClient RpcClient, retryPredicate RetryPredicate, logger logging.Logger) AggregatorRpcClient { + return AggregatorRpcClient{ listener: listener, rpcClient: rpcClient, shouldRetry: retryPredicate, @@ -70,7 +70,7 @@ func NewAggRpcClient(listener Listener, rpcClient RpcClient, retryPredicate Retr } } -func (self *AggRpcClient) Start(ctx context.Context) { +func (self *AggregatorRpcClient) Start(ctx context.Context) { defer func() { self.closeCh <- struct{}{} }() @@ -108,7 +108,7 @@ func (self *AggRpcClient) Start(ctx context.Context) { } } -func (self *AggRpcClient) Close() { +func (self *AggregatorRpcClient) Close() { self.once.Do(func() { self.logger.Debug("AggRpcClient: close") @@ -120,7 +120,7 @@ func (self *AggRpcClient) Close() { }) } -func (self *AggRpcClient) SendProcessSignedCheckpointTaskResponse(message *messages.SignedCheckpointTaskResponse) { +func (self *AggregatorRpcClient) SendProcessSignedCheckpointTaskResponse(message *messages.SignedCheckpointTaskResponse) { self.actionCh <- action{ submittedAt: time.Now(), run: func() error { @@ -130,7 +130,7 @@ func (self *AggRpcClient) SendProcessSignedCheckpointTaskResponse(message *messa } } -func (self *AggRpcClient) SendSignedStateRootUpdateMessage(message *messages.SignedStateRootUpdateMessage) { +func (self *AggregatorRpcClient) SendSignedStateRootUpdateMessage(message *messages.SignedStateRootUpdateMessage) { self.actionCh <- action{ submittedAt: time.Now(), run: func() error { @@ -140,7 +140,7 @@ func (self *AggRpcClient) SendSignedStateRootUpdateMessage(message *messages.Sig } } -func (self *AggRpcClient) SendSignedOperatorSetUpdateMessage(message *messages.SignedOperatorSetUpdateMessage) { +func (self *AggregatorRpcClient) SendSignedOperatorSetUpdateMessage(message *messages.SignedOperatorSetUpdateMessage) { self.actionCh <- action{ submittedAt: time.Now(), run: func() error { diff --git a/operator/v2/rpc_client_v2_test.go b/operator/v2/rpc_client_v2_test.go index 19a11b9a..b42b7f5a 100644 --- a/operator/v2/rpc_client_v2_test.go +++ b/operator/v2/rpc_client_v2_test.go @@ -69,7 +69,7 @@ func TestSendSuccessfulMessages(t *testing.T) { }, } - client := operator.NewAggRpcClient(&listener, &rpcClient, operator.NeverRetry, logger) + client := operator.NewAggregatorRpcClient(&listener, &rpcClient, operator.NeverRetry, logger) go client.Start(ctx) client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) @@ -90,7 +90,7 @@ func TestCloseWithContext(t *testing.T) { listener := NoopListener() rpcClient := NoopRpcClient() - client := operator.NewAggRpcClient(listener, rpcClient, operator.NeverRetry, logger) + client := operator.NewAggregatorRpcClient(listener, rpcClient, operator.NeverRetry, logger) go client.Start(ctx) time.Sleep(1 * time.Second) @@ -102,7 +102,7 @@ func TestMultipleConcurrentClose(t *testing.T) { listener := NoopListener() rpcClient := NoopRpcClient() - client := operator.NewAggRpcClient(listener, rpcClient, operator.NeverRetry, logger) + client := operator.NewAggregatorRpcClient(listener, rpcClient, operator.NeverRetry, logger) go client.Start(ctx) wg := sync.WaitGroup{} @@ -135,7 +135,7 @@ func TestUnboundedRetry(t *testing.T) { }, } - client := operator.NewAggRpcClient(listener, &rpcClient, operator.AlwaysRetry, logger) + client := operator.NewAggregatorRpcClient(listener, &rpcClient, operator.AlwaysRetry, logger) go client.Start(ctx) client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) @@ -160,7 +160,7 @@ func TestRetryAtMost(t *testing.T) { }, } - client := operator.NewAggRpcClient(listener, &rpcClient, operator.RetryAtMost(4), logger) + client := operator.NewAggregatorRpcClient(listener, &rpcClient, operator.RetryAtMost(4), logger) go client.Start(ctx) client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) @@ -186,7 +186,7 @@ func TestRetryLaterIfRecentEnough(t *testing.T) { }, } - client := operator.NewAggRpcClient(listener, &rpcClient, operator.RetryIfRecentEnough(500*time.Millisecond), logger) + client := operator.NewAggregatorRpcClient(listener, &rpcClient, operator.RetryIfRecentEnough(500*time.Millisecond), logger) go client.Start(ctx) client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) From 6b0107d5b03eb5677a61d8532ef8aea1c39eb377 Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Wed, 19 Jun 2024 14:32:53 -0300 Subject: [PATCH 12/34] Don't use `self` --- operator/v2/rpc_client_v2.go | 62 ++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/operator/v2/rpc_client_v2.go b/operator/v2/rpc_client_v2.go index 75c37af7..b13f714f 100644 --- a/operator/v2/rpc_client_v2.go +++ b/operator/v2/rpc_client_v2.go @@ -70,82 +70,82 @@ func NewAggregatorRpcClient(listener Listener, rpcClient RpcClient, retryPredica } } -func (self *AggregatorRpcClient) Start(ctx context.Context) { +func (a *AggregatorRpcClient) Start(ctx context.Context) { defer func() { - self.closeCh <- struct{}{} + a.closeCh <- struct{}{} }() for { select { case <-ctx.Done(): - self.logger.Debug("AggRpcClient: context done") + a.logger.Debug("AggRpcClient: context done") return - case <-self.closeCh: - self.logger.Debug("AggRpcClient: close message received") + case <-a.closeCh: + a.logger.Debug("AggRpcClient: close message received") return - case action, ok := <-self.actionCh: + case action, ok := <-a.actionCh: if !ok { continue } - self.logger.Debug("AggRpcClient: action message received") + a.logger.Debug("AggRpcClient: action message received") err := action.run() if err != nil { - self.logger.Error("AggRpcClient: action failed after retrying", "err", err) - self.listener.IncError() + a.logger.Error("AggRpcClient: action failed after retrying", "err", err) + a.listener.IncError() - if self.shouldRetry(action, err) { - self.logger.Debug("AggRpcClient: retrying later") + if a.shouldRetry(action, err) { + a.logger.Debug("AggRpcClient: retrying later") action.retryCount++ - self.actionCh <- action + a.actionCh <- action } else { - self.logger.Debug("AggRpcClient: not retrying later") + a.logger.Debug("AggRpcClient: not retrying later") } } else { - self.logger.Debug("AggRpcClient: action executed successfully") - self.listener.IncSuccess() + a.logger.Debug("AggRpcClient: action executed successfully") + a.listener.IncSuccess() } } } } -func (self *AggregatorRpcClient) Close() { - self.once.Do(func() { - self.logger.Debug("AggRpcClient: close") +func (a *AggregatorRpcClient) Close() { + a.once.Do(func() { + a.logger.Debug("AggRpcClient: close") - close(self.actionCh) - self.closeCh <- struct{}{} + close(a.actionCh) + a.closeCh <- struct{}{} - <-self.closeCh - close(self.closeCh) + <-a.closeCh + close(a.closeCh) }) } -func (self *AggregatorRpcClient) SendProcessSignedCheckpointTaskResponse(message *messages.SignedCheckpointTaskResponse) { - self.actionCh <- action{ +func (a *AggregatorRpcClient) SendProcessSignedCheckpointTaskResponse(message *messages.SignedCheckpointTaskResponse) { + a.actionCh <- action{ submittedAt: time.Now(), run: func() error { var ignore bool - return self.rpcClient.Call("Aggregator.ProcessSignedCheckpointTaskResponse", message, &ignore) + return a.rpcClient.Call("Aggregator.ProcessSignedCheckpointTaskResponse", message, &ignore) }, } } -func (self *AggregatorRpcClient) SendSignedStateRootUpdateMessage(message *messages.SignedStateRootUpdateMessage) { - self.actionCh <- action{ +func (a *AggregatorRpcClient) SendSignedStateRootUpdateMessage(message *messages.SignedStateRootUpdateMessage) { + a.actionCh <- action{ submittedAt: time.Now(), run: func() error { var ignore bool - return self.rpcClient.Call("Aggregator.ProcessSignedStateRootUpdateMessage", message, &ignore) + return a.rpcClient.Call("Aggregator.ProcessSignedStateRootUpdateMessage", message, &ignore) }, } } -func (self *AggregatorRpcClient) SendSignedOperatorSetUpdateMessage(message *messages.SignedOperatorSetUpdateMessage) { - self.actionCh <- action{ +func (a *AggregatorRpcClient) SendSignedOperatorSetUpdateMessage(message *messages.SignedOperatorSetUpdateMessage) { + a.actionCh <- action{ submittedAt: time.Now(), run: func() error { var ignore bool - return self.rpcClient.Call("Aggregator.ProcessSignedOperatorSetUpdateMessage", message, &ignore) + return a.rpcClient.Call("Aggregator.ProcessSignedOperatorSetUpdateMessage", message, &ignore) }, } } From 3fc60c600e19733130af9bb32c49ac978e9f56d2 Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Wed, 19 Jun 2024 15:01:55 -0300 Subject: [PATCH 13/34] Abstract `rpc.Client` --- operator/v2/rpc_client_v2.go | 5 ++++- operator/v2/rpc_client_v2_test.go | 18 +++++++++--------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/operator/v2/rpc_client_v2.go b/operator/v2/rpc_client_v2.go index b13f714f..fe238f86 100644 --- a/operator/v2/rpc_client_v2.go +++ b/operator/v2/rpc_client_v2.go @@ -2,6 +2,7 @@ package operator2 import ( "context" + "net/rpc" "sync" "time" @@ -10,9 +11,11 @@ import ( ) type RpcClient interface { - Call(method string, args interface{}, reply *bool) error + Call(serviceMethod string, args any, reply any) error } +var _ RpcClient = (*rpc.Client)(nil) + type Listener interface { IncError() IncSuccess() diff --git a/operator/v2/rpc_client_v2_test.go b/operator/v2/rpc_client_v2_test.go index b42b7f5a..059a7a6b 100644 --- a/operator/v2/rpc_client_v2_test.go +++ b/operator/v2/rpc_client_v2_test.go @@ -15,16 +15,16 @@ import ( var _ = operator.RpcClient(&MockRpcClient{}) type MockRpcClient struct { - call func(method string, args interface{}, reply *bool) error + call func(serviceMethod string, args any, reply any) error } -func (self *MockRpcClient) Call(method string, args interface{}, reply *bool) error { - return self.call(method, args, reply) +func (self *MockRpcClient) Call(serviceMethod string, args any, reply any) error { + return self.call(serviceMethod, args, reply) } func NoopRpcClient() *MockRpcClient { return &MockRpcClient{ - call: func(method string, args interface{}, reply *bool) error { return nil }, + call: func(serviceMethod string, args any, reply any) error { return nil }, } } @@ -62,8 +62,8 @@ func TestSendSuccessfulMessages(t *testing.T) { rpcClientCallCount := 0 rpcClient := MockRpcClient{ - call: func(method string, args interface{}, reply *bool) error { - logger.Debug("MockRpcClient.Call", "method", method, "args", args) + call: func(serviceMethod string, args any, reply any) error { + logger.Debug("MockRpcClient.Call", "method", serviceMethod, "args", args) rpcClientCallCount++ return nil }, @@ -124,7 +124,7 @@ func TestUnboundedRetry(t *testing.T) { rpcSuccess := false rpcFailCount := 0 rpcClient := MockRpcClient{ - call: func(method string, args interface{}, reply *bool) error { + call: func(serviceMethod string, args any, reply any) error { if rpcFailCount < 2 { rpcFailCount++ return assert.AnError @@ -154,7 +154,7 @@ func TestRetryAtMost(t *testing.T) { rpcFailCount := 0 rpcClient := MockRpcClient{ - call: func(method string, args interface{}, reply *bool) error { + call: func(serviceMethod string, args any, reply any) error { rpcFailCount++ return assert.AnError }, @@ -178,7 +178,7 @@ func TestRetryLaterIfRecentEnough(t *testing.T) { rpcFailCount := 0 rpcClient := MockRpcClient{ - call: func(method string, args interface{}, reply *bool) error { + call: func(serviceMethod string, args any, reply any) error { time.Sleep(100 * time.Millisecond) rpcFailCount++ From 658f75245d3386e0ffd7ad6a10c9c5e07c9a194d Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Wed, 19 Jun 2024 15:02:30 -0300 Subject: [PATCH 14/34] Add `NewHTTPAggregatorRpcClient` --- operator/v2/rpc_client_v2.go | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/operator/v2/rpc_client_v2.go b/operator/v2/rpc_client_v2.go index fe238f86..40e6f356 100644 --- a/operator/v2/rpc_client_v2.go +++ b/operator/v2/rpc_client_v2.go @@ -2,12 +2,15 @@ package operator2 import ( "context" + "errors" "net/rpc" "sync" "time" "github.com/Layr-Labs/eigensdk-go/logging" + eigentypes "github.com/Layr-Labs/eigensdk-go/types" "github.com/NethermindEth/near-sffl/core/types/messages" + "github.com/ethereum/go-ethereum/common" ) type RpcClient interface { @@ -16,6 +19,37 @@ type RpcClient interface { var _ RpcClient = (*rpc.Client)(nil) +func NewHTTPAggregatorRpcClient(aggregatorIpPortAddr string, operatorId eigentypes.OperatorId, expectedRegistryCoordinatorAddress common.Address, logger logging.Logger) (*rpc.Client, error) { + client, err := rpc.DialHTTP("tcp", aggregatorIpPortAddr) + if err != nil { + logger.Error("Error dialing aggregator rpc client", "err", err) + return nil, err + } + + var aggregatorRegistryCoordinatorAddress string + err = client.Call("Aggregator.GetRegistryCoordinatorAddress", struct{}{}, &aggregatorRegistryCoordinatorAddress) + if err != nil { + logger.Error("Received error when getting registry coordinator address", "err", err) + return nil, err + } + + logger.Debug("Notifying aggregator of initialization") + + var reply bool + err = client.Call("Aggregator.NotifyOperatorInitialization", operatorId, &reply) + if err != nil { + logger.Error("Error notifying aggregator of initialization", "err", err) + return nil, err + } + + if common.HexToAddress(aggregatorRegistryCoordinatorAddress).Cmp(expectedRegistryCoordinatorAddress) != 0 { + logger.Fatal("Registry coordinator address from aggregator does not match the one in the config", "aggregator", aggregatorRegistryCoordinatorAddress, "config", expectedRegistryCoordinatorAddress.String()) + return nil, errors.New("mismatching registry coordinator address from aggregator") + } + + return client, nil +} + type Listener interface { IncError() IncSuccess() From 17c134cd8527dd1a61664cddc9b58e900456253e Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Wed, 19 Jun 2024 15:46:15 -0300 Subject: [PATCH 15/34] Add `AggregatorRpcClienter` interface --- operator/v2/rpc_client_v2.go | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/operator/v2/rpc_client_v2.go b/operator/v2/rpc_client_v2.go index 40e6f356..b372133d 100644 --- a/operator/v2/rpc_client_v2.go +++ b/operator/v2/rpc_client_v2.go @@ -83,6 +83,13 @@ type action struct { retryCount int } +type AggregatorRpcClienter interface { + SendSignedCheckpointTaskResponseToAggregator(signedCheckpointTaskResponse *messages.SignedCheckpointTaskResponse) + SendSignedStateRootUpdateToAggregator(signedStateRootUpdateMessage *messages.SignedStateRootUpdateMessage) + SendSignedOperatorSetUpdateToAggregator(signedOperatorSetUpdateMessage *messages.SignedOperatorSetUpdateMessage) + GetAggregatedCheckpointMessages(fromTimestamp, toTimestamp uint64) (*messages.CheckpointMessages, error) +} + type AggregatorRpcClient struct { listener Listener rpcClient RpcClient @@ -94,6 +101,8 @@ type AggregatorRpcClient struct { closeCh chan struct{} } +var _ AggregatorRpcClienter = (*AggregatorRpcClient)(nil) + func NewAggregatorRpcClient(listener Listener, rpcClient RpcClient, retryPredicate RetryPredicate, logger logging.Logger) AggregatorRpcClient { return AggregatorRpcClient{ listener: listener, @@ -157,7 +166,7 @@ func (a *AggregatorRpcClient) Close() { }) } -func (a *AggregatorRpcClient) SendProcessSignedCheckpointTaskResponse(message *messages.SignedCheckpointTaskResponse) { +func (a *AggregatorRpcClient) SendSignedCheckpointTaskResponseToAggregator(message *messages.SignedCheckpointTaskResponse) { a.actionCh <- action{ submittedAt: time.Now(), run: func() error { @@ -167,7 +176,7 @@ func (a *AggregatorRpcClient) SendProcessSignedCheckpointTaskResponse(message *m } } -func (a *AggregatorRpcClient) SendSignedStateRootUpdateMessage(message *messages.SignedStateRootUpdateMessage) { +func (a *AggregatorRpcClient) SendSignedStateRootUpdateToAggregator(message *messages.SignedStateRootUpdateMessage) { a.actionCh <- action{ submittedAt: time.Now(), run: func() error { @@ -177,7 +186,7 @@ func (a *AggregatorRpcClient) SendSignedStateRootUpdateMessage(message *messages } } -func (a *AggregatorRpcClient) SendSignedOperatorSetUpdateMessage(message *messages.SignedOperatorSetUpdateMessage) { +func (a *AggregatorRpcClient) SendSignedOperatorSetUpdateToAggregator(message *messages.SignedOperatorSetUpdateMessage) { a.actionCh <- action{ submittedAt: time.Now(), run: func() error { @@ -186,3 +195,22 @@ func (a *AggregatorRpcClient) SendSignedOperatorSetUpdateMessage(message *messag }, } } + +func (a *AggregatorRpcClient) GetAggregatedCheckpointMessages(fromTimestamp, toTimestamp uint64) (*messages.CheckpointMessages, error) { + type Args struct { + FromTimestamp, ToTimestamp uint64 + } + + var checkpointMessages messages.CheckpointMessages + + a.actionCh <- action{ + submittedAt: time.Now(), + run: func() error { + return a.rpcClient.Call("Aggregator.GetAggregatedCheckpointMessages", &Args{fromTimestamp, toTimestamp}, &checkpointMessages) + }, + } + + // TODO: This is a terrible design. We're returning an initially nil pointer that at some point in the future might be fullfiled. + // This is essentially a `Promise` without the proper semantics (ex. wait for completion, error handling, etc) + return &checkpointMessages, nil +} From ca0820ada0f20f404e6e39a71812a9773d3c6e55 Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Wed, 19 Jun 2024 15:47:53 -0300 Subject: [PATCH 16/34] Remove interface (single implementation) --- operator/v2/rpc_client_v2.go | 9 --------- 1 file changed, 9 deletions(-) diff --git a/operator/v2/rpc_client_v2.go b/operator/v2/rpc_client_v2.go index b372133d..7ec25451 100644 --- a/operator/v2/rpc_client_v2.go +++ b/operator/v2/rpc_client_v2.go @@ -83,13 +83,6 @@ type action struct { retryCount int } -type AggregatorRpcClienter interface { - SendSignedCheckpointTaskResponseToAggregator(signedCheckpointTaskResponse *messages.SignedCheckpointTaskResponse) - SendSignedStateRootUpdateToAggregator(signedStateRootUpdateMessage *messages.SignedStateRootUpdateMessage) - SendSignedOperatorSetUpdateToAggregator(signedOperatorSetUpdateMessage *messages.SignedOperatorSetUpdateMessage) - GetAggregatedCheckpointMessages(fromTimestamp, toTimestamp uint64) (*messages.CheckpointMessages, error) -} - type AggregatorRpcClient struct { listener Listener rpcClient RpcClient @@ -101,8 +94,6 @@ type AggregatorRpcClient struct { closeCh chan struct{} } -var _ AggregatorRpcClienter = (*AggregatorRpcClient)(nil) - func NewAggregatorRpcClient(listener Listener, rpcClient RpcClient, retryPredicate RetryPredicate, logger logging.Logger) AggregatorRpcClient { return AggregatorRpcClient{ listener: listener, From b64cb17b0cc731c5300d28a38dac5b4d8353c4fc Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Wed, 19 Jun 2024 15:49:24 -0300 Subject: [PATCH 17/34] Rename client method names --- operator/v2/rpc_client_v2_test.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/operator/v2/rpc_client_v2_test.go b/operator/v2/rpc_client_v2_test.go index 059a7a6b..0147d8a4 100644 --- a/operator/v2/rpc_client_v2_test.go +++ b/operator/v2/rpc_client_v2_test.go @@ -72,8 +72,8 @@ func TestSendSuccessfulMessages(t *testing.T) { client := operator.NewAggregatorRpcClient(&listener, &rpcClient, operator.NeverRetry, logger) go client.Start(ctx) - client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) - client.SendSignedOperatorSetUpdateMessage(&messages.SignedOperatorSetUpdateMessage{}) + client.SendSignedStateRootUpdateToAggregator(&messages.SignedStateRootUpdateMessage{}) + client.SendSignedOperatorSetUpdateToAggregator(&messages.SignedOperatorSetUpdateMessage{}) time.Sleep(500 * time.Millisecond) client.Close() @@ -138,7 +138,7 @@ func TestUnboundedRetry(t *testing.T) { client := operator.NewAggregatorRpcClient(listener, &rpcClient, operator.AlwaysRetry, logger) go client.Start(ctx) - client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) + client.SendSignedStateRootUpdateToAggregator(&messages.SignedStateRootUpdateMessage{}) time.Sleep(500 * time.Millisecond) client.Close() @@ -163,7 +163,7 @@ func TestRetryAtMost(t *testing.T) { client := operator.NewAggregatorRpcClient(listener, &rpcClient, operator.RetryAtMost(4), logger) go client.Start(ctx) - client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) + client.SendSignedStateRootUpdateToAggregator(&messages.SignedStateRootUpdateMessage{}) time.Sleep(500 * time.Millisecond) client.Close() @@ -189,7 +189,7 @@ func TestRetryLaterIfRecentEnough(t *testing.T) { client := operator.NewAggregatorRpcClient(listener, &rpcClient, operator.RetryIfRecentEnough(500*time.Millisecond), logger) go client.Start(ctx) - client.SendSignedStateRootUpdateMessage(&messages.SignedStateRootUpdateMessage{}) + client.SendSignedCheckpointTaskResponseToAggregator(&messages.SignedCheckpointTaskResponse{}) time.Sleep(500 * time.Millisecond) client.Close() From 5eae2a190a071dfc26f699fb1291d1929542774c Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Wed, 19 Jun 2024 16:26:29 -0300 Subject: [PATCH 18/34] Add `TestGetAggregatedCheckpointMessages` Shows a clear data race --- operator/v2/rpc_client_v2_test.go | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/operator/v2/rpc_client_v2_test.go b/operator/v2/rpc_client_v2_test.go index 0147d8a4..ab817f58 100644 --- a/operator/v2/rpc_client_v2_test.go +++ b/operator/v2/rpc_client_v2_test.go @@ -196,3 +196,34 @@ func TestRetryLaterIfRecentEnough(t *testing.T) { assert.Equal(t, 5, rpcFailCount) } + +func TestGetAggregatedCheckpointMessages(t *testing.T) { + ctx := context.Background() + logger, _ := logging.NewZapLogger(logging.Development) + listener := NoopListener() + + expected := messages.CheckpointMessages{ + StateRootUpdateMessages: []messages.StateRootUpdateMessage{{BlockHeight: 100}}, + } + + rpcClient := MockRpcClient{ + call: func(serviceMethod string, args any, reply any) error { + switch v := reply.(type) { + case *messages.CheckpointMessages: + *v = expected + } + return nil + }, + } + + client := operator.NewAggregatorRpcClient(listener, &rpcClient, operator.NeverRetry, logger) + go client.Start(ctx) + + response, err := client.GetAggregatedCheckpointMessages(0, 0) + time.Sleep(200 * time.Millisecond) + + assert.NoError(t, err) + assert.Equal(t, expected, *response) + + client.Close() +} From a0f833f2ec6ae4eca121305d5f85e91f81f4c207 Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Wed, 19 Jun 2024 16:41:16 -0300 Subject: [PATCH 19/34] Make `GetAggregatedCheckpointMessages` blocking --- operator/v2/rpc_client_v2.go | 15 ++++++++---- operator/v2/rpc_client_v2_test.go | 40 +++++++++++++++++++++++++++++-- 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/operator/v2/rpc_client_v2.go b/operator/v2/rpc_client_v2.go index 7ec25451..23669975 100644 --- a/operator/v2/rpc_client_v2.go +++ b/operator/v2/rpc_client_v2.go @@ -187,21 +187,26 @@ func (a *AggregatorRpcClient) SendSignedOperatorSetUpdateToAggregator(message *m } } -func (a *AggregatorRpcClient) GetAggregatedCheckpointMessages(fromTimestamp, toTimestamp uint64) (*messages.CheckpointMessages, error) { +// Blocking operation since we want to wait for the return value(s). +func (a *AggregatorRpcClient) GetAggregatedCheckpointMessages(fromTimestamp, toTimestamp uint64) (messages.CheckpointMessages, error) { type Args struct { FromTimestamp, ToTimestamp uint64 } var checkpointMessages messages.CheckpointMessages - a.actionCh <- action{ + action := action{ submittedAt: time.Now(), run: func() error { return a.rpcClient.Call("Aggregator.GetAggregatedCheckpointMessages", &Args{fromTimestamp, toTimestamp}, &checkpointMessages) }, } + err := action.run() + + for err != nil && a.shouldRetry(action, err) { + action.retryCount++ + err = action.run() + } - // TODO: This is a terrible design. We're returning an initially nil pointer that at some point in the future might be fullfiled. - // This is essentially a `Promise` without the proper semantics (ex. wait for completion, error handling, etc) - return &checkpointMessages, nil + return checkpointMessages, err } diff --git a/operator/v2/rpc_client_v2_test.go b/operator/v2/rpc_client_v2_test.go index ab817f58..06ac39c0 100644 --- a/operator/v2/rpc_client_v2_test.go +++ b/operator/v2/rpc_client_v2_test.go @@ -220,10 +220,46 @@ func TestGetAggregatedCheckpointMessages(t *testing.T) { go client.Start(ctx) response, err := client.GetAggregatedCheckpointMessages(0, 0) - time.Sleep(200 * time.Millisecond) assert.NoError(t, err) - assert.Equal(t, expected, *response) + assert.Equal(t, expected, response) + + client.Close() +} + +func TestGetAggregatedCheckpointMessagesRetry(t *testing.T) { + ctx := context.Background() + logger, _ := logging.NewZapLogger(logging.Development) + listener := NoopListener() + + expected := messages.CheckpointMessages{ + StateRootUpdateMessages: []messages.StateRootUpdateMessage{{BlockHeight: 100}}, + } + + rpcFailCount := 0 + rpcClient := MockRpcClient{ + call: func(serviceMethod string, args any, reply any) error { + if rpcFailCount < 2 { + rpcFailCount++ + return assert.AnError + } + + switch v := reply.(type) { + case *messages.CheckpointMessages: + *v = expected + } + return nil + }, + } + + client := operator.NewAggregatorRpcClient(listener, &rpcClient, operator.AlwaysRetry, logger) + go client.Start(ctx) + + response, err := client.GetAggregatedCheckpointMessages(0, 0) + + assert.NoError(t, err) + assert.Equal(t, expected, response) + assert.Equal(t, 2, rpcFailCount) client.Close() } From b5f5207bca6480f3a76a6d545d3c7abbf588466e Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Thu, 20 Jun 2024 12:54:55 -0300 Subject: [PATCH 20/34] Replace event loop with blocking code --- operator/v2/rpc_client.go | 152 +++++++++++++ ...c_client_v2_test.go => rpc_client_test.go} | 127 ++--------- operator/v2/rpc_client_v2.go | 212 ------------------ 3 files changed, 175 insertions(+), 316 deletions(-) create mode 100644 operator/v2/rpc_client.go rename operator/v2/{rpc_client_v2_test.go => rpc_client_test.go} (56%) delete mode 100644 operator/v2/rpc_client_v2.go diff --git a/operator/v2/rpc_client.go b/operator/v2/rpc_client.go new file mode 100644 index 00000000..5de8c2ce --- /dev/null +++ b/operator/v2/rpc_client.go @@ -0,0 +1,152 @@ +package operator2 + +import ( + "errors" + "net/rpc" + "time" + + "github.com/Layr-Labs/eigensdk-go/logging" + eigentypes "github.com/Layr-Labs/eigensdk-go/types" + "github.com/NethermindEth/near-sffl/core/types/messages" + "github.com/ethereum/go-ethereum/common" +) + +type RpcClient interface { + Call(serviceMethod string, args any, reply any) error +} + +var _ RpcClient = (*rpc.Client)(nil) + +func NewHTTPAggregatorRpcClient(aggregatorIpPortAddr string, operatorId eigentypes.OperatorId, expectedRegistryCoordinatorAddress common.Address, logger logging.Logger) (*rpc.Client, error) { + client, err := rpc.DialHTTP("tcp", aggregatorIpPortAddr) + if err != nil { + logger.Error("Error dialing aggregator rpc client", "err", err) + return nil, err + } + + var aggregatorRegistryCoordinatorAddress string + err = client.Call("Aggregator.GetRegistryCoordinatorAddress", struct{}{}, &aggregatorRegistryCoordinatorAddress) + if err != nil { + logger.Error("Received error when getting registry coordinator address", "err", err) + return nil, err + } + + logger.Debug("Notifying aggregator of initialization") + + var reply bool + err = client.Call("Aggregator.NotifyOperatorInitialization", operatorId, &reply) + if err != nil { + logger.Error("Error notifying aggregator of initialization", "err", err) + return nil, err + } + + if common.HexToAddress(aggregatorRegistryCoordinatorAddress).Cmp(expectedRegistryCoordinatorAddress) != 0 { + logger.Fatal("Registry coordinator address from aggregator does not match the one in the config", "aggregator", aggregatorRegistryCoordinatorAddress, "config", expectedRegistryCoordinatorAddress.String()) + return nil, errors.New("mismatching registry coordinator address from aggregator") + } + + return client, nil +} + +type RetryStrategy func(submittedAt time.Time, err error) bool + +func NeverRetry(_ time.Time, _ error) bool { + return false +} + +func AlwaysRetry(_ time.Time, _ error) bool { + return true +} + +func RetryIfRecentEnough(ttl time.Duration) RetryStrategy { + return func(submittedAt time.Time, err error) bool { + return time.Since(submittedAt) < ttl + } +} + +func RetryAtMost(retries int) RetryStrategy { + retryCount := 0 + return func(_ time.Time, err error) bool { + result := retryCount < retries + retryCount++ + return result + } +} + +type AggregatorRpcClient struct { + rpcClient RpcClient + shouldRetry RetryStrategy + logger logging.Logger +} + +func NewAggregatorRpcClient(rpcClient RpcClient, retryPredicate RetryStrategy, logger logging.Logger) AggregatorRpcClient { + return AggregatorRpcClient{ + rpcClient: rpcClient, + shouldRetry: retryPredicate, + logger: logger, + } +} + +func (a *AggregatorRpcClient) SendSignedCheckpointTaskResponseToAggregator(message *messages.SignedCheckpointTaskResponse) error { + submittedAt := time.Now() + var ignore bool + action := func() error { + return a.rpcClient.Call("Aggregator.ProcessSignedCheckpointTaskResponse", message, &ignore) + } + + err := action() + for err != nil && a.shouldRetry(submittedAt, err) { + err = action() + } + + return err +} + +func (a *AggregatorRpcClient) SendSignedStateRootUpdateToAggregator(message *messages.SignedStateRootUpdateMessage) error { + submittedAt := time.Now() + var ignore bool + action := func() error { + return a.rpcClient.Call("Aggregator.ProcessSignedStateRootUpdateMessage", message, &ignore) + } + + err := action() + for err != nil && a.shouldRetry(submittedAt, err) { + err = action() + } + + return err +} + +func (a *AggregatorRpcClient) SendSignedOperatorSetUpdateToAggregator(message *messages.SignedOperatorSetUpdateMessage) error { + submittedAt := time.Now() + var ignore bool + action := func() error { + return a.rpcClient.Call("Aggregator.ProcessSignedOperatorSetUpdateMessage", message, &ignore) + } + + err := action() + for err != nil && a.shouldRetry(submittedAt, err) { + err = action() + } + + return err +} + +func (a *AggregatorRpcClient) GetAggregatedCheckpointMessages(fromTimestamp, toTimestamp uint64) (messages.CheckpointMessages, error) { + type Args struct { + FromTimestamp, ToTimestamp uint64 + } + + submittedAt := time.Now() + var checkpointMessages messages.CheckpointMessages + action := func() error { + return a.rpcClient.Call("Aggregator.GetAggregatedCheckpointMessages", &Args{fromTimestamp, toTimestamp}, &checkpointMessages) + } + + err := action() + for err != nil && a.shouldRetry(submittedAt, err) { + err = action() + } + + return checkpointMessages, err +} diff --git a/operator/v2/rpc_client_v2_test.go b/operator/v2/rpc_client_test.go similarity index 56% rename from operator/v2/rpc_client_v2_test.go rename to operator/v2/rpc_client_test.go index 06ac39c0..52f720ae 100644 --- a/operator/v2/rpc_client_v2_test.go +++ b/operator/v2/rpc_client_test.go @@ -1,7 +1,6 @@ package operator2_test import ( - "context" "sync" "testing" "time" @@ -15,10 +14,14 @@ import ( var _ = operator.RpcClient(&MockRpcClient{}) type MockRpcClient struct { + lock sync.Mutex call func(serviceMethod string, args any, reply any) error } func (self *MockRpcClient) Call(serviceMethod string, args any, reply any) error { + self.lock.Lock() + defer self.lock.Unlock() + return self.call(serviceMethod, args, reply) } @@ -28,98 +31,40 @@ func NoopRpcClient() *MockRpcClient { } } -type MockListener struct { - incError func() - incSuccess func() -} - -var _ = operator.Listener(&MockListener{}) - -func (self *MockListener) IncError() { - self.incError() -} - -func (self *MockListener) IncSuccess() { - self.incSuccess() -} - -func NoopListener() *MockListener { - return &MockListener{ - incError: func() {}, - incSuccess: func() {}, - } -} - func TestSendSuccessfulMessages(t *testing.T) { - ctx := context.Background() logger, _ := logging.NewZapLogger(logging.Development) - successCount := 0 - listener := MockListener{ - incSuccess: func() { logger.Debug("IncSuccess"); successCount++ }, - incError: func() { logger.Debug("IncError") }, - } - rpcClientCallCount := 0 rpcClient := MockRpcClient{ call: func(serviceMethod string, args any, reply any) error { logger.Debug("MockRpcClient.Call", "method", serviceMethod, "args", args) rpcClientCallCount++ + return nil }, } - client := operator.NewAggregatorRpcClient(&listener, &rpcClient, operator.NeverRetry, logger) - go client.Start(ctx) + client := operator.NewAggregatorRpcClient(&rpcClient, operator.NeverRetry, logger) - client.SendSignedStateRootUpdateToAggregator(&messages.SignedStateRootUpdateMessage{}) - client.SendSignedOperatorSetUpdateToAggregator(&messages.SignedOperatorSetUpdateMessage{}) - - time.Sleep(500 * time.Millisecond) - client.Close() - - assert.Equal(t, 2, successCount) - assert.Equal(t, 2, rpcClientCallCount) -} - -func TestCloseWithContext(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) - defer cancel() - - logger, _ := logging.NewZapLogger(logging.Development) - listener := NoopListener() - rpcClient := NoopRpcClient() - - client := operator.NewAggregatorRpcClient(listener, rpcClient, operator.NeverRetry, logger) - go client.Start(ctx) - - time.Sleep(1 * time.Second) -} - -func TestMultipleConcurrentClose(t *testing.T) { - ctx := context.Background() - logger, _ := logging.NewZapLogger(logging.Development) - listener := NoopListener() - rpcClient := NoopRpcClient() + wg := sync.WaitGroup{} + wg.Add(2) + go func() { + defer wg.Done() + client.SendSignedStateRootUpdateToAggregator(&messages.SignedStateRootUpdateMessage{}) + }() - client := operator.NewAggregatorRpcClient(listener, rpcClient, operator.NeverRetry, logger) - go client.Start(ctx) + go func() { + defer wg.Done() + client.SendSignedOperatorSetUpdateToAggregator(&messages.SignedOperatorSetUpdateMessage{}) + }() - wg := sync.WaitGroup{} - for i := 0; i < 10; i++ { - wg.Add(1) - go func() { - defer wg.Done() - client.Close() - }() - } wg.Wait() + + assert.Equal(t, 2, rpcClientCallCount) } func TestUnboundedRetry(t *testing.T) { - ctx := context.Background() logger, _ := logging.NewZapLogger(logging.Development) - listener := NoopListener() rpcSuccess := false rpcFailCount := 0 @@ -135,22 +80,16 @@ func TestUnboundedRetry(t *testing.T) { }, } - client := operator.NewAggregatorRpcClient(listener, &rpcClient, operator.AlwaysRetry, logger) - go client.Start(ctx) + client := operator.NewAggregatorRpcClient(&rpcClient, operator.AlwaysRetry, logger) client.SendSignedStateRootUpdateToAggregator(&messages.SignedStateRootUpdateMessage{}) - time.Sleep(500 * time.Millisecond) - client.Close() - assert.Equal(t, 2, rpcFailCount) assert.True(t, rpcSuccess) } func TestRetryAtMost(t *testing.T) { - ctx := context.Background() logger, _ := logging.NewZapLogger(logging.Development) - listener := NoopListener() rpcFailCount := 0 rpcClient := MockRpcClient{ @@ -160,21 +99,15 @@ func TestRetryAtMost(t *testing.T) { }, } - client := operator.NewAggregatorRpcClient(listener, &rpcClient, operator.RetryAtMost(4), logger) - go client.Start(ctx) + client := operator.NewAggregatorRpcClient(&rpcClient, operator.RetryAtMost(4), logger) client.SendSignedStateRootUpdateToAggregator(&messages.SignedStateRootUpdateMessage{}) - time.Sleep(500 * time.Millisecond) - client.Close() - assert.Equal(t, 5, rpcFailCount) // 1 run, 4 retries } func TestRetryLaterIfRecentEnough(t *testing.T) { - ctx := context.Background() logger, _ := logging.NewZapLogger(logging.Development) - listener := NoopListener() rpcFailCount := 0 rpcClient := MockRpcClient{ @@ -186,21 +119,15 @@ func TestRetryLaterIfRecentEnough(t *testing.T) { }, } - client := operator.NewAggregatorRpcClient(listener, &rpcClient, operator.RetryIfRecentEnough(500*time.Millisecond), logger) - go client.Start(ctx) + client := operator.NewAggregatorRpcClient(&rpcClient, operator.RetryIfRecentEnough(500*time.Millisecond), logger) client.SendSignedCheckpointTaskResponseToAggregator(&messages.SignedCheckpointTaskResponse{}) - time.Sleep(500 * time.Millisecond) - client.Close() - assert.Equal(t, 5, rpcFailCount) } func TestGetAggregatedCheckpointMessages(t *testing.T) { - ctx := context.Background() logger, _ := logging.NewZapLogger(logging.Development) - listener := NoopListener() expected := messages.CheckpointMessages{ StateRootUpdateMessages: []messages.StateRootUpdateMessage{{BlockHeight: 100}}, @@ -216,21 +143,16 @@ func TestGetAggregatedCheckpointMessages(t *testing.T) { }, } - client := operator.NewAggregatorRpcClient(listener, &rpcClient, operator.NeverRetry, logger) - go client.Start(ctx) + client := operator.NewAggregatorRpcClient(&rpcClient, operator.NeverRetry, logger) response, err := client.GetAggregatedCheckpointMessages(0, 0) assert.NoError(t, err) assert.Equal(t, expected, response) - - client.Close() } func TestGetAggregatedCheckpointMessagesRetry(t *testing.T) { - ctx := context.Background() logger, _ := logging.NewZapLogger(logging.Development) - listener := NoopListener() expected := messages.CheckpointMessages{ StateRootUpdateMessages: []messages.StateRootUpdateMessage{{BlockHeight: 100}}, @@ -252,14 +174,11 @@ func TestGetAggregatedCheckpointMessagesRetry(t *testing.T) { }, } - client := operator.NewAggregatorRpcClient(listener, &rpcClient, operator.AlwaysRetry, logger) - go client.Start(ctx) + client := operator.NewAggregatorRpcClient(&rpcClient, operator.AlwaysRetry, logger) response, err := client.GetAggregatedCheckpointMessages(0, 0) assert.NoError(t, err) assert.Equal(t, expected, response) assert.Equal(t, 2, rpcFailCount) - - client.Close() } diff --git a/operator/v2/rpc_client_v2.go b/operator/v2/rpc_client_v2.go deleted file mode 100644 index 23669975..00000000 --- a/operator/v2/rpc_client_v2.go +++ /dev/null @@ -1,212 +0,0 @@ -package operator2 - -import ( - "context" - "errors" - "net/rpc" - "sync" - "time" - - "github.com/Layr-Labs/eigensdk-go/logging" - eigentypes "github.com/Layr-Labs/eigensdk-go/types" - "github.com/NethermindEth/near-sffl/core/types/messages" - "github.com/ethereum/go-ethereum/common" -) - -type RpcClient interface { - Call(serviceMethod string, args any, reply any) error -} - -var _ RpcClient = (*rpc.Client)(nil) - -func NewHTTPAggregatorRpcClient(aggregatorIpPortAddr string, operatorId eigentypes.OperatorId, expectedRegistryCoordinatorAddress common.Address, logger logging.Logger) (*rpc.Client, error) { - client, err := rpc.DialHTTP("tcp", aggregatorIpPortAddr) - if err != nil { - logger.Error("Error dialing aggregator rpc client", "err", err) - return nil, err - } - - var aggregatorRegistryCoordinatorAddress string - err = client.Call("Aggregator.GetRegistryCoordinatorAddress", struct{}{}, &aggregatorRegistryCoordinatorAddress) - if err != nil { - logger.Error("Received error when getting registry coordinator address", "err", err) - return nil, err - } - - logger.Debug("Notifying aggregator of initialization") - - var reply bool - err = client.Call("Aggregator.NotifyOperatorInitialization", operatorId, &reply) - if err != nil { - logger.Error("Error notifying aggregator of initialization", "err", err) - return nil, err - } - - if common.HexToAddress(aggregatorRegistryCoordinatorAddress).Cmp(expectedRegistryCoordinatorAddress) != 0 { - logger.Fatal("Registry coordinator address from aggregator does not match the one in the config", "aggregator", aggregatorRegistryCoordinatorAddress, "config", expectedRegistryCoordinatorAddress.String()) - return nil, errors.New("mismatching registry coordinator address from aggregator") - } - - return client, nil -} - -type Listener interface { - IncError() - IncSuccess() -} - -type RetryPredicate func(action action, err error) bool - -func NeverRetry(_ action, _ error) bool { - return false -} - -func AlwaysRetry(_ action, _ error) bool { - return true -} - -func RetryIfRecentEnough(ttl time.Duration) RetryPredicate { - return func(action action, err error) bool { - return time.Since(action.submittedAt) < ttl - } -} - -func RetryAtMost(retries int) RetryPredicate { - return func(action action, err error) bool { - return action.retryCount < retries - } -} - -type action struct { - run func() error - submittedAt time.Time - retryCount int -} - -type AggregatorRpcClient struct { - listener Listener - rpcClient RpcClient - shouldRetry RetryPredicate - logger logging.Logger - actionCh chan action - - once sync.Once - closeCh chan struct{} -} - -func NewAggregatorRpcClient(listener Listener, rpcClient RpcClient, retryPredicate RetryPredicate, logger logging.Logger) AggregatorRpcClient { - return AggregatorRpcClient{ - listener: listener, - rpcClient: rpcClient, - shouldRetry: retryPredicate, - logger: logger, - actionCh: make(chan action, 10), - - once: sync.Once{}, - closeCh: make(chan struct{}), - } -} - -func (a *AggregatorRpcClient) Start(ctx context.Context) { - defer func() { - a.closeCh <- struct{}{} - }() - - for { - select { - case <-ctx.Done(): - a.logger.Debug("AggRpcClient: context done") - return - case <-a.closeCh: - a.logger.Debug("AggRpcClient: close message received") - return - case action, ok := <-a.actionCh: - if !ok { - continue - } - a.logger.Debug("AggRpcClient: action message received") - err := action.run() - if err != nil { - a.logger.Error("AggRpcClient: action failed after retrying", "err", err) - a.listener.IncError() - - if a.shouldRetry(action, err) { - a.logger.Debug("AggRpcClient: retrying later") - action.retryCount++ - a.actionCh <- action - } else { - a.logger.Debug("AggRpcClient: not retrying later") - } - } else { - a.logger.Debug("AggRpcClient: action executed successfully") - a.listener.IncSuccess() - } - } - } -} - -func (a *AggregatorRpcClient) Close() { - a.once.Do(func() { - a.logger.Debug("AggRpcClient: close") - - close(a.actionCh) - a.closeCh <- struct{}{} - - <-a.closeCh - close(a.closeCh) - }) -} - -func (a *AggregatorRpcClient) SendSignedCheckpointTaskResponseToAggregator(message *messages.SignedCheckpointTaskResponse) { - a.actionCh <- action{ - submittedAt: time.Now(), - run: func() error { - var ignore bool - return a.rpcClient.Call("Aggregator.ProcessSignedCheckpointTaskResponse", message, &ignore) - }, - } -} - -func (a *AggregatorRpcClient) SendSignedStateRootUpdateToAggregator(message *messages.SignedStateRootUpdateMessage) { - a.actionCh <- action{ - submittedAt: time.Now(), - run: func() error { - var ignore bool - return a.rpcClient.Call("Aggregator.ProcessSignedStateRootUpdateMessage", message, &ignore) - }, - } -} - -func (a *AggregatorRpcClient) SendSignedOperatorSetUpdateToAggregator(message *messages.SignedOperatorSetUpdateMessage) { - a.actionCh <- action{ - submittedAt: time.Now(), - run: func() error { - var ignore bool - return a.rpcClient.Call("Aggregator.ProcessSignedOperatorSetUpdateMessage", message, &ignore) - }, - } -} - -// Blocking operation since we want to wait for the return value(s). -func (a *AggregatorRpcClient) GetAggregatedCheckpointMessages(fromTimestamp, toTimestamp uint64) (messages.CheckpointMessages, error) { - type Args struct { - FromTimestamp, ToTimestamp uint64 - } - - var checkpointMessages messages.CheckpointMessages - - action := action{ - submittedAt: time.Now(), - run: func() error { - return a.rpcClient.Call("Aggregator.GetAggregatedCheckpointMessages", &Args{fromTimestamp, toTimestamp}, &checkpointMessages) - }, - } - err := action.run() - - for err != nil && a.shouldRetry(action, err) { - action.retryCount++ - err = action.run() - } - - return checkpointMessages, err -} From 24b027ebcaddf6a26fd67240e9659566c11249cc Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Thu, 20 Jun 2024 13:47:27 -0300 Subject: [PATCH 21/34] Add `Metricable` interface to `AggregatorRpcClient` --- operator/v2/rpc_client.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/operator/v2/rpc_client.go b/operator/v2/rpc_client.go index 5de8c2ce..8c8ad961 100644 --- a/operator/v2/rpc_client.go +++ b/operator/v2/rpc_client.go @@ -7,8 +7,11 @@ import ( "github.com/Layr-Labs/eigensdk-go/logging" eigentypes "github.com/Layr-Labs/eigensdk-go/types" + "github.com/NethermindEth/near-sffl/core" "github.com/NethermindEth/near-sffl/core/types/messages" + v1 "github.com/NethermindEth/near-sffl/operator" "github.com/ethereum/go-ethereum/common" + "github.com/prometheus/client_golang/prometheus" ) type RpcClient interface { @@ -76,13 +79,17 @@ func RetryAtMost(retries int) RetryStrategy { type AggregatorRpcClient struct { rpcClient RpcClient shouldRetry RetryStrategy + listener v1.RpcClientEventListener logger logging.Logger } +var _ core.Metricable = (*AggregatorRpcClient)(nil) + func NewAggregatorRpcClient(rpcClient RpcClient, retryPredicate RetryStrategy, logger logging.Logger) AggregatorRpcClient { return AggregatorRpcClient{ rpcClient: rpcClient, shouldRetry: retryPredicate, + listener: &v1.SelectiveRpcClientListener{}, logger: logger, } } @@ -150,3 +157,13 @@ func (a *AggregatorRpcClient) GetAggregatedCheckpointMessages(fromTimestamp, toT return checkpointMessages, err } + +func (c *AggregatorRpcClient) EnableMetrics(registry *prometheus.Registry) error { + listener, err := v1.MakeRpcClientMetrics(registry) + if err != nil { + return err + } + + c.listener = listener + return nil +} From cf09334085b2aa6f7691d7ab0c82af288d0ffaa4 Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Thu, 20 Jun 2024 13:48:01 -0300 Subject: [PATCH 22/34] Use listener in `SendSignedCheckpointTaskResponseToAggregator` --- operator/v2/rpc_client.go | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/operator/v2/rpc_client.go b/operator/v2/rpc_client.go index 8c8ad961..6768f7f4 100644 --- a/operator/v2/rpc_client.go +++ b/operator/v2/rpc_client.go @@ -94,16 +94,34 @@ func NewAggregatorRpcClient(rpcClient RpcClient, retryPredicate RetryStrategy, l } } -func (a *AggregatorRpcClient) SendSignedCheckpointTaskResponseToAggregator(message *messages.SignedCheckpointTaskResponse) error { +func (a *AggregatorRpcClient) SendSignedCheckpointTaskResponseToAggregator(signedCheckpointTaskResponse *messages.SignedCheckpointTaskResponse) error { + a.logger.Info("Sending signed task response header to aggregator", "signedCheckpointTaskResponse", signedCheckpointTaskResponse) + submittedAt := time.Now() - var ignore bool + var reply bool action := func() error { - return a.rpcClient.Call("Aggregator.ProcessSignedCheckpointTaskResponse", message, &ignore) + err := a.rpcClient.Call("Aggregator.ProcessSignedCheckpointTaskResponse", signedCheckpointTaskResponse, &reply) + if err != nil { + a.logger.Error("Received error from aggregator", "err", err) + } + return err } + retried := false err := action() for err != nil && a.shouldRetry(submittedAt, err) { + a.listener.IncErroredCheckpointSubmissions(retried) err = action() + retried = true + } + + if err != nil { + a.logger.Info("Signed task response header accepted by aggregator", "reply", reply) + a.listener.IncCheckpointTaskResponseSubmissions(retried) + a.listener.ObserveLastCheckpointIdResponded(signedCheckpointTaskResponse.TaskResponse.ReferenceTaskIndex) + a.listener.OnMessagesReceived() + } else { + a.logger.Error("Dropping message after error", "err", err) } return err From 5ec7d284a26fe1d8eb716309b855122fb494e31f Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Thu, 20 Jun 2024 13:48:20 -0300 Subject: [PATCH 23/34] Use listener in `SendSignedStateRootUpdateToAggregator` --- operator/v2/rpc_client.go | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/operator/v2/rpc_client.go b/operator/v2/rpc_client.go index 6768f7f4..1c91c7d1 100644 --- a/operator/v2/rpc_client.go +++ b/operator/v2/rpc_client.go @@ -127,16 +127,33 @@ func (a *AggregatorRpcClient) SendSignedCheckpointTaskResponseToAggregator(signe return err } -func (a *AggregatorRpcClient) SendSignedStateRootUpdateToAggregator(message *messages.SignedStateRootUpdateMessage) error { +func (a *AggregatorRpcClient) SendSignedStateRootUpdateToAggregator(signedStateRootUpdateMessage *messages.SignedStateRootUpdateMessage) error { + a.logger.Info("Sending signed state root update message to aggregator", "signedStateRootUpdateMessage", signedStateRootUpdateMessage) + submittedAt := time.Now() - var ignore bool + var reply bool action := func() error { - return a.rpcClient.Call("Aggregator.ProcessSignedStateRootUpdateMessage", message, &ignore) + err := a.rpcClient.Call("Aggregator.ProcessSignedStateRootUpdateMessage", signedStateRootUpdateMessage, &reply) + if err != nil { + a.logger.Error("Received error from aggregator", "err", err) + } + return err } + retried := false err := action() for err != nil && a.shouldRetry(submittedAt, err) { + a.listener.IncErroredStateRootUpdateSubmissions(signedStateRootUpdateMessage.Message.RollupId, retried) err = action() + retried = true + } + + if err != nil { + a.logger.Info("Signed state root update message accepted by aggregator", "reply", reply) + a.listener.IncStateRootUpdateSubmissions(signedStateRootUpdateMessage.Message.RollupId, retried) + a.listener.OnMessagesReceived() + } else { + a.logger.Error("Dropping message after error", "err", err) } return err From e576b5f979a8e3563ccbb6db2272979b7ddcaec0 Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Thu, 20 Jun 2024 14:00:37 -0300 Subject: [PATCH 24/34] Use listener in `SendSignedOperatorSetUpdateToAggregator` --- operator/v2/rpc_client.go | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/operator/v2/rpc_client.go b/operator/v2/rpc_client.go index 1c91c7d1..9b7fd7ce 100644 --- a/operator/v2/rpc_client.go +++ b/operator/v2/rpc_client.go @@ -159,16 +159,34 @@ func (a *AggregatorRpcClient) SendSignedStateRootUpdateToAggregator(signedStateR return err } -func (a *AggregatorRpcClient) SendSignedOperatorSetUpdateToAggregator(message *messages.SignedOperatorSetUpdateMessage) error { +func (a *AggregatorRpcClient) SendSignedOperatorSetUpdateToAggregator(signedOperatorSetUpdateMessage *messages.SignedOperatorSetUpdateMessage) error { + a.logger.Info("Sending operator set update message to aggregator", "signedOperatorSetUpdateMessage", signedOperatorSetUpdateMessage) + submittedAt := time.Now() - var ignore bool + var reply bool action := func() error { - return a.rpcClient.Call("Aggregator.ProcessSignedOperatorSetUpdateMessage", message, &ignore) + err := a.rpcClient.Call("Aggregator.ProcessSignedOperatorSetUpdateMessage", signedOperatorSetUpdateMessage, &reply) + if err != nil { + a.logger.Error("Received error from aggregator", "err", err) + } + return err } + retried := false err := action() for err != nil && a.shouldRetry(submittedAt, err) { + a.listener.IncErroredOperatorSetUpdateSubmissions(retried) err = action() + retried = true + } + + if err != nil { + a.logger.Info("Signed operator set update message accepted by aggregator", "reply", reply) + a.listener.IncOperatorSetUpdateUpdateSubmissions(retried) + a.listener.ObserveLastOperatorSetUpdateIdResponded(signedOperatorSetUpdateMessage.Message.Id) + a.listener.OnMessagesReceived() + } else { + a.logger.Error("Dropping message after error", "err", err) } return err From 22f52644536b7764c9176d9e71af4b3869b0ee35 Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Thu, 20 Jun 2024 14:01:50 -0300 Subject: [PATCH 25/34] Add logs to `GetAggregatedCheckpointMessages` --- operator/v2/rpc_client.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/operator/v2/rpc_client.go b/operator/v2/rpc_client.go index 9b7fd7ce..1afbd985 100644 --- a/operator/v2/rpc_client.go +++ b/operator/v2/rpc_client.go @@ -193,6 +193,8 @@ func (a *AggregatorRpcClient) SendSignedOperatorSetUpdateToAggregator(signedOper } func (a *AggregatorRpcClient) GetAggregatedCheckpointMessages(fromTimestamp, toTimestamp uint64) (messages.CheckpointMessages, error) { + a.logger.Info("Getting checkpoint messages from aggregator", "fromTimestamp", fromTimestamp, "toTimestamp", toTimestamp) + type Args struct { FromTimestamp, ToTimestamp uint64 } @@ -200,7 +202,11 @@ func (a *AggregatorRpcClient) GetAggregatedCheckpointMessages(fromTimestamp, toT submittedAt := time.Now() var checkpointMessages messages.CheckpointMessages action := func() error { - return a.rpcClient.Call("Aggregator.GetAggregatedCheckpointMessages", &Args{fromTimestamp, toTimestamp}, &checkpointMessages) + err := a.rpcClient.Call("Aggregator.GetAggregatedCheckpointMessages", &Args{fromTimestamp, toTimestamp}, &checkpointMessages) + if err != nil { + a.logger.Error("Received error from aggregator", "err", err) + } + return err } err := action() From fefaa66cebd7b3fac79e571a97cec53d69ce716e Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Thu, 20 Jun 2024 14:17:50 -0300 Subject: [PATCH 26/34] Add `RetryWithDelay` --- operator/v2/rpc_client.go | 43 ++++++++++++++++++++-------------- operator/v2/rpc_client_test.go | 29 +++++++++++++++++++++++ 2 files changed, 55 insertions(+), 17 deletions(-) diff --git a/operator/v2/rpc_client.go b/operator/v2/rpc_client.go index 1afbd985..543f1a8c 100644 --- a/operator/v2/rpc_client.go +++ b/operator/v2/rpc_client.go @@ -61,6 +61,13 @@ func AlwaysRetry(_ time.Time, _ error) bool { return true } +func RetryWithDelay(delay time.Duration, inner RetryStrategy) RetryStrategy { + return func(submittedAt time.Time, err error) bool { + time.Sleep(delay) + return inner(submittedAt, err) + } +} + func RetryIfRecentEnough(ttl time.Duration) RetryStrategy { return func(submittedAt time.Time, err error) bool { return time.Since(submittedAt) < ttl @@ -116,15 +123,16 @@ func (a *AggregatorRpcClient) SendSignedCheckpointTaskResponseToAggregator(signe } if err != nil { - a.logger.Info("Signed task response header accepted by aggregator", "reply", reply) - a.listener.IncCheckpointTaskResponseSubmissions(retried) - a.listener.ObserveLastCheckpointIdResponded(signedCheckpointTaskResponse.TaskResponse.ReferenceTaskIndex) - a.listener.OnMessagesReceived() - } else { a.logger.Error("Dropping message after error", "err", err) + return err } - return err + a.logger.Info("Signed task response header accepted by aggregator", "reply", reply) + a.listener.IncCheckpointTaskResponseSubmissions(retried) + a.listener.ObserveLastCheckpointIdResponded(signedCheckpointTaskResponse.TaskResponse.ReferenceTaskIndex) + a.listener.OnMessagesReceived() + + return nil } func (a *AggregatorRpcClient) SendSignedStateRootUpdateToAggregator(signedStateRootUpdateMessage *messages.SignedStateRootUpdateMessage) error { @@ -149,14 +157,15 @@ func (a *AggregatorRpcClient) SendSignedStateRootUpdateToAggregator(signedStateR } if err != nil { - a.logger.Info("Signed state root update message accepted by aggregator", "reply", reply) - a.listener.IncStateRootUpdateSubmissions(signedStateRootUpdateMessage.Message.RollupId, retried) - a.listener.OnMessagesReceived() - } else { a.logger.Error("Dropping message after error", "err", err) + return err } - return err + a.logger.Info("Signed state root update message accepted by aggregator", "reply", reply) + a.listener.IncStateRootUpdateSubmissions(signedStateRootUpdateMessage.Message.RollupId, retried) + a.listener.OnMessagesReceived() + + return nil } func (a *AggregatorRpcClient) SendSignedOperatorSetUpdateToAggregator(signedOperatorSetUpdateMessage *messages.SignedOperatorSetUpdateMessage) error { @@ -181,15 +190,15 @@ func (a *AggregatorRpcClient) SendSignedOperatorSetUpdateToAggregator(signedOper } if err != nil { - a.logger.Info("Signed operator set update message accepted by aggregator", "reply", reply) - a.listener.IncOperatorSetUpdateUpdateSubmissions(retried) - a.listener.ObserveLastOperatorSetUpdateIdResponded(signedOperatorSetUpdateMessage.Message.Id) - a.listener.OnMessagesReceived() - } else { a.logger.Error("Dropping message after error", "err", err) + return err } - return err + a.logger.Info("Signed operator set update message accepted by aggregator", "reply", reply) + a.listener.IncOperatorSetUpdateUpdateSubmissions(retried) + a.listener.ObserveLastOperatorSetUpdateIdResponded(signedOperatorSetUpdateMessage.Message.Id) + a.listener.OnMessagesReceived() + return nil } func (a *AggregatorRpcClient) GetAggregatedCheckpointMessages(fromTimestamp, toTimestamp uint64) (messages.CheckpointMessages, error) { diff --git a/operator/v2/rpc_client_test.go b/operator/v2/rpc_client_test.go index 52f720ae..24d25998 100644 --- a/operator/v2/rpc_client_test.go +++ b/operator/v2/rpc_client_test.go @@ -88,6 +88,35 @@ func TestUnboundedRetry(t *testing.T) { assert.True(t, rpcSuccess) } +func TestUnboundedRetry_Delayed(t *testing.T) { + logger, _ := logging.NewZapLogger(logging.Development) + + rpcSuccess := false + rpcFailCount := 0 + rpcClient := MockRpcClient{ + call: func(serviceMethod string, args any, reply any) error { + if rpcFailCount < 2 { + rpcFailCount++ + return assert.AnError + } + + rpcSuccess = true + return nil + }, + } + + client := operator.NewAggregatorRpcClient(&rpcClient, operator.RetryWithDelay(100*time.Millisecond, operator.AlwaysRetry), logger) + + startedAt := time.Now() + client.SendSignedStateRootUpdateToAggregator(&messages.SignedStateRootUpdateMessage{}) + execTime := time.Since(startedAt) + + assert.True(t, execTime > 180*time.Millisecond) + assert.True(t, execTime < 220*time.Millisecond) + assert.Equal(t, 2, rpcFailCount) + assert.True(t, rpcSuccess) +} + func TestRetryAtMost(t *testing.T) { logger, _ := logging.NewZapLogger(logging.Development) From f3d197c868784043ed41789b4827056839f66f3f Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Thu, 20 Jun 2024 14:19:17 -0300 Subject: [PATCH 27/34] Rename parameter --- operator/v2/rpc_client.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/operator/v2/rpc_client.go b/operator/v2/rpc_client.go index 543f1a8c..417cf49d 100644 --- a/operator/v2/rpc_client.go +++ b/operator/v2/rpc_client.go @@ -92,10 +92,10 @@ type AggregatorRpcClient struct { var _ core.Metricable = (*AggregatorRpcClient)(nil) -func NewAggregatorRpcClient(rpcClient RpcClient, retryPredicate RetryStrategy, logger logging.Logger) AggregatorRpcClient { +func NewAggregatorRpcClient(rpcClient RpcClient, retryStrategy RetryStrategy, logger logging.Logger) AggregatorRpcClient { return AggregatorRpcClient{ rpcClient: rpcClient, - shouldRetry: retryPredicate, + shouldRetry: retryStrategy, listener: &v1.SelectiveRpcClientListener{}, logger: logger, } From fc5665b5a05296be8922da79966dba0dab3df344 Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Thu, 20 Jun 2024 14:22:31 -0300 Subject: [PATCH 28/34] Add `RetryAnd` to compose rather than decorators --- operator/v2/rpc_client.go | 10 ++++++++-- operator/v2/rpc_client_test.go | 3 ++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/operator/v2/rpc_client.go b/operator/v2/rpc_client.go index 417cf49d..56eb5d59 100644 --- a/operator/v2/rpc_client.go +++ b/operator/v2/rpc_client.go @@ -61,10 +61,10 @@ func AlwaysRetry(_ time.Time, _ error) bool { return true } -func RetryWithDelay(delay time.Duration, inner RetryStrategy) RetryStrategy { +func RetryWithDelay(delay time.Duration) RetryStrategy { return func(submittedAt time.Time, err error) bool { time.Sleep(delay) - return inner(submittedAt, err) + return true } } @@ -83,6 +83,12 @@ func RetryAtMost(retries int) RetryStrategy { } } +func RetryAnd(s1 RetryStrategy, s2 RetryStrategy) RetryStrategy { + return func(submittedAt time.Time, err error) bool { + return s1(submittedAt, err) && s2(submittedAt, err) + } +} + type AggregatorRpcClient struct { rpcClient RpcClient shouldRetry RetryStrategy diff --git a/operator/v2/rpc_client_test.go b/operator/v2/rpc_client_test.go index 24d25998..35e31389 100644 --- a/operator/v2/rpc_client_test.go +++ b/operator/v2/rpc_client_test.go @@ -105,7 +105,8 @@ func TestUnboundedRetry_Delayed(t *testing.T) { }, } - client := operator.NewAggregatorRpcClient(&rpcClient, operator.RetryWithDelay(100*time.Millisecond, operator.AlwaysRetry), logger) + strategy := operator.RetryAnd(operator.RetryWithDelay(100*time.Millisecond), operator.AlwaysRetry) + client := operator.NewAggregatorRpcClient(&rpcClient, strategy, logger) startedAt := time.Now() client.SendSignedStateRootUpdateToAggregator(&messages.SignedStateRootUpdateMessage{}) From f71d64262616ff9f09ddfaa01d9ddcd0b2d52d61 Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Thu, 20 Jun 2024 14:24:40 -0300 Subject: [PATCH 29/34] Add `DefaultRetryStrategy` --- operator/v2/rpc_client.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/operator/v2/rpc_client.go b/operator/v2/rpc_client.go index 56eb5d59..8fde1832 100644 --- a/operator/v2/rpc_client.go +++ b/operator/v2/rpc_client.go @@ -89,6 +89,14 @@ func RetryAnd(s1 RetryStrategy, s2 RetryStrategy) RetryStrategy { } } +// By defaul, retry with a delay of 2 seconds between calls, +// at most 10 times, and only if the error is recent enough (24 hours) +// TODO: Discuss the "recent enough" part +var DefaultRetryStrategy RetryStrategy = RetryAnd( + RetryWithDelay(2*time.Second), RetryAnd( + RetryAtMost(10), + RetryIfRecentEnough(24*time.Hour))) + type AggregatorRpcClient struct { rpcClient RpcClient shouldRetry RetryStrategy From 6a6db7d54ea023bca7c2a8190996873798d20b1e Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Thu, 20 Jun 2024 15:12:38 -0300 Subject: [PATCH 30/34] Start aggregator before operator --- tests/integration/integration_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/integration_test.go b/tests/integration/integration_test.go index 03ff51c1..4826c4bf 100644 --- a/tests/integration/integration_test.go +++ b/tests/integration/integration_test.go @@ -212,14 +212,14 @@ func setupTestEnv(t *testing.T, ctx context.Context) *testEnv { t.Fatalf("Failed to create logger: %s", err.Error()) } - nodeConfig, _, _ := genOperatorConfig(t, ctx, "3", mainnetAnvil, rollupAnvils, rabbitMq) - addresses, registryRollups, registryRollupAuths, _ := deployRegistryRollups(t, rollupAnvils) - operator := startOperator(t, ctx, nodeConfig) config := buildConfig(t, sfflDeploymentRaw, addresses, rollupAnvils, configRaw) aggregator := startAggregator(t, ctx, config, logger) + nodeConfig, _, _ := genOperatorConfig(t, ctx, "3", mainnetAnvil, rollupAnvils, rabbitMq) + operator := startOperator(t, ctx, nodeConfig) + avsReader, err := chainio.BuildAvsReader(common.HexToAddress(sfflDeploymentRaw.Addresses.RegistryCoordinatorAddr), common.HexToAddress(sfflDeploymentRaw.Addresses.OperatorStateRetrieverAddr), mainnetAnvil.HttpClient, logger) if err != nil { t.Fatalf("Cannot create AVS Reader: %s", err.Error()) From 28b0005e1f423029dd5598f7a999edeaedc554dd Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Thu, 20 Jun 2024 15:13:59 -0300 Subject: [PATCH 31/34] Replace original with v2 --- operator/mocks/rpc_client.go | 19 +- operator/operator.go | 8 +- operator/rpc_client.go | 488 +++++++++------------------ operator/{v2 => }/rpc_client_test.go | 8 +- operator/v2/rpc_client.go | 251 -------------- 5 files changed, 185 insertions(+), 589 deletions(-) rename operator/{v2 => }/rpc_client_test.go (97%) delete mode 100644 operator/v2/rpc_client.go diff --git a/operator/mocks/rpc_client.go b/operator/mocks/rpc_client.go index 95a56ddb..27e6ddf0 100644 --- a/operator/mocks/rpc_client.go +++ b/operator/mocks/rpc_client.go @@ -5,6 +5,7 @@ // // mockgen -destination=./mocks/rpc_client.go -package=mocks github.com/NethermindEth/near-sffl/operator AggregatorRpcClienter // + // Package mocks is a generated GoMock package. package mocks @@ -69,9 +70,11 @@ func (mr *MockAggregatorRpcClienterMockRecorder) GetAggregatedCheckpointMessages } // SendSignedCheckpointTaskResponseToAggregator mocks base method. -func (m *MockAggregatorRpcClienter) SendSignedCheckpointTaskResponseToAggregator(arg0 *messages.SignedCheckpointTaskResponse) { +func (m *MockAggregatorRpcClienter) SendSignedCheckpointTaskResponseToAggregator(arg0 *messages.SignedCheckpointTaskResponse) error { m.ctrl.T.Helper() - m.ctrl.Call(m, "SendSignedCheckpointTaskResponseToAggregator", arg0) + ret := m.ctrl.Call(m, "SendSignedCheckpointTaskResponseToAggregator", arg0) + ret0, _ := ret[0].(error) + return ret0 } // SendSignedCheckpointTaskResponseToAggregator indicates an expected call of SendSignedCheckpointTaskResponseToAggregator. @@ -81,9 +84,11 @@ func (mr *MockAggregatorRpcClienterMockRecorder) SendSignedCheckpointTaskRespons } // SendSignedOperatorSetUpdateToAggregator mocks base method. -func (m *MockAggregatorRpcClienter) SendSignedOperatorSetUpdateToAggregator(arg0 *messages.SignedOperatorSetUpdateMessage) { +func (m *MockAggregatorRpcClienter) SendSignedOperatorSetUpdateToAggregator(arg0 *messages.SignedOperatorSetUpdateMessage) error { m.ctrl.T.Helper() - m.ctrl.Call(m, "SendSignedOperatorSetUpdateToAggregator", arg0) + ret := m.ctrl.Call(m, "SendSignedOperatorSetUpdateToAggregator", arg0) + ret0, _ := ret[0].(error) + return ret0 } // SendSignedOperatorSetUpdateToAggregator indicates an expected call of SendSignedOperatorSetUpdateToAggregator. @@ -93,9 +98,11 @@ func (mr *MockAggregatorRpcClienterMockRecorder) SendSignedOperatorSetUpdateToAg } // SendSignedStateRootUpdateToAggregator mocks base method. -func (m *MockAggregatorRpcClienter) SendSignedStateRootUpdateToAggregator(arg0 *messages.SignedStateRootUpdateMessage) { +func (m *MockAggregatorRpcClienter) SendSignedStateRootUpdateToAggregator(arg0 *messages.SignedStateRootUpdateMessage) error { m.ctrl.T.Helper() - m.ctrl.Call(m, "SendSignedStateRootUpdateToAggregator", arg0) + ret := m.ctrl.Call(m, "SendSignedStateRootUpdateToAggregator", arg0) + ret0, _ := ret[0].(error) + return ret0 } // SendSignedStateRootUpdateToAggregator indicates an expected call of SendSignedStateRootUpdateToAggregator. diff --git a/operator/operator.go b/operator/operator.go index a3649ab1..db94c649 100644 --- a/operator/operator.go +++ b/operator/operator.go @@ -173,11 +173,13 @@ func NewOperatorFromConfig(c optypes.NodeConfig) (*Operator, error) { return nil, err } - aggregatorRpcClient, err := NewAggregatorRpcClient(c.AggregatorServerIpPortAddress, operatorId, registryCoordinatorAddress, logger) + // TODO: We never close `httpRpcClient` + httpRpcClient, err := NewHTTPAggregatorRpcClient(c.AggregatorServerIpPortAddress, operatorId, registryCoordinatorAddress, logger) if err != nil { - logger.Error("Cannot create AggregatorRpcClient. Is aggregator running?", "err", err) + logger.Error("Cannot create HTTPAggregatorRpcClient. Is aggregator running?", "err", err) return nil, err } + aggregatorRpcClient := NewAggregatorRpcClient(httpRpcClient, DefaultAggregatorRpcRetryStrategy, logger) avsManager, err := NewAvsManager(&c, ethHttpClient, ethWsClient, elChainReader, elChainWriter, txMgr, logger) if err != nil { @@ -219,7 +221,7 @@ func NewOperatorFromConfig(c optypes.NodeConfig) (*Operator, error) { blsKeypair: blsKeyPair, operatorAddr: common.HexToAddress(c.OperatorAddress), aggregatorServerIpPortAddr: c.AggregatorServerIpPortAddress, - aggregatorRpcClient: aggregatorRpcClient, + aggregatorRpcClient: &aggregatorRpcClient, registryCoordinatorAddr: registryCoordinatorAddress, operatorId: operatorId, taskResponseWait: time.Duration(c.TaskResponseWaitMs) * time.Millisecond, diff --git a/operator/rpc_client.go b/operator/rpc_client.go index 88f18158..d5769679 100644 --- a/operator/rpc_client.go +++ b/operator/rpc_client.go @@ -2,421 +2,259 @@ package operator import ( "errors" - "fmt" - "net" "net/rpc" - "sync" "time" "github.com/Layr-Labs/eigensdk-go/logging" eigentypes "github.com/Layr-Labs/eigensdk-go/types" - "github.com/ethereum/go-ethereum/common" - "github.com/prometheus/client_golang/prometheus" - "github.com/NethermindEth/near-sffl/core" "github.com/NethermindEth/near-sffl/core/types/messages" + "github.com/ethereum/go-ethereum/common" + "github.com/prometheus/client_golang/prometheus" ) -const ( - ResendInterval = 2 * time.Second - MaxRetries = 10 -) - -type AggregatorRpcClienter interface { - core.Metricable - - SendSignedCheckpointTaskResponseToAggregator(signedCheckpointTaskResponse *messages.SignedCheckpointTaskResponse) - SendSignedStateRootUpdateToAggregator(signedStateRootUpdateMessage *messages.SignedStateRootUpdateMessage) - SendSignedOperatorSetUpdateToAggregator(signedOperatorSetUpdateMessage *messages.SignedOperatorSetUpdateMessage) - GetAggregatedCheckpointMessages(fromTimestamp, toTimestamp uint64) (*messages.CheckpointMessages, error) -} - -type unsentRpcMessage struct { - Message interface{} - Retries int -} - -type AggregatorRpcClient struct { - rpcClientLock sync.RWMutex - rpcClient *rpc.Client - aggregatorIpPortAddr string - registryCoordinatorAddress common.Address - - operatorId eigentypes.OperatorId - notifiedInitialized bool - - unsentMessagesLock sync.Mutex - unsentMessages []unsentRpcMessage - resendTicker *time.Ticker - - logger logging.Logger - listener RpcClientEventListener +type RpcClient interface { + Call(serviceMethod string, args any, reply any) error + // TODO: Do we also want a `Close` method? } -var _ core.Metricable = (*AggregatorRpcClient)(nil) - -func NewAggregatorRpcClient(aggregatorIpPortAddr string, operatorId eigentypes.OperatorId, registryCoordinatorAddress common.Address, logger logging.Logger) (*AggregatorRpcClient, error) { - resendTicker := time.NewTicker(ResendInterval) - - client := &AggregatorRpcClient{ - // set to nil so that we can create an rpc client even if the aggregator is not running - rpcClient: nil, - logger: logger, - aggregatorIpPortAddr: aggregatorIpPortAddr, - registryCoordinatorAddress: registryCoordinatorAddress, - unsentMessages: make([]unsentRpcMessage, 0), - resendTicker: resendTicker, - listener: &SelectiveRpcClientListener{}, - operatorId: operatorId, - } +var _ RpcClient = (*rpc.Client)(nil) - go client.onTick() - return client, nil -} - -func (c *AggregatorRpcClient) EnableMetrics(registry *prometheus.Registry) error { - listener, err := MakeRpcClientMetrics(registry) +func NewHTTPAggregatorRpcClient(aggregatorIpPortAddr string, operatorId eigentypes.OperatorId, expectedRegistryCoordinatorAddress common.Address, logger logging.Logger) (*rpc.Client, error) { + client, err := rpc.DialHTTP("tcp", aggregatorIpPortAddr) if err != nil { - return err - } - - c.listener = listener - return nil -} - -func (c *AggregatorRpcClient) dialAggregatorRpcClient() error { - c.rpcClientLock.Lock() - defer c.rpcClientLock.Unlock() - - if c.rpcClient != nil { - return nil - } - - c.logger.Info("rpc client is nil. Dialing aggregator rpc client") - - client, err := rpc.DialHTTP("tcp", c.aggregatorIpPortAddr) - if err != nil { - c.logger.Error("Error dialing aggregator rpc client", "err", err) - return err + logger.Error("Error dialing aggregator rpc client", "err", err) + return nil, err } var aggregatorRegistryCoordinatorAddress string err = client.Call("Aggregator.GetRegistryCoordinatorAddress", struct{}{}, &aggregatorRegistryCoordinatorAddress) if err != nil { - c.logger.Info("Received error when getting registry coordinator address", "err", err) - return err + logger.Error("Received error when getting registry coordinator address", "err", err) + return nil, err } - if !c.notifiedInitialized { - c.logger.Info("Notifying aggregator of initialization") + logger.Debug("Notifying aggregator of initialization") - var reply bool - err := client.Call("Aggregator.NotifyOperatorInitialization", c.operatorId, &reply) - if err != nil { - c.logger.Error("Error notifying aggregator of initialization", "err", err) - return err - } - - c.notifiedInitialized = true + var reply bool + err = client.Call("Aggregator.NotifyOperatorInitialization", operatorId, &reply) + if err != nil { + logger.Error("Error notifying aggregator of initialization", "err", err) + return nil, err } - if common.HexToAddress(aggregatorRegistryCoordinatorAddress).Cmp(c.registryCoordinatorAddress) != 0 { - c.logger.Fatal("Registry coordinator address from aggregator does not match the one in the config", "aggregator", aggregatorRegistryCoordinatorAddress, "config", c.registryCoordinatorAddress.String()) - return errors.New("mismatching registry coordinator address from aggregator") + if common.HexToAddress(aggregatorRegistryCoordinatorAddress).Cmp(expectedRegistryCoordinatorAddress) != 0 { + logger.Fatal("Registry coordinator address from aggregator does not match the one in the config", "aggregator", aggregatorRegistryCoordinatorAddress, "config", expectedRegistryCoordinatorAddress.String()) + return nil, errors.New("mismatching registry coordinator address from aggregator") } - c.rpcClient = client - - return nil + return client, nil } -func (c *AggregatorRpcClient) InitializeClientIfNotExist() error { - c.rpcClientLock.RLock() - if c.rpcClient != nil { - c.rpcClientLock.RUnlock() - return nil - } - c.rpcClientLock.RUnlock() +type RetryStrategy func(submittedAt time.Time, err error) bool - return c.dialAggregatorRpcClient() +func NeverRetry(_ time.Time, _ error) bool { + return false } -func isShutdownOrNetworkError(err error) bool { - if err == rpc.ErrShutdown { - return true - } +func AlwaysRetry(_ time.Time, _ error) bool { + return true +} - if _, ok := err.(*net.OpError); ok { +func RetryWithDelay(delay time.Duration) RetryStrategy { + return func(submittedAt time.Time, err error) bool { + time.Sleep(delay) return true } - - return false } -func (c *AggregatorRpcClient) handleRpcError(err error) { - if isShutdownOrNetworkError(err) { - go c.handleRpcShutdown() +func RetryIfRecentEnough(ttl time.Duration) RetryStrategy { + return func(submittedAt time.Time, err error) bool { + return time.Since(submittedAt) < ttl } } -func (c *AggregatorRpcClient) handleRpcShutdown() { - c.rpcClientLock.Lock() - defer c.rpcClientLock.Unlock() - - if c.rpcClient != nil { - c.logger.Info("Closing RPC client due to shutdown") - - err := c.rpcClient.Close() - if err != nil { - c.logger.Error("Error closing RPC client", "err", err) - } - - c.rpcClient = nil +func RetryAtMost(retries int) RetryStrategy { + retryCount := 0 + return func(_ time.Time, err error) bool { + result := retryCount < retries + retryCount++ + return result } } -func (c *AggregatorRpcClient) onTick() { - for { - <-c.resendTicker.C - - err := c.InitializeClientIfNotExist() - if err != nil { - c.logger.Error("Error initializing client", "err", err) - continue - } - - c.unsentMessagesLock.Lock() - if len(c.unsentMessages) == 0 { - c.unsentMessagesLock.Unlock() - continue - } - c.unsentMessagesLock.Unlock() - - c.tryResendFromDeque() +func RetryAnd(s1 RetryStrategy, s2 RetryStrategy) RetryStrategy { + return func(submittedAt time.Time, err error) bool { + return s1(submittedAt, err) && s2(submittedAt, err) } } -// Expected to be called with initialized client. -func (c *AggregatorRpcClient) tryResendFromDeque() { - c.rpcClientLock.RLock() - defer c.rpcClientLock.RUnlock() +// By defaul, retry with a delay of 2 seconds between calls, +// at most 10 times, and only if the error is recent enough (24 hours) +// TODO: Discuss the "recent enough" part +var DefaultAggregatorRpcRetryStrategy RetryStrategy = RetryAnd( + RetryWithDelay(2*time.Second), RetryAnd( + RetryAtMost(10), + RetryIfRecentEnough(24*time.Hour))) - c.unsentMessagesLock.Lock() - defer c.unsentMessagesLock.Unlock() - - if len(c.unsentMessages) != 0 { - c.logger.Info("Resending messages from queue") - } - - errorPos := 0 - for i := 0; i < len(c.unsentMessages); i++ { - entry := c.unsentMessages[i] - message := entry.Message - - // Assumes client exists - var err error - var reply bool - - switch message := message.(type) { - case *messages.SignedCheckpointTaskResponse: - err = c.rpcClient.Call("Aggregator.ProcessSignedCheckpointTaskResponse", message, &reply) - if err != nil { - c.listener.IncErroredCheckpointSubmissions(true) - } else { - c.listener.IncCheckpointTaskResponseSubmissions(true) - c.listener.ObserveLastCheckpointIdResponded(message.TaskResponse.ReferenceTaskIndex) - } - - case *messages.SignedStateRootUpdateMessage: - err = c.rpcClient.Call("Aggregator.ProcessSignedStateRootUpdateMessage", message, &reply) - if err != nil { - c.listener.IncErroredStateRootUpdateSubmissions(message.Message.RollupId, true) - } else { - c.listener.IncStateRootUpdateSubmissions(message.Message.RollupId, true) - } - - case *messages.SignedOperatorSetUpdateMessage: - err = c.rpcClient.Call("Aggregator.ProcessSignedOperatorSetUpdateMessage", message, &reply) - if err != nil { - c.listener.IncErroredOperatorSetUpdateSubmissions(true) - } else { - c.listener.IncOperatorSetUpdateUpdateSubmissions(true) - c.listener.ObserveLastOperatorSetUpdateIdResponded(message.Message.Id) - } - - default: - panic("unreachable") - } - - if err != nil { - c.logger.Error("Couldn't resend message", "err", err) - - if isShutdownOrNetworkError(err) { - c.logger.Error("Couldn't resend message due to shutdown or network error") - - if errorPos == 0 { - c.unsentMessages = c.unsentMessages[i:] - return - } - - for j := i; j < len(c.unsentMessages); j++ { - rpcMessage := c.unsentMessages[j] - c.unsentMessages[errorPos] = rpcMessage - errorPos++ - } - - break - } - - entry.Retries++ - if entry.Retries >= MaxRetries { - c.logger.Error("Max retries reached, dropping message", "message", fmt.Sprintf("%#v", message)) - continue - } - - c.unsentMessages[errorPos] = entry - errorPos++ - } - } +type AggregatorRpcClienter interface { + core.Metricable - c.unsentMessages = c.unsentMessages[:errorPos] - c.listener.ObserveResendQueueSize(len(c.unsentMessages)) + SendSignedCheckpointTaskResponseToAggregator(signedCheckpointTaskResponse *messages.SignedCheckpointTaskResponse) error + SendSignedStateRootUpdateToAggregator(signedStateRootUpdateMessage *messages.SignedStateRootUpdateMessage) error + SendSignedOperatorSetUpdateToAggregator(signedOperatorSetUpdateMessage *messages.SignedOperatorSetUpdateMessage) error + GetAggregatedCheckpointMessages(fromTimestamp, toTimestamp uint64) (*messages.CheckpointMessages, error) } -func (c *AggregatorRpcClient) sendOperatorMessage(sendCb func() error, message interface{}) { - c.rpcClientLock.RLock() - defer c.rpcClientLock.RUnlock() - - appendProtected := func() { - c.unsentMessagesLock.Lock() - defer c.unsentMessagesLock.Unlock() - - c.unsentMessages = append(c.unsentMessages, unsentRpcMessage{Message: message}) - c.listener.ObserveResendQueueSize(len(c.unsentMessages)) - } +type AggregatorRpcClient struct { + rpcClient RpcClient + shouldRetry RetryStrategy + listener RpcClientEventListener + logger logging.Logger +} - if c.rpcClient == nil { - appendProtected() - return - } +var _ AggregatorRpcClienter = (*AggregatorRpcClient)(nil) - c.logger.Info("Sending request to aggregator") - err := sendCb() - if err != nil { - c.handleRpcError(err) - appendProtected() - return +func NewAggregatorRpcClient(rpcClient RpcClient, retryStrategy RetryStrategy, logger logging.Logger) AggregatorRpcClient { + return AggregatorRpcClient{ + rpcClient: rpcClient, + shouldRetry: retryStrategy, + listener: &SelectiveRpcClientListener{}, + logger: logger, } } -func (c *AggregatorRpcClient) sendRequest(sendCb func() error) error { - c.rpcClientLock.RLock() - defer c.rpcClientLock.RUnlock() +func (a *AggregatorRpcClient) SendSignedCheckpointTaskResponseToAggregator(signedCheckpointTaskResponse *messages.SignedCheckpointTaskResponse) error { + a.logger.Info("Sending signed task response header to aggregator", "signedCheckpointTaskResponse", signedCheckpointTaskResponse) - if c.rpcClient == nil { - return errors.New("rpc client is nil") + submittedAt := time.Now() + var reply bool + action := func() error { + err := a.rpcClient.Call("Aggregator.ProcessSignedCheckpointTaskResponse", signedCheckpointTaskResponse, &reply) + if err != nil { + a.logger.Error("Received error from aggregator", "err", err) + } + return err } - c.logger.Info("Sending request to aggregator") + retried := false + err := action() + for err != nil && a.shouldRetry(submittedAt, err) { + a.listener.IncErroredCheckpointSubmissions(retried) + err = action() + retried = true + } - err := sendCb() if err != nil { - c.handleRpcError(err) + a.logger.Error("Dropping message after error", "err", err) return err } - c.logger.Info("Request successfully sent to aggregator") + a.logger.Info("Signed task response header accepted by aggregator", "reply", reply) + a.listener.IncCheckpointTaskResponseSubmissions(retried) + a.listener.ObserveLastCheckpointIdResponded(signedCheckpointTaskResponse.TaskResponse.ReferenceTaskIndex) + a.listener.OnMessagesReceived() return nil } -func (c *AggregatorRpcClient) SendSignedCheckpointTaskResponseToAggregator(signedCheckpointTaskResponse *messages.SignedCheckpointTaskResponse) { - c.logger.Info("Sending signed task response header to aggregator", "signedCheckpointTaskResponse", fmt.Sprintf("%#v", signedCheckpointTaskResponse)) +func (a *AggregatorRpcClient) SendSignedStateRootUpdateToAggregator(signedStateRootUpdateMessage *messages.SignedStateRootUpdateMessage) error { + a.logger.Info("Sending signed state root update message to aggregator", "signedStateRootUpdateMessage", signedStateRootUpdateMessage) - c.sendOperatorMessage(func() error { - var reply bool - err := c.rpcClient.Call("Aggregator.ProcessSignedCheckpointTaskResponse", signedCheckpointTaskResponse, &reply) + submittedAt := time.Now() + var reply bool + action := func() error { + err := a.rpcClient.Call("Aggregator.ProcessSignedStateRootUpdateMessage", signedStateRootUpdateMessage, &reply) if err != nil { - c.listener.IncErroredCheckpointSubmissions(false) - - c.logger.Info("Received error from aggregator", "err", err) - return err + a.logger.Error("Received error from aggregator", "err", err) } + return err + } - c.listener.IncCheckpointTaskResponseSubmissions(false) - c.listener.ObserveLastCheckpointIdResponded(signedCheckpointTaskResponse.TaskResponse.ReferenceTaskIndex) - - c.logger.Info("Signed task response header accepted by aggregator.", "reply", reply) - c.listener.OnMessagesReceived() - return nil - }, signedCheckpointTaskResponse) -} - -func (c *AggregatorRpcClient) SendSignedStateRootUpdateToAggregator(signedStateRootUpdateMessage *messages.SignedStateRootUpdateMessage) { - c.logger.Info("Sending signed state root update message to aggregator", "signedStateRootUpdateMessage", fmt.Sprintf("%#v", signedStateRootUpdateMessage)) - - c.sendOperatorMessage(func() error { - var reply bool - err := c.rpcClient.Call("Aggregator.ProcessSignedStateRootUpdateMessage", signedStateRootUpdateMessage, &reply) - if err != nil { - c.listener.IncErroredStateRootUpdateSubmissions(signedStateRootUpdateMessage.Message.RollupId, false) + retried := false + err := action() + for err != nil && a.shouldRetry(submittedAt, err) { + a.listener.IncErroredStateRootUpdateSubmissions(signedStateRootUpdateMessage.Message.RollupId, retried) + err = action() + retried = true + } - c.logger.Info("Received error from aggregator", "err", err) - return err - } + if err != nil { + a.logger.Error("Dropping message after error", "err", err) + return err + } - c.listener.IncStateRootUpdateSubmissions(signedStateRootUpdateMessage.Message.RollupId, false) + a.logger.Info("Signed state root update message accepted by aggregator", "reply", reply) + a.listener.IncStateRootUpdateSubmissions(signedStateRootUpdateMessage.Message.RollupId, retried) + a.listener.OnMessagesReceived() - c.logger.Info("Signed state root update message accepted by aggregator.", "reply", reply) - c.listener.OnMessagesReceived() - return nil - }, signedStateRootUpdateMessage) + return nil } -func (c *AggregatorRpcClient) SendSignedOperatorSetUpdateToAggregator(signedOperatorSetUpdateMessage *messages.SignedOperatorSetUpdateMessage) { - c.logger.Info("Sending operator set update message to aggregator", "signedOperatorSetUpdateMessage", fmt.Sprintf("%#v", signedOperatorSetUpdateMessage)) +func (a *AggregatorRpcClient) SendSignedOperatorSetUpdateToAggregator(signedOperatorSetUpdateMessage *messages.SignedOperatorSetUpdateMessage) error { + a.logger.Info("Sending operator set update message to aggregator", "signedOperatorSetUpdateMessage", signedOperatorSetUpdateMessage) - c.sendOperatorMessage(func() error { - var reply bool - err := c.rpcClient.Call("Aggregator.ProcessSignedOperatorSetUpdateMessage", signedOperatorSetUpdateMessage, &reply) + submittedAt := time.Now() + var reply bool + action := func() error { + err := a.rpcClient.Call("Aggregator.ProcessSignedOperatorSetUpdateMessage", signedOperatorSetUpdateMessage, &reply) if err != nil { - c.listener.IncErroredOperatorSetUpdateSubmissions(false) - - c.logger.Info("Received error from aggregator", "err", err) - return err + a.logger.Error("Received error from aggregator", "err", err) } + return err + } - c.listener.IncOperatorSetUpdateUpdateSubmissions(false) - c.listener.ObserveLastOperatorSetUpdateIdResponded(signedOperatorSetUpdateMessage.Message.Id) + retried := false + err := action() + for err != nil && a.shouldRetry(submittedAt, err) { + a.listener.IncErroredOperatorSetUpdateSubmissions(retried) + err = action() + retried = true + } - c.logger.Info("Signed operator set update message accepted by aggregator.", "reply", reply) - c.listener.OnMessagesReceived() - return nil - }, signedOperatorSetUpdateMessage) -} + if err != nil { + a.logger.Error("Dropping message after error", "err", err) + return err + } -func (c *AggregatorRpcClient) GetAggregatedCheckpointMessages(fromTimestamp, toTimestamp uint64) (*messages.CheckpointMessages, error) { - c.logger.Info("Getting checkpoint messages from aggregator") + a.logger.Info("Signed operator set update message accepted by aggregator", "reply", reply) + a.listener.IncOperatorSetUpdateUpdateSubmissions(retried) + a.listener.ObserveLastOperatorSetUpdateIdResponded(signedOperatorSetUpdateMessage.Message.Id) + a.listener.OnMessagesReceived() + return nil +} - var checkpointMessages messages.CheckpointMessages +func (a *AggregatorRpcClient) GetAggregatedCheckpointMessages(fromTimestamp, toTimestamp uint64) (*messages.CheckpointMessages, error) { + a.logger.Info("Getting checkpoint messages from aggregator", "fromTimestamp", fromTimestamp, "toTimestamp", toTimestamp) type Args struct { FromTimestamp, ToTimestamp uint64 } - err := c.sendRequest(func() error { - err := c.rpcClient.Call("Aggregator.GetAggregatedCheckpointMessages", &Args{fromTimestamp, toTimestamp}, &checkpointMessages) + submittedAt := time.Now() + var checkpointMessages messages.CheckpointMessages + action := func() error { + err := a.rpcClient.Call("Aggregator.GetAggregatedCheckpointMessages", &Args{fromTimestamp, toTimestamp}, &checkpointMessages) if err != nil { - c.logger.Info("Received error from aggregator", "err", err) - return err + a.logger.Error("Received error from aggregator", "err", err) } + return err + } + + err := action() + for err != nil && a.shouldRetry(submittedAt, err) { + err = action() + } - c.logger.Info("Checkpoint messages fetched from aggregator") - return nil - }) + return &checkpointMessages, err +} + +func (c *AggregatorRpcClient) EnableMetrics(registry *prometheus.Registry) error { + listener, err := MakeRpcClientMetrics(registry) if err != nil { - return nil, err + return err } - return &checkpointMessages, nil + c.listener = listener + return nil } diff --git a/operator/v2/rpc_client_test.go b/operator/rpc_client_test.go similarity index 97% rename from operator/v2/rpc_client_test.go rename to operator/rpc_client_test.go index 35e31389..0b3f3f05 100644 --- a/operator/v2/rpc_client_test.go +++ b/operator/rpc_client_test.go @@ -1,4 +1,4 @@ -package operator2_test +package operator_test import ( "sync" @@ -7,7 +7,7 @@ import ( "github.com/Layr-Labs/eigensdk-go/logging" "github.com/NethermindEth/near-sffl/core/types/messages" - operator "github.com/NethermindEth/near-sffl/operator/v2" + "github.com/NethermindEth/near-sffl/operator" "github.com/stretchr/testify/assert" ) @@ -178,7 +178,7 @@ func TestGetAggregatedCheckpointMessages(t *testing.T) { response, err := client.GetAggregatedCheckpointMessages(0, 0) assert.NoError(t, err) - assert.Equal(t, expected, response) + assert.Equal(t, expected, *response) } func TestGetAggregatedCheckpointMessagesRetry(t *testing.T) { @@ -209,6 +209,6 @@ func TestGetAggregatedCheckpointMessagesRetry(t *testing.T) { response, err := client.GetAggregatedCheckpointMessages(0, 0) assert.NoError(t, err) - assert.Equal(t, expected, response) + assert.Equal(t, expected, *response) assert.Equal(t, 2, rpcFailCount) } diff --git a/operator/v2/rpc_client.go b/operator/v2/rpc_client.go deleted file mode 100644 index 8fde1832..00000000 --- a/operator/v2/rpc_client.go +++ /dev/null @@ -1,251 +0,0 @@ -package operator2 - -import ( - "errors" - "net/rpc" - "time" - - "github.com/Layr-Labs/eigensdk-go/logging" - eigentypes "github.com/Layr-Labs/eigensdk-go/types" - "github.com/NethermindEth/near-sffl/core" - "github.com/NethermindEth/near-sffl/core/types/messages" - v1 "github.com/NethermindEth/near-sffl/operator" - "github.com/ethereum/go-ethereum/common" - "github.com/prometheus/client_golang/prometheus" -) - -type RpcClient interface { - Call(serviceMethod string, args any, reply any) error -} - -var _ RpcClient = (*rpc.Client)(nil) - -func NewHTTPAggregatorRpcClient(aggregatorIpPortAddr string, operatorId eigentypes.OperatorId, expectedRegistryCoordinatorAddress common.Address, logger logging.Logger) (*rpc.Client, error) { - client, err := rpc.DialHTTP("tcp", aggregatorIpPortAddr) - if err != nil { - logger.Error("Error dialing aggregator rpc client", "err", err) - return nil, err - } - - var aggregatorRegistryCoordinatorAddress string - err = client.Call("Aggregator.GetRegistryCoordinatorAddress", struct{}{}, &aggregatorRegistryCoordinatorAddress) - if err != nil { - logger.Error("Received error when getting registry coordinator address", "err", err) - return nil, err - } - - logger.Debug("Notifying aggregator of initialization") - - var reply bool - err = client.Call("Aggregator.NotifyOperatorInitialization", operatorId, &reply) - if err != nil { - logger.Error("Error notifying aggregator of initialization", "err", err) - return nil, err - } - - if common.HexToAddress(aggregatorRegistryCoordinatorAddress).Cmp(expectedRegistryCoordinatorAddress) != 0 { - logger.Fatal("Registry coordinator address from aggregator does not match the one in the config", "aggregator", aggregatorRegistryCoordinatorAddress, "config", expectedRegistryCoordinatorAddress.String()) - return nil, errors.New("mismatching registry coordinator address from aggregator") - } - - return client, nil -} - -type RetryStrategy func(submittedAt time.Time, err error) bool - -func NeverRetry(_ time.Time, _ error) bool { - return false -} - -func AlwaysRetry(_ time.Time, _ error) bool { - return true -} - -func RetryWithDelay(delay time.Duration) RetryStrategy { - return func(submittedAt time.Time, err error) bool { - time.Sleep(delay) - return true - } -} - -func RetryIfRecentEnough(ttl time.Duration) RetryStrategy { - return func(submittedAt time.Time, err error) bool { - return time.Since(submittedAt) < ttl - } -} - -func RetryAtMost(retries int) RetryStrategy { - retryCount := 0 - return func(_ time.Time, err error) bool { - result := retryCount < retries - retryCount++ - return result - } -} - -func RetryAnd(s1 RetryStrategy, s2 RetryStrategy) RetryStrategy { - return func(submittedAt time.Time, err error) bool { - return s1(submittedAt, err) && s2(submittedAt, err) - } -} - -// By defaul, retry with a delay of 2 seconds between calls, -// at most 10 times, and only if the error is recent enough (24 hours) -// TODO: Discuss the "recent enough" part -var DefaultRetryStrategy RetryStrategy = RetryAnd( - RetryWithDelay(2*time.Second), RetryAnd( - RetryAtMost(10), - RetryIfRecentEnough(24*time.Hour))) - -type AggregatorRpcClient struct { - rpcClient RpcClient - shouldRetry RetryStrategy - listener v1.RpcClientEventListener - logger logging.Logger -} - -var _ core.Metricable = (*AggregatorRpcClient)(nil) - -func NewAggregatorRpcClient(rpcClient RpcClient, retryStrategy RetryStrategy, logger logging.Logger) AggregatorRpcClient { - return AggregatorRpcClient{ - rpcClient: rpcClient, - shouldRetry: retryStrategy, - listener: &v1.SelectiveRpcClientListener{}, - logger: logger, - } -} - -func (a *AggregatorRpcClient) SendSignedCheckpointTaskResponseToAggregator(signedCheckpointTaskResponse *messages.SignedCheckpointTaskResponse) error { - a.logger.Info("Sending signed task response header to aggregator", "signedCheckpointTaskResponse", signedCheckpointTaskResponse) - - submittedAt := time.Now() - var reply bool - action := func() error { - err := a.rpcClient.Call("Aggregator.ProcessSignedCheckpointTaskResponse", signedCheckpointTaskResponse, &reply) - if err != nil { - a.logger.Error("Received error from aggregator", "err", err) - } - return err - } - - retried := false - err := action() - for err != nil && a.shouldRetry(submittedAt, err) { - a.listener.IncErroredCheckpointSubmissions(retried) - err = action() - retried = true - } - - if err != nil { - a.logger.Error("Dropping message after error", "err", err) - return err - } - - a.logger.Info("Signed task response header accepted by aggregator", "reply", reply) - a.listener.IncCheckpointTaskResponseSubmissions(retried) - a.listener.ObserveLastCheckpointIdResponded(signedCheckpointTaskResponse.TaskResponse.ReferenceTaskIndex) - a.listener.OnMessagesReceived() - - return nil -} - -func (a *AggregatorRpcClient) SendSignedStateRootUpdateToAggregator(signedStateRootUpdateMessage *messages.SignedStateRootUpdateMessage) error { - a.logger.Info("Sending signed state root update message to aggregator", "signedStateRootUpdateMessage", signedStateRootUpdateMessage) - - submittedAt := time.Now() - var reply bool - action := func() error { - err := a.rpcClient.Call("Aggregator.ProcessSignedStateRootUpdateMessage", signedStateRootUpdateMessage, &reply) - if err != nil { - a.logger.Error("Received error from aggregator", "err", err) - } - return err - } - - retried := false - err := action() - for err != nil && a.shouldRetry(submittedAt, err) { - a.listener.IncErroredStateRootUpdateSubmissions(signedStateRootUpdateMessage.Message.RollupId, retried) - err = action() - retried = true - } - - if err != nil { - a.logger.Error("Dropping message after error", "err", err) - return err - } - - a.logger.Info("Signed state root update message accepted by aggregator", "reply", reply) - a.listener.IncStateRootUpdateSubmissions(signedStateRootUpdateMessage.Message.RollupId, retried) - a.listener.OnMessagesReceived() - - return nil -} - -func (a *AggregatorRpcClient) SendSignedOperatorSetUpdateToAggregator(signedOperatorSetUpdateMessage *messages.SignedOperatorSetUpdateMessage) error { - a.logger.Info("Sending operator set update message to aggregator", "signedOperatorSetUpdateMessage", signedOperatorSetUpdateMessage) - - submittedAt := time.Now() - var reply bool - action := func() error { - err := a.rpcClient.Call("Aggregator.ProcessSignedOperatorSetUpdateMessage", signedOperatorSetUpdateMessage, &reply) - if err != nil { - a.logger.Error("Received error from aggregator", "err", err) - } - return err - } - - retried := false - err := action() - for err != nil && a.shouldRetry(submittedAt, err) { - a.listener.IncErroredOperatorSetUpdateSubmissions(retried) - err = action() - retried = true - } - - if err != nil { - a.logger.Error("Dropping message after error", "err", err) - return err - } - - a.logger.Info("Signed operator set update message accepted by aggregator", "reply", reply) - a.listener.IncOperatorSetUpdateUpdateSubmissions(retried) - a.listener.ObserveLastOperatorSetUpdateIdResponded(signedOperatorSetUpdateMessage.Message.Id) - a.listener.OnMessagesReceived() - return nil -} - -func (a *AggregatorRpcClient) GetAggregatedCheckpointMessages(fromTimestamp, toTimestamp uint64) (messages.CheckpointMessages, error) { - a.logger.Info("Getting checkpoint messages from aggregator", "fromTimestamp", fromTimestamp, "toTimestamp", toTimestamp) - - type Args struct { - FromTimestamp, ToTimestamp uint64 - } - - submittedAt := time.Now() - var checkpointMessages messages.CheckpointMessages - action := func() error { - err := a.rpcClient.Call("Aggregator.GetAggregatedCheckpointMessages", &Args{fromTimestamp, toTimestamp}, &checkpointMessages) - if err != nil { - a.logger.Error("Received error from aggregator", "err", err) - } - return err - } - - err := action() - for err != nil && a.shouldRetry(submittedAt, err) { - err = action() - } - - return checkpointMessages, err -} - -func (c *AggregatorRpcClient) EnableMetrics(registry *prometheus.Registry) error { - listener, err := v1.MakeRpcClientMetrics(registry) - if err != nil { - return err - } - - c.listener = listener - return nil -} From 7bcc7ff92a2754a54a8f1ffa62c9771b159d9dd4 Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Thu, 20 Jun 2024 15:19:30 -0300 Subject: [PATCH 32/34] Increase coverage by using different messages --- operator/rpc_client_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/operator/rpc_client_test.go b/operator/rpc_client_test.go index 0b3f3f05..31b87729 100644 --- a/operator/rpc_client_test.go +++ b/operator/rpc_client_test.go @@ -109,7 +109,7 @@ func TestUnboundedRetry_Delayed(t *testing.T) { client := operator.NewAggregatorRpcClient(&rpcClient, strategy, logger) startedAt := time.Now() - client.SendSignedStateRootUpdateToAggregator(&messages.SignedStateRootUpdateMessage{}) + client.SendSignedCheckpointTaskResponseToAggregator(&messages.SignedCheckpointTaskResponse{}) execTime := time.Since(startedAt) assert.True(t, execTime > 180*time.Millisecond) @@ -131,7 +131,7 @@ func TestRetryAtMost(t *testing.T) { client := operator.NewAggregatorRpcClient(&rpcClient, operator.RetryAtMost(4), logger) - client.SendSignedStateRootUpdateToAggregator(&messages.SignedStateRootUpdateMessage{}) + client.SendSignedOperatorSetUpdateToAggregator(&messages.SignedOperatorSetUpdateMessage{}) assert.Equal(t, 5, rpcFailCount) // 1 run, 4 retries } @@ -151,7 +151,7 @@ func TestRetryLaterIfRecentEnough(t *testing.T) { client := operator.NewAggregatorRpcClient(&rpcClient, operator.RetryIfRecentEnough(500*time.Millisecond), logger) - client.SendSignedCheckpointTaskResponseToAggregator(&messages.SignedCheckpointTaskResponse{}) + client.SendSignedStateRootUpdateToAggregator(&messages.SignedStateRootUpdateMessage{}) assert.Equal(t, 5, rpcFailCount) } From f4ee227bcc69f52b006e6162475b0c7e7c49f420 Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Thu, 20 Jun 2024 17:01:10 -0300 Subject: [PATCH 33/34] Ensure retry strategy is local to each method call --- operator/operator.go | 2 +- operator/rpc_client.go | 42 ++++++++++++++++++------------- operator/rpc_client_test.go | 50 +++++++++++++++++++++++++++++++------ 3 files changed, 68 insertions(+), 26 deletions(-) diff --git a/operator/operator.go b/operator/operator.go index db94c649..2b02ee34 100644 --- a/operator/operator.go +++ b/operator/operator.go @@ -179,7 +179,7 @@ func NewOperatorFromConfig(c optypes.NodeConfig) (*Operator, error) { logger.Error("Cannot create HTTPAggregatorRpcClient. Is aggregator running?", "err", err) return nil, err } - aggregatorRpcClient := NewAggregatorRpcClient(httpRpcClient, DefaultAggregatorRpcRetryStrategy, logger) + aggregatorRpcClient := NewAggregatorRpcClient(httpRpcClient, DefaultAggregatorRpcRetry, logger) avsManager, err := NewAvsManager(&c, ethHttpClient, ethWsClient, elChainReader, elChainWriter, txMgr, logger) if err != nil { diff --git a/operator/rpc_client.go b/operator/rpc_client.go index d5769679..8048b6f9 100644 --- a/operator/rpc_client.go +++ b/operator/rpc_client.go @@ -89,13 +89,17 @@ func RetryAnd(s1 RetryStrategy, s2 RetryStrategy) RetryStrategy { } } +type RetryFactory = func() RetryStrategy + // By defaul, retry with a delay of 2 seconds between calls, // at most 10 times, and only if the error is recent enough (24 hours) // TODO: Discuss the "recent enough" part -var DefaultAggregatorRpcRetryStrategy RetryStrategy = RetryAnd( - RetryWithDelay(2*time.Second), RetryAnd( - RetryAtMost(10), - RetryIfRecentEnough(24*time.Hour))) +func DefaultAggregatorRpcRetry() RetryStrategy { + return RetryAnd( + RetryWithDelay(2*time.Second), RetryAnd( + RetryAtMost(10), + RetryIfRecentEnough(24*time.Hour))) +} type AggregatorRpcClienter interface { core.Metricable @@ -107,20 +111,20 @@ type AggregatorRpcClienter interface { } type AggregatorRpcClient struct { - rpcClient RpcClient - shouldRetry RetryStrategy - listener RpcClientEventListener - logger logging.Logger + rpcClient RpcClient + newRetryStrategy RetryFactory + listener RpcClientEventListener + logger logging.Logger } var _ AggregatorRpcClienter = (*AggregatorRpcClient)(nil) -func NewAggregatorRpcClient(rpcClient RpcClient, retryStrategy RetryStrategy, logger logging.Logger) AggregatorRpcClient { +func NewAggregatorRpcClient(rpcClient RpcClient, retryFactory RetryFactory, logger logging.Logger) AggregatorRpcClient { return AggregatorRpcClient{ - rpcClient: rpcClient, - shouldRetry: retryStrategy, - listener: &SelectiveRpcClientListener{}, - logger: logger, + rpcClient: rpcClient, + newRetryStrategy: retryFactory, + listener: &SelectiveRpcClientListener{}, + logger: logger, } } @@ -138,8 +142,9 @@ func (a *AggregatorRpcClient) SendSignedCheckpointTaskResponseToAggregator(signe } retried := false + shouldRetry := a.newRetryStrategy() err := action() - for err != nil && a.shouldRetry(submittedAt, err) { + for err != nil && shouldRetry(submittedAt, err) { a.listener.IncErroredCheckpointSubmissions(retried) err = action() retried = true @@ -172,8 +177,9 @@ func (a *AggregatorRpcClient) SendSignedStateRootUpdateToAggregator(signedStateR } retried := false + shouldRetry := a.newRetryStrategy() err := action() - for err != nil && a.shouldRetry(submittedAt, err) { + for err != nil && shouldRetry(submittedAt, err) { a.listener.IncErroredStateRootUpdateSubmissions(signedStateRootUpdateMessage.Message.RollupId, retried) err = action() retried = true @@ -205,8 +211,9 @@ func (a *AggregatorRpcClient) SendSignedOperatorSetUpdateToAggregator(signedOper } retried := false + shouldRetry := a.newRetryStrategy() err := action() - for err != nil && a.shouldRetry(submittedAt, err) { + for err != nil && shouldRetry(submittedAt, err) { a.listener.IncErroredOperatorSetUpdateSubmissions(retried) err = action() retried = true @@ -241,8 +248,9 @@ func (a *AggregatorRpcClient) GetAggregatedCheckpointMessages(fromTimestamp, toT return err } + shouldRetry := a.newRetryStrategy() err := action() - for err != nil && a.shouldRetry(submittedAt, err) { + for err != nil && shouldRetry(submittedAt, err) { err = action() } diff --git a/operator/rpc_client_test.go b/operator/rpc_client_test.go index 31b87729..f193e7a2 100644 --- a/operator/rpc_client_test.go +++ b/operator/rpc_client_test.go @@ -44,7 +44,7 @@ func TestSendSuccessfulMessages(t *testing.T) { }, } - client := operator.NewAggregatorRpcClient(&rpcClient, operator.NeverRetry, logger) + client := operator.NewAggregatorRpcClient(&rpcClient, func() operator.RetryStrategy { return operator.NeverRetry }, logger) wg := sync.WaitGroup{} wg.Add(2) @@ -80,7 +80,7 @@ func TestUnboundedRetry(t *testing.T) { }, } - client := operator.NewAggregatorRpcClient(&rpcClient, operator.AlwaysRetry, logger) + client := operator.NewAggregatorRpcClient(&rpcClient, func() operator.RetryStrategy { return operator.AlwaysRetry }, logger) client.SendSignedStateRootUpdateToAggregator(&messages.SignedStateRootUpdateMessage{}) @@ -105,8 +105,10 @@ func TestUnboundedRetry_Delayed(t *testing.T) { }, } - strategy := operator.RetryAnd(operator.RetryWithDelay(100*time.Millisecond), operator.AlwaysRetry) - client := operator.NewAggregatorRpcClient(&rpcClient, strategy, logger) + retryFactory := func() operator.RetryStrategy { + return operator.RetryAnd(operator.RetryWithDelay(100*time.Millisecond), operator.AlwaysRetry) + } + client := operator.NewAggregatorRpcClient(&rpcClient, retryFactory, logger) startedAt := time.Now() client.SendSignedCheckpointTaskResponseToAggregator(&messages.SignedCheckpointTaskResponse{}) @@ -129,13 +131,45 @@ func TestRetryAtMost(t *testing.T) { }, } - client := operator.NewAggregatorRpcClient(&rpcClient, operator.RetryAtMost(4), logger) + client := operator.NewAggregatorRpcClient(&rpcClient, func() operator.RetryStrategy { return operator.RetryAtMost(4) }, logger) client.SendSignedOperatorSetUpdateToAggregator(&messages.SignedOperatorSetUpdateMessage{}) assert.Equal(t, 5, rpcFailCount) // 1 run, 4 retries } +func TestRetryAtMost_Concurrent(t *testing.T) { + logger, _ := logging.NewZapLogger(logging.Development) + + rpcFailCount := 0 + rpcClient := MockRpcClient{ + call: func(serviceMethod string, args any, reply any) error { + rpcFailCount++ + return assert.AnError + }, + } + + client := operator.NewAggregatorRpcClient(&rpcClient, func() operator.RetryStrategy { return operator.RetryAtMost(4) }, logger) + + wg := sync.WaitGroup{} + wg.Add(3) + go func() { + defer wg.Done() + client.SendSignedCheckpointTaskResponseToAggregator(&messages.SignedCheckpointTaskResponse{}) + }() + go func() { + defer wg.Done() + client.SendSignedStateRootUpdateToAggregator(&messages.SignedStateRootUpdateMessage{}) + }() + go func() { + defer wg.Done() + client.SendSignedOperatorSetUpdateToAggregator(&messages.SignedOperatorSetUpdateMessage{}) + }() + wg.Wait() + + assert.Equal(t, 15, rpcFailCount) // 1 run, 4 retries for each of the 3 calls = 15 calls in total +} + func TestRetryLaterIfRecentEnough(t *testing.T) { logger, _ := logging.NewZapLogger(logging.Development) @@ -149,7 +183,7 @@ func TestRetryLaterIfRecentEnough(t *testing.T) { }, } - client := operator.NewAggregatorRpcClient(&rpcClient, operator.RetryIfRecentEnough(500*time.Millisecond), logger) + client := operator.NewAggregatorRpcClient(&rpcClient, func() operator.RetryStrategy { return operator.RetryIfRecentEnough(500 * time.Millisecond) }, logger) client.SendSignedStateRootUpdateToAggregator(&messages.SignedStateRootUpdateMessage{}) @@ -173,7 +207,7 @@ func TestGetAggregatedCheckpointMessages(t *testing.T) { }, } - client := operator.NewAggregatorRpcClient(&rpcClient, operator.NeverRetry, logger) + client := operator.NewAggregatorRpcClient(&rpcClient, func() operator.RetryStrategy { return operator.NeverRetry }, logger) response, err := client.GetAggregatedCheckpointMessages(0, 0) @@ -204,7 +238,7 @@ func TestGetAggregatedCheckpointMessagesRetry(t *testing.T) { }, } - client := operator.NewAggregatorRpcClient(&rpcClient, operator.AlwaysRetry, logger) + client := operator.NewAggregatorRpcClient(&rpcClient, func() operator.RetryStrategy { return operator.AlwaysRetry }, logger) response, err := client.GetAggregatedCheckpointMessages(0, 0) From 9f3809e4b42dc4fe175be0c18f87b6afd090289f Mon Sep 17 00:00:00 2001 From: Lautaro Emanuel Date: Thu, 20 Jun 2024 17:04:52 -0300 Subject: [PATCH 34/34] Typo --- operator/rpc_client.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operator/rpc_client.go b/operator/rpc_client.go index 8048b6f9..da171533 100644 --- a/operator/rpc_client.go +++ b/operator/rpc_client.go @@ -91,7 +91,7 @@ func RetryAnd(s1 RetryStrategy, s2 RetryStrategy) RetryStrategy { type RetryFactory = func() RetryStrategy -// By defaul, retry with a delay of 2 seconds between calls, +// By default, retry with a delay of 2 seconds between calls, // at most 10 times, and only if the error is recent enough (24 hours) // TODO: Discuss the "recent enough" part func DefaultAggregatorRpcRetry() RetryStrategy {