From 30fa3332cf15e8740790f7fce71b689ac51ff66a Mon Sep 17 00:00:00 2001 From: wjrjerome Date: Tue, 18 Jun 2024 21:49:21 +1000 Subject: [PATCH 1/4] chore: fix local dev setup on max_staking_time --- config/global-params.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/global-params.json b/config/global-params.json index d0017923..a3fd0868 100644 --- a/config/global-params.json +++ b/config/global-params.json @@ -17,9 +17,9 @@ "unbonding_fee": 20000, "max_staking_amount": 1000000000, "min_staking_amount": 1000000, - "max_staking_time": 84000, + "max_staking_time": 65535, "min_staking_time": 64000, "confirmation_depth": 10 } ] -} +} \ No newline at end of file From 514821b45c8e4a6b8596d7f20f6b8571ff3981d4 Mon Sep 17 00:00:00 2001 From: Jeremy <168515712+jeremy-babylonchain@users.noreply.github.com> Date: Thu, 20 Jun 2024 07:50:26 +0800 Subject: [PATCH 2/4] add pendingTvl and update activeTvl (#187) * add pendingTvl and update activeTvl --- internal/services/stats.go | 10 +++++++++- tests/stats_test.go | 9 ++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/internal/services/stats.go b/internal/services/stats.go index 5d2b1709..02fa5fb6 100644 --- a/internal/services/stats.go +++ b/internal/services/stats.go @@ -17,6 +17,7 @@ type OverallStatsPublic struct { TotalDelegations int64 `json:"total_delegations"` TotalStakers uint64 `json:"total_stakers"` UnconfirmedTvl uint64 `json:"unconfirmed_tvl"` + PendingTvl uint64 `json:"pending_tvl"` } type StakerStatsPublic struct { @@ -131,7 +132,11 @@ func (s *Services) GetOverallStats(ctx context.Context) (*OverallStatsPublic, *t log.Ctx(ctx).Error().Err(err).Msg("error while fetching overall stats") return nil, types.NewInternalServiceError(err) } + unconfirmedTvl := uint64(0) + confirmedTvl := uint64(0) + pendingTvl := uint64(0) + btcInfo, err := s.DbClient.GetLatestBtcInfo(ctx) if err != nil { // Handle missing BTC information, which may occur during initial setup. @@ -145,15 +150,18 @@ func (s *Services) GetOverallStats(ctx context.Context) (*OverallStatsPublic, *t } } else { unconfirmedTvl = btcInfo.UnconfirmedTvl + confirmedTvl = btcInfo.ConfirmedTvl + pendingTvl = unconfirmedTvl - confirmedTvl } return &OverallStatsPublic{ - ActiveTvl: stats.ActiveTvl, + ActiveTvl: int64(confirmedTvl), TotalTvl: stats.TotalTvl, ActiveDelegations: stats.ActiveDelegations, TotalDelegations: stats.TotalDelegations, TotalStakers: stats.TotalStakers, UnconfirmedTvl: unconfirmedTvl, + PendingTvl: pendingTvl, }, nil } diff --git a/tests/stats_test.go b/tests/stats_test.go index 54acdc5a..468b9de3 100644 --- a/tests/stats_test.go +++ b/tests/stats_test.go @@ -204,13 +204,14 @@ func TestStatsEndpoints(t *testing.T) { // Test the overall stats endpoint overallStats := fetchOverallStatsEndpoint(t, testServer) - assert.Equal(t, int64(activeStakingEvent.StakingValue), overallStats.ActiveTvl) assert.Equal(t, int64(activeStakingEvent.StakingValue), overallStats.TotalTvl) assert.Equal(t, int64(1), overallStats.ActiveDelegations) assert.Equal(t, int64(1), overallStats.TotalDelegations) assert.Equal(t, uint64(1), overallStats.TotalStakers) - // We have not yet sent any UnconfirmedInfoEvent, hence no recrod in db + // We have not yet sent any ConfirmedInfoEvent and UnconfirmedInfoEvent, hence no recrod in db + assert.Equal(t, int64(0), overallStats.ActiveTvl) assert.Equal(t, uint64(0), overallStats.UnconfirmedTvl) + assert.Equal(t, uint64(0), overallStats.PendingTvl) // Test the top staker stats endpoint stakerStats, _ := fetchStakerStatsEndpoint(t, testServer) @@ -283,7 +284,6 @@ func TestStatsEndpoints(t *testing.T) { expectedTvl := int64(activeEvents[0].StakingValue + activeEvents[1].StakingValue) expectedTotalTvl := int64(expectedTvl) + int64(activeStakingEvent.StakingValue) - assert.Equal(t, expectedTvl, overallStats.ActiveTvl) assert.Equal(t, expectedTotalTvl, overallStats.TotalTvl) assert.Equal(t, int64(2), overallStats.ActiveDelegations) assert.Equal(t, int64(3), overallStats.TotalDelegations) @@ -310,6 +310,7 @@ func TestStatsEndpoints(t *testing.T) { overallStats = fetchOverallStatsEndpoint(t, testServer) assert.Equal(t, uint64(100), overallStats.UnconfirmedTvl) + assert.Equal(t, int64(90), overallStats.ActiveTvl) } func FuzzStatsEndpointReturnHighestUnconfirmedTvlFromEvents(f *testing.F) { @@ -347,6 +348,8 @@ func FuzzStatsEndpointReturnHighestUnconfirmedTvlFromEvents(f *testing.F) { overallStats = fetchOverallStatsEndpoint(t, testServer) assert.Equal(t, &highestHeightEvent.UnconfirmedTvl, &overallStats.UnconfirmedTvl) + pendingTvl := int64(highestHeightEvent.UnconfirmedTvl) - int64(highestHeightEvent.ConfirmedTvl) + assert.Equal(t, pendingTvl, overallStats.PendingTvl) }) } From 65545af206e51b05757c42e02ef1d87c518fffe1 Mon Sep 17 00:00:00 2001 From: Jeremy <168515712+jeremy-babylonchain@users.noreply.github.com> Date: Fri, 28 Jun 2024 07:10:06 -0700 Subject: [PATCH 3/4] add queue health check and test (#196) * add queue health check and test * add configs * add terminate service function * fix * adjust health check interval to 5 minutes * staking queue client version bump --- cmd/staking-api-service/main.go | 3 + config/config-docker.yml | 1 + config/config-local.yml | 1 + go.mod | 3 +- go.sum | 6 +- internal/config/server.go | 23 ++++--- .../observability/healthcheck/healthcheck.go | 60 +++++++++++++++++++ internal/observability/metrics/metrics.go | 15 +++++ internal/queue/queue.go | 27 +++++++++ tests/config/config-test.yml | 1 + tests/healthcheck_test.go | 2 +- 11 files changed, 129 insertions(+), 13 deletions(-) create mode 100644 internal/observability/healthcheck/healthcheck.go diff --git a/cmd/staking-api-service/main.go b/cmd/staking-api-service/main.go index 5134bb4b..31d5b9bd 100644 --- a/cmd/staking-api-service/main.go +++ b/cmd/staking-api-service/main.go @@ -9,6 +9,7 @@ import ( "github.com/babylonchain/staking-api-service/internal/api" "github.com/babylonchain/staking-api-service/internal/config" "github.com/babylonchain/staking-api-service/internal/db/model" + "github.com/babylonchain/staking-api-service/internal/observability/healthcheck" "github.com/babylonchain/staking-api-service/internal/observability/metrics" "github.com/babylonchain/staking-api-service/internal/queue" "github.com/babylonchain/staking-api-service/internal/services" @@ -77,6 +78,8 @@ func main() { queues.StartReceivingMessages() + healthcheck.StartHealthCheckCron(ctx, queues, cfg.Server.HealthCheckInterval) + apiServer, err := api.New(ctx, cfg, services) if err != nil { log.Fatal().Err(err).Msg("error while setting up staking api service") diff --git a/config/config-docker.yml b/config/config-docker.yml index 58ebeab8..1edb4a29 100644 --- a/config/config-docker.yml +++ b/config/config-docker.yml @@ -8,6 +8,7 @@ server: log-level: debug btc-net: "mainnet" max-content-length: 4096 + health-check-interval: 300 # 5 minutes interval db: address: "mongodb://mongodb:27017" db-name: staking-api-service diff --git a/config/config-local.yml b/config/config-local.yml index 8d8310f2..d9b04236 100644 --- a/config/config-local.yml +++ b/config/config-local.yml @@ -8,6 +8,7 @@ server: log-level: debug btc-net: "signet" max-content-length: 4096 + health-check-interval: 300 # 5 minutes interval db: address: "mongodb://localhost:27017/?directConnection=true" db-name: staking-api-service diff --git a/go.mod b/go.mod index 3c73467e..3b4f3a23 100644 --- a/go.mod +++ b/go.mod @@ -7,12 +7,13 @@ toolchain go1.22.4 require ( github.com/babylonchain/babylon v0.9.0-rc.1 github.com/babylonchain/networks/parameters v0.2.1 - github.com/babylonchain/staking-queue-client v0.2.1 + github.com/babylonchain/staking-queue-client v0.3.1 github.com/btcsuite/btcd v0.24.0 github.com/btcsuite/btcd/btcec/v2 v2.3.2 github.com/btcsuite/btcd/btcutil v1.1.5 github.com/btcsuite/btcd/chaincfg/chainhash v1.1.0 github.com/rabbitmq/amqp091-go v1.9.0 + github.com/robfig/cron/v3 v3.0.1 github.com/spf13/viper v1.18.2 github.com/swaggo/swag v1.16.3 github.com/unrolled/secure v1.14.0 diff --git a/go.sum b/go.sum index ecfdba9a..0d3132e1 100644 --- a/go.sum +++ b/go.sum @@ -274,8 +274,8 @@ github.com/babylonchain/babylon v0.9.0-rc.1 h1:mZYKQVHVKFUA2xaEAzJloB1kyePHvZECJ github.com/babylonchain/babylon v0.9.0-rc.1/go.mod h1:YFALTW+Kp/b5jSDoA7Z70RggJjAedlmQTrpdeU8c3hY= github.com/babylonchain/networks/parameters v0.2.1 h1:OKHiCnwL/UdVN17cMwCrHz/bAjO/USauLiPyNlnVl6E= github.com/babylonchain/networks/parameters v0.2.1/go.mod h1:nejhvrL7Iwh5Vunvkg7pnomQZlHnyNzOY9lQaDp6tOA= -github.com/babylonchain/staking-queue-client v0.2.1 h1:FKbxUUOoCydAsUoj9XQMIQT1S79ThX9p7vVc5yjWm8c= -github.com/babylonchain/staking-queue-client v0.2.1/go.mod h1:mEgA6N3SnwFwGEOsUYr/HdjpKa13Wck08MS7VY/Icvo= +github.com/babylonchain/staking-queue-client v0.3.1 h1:lbGG/J+2dEVtDCku8MDmisVWpZYst0uyxjPAxF2Pr88= +github.com/babylonchain/staking-queue-client v0.3.1/go.mod h1:mEgA6N3SnwFwGEOsUYr/HdjpKa13Wck08MS7VY/Icvo= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= @@ -1000,6 +1000,8 @@ github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 h1:N/ElC8H3+5X github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/regen-network/protobuf v1.3.3-alpha.regen.1 h1:OHEc+q5iIAXpqiqFKeLpu5NwTIkVXUs48vFMwzqpqY4= github.com/regen-network/protobuf v1.3.3-alpha.regen.1/go.mod h1:2DjTFR1HhMQhiWC5sZ4OhQ3+NtdbZ6oBDKQwq5Ou+FI= +github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs= +github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro= github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= diff --git a/internal/config/server.go b/internal/config/server.go index e7f70d0a..af66876c 100644 --- a/internal/config/server.go +++ b/internal/config/server.go @@ -13,15 +13,16 @@ import ( ) type ServerConfig struct { - Host string `mapstructure:"host"` - Port int `mapstructure:"port"` - WriteTimeout time.Duration `mapstructure:"write-timeout"` - ReadTimeout time.Duration `mapstructure:"read-timeout"` - IdleTimeout time.Duration `mapstructure:"idle-timeout"` - AllowedOrigins []string `mapstructure:"allowed-origins"` - BTCNet string `mapstructure:"btc-net"` - LogLevel string `mapstructure:"log-level"` - MaxContentLength int64 `mapstructure:"max-content-length"` + Host string `mapstructure:"host"` + Port int `mapstructure:"port"` + WriteTimeout time.Duration `mapstructure:"write-timeout"` + ReadTimeout time.Duration `mapstructure:"read-timeout"` + IdleTimeout time.Duration `mapstructure:"idle-timeout"` + AllowedOrigins []string `mapstructure:"allowed-origins"` + BTCNet string `mapstructure:"btc-net"` + LogLevel string `mapstructure:"log-level"` + MaxContentLength int64 `mapstructure:"max-content-length"` + HealthCheckInterval int `mapstructure:"health-check-interval"` BTCNetParam *chaincfg.Params } @@ -52,6 +53,10 @@ func (cfg *ServerConfig) Validate() error { return fmt.Errorf("MaxContentLength must be a positive integer") } + if cfg.HealthCheckInterval <= 0 { + return fmt.Errorf("HealthCheckInterval must be a positive integer") + } + btcNet, err := utils.GetBtcNetParamesFromString(cfg.BTCNet) if err != nil { return errors.New("invalid btc-net") diff --git a/internal/observability/healthcheck/healthcheck.go b/internal/observability/healthcheck/healthcheck.go new file mode 100644 index 00000000..6601e95a --- /dev/null +++ b/internal/observability/healthcheck/healthcheck.go @@ -0,0 +1,60 @@ +package healthcheck + +import ( + "context" + "fmt" + + "github.com/babylonchain/staking-api-service/internal/observability/metrics" + "github.com/babylonchain/staking-api-service/internal/queue" + "github.com/robfig/cron/v3" + "github.com/rs/zerolog" + "github.com/rs/zerolog/log" +) + +var logger zerolog.Logger = log.Logger + +func SetLogger(customLogger zerolog.Logger) { + logger = customLogger +} + +func StartHealthCheckCron(ctx context.Context, queues *queue.Queues, cronTime int) error { + c := cron.New() + logger.Info().Msg("Initiated Health Check Cron") + + if cronTime == 0 { + cronTime = 60 + } + + cronSpec := fmt.Sprintf("@every %ds", cronTime) + + _, err := c.AddFunc(cronSpec, func() { + queueHealthCheck(queues) + }) + + if err != nil { + return err + } + + c.Start() + + go func() { + <-ctx.Done() + logger.Info().Msg("Stopping Health Check Cron") + c.Stop() + }() + + return nil +} + +func queueHealthCheck(queues *queue.Queues) { + if err := queues.IsConnectionHealthy(); err != nil { + logger.Error().Err(err).Msg("One or more queue connections are not healthy.") + // Record service unavailable in metrics + metrics.RecordServiceCrash("queue") + terminateService() + } +} + +func terminateService() { + logger.Fatal().Msg("Terminating service due to health check failure.") +} \ No newline at end of file diff --git a/internal/observability/metrics/metrics.go b/internal/observability/metrics/metrics.go index 00777d95..14e1ea70 100644 --- a/internal/observability/metrics/metrics.go +++ b/internal/observability/metrics/metrics.go @@ -31,6 +31,7 @@ var ( unprocessableEntityCounter *prometheus.CounterVec queueOperationFailureCounter *prometheus.CounterVec httpResponseWriteFailureCounter *prometheus.CounterVec + serviceCrashCounter *prometheus.CounterVec ) // Init initializes the metrics package. @@ -103,12 +104,21 @@ func registerMetrics() { []string{"status"}, ) + serviceCrashCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "service_crash_total", + Help: "", + }, + []string{"type"}, + ) + prometheus.MustRegister( httpRequestDurationHistogram, eventProcessingDurationHistogram, unprocessableEntityCounter, queueOperationFailureCounter, httpResponseWriteFailureCounter, + serviceCrashCounter, ) } @@ -151,3 +161,8 @@ func RecordQueueOperationFailure(operation, queuename string) { func RecordHttpResponseWriteFailure(statusCode int) { httpResponseWriteFailureCounter.WithLabelValues(fmt.Sprintf("%d", statusCode)).Inc() } + +// RecordServiceCrash increments the service crash counter. +func RecordServiceCrash(service string) { + serviceCrashCounter.WithLabelValues(service).Inc() +} diff --git a/internal/queue/queue.go b/internal/queue/queue.go index 333f2c20..709f7a2d 100644 --- a/internal/queue/queue.go +++ b/internal/queue/queue.go @@ -2,7 +2,9 @@ package queue import ( "context" + "fmt" "net/http" + "strings" "time" "github.com/babylonchain/staking-api-service/internal/observability/metrics" @@ -257,3 +259,28 @@ func recordErrorLog(err *types.Error) { log.Warn().Err(err).Msg("event processing failed with 4xx error") } } + +func (q *Queues) IsConnectionHealthy() error { + var errorMessages []string + + ctx, cancel := context.WithTimeout(context.Background(), 5 * time.Second) + defer cancel() + + checkQueue := func(name string, client client.QueueClient) { + if err := client.Ping(ctx); err != nil { + errorMessages = append(errorMessages, fmt.Sprintf("%s is not healthy: %v", name, err)) + } + } + + checkQueue("ActiveStakingQueueClient", q.ActiveStakingQueueClient) + checkQueue("ExpiredStakingQueueClient", q.ExpiredStakingQueueClient) + checkQueue("UnbondingStakingQueueClient", q.UnbondingStakingQueueClient) + checkQueue("WithdrawStakingQueueClient", q.WithdrawStakingQueueClient) + checkQueue("StatsQueueClient", q.StatsQueueClient) + checkQueue("BtcInfoQueueClient", q.BtcInfoQueueClient) + + if len(errorMessages) > 0 { + return fmt.Errorf(strings.Join(errorMessages, "; ")) + } + return nil +} diff --git a/tests/config/config-test.yml b/tests/config/config-test.yml index ed49ce42..0757697b 100644 --- a/tests/config/config-test.yml +++ b/tests/config/config-test.yml @@ -8,6 +8,7 @@ server: log-level: error btc-net: "signet" max-content-length: 4096 + health-check-interval: 2 db: address: "mongodb://localhost:27017" db-name: staking-api-service diff --git a/tests/healthcheck_test.go b/tests/healthcheck_test.go index 4fcef7bd..aed73084 100644 --- a/tests/healthcheck_test.go +++ b/tests/healthcheck_test.go @@ -124,4 +124,4 @@ func TestSecurityHeaders(t *testing.T) { assert.Equal(t, "DENY", resp.Header.Get("X-Frame-Options"), "expected X-Frame-Options to be DENY") assert.Equal(t, "default-src 'self'; script-src 'self' 'unsafe-inline' https://cdnjs.cloudflare.com https://stackpath.bootstrap.com; style-src 'self' 'unsafe-inline' https://cdnjs.cloudflare.com https://stackpath.bootstrap.com; img-src 'self' data: https://cdnjs.cloudflare.com https://stackpath.bootstrap.com; font-src 'self' https://cdnjs.cloudflare.com https://stackpath.bootstrap.com; object-src 'none'; frame-ancestors 'self'; form-action 'self'; block-all-mixed-content; base-uri 'self';", resp.Header.Get("Content-Security-Policy"), "expected Swagger Content-Security-Policy") assert.Equal(t, "strict-origin-when-cross-origin", resp.Header.Get("Referrer-Policy"), "expected Referrer-Policy to be strict-origin-when-cross-origin") -} +} \ No newline at end of file From 33aff03122c0e2873efff4b38185f05a07d81688 Mon Sep 17 00:00:00 2001 From: wjrjerome Date: Mon, 8 Jul 2024 16:39:24 +1000 Subject: [PATCH 4/4] chore: fix broken stats test --- tests/stats_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/stats_test.go b/tests/stats_test.go index 468b9de3..607c2872 100644 --- a/tests/stats_test.go +++ b/tests/stats_test.go @@ -348,8 +348,8 @@ func FuzzStatsEndpointReturnHighestUnconfirmedTvlFromEvents(f *testing.F) { overallStats = fetchOverallStatsEndpoint(t, testServer) assert.Equal(t, &highestHeightEvent.UnconfirmedTvl, &overallStats.UnconfirmedTvl) - pendingTvl := int64(highestHeightEvent.UnconfirmedTvl) - int64(highestHeightEvent.ConfirmedTvl) - assert.Equal(t, pendingTvl, overallStats.PendingTvl) + pendingTvl := highestHeightEvent.UnconfirmedTvl - highestHeightEvent.ConfirmedTvl + assert.Equal(t, pendingTvl, overallStats.PendingTvl) }) }