From ed2352f0e5e3bc1cb7cfe6395264fa18069b164c Mon Sep 17 00:00:00 2001 From: Alex Gartner Date: Tue, 26 Nov 2024 08:42:33 -0800 Subject: [PATCH] fix(zetaclient): distinguish between known and connected peers (#3208) --- zetaclient/metrics/telemetry.go | 29 ++++++++++++++++++++++++++++- zetaclient/tss/healthcheck.go | 23 +++++++++++++++++++---- zetaclient/tss/service.go | 1 + 3 files changed, 48 insertions(+), 5 deletions(-) diff --git a/zetaclient/metrics/telemetry.go b/zetaclient/metrics/telemetry.go index 506945859c..7c50e9f248 100644 --- a/zetaclient/metrics/telemetry.go +++ b/zetaclient/metrics/telemetry.go @@ -31,6 +31,7 @@ type TelemetryServer struct { status types.Status ipAddress string HotKeyBurnRate *BurnRate + knownPeers []peer.AddrInfo connectedPeers []peer.AddrInfo rtt map[peer.ID]int64 } @@ -42,6 +43,7 @@ func NewTelemetryServer() *TelemetryServer { lastScannedBlockNumber: make(map[int64]uint64), lastStartTimestamp: time.Now(), HotKeyBurnRate: NewBurnRate(100), + knownPeers: make([]peer.AddrInfo, 0), connectedPeers: make([]peer.AddrInfo, 0), rtt: make(map[peer.ID]int64), } @@ -67,6 +69,18 @@ func (t *TelemetryServer) GetPingRTT() map[peer.ID]int64 { return t.rtt } +func (t *TelemetryServer) SetKnownPeers(peers []peer.AddrInfo) { + t.mu.Lock() + defer t.mu.Unlock() + t.knownPeers = peers +} + +func (t *TelemetryServer) GetKnownPeers() []peer.AddrInfo { + t.mu.Lock() + defer t.mu.Unlock() + return t.knownPeers +} + func (t *TelemetryServer) SetConnectedPeers(peers []peer.AddrInfo) { t.mu.Lock() defer t.mu.Unlock() @@ -175,6 +189,7 @@ func (t *TelemetryServer) Handlers() http.Handler { router.Handle("/ip", http.HandlerFunc(t.ipHandler)).Methods(http.MethodGet) router.Handle("/hotkeyburnrate", http.HandlerFunc(t.hotKeyFeeBurnRate)).Methods(http.MethodGet) router.Handle("/connectedpeers", http.HandlerFunc(t.connectedPeersHandler)).Methods(http.MethodGet) + router.Handle("/knownpeers", http.HandlerFunc(t.knownPeersHandler)).Methods(http.MethodGet) router.Handle("/pingrtt", http.HandlerFunc(t.pingRTTHandler)).Methods(http.MethodGet) router.Use(logMiddleware()) @@ -283,7 +298,19 @@ func (t *TelemetryServer) connectedPeersHandler(w http.ResponseWriter, _ *http.R peers := t.GetConnectedPeers() data, err := json.Marshal(peers) if err != nil { - t.logger.Error().Err(err).Msg("Failed to marshal connected peers") + t.logger.Error().Err(err).Msg("Failed to marshal known peers") + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + fmt.Fprintf(w, "%s", string(data)) +} + +func (t *TelemetryServer) knownPeersHandler(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + peers := t.GetKnownPeers() + data, err := json.Marshal(peers) + if err != nil { + t.logger.Error().Err(err).Msg("Failed to marshal known peers") http.Error(w, err.Error(), http.StatusInternalServerError) return } diff --git a/zetaclient/tss/healthcheck.go b/zetaclient/tss/healthcheck.go index ea23b17de3..69f8c76b7c 100644 --- a/zetaclient/tss/healthcheck.go +++ b/zetaclient/tss/healthcheck.go @@ -7,10 +7,13 @@ import ( "sync" "time" + libp2p_network "github.com/libp2p/go-libp2p/core/network" "github.com/libp2p/go-libp2p/core/peer" "github.com/libp2p/go-libp2p/p2p/protocol/ping" + maddr "github.com/multiformats/go-multiaddr" "github.com/prometheus/client_golang/prometheus" "github.com/rs/zerolog" + "github.com/samber/lo" "gitlab.com/thorchain/tss/go-tss/tss" "github.com/zeta-chain/node/pkg/bg" @@ -89,16 +92,28 @@ func HealthcheckWorker(ctx context.Context, server *tss.TssServer, p Healthcheck return nil } - peersCounter := func(_ context.Context, _ *ticker.Ticker) error { + knownPeersCounter := func(_ context.Context, _ *ticker.Ticker) error { peers := server.GetKnownPeers() - p.NumConnectedPeersMetric.Set(float64(len(peers))) - p.Telemetry.SetConnectedPeers(peers) + p.Telemetry.SetKnownPeers(peers) + return nil + } + connectedPeersCounter := func(_ context.Context, _ *ticker.Ticker) error { + p2pHost := server.GetP2PHost() + connectedPeers := lo.Map(p2pHost.Network().Conns(), func(conn libp2p_network.Conn, _ int) peer.AddrInfo { + return peer.AddrInfo{ + ID: conn.RemotePeer(), + Addrs: []maddr.Multiaddr{conn.RemoteMultiaddr()}, + } + }) + p.Telemetry.SetConnectedPeers(connectedPeers) + p.NumConnectedPeersMetric.Set(float64(len(connectedPeers))) return nil } runBackgroundTicker(ctx, pinger, p.Interval, "TSSHealthcheckPeersPing", logger) - runBackgroundTicker(ctx, peersCounter, p.Interval, "TSSHealthcheckPeersCounter", logger) + runBackgroundTicker(ctx, knownPeersCounter, p.Interval, "TSSHealthcheckKnownPeersCounter", logger) + runBackgroundTicker(ctx, connectedPeersCounter, p.Interval, "TSSHealthcheckConnectedPeersCounter", logger) return nil } diff --git a/zetaclient/tss/service.go b/zetaclient/tss/service.go index 7a8391ff89..ed8a034eee 100644 --- a/zetaclient/tss/service.go +++ b/zetaclient/tss/service.go @@ -48,6 +48,7 @@ type Zetacore interface { type Telemetry interface { SetP2PID(id string) SetConnectedPeers(peers []peer.AddrInfo) + SetKnownPeers(peers []peer.AddrInfo) SetPingRTT(peers map[peer.ID]int64) }