From 5544239bd39210e21c49fd533a9e0f66c4fdf2a5 Mon Sep 17 00:00:00 2001 From: djkazic Date: Fri, 12 Jul 2024 15:03:34 -0400 Subject: [PATCH] collectors: add time_to_unlock_secs --- collectors/prometheus.go | 6 ++- collectors/state_collector.go | 86 +++++++++++++++++++++++------------ lndmon.go | 4 ++ 3 files changed, 65 insertions(+), 31 deletions(-) diff --git a/collectors/prometheus.go b/collectors/prometheus.go index 5377c08..9781404 100644 --- a/collectors/prometheus.go +++ b/collectors/prometheus.go @@ -6,6 +6,7 @@ import ( "net/http" "os" "path/filepath" + "time" "github.com/btcsuite/btcd/btcutil" "github.com/lightninglabs/lndclient" @@ -70,6 +71,9 @@ type MonitoringConfig struct { // DisableHtlc disables collection of HTLCs metrics DisableHtlc bool + + // ProgramStartTime stores a best-effort estimate of when lnd/lndmon was started. + ProgramStartTime time.Time } func DefaultConfig() *PrometheusConfig { @@ -104,7 +108,7 @@ func NewPrometheusExporter(cfg *PrometheusConfig, lnd *lndclient.LndServices, NewWalletCollector(lnd, errChan), NewPeerCollector(lnd.Client, errChan), NewInfoCollector(lnd.Client, errChan), - NewStateCollector(lnd, errChan), + NewStateCollector(lnd, errChan, monitoringCfg.ProgramStartTime), } if !monitoringCfg.DisableHtlc { diff --git a/collectors/state_collector.go b/collectors/state_collector.go index ed486a3..8f7728b 100644 --- a/collectors/state_collector.go +++ b/collectors/state_collector.go @@ -10,22 +10,29 @@ import ( "github.com/prometheus/client_golang/prometheus" ) +// StateCollector is a collector that keeps track of LND's state. type StateCollector struct { lnd *lndclient.LndServices - // Use one gauge to track the starting time of LND. + // timeToStartDesc is a gauge to track time from unlocked to started of LND. timeToStartDesc *prometheus.Desc - // startTime records a best-effort timestamp of when LND was started. - startTime time.Time + // timeToUnlockDesc is a gauge to track the time to unlock of LND. + timeToUnlockDesc *prometheus.Desc - // endTime records when LND makes a transition from RPC_ACTIVE to + // programStartTime records a best-effort timestamp of when lndmon was started. + programStartTime time.Time + + // unlockTime records a best-effort timestamp of when LND was unlocked. + unlockTime time.Time + + // endTime records when LND makes a transition from UNLOCKED to // SERVER_ACTIVE. // If lndmon starts after LND has already reached SERVER_ACTIVE, no // startup time metric will be emitted. endTime time.Time - // mutex is a lock for preventing concurrent writes to startTime or + // mutex is a lock for preventing concurrent writes to unlockTime or // endTime. mutex sync.RWMutex @@ -36,17 +43,23 @@ type StateCollector struct { // NewStateCollector returns a new instance of the StateCollector. func NewStateCollector(lnd *lndclient.LndServices, - errChan chan<- error) *StateCollector { + errChan chan<- error, programStartTime time.Time) *StateCollector { sc := &StateCollector{ lnd: lnd, timeToStartDesc: prometheus.NewDesc( - "lnd_time_to_start_secs", - "time to start in seconds", + "lnd_time_to_start_millisecs", + "time to start in milliseconds", + nil, nil, + ), + timeToUnlockDesc: prometheus.NewDesc( + "lnd_time_to_unlock_millisecs", + "time to unlocked in milliseconds", nil, nil, ), - startTime: time.Now(), - errChan: errChan, + programStartTime: programStartTime, + unlockTime: time.Now(), + errChan: errChan, } go sc.monitorStateChanges() @@ -57,24 +70,30 @@ func NewStateCollector(lnd *lndclient.LndServices, func (s *StateCollector) monitorStateChanges() { var serverActiveReached bool - for { - state, err := s.lnd.State.GetState(context.Background()) - if err != nil { - s.errChan <- fmt.Errorf("StateCollector GetState failed with: %v", err) - continue - } - - s.mutex.Lock() - if state == lndclient.WalletStateRPCActive && !s.startTime.IsZero() { - s.endTime = time.Now() - serverActiveReached = true - } - s.mutex.Unlock() + stateUpdates, errChan, err := s.lnd.State.SubscribeState(context.Background()) + if err != nil { + s.errChan <- fmt.Errorf("StateCollector SubscribeState failed with: %v", err) + return + } - if serverActiveReached { - break + for { + select { + case state := <-stateUpdates: + s.mutex.Lock() + if state == lndclient.WalletStateServerActive && !s.unlockTime.IsZero() { + s.endTime = time.Now() + serverActiveReached = true + } + s.mutex.Unlock() + + if serverActiveReached { + return + } + + case err := <-errChan: + s.errChan <- fmt.Errorf("StateCollector state update failed with: %v", err) + return } - time.Sleep(1 * time.Second) } } @@ -85,6 +104,7 @@ func (s *StateCollector) monitorStateChanges() { // NOTE: Part of the prometheus.Collector interface. func (s *StateCollector) Describe(ch chan<- *prometheus.Desc) { ch <- s.timeToStartDesc + ch <- s.timeToUnlockDesc } // Collect is called by the Prometheus registry when collecting metrics. @@ -95,11 +115,17 @@ func (s *StateCollector) Collect(ch chan<- prometheus.Metric) { s.mutex.RLock() defer s.mutex.RUnlock() - // We have set both a startTime and endTime, calculate the difference and emit a metric. - if !s.startTime.IsZero() && !s.endTime.IsZero() { - timeToStartInSecs := s.endTime.Sub(s.startTime).Seconds() + // We have set unlockTime and endTime. + // Calculate the differences and emit a metric. + if !s.unlockTime.IsZero() && !s.endTime.IsZero() { + timeToUnlockInMSecs := s.unlockTime.Sub(s.programStartTime).Milliseconds() + timeToStartInMSecs := s.endTime.Sub(s.unlockTime).Milliseconds() + ch <- prometheus.MustNewConstMetric( + s.timeToStartDesc, prometheus.GaugeValue, float64(timeToStartInMSecs), + ) + ch <- prometheus.MustNewConstMetric( - s.timeToStartDesc, prometheus.GaugeValue, timeToStartInSecs, + s.timeToUnlockDesc, prometheus.GaugeValue, float64(timeToUnlockInMSecs), ) } } diff --git a/lndmon.go b/lndmon.go index 6b1374f..6dd362c 100755 --- a/lndmon.go +++ b/lndmon.go @@ -4,6 +4,7 @@ import ( "fmt" "os" "path/filepath" + "time" flags "github.com/jessevdk/go-flags" "github.com/lightninglabs/lndclient" @@ -37,6 +38,8 @@ func start() error { return fmt.Errorf("could not intercept signal: %v", err) } + programStartTime := time.Now() + // Initialize our lnd client, requiring at least lnd v0.11. lnd, err := lndclient.NewLndServices( &lndclient.LndServicesConfig{ @@ -70,6 +73,7 @@ func start() error { } monitoringCfg.PrimaryNode = &primaryNode } + monitoringCfg.ProgramStartTime = programStartTime // Start our Prometheus exporter. This exporter spawns a goroutine // that pulls metrics from our lnd client on a set interval.