Skip to content

Commit

Permalink
add isTerminating prometheus metric (#622)
Browse files Browse the repository at this point in the history
* add isTerminating prometheus metric

* terminating -> disabled, lower shutdown check timer
  • Loading branch information
frostbyte73 authored Feb 29, 2024
1 parent 6bf97e0 commit bdbbd99
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 5 deletions.
5 changes: 2 additions & 3 deletions pkg/service/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ import (
"github.com/livekit/protocol/rpc"
)

const shutdownTimer = time.Second * 30

type Service struct {
ipc.UnimplementedEgressServiceServer

Expand Down Expand Up @@ -89,6 +87,7 @@ func NewService(conf *config.ServiceConfig, ioClient rpc.IOInfoClient) (*Service
if err := s.Start(s.conf,
s.promIsIdle,
s.promCanAcceptRequest,
s.promIsDisabled,
s.killProcess,
); err != nil {
return nil, err
Expand Down Expand Up @@ -206,7 +205,7 @@ func (s *Service) killProcess(egressID string, maxUsage float64) {

func (s *Service) Close() {
for s.GetRequestCount() > 0 {
time.Sleep(shutdownTimer)
time.Sleep(time.Second)
}
logger.Infow("closing server")
s.psrpcServer.Shutdown()
Expand Down
9 changes: 8 additions & 1 deletion pkg/service/service_prom.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ func (s *Service) PromHandler() http.Handler {
}

func (s *Service) promIsIdle() float64 {
if s.GetRequestCount() == 0 {
if !s.shutdown.IsBroken() && s.GetRequestCount() == 0 {
return 1
}
return 0
Expand All @@ -89,6 +89,13 @@ func (s *Service) promCanAcceptRequest() float64 {
return 0
}

func (s *Service) promIsDisabled() float64 {
if s.shutdown.IsBroken() {
return 1
}
return 0
}

func (s *Service) storeProcessEndedMetrics(egressID string, metrics string) error {
m, err := deserializeMetrics(egressID, metrics)
if err != nil {
Expand Down
10 changes: 9 additions & 1 deletion pkg/stats/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ func (m *Monitor) Start(
conf *config.ServiceConfig,
isIdle func() float64,
canAcceptRequest func() float64,
isDisabled func() float64,
killProcess func(string, float64),
) error {
m.killProcess = killProcess
Expand Down Expand Up @@ -105,6 +106,13 @@ func (m *Monitor) Start(
ConstLabels: prometheus.Labels{"node_id": conf.NodeID, "cluster_id": conf.ClusterID},
}, canAcceptRequest)

promIsDisabled := prometheus.NewGaugeFunc(prometheus.GaugeOpts{
Namespace: "livekit",
Subsystem: "egress",
Name: "is_disabled",
ConstLabels: prometheus.Labels{"node_id": conf.NodeID, "cluster_id": conf.ClusterID},
}, isDisabled)

m.promCPULoad = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "livekit",
Subsystem: "node",
Expand All @@ -119,7 +127,7 @@ func (m *Monitor) Start(
ConstLabels: prometheus.Labels{"node_id": conf.NodeID, "cluster_id": conf.ClusterID},
}, []string{"type"})

prometheus.MustRegister(promNodeAvailable, promCanAcceptRequest, m.promCPULoad, m.requestGauge)
prometheus.MustRegister(promNodeAvailable, promCanAcceptRequest, promIsDisabled, m.promCPULoad, m.requestGauge)

return nil
}
Expand Down

0 comments on commit bdbbd99

Please sign in to comment.