Skip to content

Commit

Permalink
[supervisor] add ssh tunnel metrics (#18795)
Browse files Browse the repository at this point in the history
  • Loading branch information
akosyakov authored Sep 25, 2023
1 parent 7416680 commit 2565a1c
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 5 deletions.
12 changes: 12 additions & 0 deletions components/supervisor/pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ import (
type SupervisorMetrics struct {
IDEReadyDurationTotal *prometheus.HistogramVec
InitializerHistogram *prometheus.HistogramVec
SSHTunnelOpenedTotal *prometheus.CounterVec
SSHTunnelClosedTotal *prometheus.CounterVec
}

func NewMetrics() *SupervisorMetrics {
Expand All @@ -27,13 +29,23 @@ func NewMetrics() *SupervisorMetrics {
Help: "initializer speed in bytes per second",
Buckets: prometheus.ExponentialBuckets(1024*1024, 2, 12),
}, []string{"kind"}),
SSHTunnelOpenedTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "supervisor_ssh_tunnel_opened_total",
Help: "Total number of SSH tunnels opened by the supervisor",
}, []string{}),
SSHTunnelClosedTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "supervisor_ssh_tunnel_closed_total",
Help: "Total number of SSH tunnels closed by the supervisor",
}, []string{"code"}),
}
}

func (m *SupervisorMetrics) Register(registry *prometheus.Registry) error {
metrics := []prometheus.Collector{
m.IDEReadyDurationTotal,
m.InitializerHistogram,
m.SSHTunnelOpenedTotal,
m.SSHTunnelClosedTotal,
}

for _, metric := range metrics {
Expand Down
2 changes: 2 additions & 0 deletions components/supervisor/pkg/metrics/reporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ func NewGrpcMetricsReporter(gitpodHost string) *GrpcMetricsReporter {
"supervisor_initializer_bytes_second": true,
"supervisor_client_handled_total": true,
"supervisor_client_handling_seconds": true,
"supervisor_ssh_tunnel_opened_total": true,
"supervisor_ssh_tunnel_closed_total": true,
},
values: make(map[string]float64),
addCounter: func(name string, labels map[string]string, value uint64) {
Expand Down
51 changes: 46 additions & 5 deletions components/supervisor/pkg/supervisor/supervisor.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"os/exec"
"os/signal"
"path/filepath"
"regexp"
"runtime"
"runtime/debug"
"strconv"
Expand Down Expand Up @@ -418,7 +419,7 @@ func Run(options ...RunOption) {
}

wg.Add(1)
go startAPIEndpoint(ctx, cfg, &wg, apiServices, tunneledPortsService, metricsReporter, apiEndpointOpts...)
go startAPIEndpoint(ctx, cfg, &wg, apiServices, tunneledPortsService, metricsReporter, supervisorMetrics, topService, apiEndpointOpts...)

wg.Add(1)
go startSSHServer(ctx, cfg, &wg)
Expand Down Expand Up @@ -1187,7 +1188,28 @@ func isBlacklistedEnvvar(name string) bool {
return false
}

func startAPIEndpoint(ctx context.Context, cfg *Config, wg *sync.WaitGroup, services []RegisterableService, tunneled *ports.TunneledPortsService, metricsReporter *metrics.GrpcMetricsReporter, opts ...grpc.ServerOption) {
var websocketCloseErrorPattern = regexp.MustCompile(`websocket: close (\d+)`)

func extractCloseErrorCode(errStr string) string {
matches := websocketCloseErrorPattern.FindStringSubmatch(errStr)
if len(matches) < 2 {
return "unknown"
}

return matches[1]
}

func startAPIEndpoint(
ctx context.Context,
cfg *Config,
wg *sync.WaitGroup,
services []RegisterableService,
tunneled *ports.TunneledPortsService,
metricsReporter *metrics.GrpcMetricsReporter,
supervisorMetrics *metrics.SupervisorMetrics,
topService *TopService,
opts ...grpc.ServerOption,
) {
defer wg.Done()
defer log.Debug("startAPIEndpoint shutdown")

Expand Down Expand Up @@ -1308,6 +1330,17 @@ func startAPIEndpoint(ctx context.Context, cfg *Config, wg *sync.WaitGroup, serv
tunnelOverWebSocket(tunneled, conn)
}))
routes.Handle("/_supervisor/tunnel/ssh", http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
var err error
supervisorMetrics.SSHTunnelOpenedTotal.WithLabelValues().Inc()
defer func() {
code := "unknown"
if err != nil {
code = extractCloseErrorCode(err.Error())
}
supervisorMetrics.SSHTunnelClosedTotal.WithLabelValues(code).Inc()
}()
startTime := time.Now()
log := log.WithField("userAgent", r.Header.Get("user-agent")).WithField("remoteAddr", r.RemoteAddr)
wsConn, err := upgrader.Upgrade(rw, r, nil)
if err != nil {
log.WithError(err).Error("tunnel ssh: upgrade to the WebSocket protocol failed")
Expand All @@ -1331,13 +1364,21 @@ func startAPIEndpoint(ctx context.Context, cfg *Config, wg *sync.WaitGroup, serv

go io.Copy(conn, conn2)
_, err = io.Copy(conn2, conn)
if err != nil && !websocket.IsUnexpectedCloseError(err, websocket.CloseGoingAway, websocket.CloseAbnormalClosure) {
log.WithError(err).Error("tunnel ssh: error returned from io.copy")
if err != nil {
var usedCpu, usedMemory int64
data := topService.data
if data != nil && data.Cpu != nil {
usedCpu = data.Cpu.Used
}
if data != nil && data.Memory != nil {
usedMemory = data.Memory.Used
}
log.WithField("usedCpu", usedCpu).WithField("usedMemory", usedMemory).WithError(err).Error("tunnel ssh: error returned from io.copy")
}

conn.Close()
conn2.Close()
log.Infof("tunnel ssh: Disconnect from %s", conn.RemoteAddr())
log.WithField("duration", time.Since(startTime).Seconds()).Infof("tunnel ssh: Disconnect from %s", conn.RemoteAddr())
}))
if cfg.DebugEnable {
routes.Handle("/_supervisor/debug/tunnels", http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
Expand Down
16 changes: 16 additions & 0 deletions install/installer/pkg/components/ide-metrics/configmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,22 @@ func configmap(ctx *common.RenderContext) ([]runtime.Object, error) {
},
},
},
{
Name: "supervisor_ssh_tunnel_opened_total",
Help: "Total number of SSH tunnels opened by the supervisor",
Labels: []config.LabelAllowList{},
},
{
Name: "supervisor_ssh_tunnel_closed_total",
Help: "Total number of SSH tunnels closed by the supervisor",
Labels: []config.LabelAllowList{
{
Name: "code",
AllowValues: []string{"*"},
DefaultValue: "unknown",
},
},
},
}

histogramMetrics := []config.HistogramMetricsConfiguration{
Expand Down

0 comments on commit 2565a1c

Please sign in to comment.