Skip to content

Commit

Permalink
Do not error when facing duplicate metrics + log with component
Browse files Browse the repository at this point in the history
  • Loading branch information
hugoShaka committed Jan 9, 2025
1 parent 342a025 commit d153fda
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 15 deletions.
22 changes: 20 additions & 2 deletions lib/service/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -3416,6 +3416,16 @@ func (process *TeleportProcess) initUploaderService() error {
return nil
}

// promHTTPLogAdapter adapts a slog.Logger into a promhttp.Logger.
type promHTTPLogAdapter struct {
*slog.Logger
}

// Println implements the promhttp.Logger interface.
func (l promHTTPLogAdapter) Println(v ...interface{}) {
l.Error(fmt.Sprint(v...))
}

// initMetricsService starts the metrics service currently serving metrics for
// prometheus consumption
func (process *TeleportProcess) initMetricsService() error {
Expand All @@ -3424,12 +3434,20 @@ func (process *TeleportProcess) initMetricsService() error {
// We gather metrics both from the in-process registry (preferred metrics registration method)
// and the global registry (used by some Teleport services and many dependencies).
gatherers := prometheus.Gatherers{
prometheus.DefaultGatherer,
process.metricsRegistry,
prometheus.DefaultGatherer,
}

metricsHandler := promhttp.InstrumentMetricHandler(
process.metricsRegistry, promhttp.HandlerFor(gatherers, promhttp.HandlerOpts{}),
process.metricsRegistry, promhttp.HandlerFor(gatherers, promhttp.HandlerOpts{
// Errors can happen if metrics are registered with identical names in both the local and the global registry.
// In this case, we log the error but continue collecting metrics. The first collected metric will win
// (the one from the local metrics registry takes precedence).
// As we move more things to the local registry, especially in other tools like tbot, we will have less
// conflicts in tests.
ErrorHandling: promhttp.ContinueOnError,
ErrorLog: promHTTPLogAdapter{process.logger.With(teleport.ComponentKey, teleport.ComponentMetrics)},
}),
)

mux.Handle("/metrics", metricsHandler)
Expand Down
13 changes: 0 additions & 13 deletions lib/service/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2020,19 +2020,6 @@ func TestMetricsService(t *testing.T) {
metricsURL.Path = "/metrics"
resp, err := http.Get(metricsURL.String())
require.NoError(t, err)
// WIP: the test is failing in the CI only. This piece of code can be
// ignored and will be removed from the PR once I know why the HTTP status
// code is 500.
if resp.StatusCode != 200 {
t.Log("Non 200 status code received")
body, err := io.ReadAll(resp.Body)
if err != nil {
t.Logf("Cannot read body: %s", err.Error())
} else {
t.Logf("Body: %s", string(body))
}
t.Logf("Header: %#v", resp.Header)
}
require.Equal(t, http.StatusOK, resp.StatusCode)

body, err := io.ReadAll(resp.Body)
Expand Down

0 comments on commit d153fda

Please sign in to comment.