Skip to content

Commit

Permalink
Add an endpoint for monitoring health
Browse files Browse the repository at this point in the history
  • Loading branch information
ptodev committed Nov 29, 2024
1 parent d82f44b commit c698048
Show file tree
Hide file tree
Showing 5 changed files with 171 additions and 6 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ Main (unreleased)

- (_Experimental_) Add a `database_observability.mysql` component to collect mysql performance data.

- Add a new `/-/healthy` endpoint which returns HTTP 500 if one or more components are unhealthy. (@ptodev)

### Enhancements

- Add second metrics sample to the support bundle to provide delta information (@dehaansa)
Expand Down
2 changes: 1 addition & 1 deletion docs/sources/reference/_index.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
canonical: https://grafana.com/docs/alloy/latest/reference/
description: The reference-level documentaiton for Grafana Aloy
description: The reference-level documentation for Grafana Alloy
menuTitle: Reference
title: Grafana Alloy Reference
weight: 600
Expand Down
65 changes: 65 additions & 0 deletions docs/sources/reference/http/_index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
---
canonical: https://grafana.com/docs/alloy/latest/reference/http/
description: Learn about HTTP endpoints exposed by Grafana Alloy
title: HTTP endpoints
weight: 700
---

# The {{% param "FULL_PRODUCT_NAME" %}} HTTP endpoints

There are HTTP endpoints which are enabled by default on every instance of {{% param "FULL_PRODUCT_NAME" %}},
regardless which components are configured.
They can be used for monitoring, health checking, and troubleshooting.

The HTTP server which exposes them is configured via the [http block](../config-blocks/http)
and the `--server.` [command line arguments](../cli/run).
For example, if the `--server.http.listen-addr` command line argument is set to `127.0.0.1:12345`,
you can query the `127.0.0.1:12345/metrics` endpoint to see the internal metrics of {{% param "FULL_PRODUCT_NAME" %}}.

### /metrics

Displays the internal metrics of {{% param "FULL_PRODUCT_NAME" %}} in the Prometheus exposition format.

### /-/ready

A {{% param "FULL_PRODUCT_NAME" %}} instance is "ready" once it has loaded its initial configuration.
If it is ready, HTTP 200 and the message `Alloy is ready.` are returned.
Otherwise, HTTP 503 and the message `Alloy is not ready.` are returned.

### /-/healthy

If all components are healthy, HTTP 200 and the message "Alloy is healthy." will be returned.
Otherwise, {{% param "FULL_PRODUCT_NAME" %}} will return HTTP 500 and an error message.
You can also monitor component health through the [UI](../../troubleshoot/debug#alloy-ui).

```
$ curl localhost:12345/-/healthy
Alloy is healthy.
```

```
$ curl localhost:12345/-/healthy
unhealthy components: math.add
```

### /-/reload

Reloads the {{% param "FULL_PRODUCT_NAME" %}} configuration file. Returns HTTP 400 and an error message if an issue with the reload was encountered.

```
$ curl localhost:12345/-/reload
config reloaded
```

```
$ curl localhost:12345/-/reload
error during the initial load: /Users/user1/Desktop/git.alloy:13:1: Failed to build component: loading custom component controller: custom component config not found in the registry, namespace: "math", componentName: "add"
```

### /-/support

Generates a [support bundle](../../troubleshoot/support_bundle).

### /debug/pprof

Generates a [profile](../../troubleshoot/profile).
26 changes: 26 additions & 0 deletions internal/service/http/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,32 @@ func (s *Service) Run(ctx context.Context, host service.Host) error {
otelmux.WithTracerProvider(s.tracer),
))

// The implementation for "/-/healthy" is inspired by
// the "/components" web API endpoint in /internal/web/api/api.go
r.HandleFunc("/-/healthy", func(w http.ResponseWriter, r *http.Request) {
components, err := host.ListComponents("", component.InfoOptions{
GetHealth: true,
})
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}

unhealthyComponents := []string{}
for _, c := range components {
if c.Health.Health == component.HealthTypeUnhealthy {
unhealthyComponents = append(unhealthyComponents, c.ComponentName)
}
}
if len(unhealthyComponents) > 0 {
http.Error(w, "unhealthy components: "+strings.Join(unhealthyComponents, ", "), http.StatusInternalServerError)
return
}

fmt.Fprintln(w, "Alloy is healthy.")
w.WriteHeader(http.StatusOK)
})

r.Handle(
"/metrics",
promhttp.HandlerFor(s.gatherer, promhttp.HandlerOpts{}),
Expand Down
82 changes: 77 additions & 5 deletions internal/service/http/http_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package http
import (
"context"
"fmt"
"io"
"net/http"
"testing"

Expand Down Expand Up @@ -43,6 +44,26 @@ func TestHTTP(t *testing.T) {
require.NoError(t, err)
defer resp.Body.Close()

buf, err := io.ReadAll(resp.Body)
require.Equal(t, "Alloy is ready.\n", string(buf))

require.Equal(t, http.StatusOK, resp.StatusCode)
})

util.Eventually(t, func(t require.TestingT) {
cli, err := config.NewClientFromConfig(config.HTTPClientConfig{}, "test")
require.NoError(t, err)

req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("http://%s/-/healthy", env.ListenAddr()), nil)
require.NoError(t, err)

resp, err := cli.Do(req)
require.NoError(t, err)
defer resp.Body.Close()

buf, err := io.ReadAll(resp.Body)
require.Equal(t, "Alloy is healthy.\n", string(buf))

require.Equal(t, http.StatusOK, resp.StatusCode)
})
}
Expand Down Expand Up @@ -157,9 +178,53 @@ func Test_Toggle_TLS(t *testing.T) {
}
}

func TestUnhealthy(t *testing.T) {
ctx := componenttest.TestContext(t)

env, err := newTestEnvironment(t)
require.NoError(t, err)

env.components = []*component.Info{
{
ID: component.ID{
ModuleID: "",
LocalID: "testCompId",
},
Label: "testCompLabel",
ComponentName: "testCompName",
Health: component.Health{
Health: component.HealthTypeUnhealthy,
},
},
}
require.NoError(t, env.ApplyConfig(""))

go func() {
require.NoError(t, env.Run(ctx))
}()

util.Eventually(t, func(t require.TestingT) {
cli, err := config.NewClientFromConfig(config.HTTPClientConfig{}, "test")
require.NoError(t, err)

req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("http://%s/-/healthy", env.ListenAddr()), nil)
require.NoError(t, err)

resp, err := cli.Do(req)
require.NoError(t, err)
defer resp.Body.Close()

buf, err := io.ReadAll(resp.Body)
require.Equal(t, "unhealthy components: testCompName\n", string(buf))

require.Equal(t, http.StatusInternalServerError, resp.StatusCode)
})
}

type testEnvironment struct {
svc *Service
addr string
svc *Service
addr string
components []*component.Info
}

func newTestEnvironment(t *testing.T) (*testEnvironment, error) {
Expand Down Expand Up @@ -196,20 +261,27 @@ func (env *testEnvironment) ApplyConfig(config string) error {
}

func (env *testEnvironment) Run(ctx context.Context) error {
return env.svc.Run(ctx, fakeHost{})
return env.svc.Run(ctx, fakeHost{
components: env.components,
})
}

func (env *testEnvironment) ListenAddr() string { return env.addr }

type fakeHost struct{}
type fakeHost struct {
components []*component.Info
}

var _ service.Host = (fakeHost{})

func (fakeHost) GetComponent(id component.ID, opts component.InfoOptions) (*component.Info, error) {
return nil, fmt.Errorf("no such component %s", id)
}

func (fakeHost) ListComponents(moduleID string, opts component.InfoOptions) ([]*component.Info, error) {
func (f fakeHost) ListComponents(moduleID string, opts component.InfoOptions) ([]*component.Info, error) {
if f.components != nil {
return f.components, nil
}
if moduleID == "" {
return nil, nil
}
Expand Down

0 comments on commit c698048

Please sign in to comment.