Skip to content

Commit

Permalink
Capture second metrics sample in support bundle to provide metrics de…
Browse files Browse the repository at this point in the history
…lta for investigating issues (#2085)

* Capture second metrics sample to provide metrics delta for investigating issues

* Update names of metrics samples
  • Loading branch information
dehaansa authored Nov 15, 2024
1 parent a37e481 commit c01bfa8
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 50 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ Main (unreleased)

- Add `otelcol.receiver.solace` component to receive traces from a Solace broker. (@wildum)

### Enhancements

- Add second metrics sample to the support bundle to provide delta information (@dehaansa)

### Bugfixes

- Fixed an issue in the `prometheus.exporter.postgres` component that would leak goroutines when the target was not reachable (@dehaansa)
Expand Down
3 changes: 2 additions & 1 deletion docs/sources/troubleshoot/support_bundle.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ A support bundle contains the following data:
`/api/v0/web/components` endpoint.
* `alloy-logs.txt` contains the logs during the bundle generation.
* `alloy-metadata.yaml` contains the {{< param "PRODUCT_NAME" >}} build version and the installation's operating system, architecture, and uptime.
* `alloy-metrics.txt` contains a snapshot of the internal metrics for {{< param "PRODUCT_NAME" >}}.
* `alloy-metrics-sample-start.txt` contains a snapshot of the internal metrics for {{< param "PRODUCT_NAME" >}} at the start of the bundle collection.
* `alloy-metrics-sample-end.txt` contains a snapshot of the internal metrics for {{< param "PRODUCT_NAME" >}} at the end of the bundle collection.
* `alloy-peers.json` contains information about the identified cluster peers of this {{< param "PRODUCT_NAME" >}} instance, generated by the
`/api/v0/web/peers` endpoint.
* `alloy-runtime-flags.txt` contains the values of the runtime flags available in {{< param "PRODUCT_NAME" >}}.
Expand Down
109 changes: 60 additions & 49 deletions internal/service/http/supportbundle.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,17 @@ type SupportBundleContext struct {

// Bundle collects all the data that is exposed as a support bundle.
type Bundle struct {
meta []byte
alloyMetrics []byte
components []byte
peers []byte
runtimeFlags []byte
heapBuf *bytes.Buffer
goroutineBuf *bytes.Buffer
blockBuf *bytes.Buffer
mutexBuf *bytes.Buffer
cpuBuf *bytes.Buffer
meta []byte
alloyMetricsStart []byte
alloyMetricsEnd []byte
components []byte
peers []byte
runtimeFlags []byte
heapBuf *bytes.Buffer
goroutineBuf *bytes.Buffer
blockBuf *bytes.Buffer
mutexBuf *bytes.Buffer
cpuBuf *bytes.Buffer
}

// Metadata contains general runtime information about the current Alloy environment.
Expand All @@ -50,6 +51,26 @@ type Metadata struct {

// ExportSupportBundle gathers the information required for the support bundle.
func ExportSupportBundle(ctx context.Context, runtimeFlags []string, srvAddress string, dialContext server.DialContextFunc) (*Bundle, error) {
var httpClient http.Client
httpClient.Transport = &http.Transport{DialContext: dialContext}

// Gather Alloy's own metrics.
alloyMetricsStart, err := retrieveAPIEndpoint(httpClient, srvAddress, "metrics")
if err != nil {
return nil, fmt.Errorf("failed to get internal Alloy metrics: %s", err)
}

// Gather running component configuration
components, err := retrieveAPIEndpoint(httpClient, srvAddress, "api/v0/web/components")
if err != nil {
return nil, fmt.Errorf("failed to get component details: %s", err)
}
// Gather cluster peers information
peers, err := retrieveAPIEndpoint(httpClient, srvAddress, "api/v0/web/peers")
if err != nil {
return nil, fmt.Errorf("failed to get peer details: %s", err)
}

// The block profiler is disabled by default. Temporarily enable recording
// of all blocking events. Also, temporarily record all mutex contentions,
// and defer restoring of earlier mutex profiling fraction.
Expand All @@ -76,24 +97,6 @@ func ExportSupportBundle(ctx context.Context, runtimeFlags []string, srvAddress
return nil, fmt.Errorf("failed to marshal support bundle metadata: %s", err)
}

var httpClient http.Client
httpClient.Transport = &http.Transport{DialContext: dialContext}
// Gather Alloy's own metrics.
alloyMetrics, err := retrieveAPIEndpoint(httpClient, srvAddress, "metrics")
if err != nil {
return nil, fmt.Errorf("failed to get internal Alloy metrics: %s", err)
}
// Gather running component configuration
components, err := retrieveAPIEndpoint(httpClient, srvAddress, "api/v0/web/components")
if err != nil {
return nil, fmt.Errorf("failed to get component details: %s", err)
}
// Gather cluster peers information
peers, err := retrieveAPIEndpoint(httpClient, srvAddress, "api/v0/web/peers")
if err != nil {
return nil, fmt.Errorf("failed to get peer details: %s", err)
}

// Export pprof data.
var (
cpuBuf bytes.Buffer
Expand Down Expand Up @@ -129,19 +132,26 @@ func ExportSupportBundle(ctx context.Context, runtimeFlags []string, srvAddress
return nil, err
}

// Gather Alloy's own metrics after the profile completes
alloyMetricsEnd, err := retrieveAPIEndpoint(httpClient, srvAddress, "metrics")
if err != nil {
return nil, fmt.Errorf("failed to get internal Alloy metrics: %s", err)
}

// Finally, bundle everything up to be served, either as a zip from
// memory, or exported to a directory.
bundle := &Bundle{
meta: meta,
alloyMetrics: alloyMetrics,
components: components,
peers: peers,
runtimeFlags: []byte(strings.Join(runtimeFlags, "\n")),
heapBuf: &heapBuf,
goroutineBuf: &goroutineBuf,
blockBuf: &blockBuf,
mutexBuf: &mutexBuf,
cpuBuf: &cpuBuf,
meta: meta,
alloyMetricsStart: alloyMetricsStart,
alloyMetricsEnd: alloyMetricsEnd,
components: components,
peers: peers,
runtimeFlags: []byte(strings.Join(runtimeFlags, "\n")),
heapBuf: &heapBuf,
goroutineBuf: &goroutineBuf,
blockBuf: &blockBuf,
mutexBuf: &mutexBuf,
cpuBuf: &cpuBuf,
}

return bundle, nil
Expand Down Expand Up @@ -169,17 +179,18 @@ func ServeSupportBundle(rw http.ResponseWriter, b *Bundle, logsBuf *bytes.Buffer
rw.Header().Set("Content-Disposition", "attachment; filename=\"alloy-support-bundle.zip\"")

zipStructure := map[string][]byte{
"alloy-metadata.yaml": b.meta,
"alloy-components.json": b.components,
"alloy-peers.json": b.peers,
"alloy-metrics.txt": b.alloyMetrics,
"alloy-runtime-flags.txt": b.runtimeFlags,
"alloy-logs.txt": logsBuf.Bytes(),
"pprof/cpu.pprof": b.cpuBuf.Bytes(),
"pprof/heap.pprof": b.heapBuf.Bytes(),
"pprof/goroutine.pprof": b.goroutineBuf.Bytes(),
"pprof/mutex.pprof": b.mutexBuf.Bytes(),
"pprof/block.pprof": b.blockBuf.Bytes(),
"alloy-metadata.yaml": b.meta,
"alloy-components.json": b.components,
"alloy-peers.json": b.peers,
"alloy-metrics-sample-start.txt": b.alloyMetricsStart,
"alloy-metrics-sample-end.txt": b.alloyMetricsEnd,
"alloy-runtime-flags.txt": b.runtimeFlags,
"alloy-logs.txt": logsBuf.Bytes(),
"pprof/cpu.pprof": b.cpuBuf.Bytes(),
"pprof/heap.pprof": b.heapBuf.Bytes(),
"pprof/goroutine.pprof": b.goroutineBuf.Bytes(),
"pprof/mutex.pprof": b.mutexBuf.Bytes(),
"pprof/block.pprof": b.blockBuf.Bytes(),
}

for fn, b := range zipStructure {
Expand Down

0 comments on commit c01bfa8

Please sign in to comment.