diff --git a/CHANGELOG.md b/CHANGELOG.md index 28fc106bc5..be1e851989 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,10 @@ Main (unreleased) - Add `otelcol.exporter.splunkhec` allowing to export otel data to Splunk HEC (@adlotsof) +### Enhancements + +- Add all raw configuration files & a copy of the latest remote config to the support bundle (@dehaansa) + ### Bugfixes - Fixed an issue in the `prometheus.exporter.postgres` component that would leak goroutines when the target was not reachable (@dehaansa) diff --git a/docs/sources/troubleshoot/support_bundle.md b/docs/sources/troubleshoot/support_bundle.md index 2bb870bc5b..722a8ce405 100644 --- a/docs/sources/troubleshoot/support_bundle.md +++ b/docs/sources/troubleshoot/support_bundle.md @@ -44,8 +44,11 @@ A support bundle contains the following data: * `alloy-runtime-flags.txt` contains the values of the runtime flags available in {{< param "PRODUCT_NAME" >}}. * The `pprof/` directory contains Go runtime profiling data (CPU, heap, goroutine, mutex, block profiles) as exported by the pprof package. Refer to the [profile][profile] documentation for more details on how to use this information. +* The `sources/` directory contains copies of the local configuration files being used to configure {{< param "PRODUCT_NAME" >}}. +* `sources/remote-config/remote.alloy` contains a copy of the last received [remote configuration][remotecfg]. [profile]: ../profile [components]: ../../get-started/components/ [alloy-repo]: https://github.com/grafana/alloy/issues -[backward-compatibility]: ../../introduction/backward-compatibility \ No newline at end of file +[backward-compatibility]: ../../introduction/backward-compatibility +[remotecfg]: ../../reference/config-blocks/remotecfg/ \ No newline at end of file diff --git a/internal/alloycli/cmd_run.go b/internal/alloycli/cmd_run.go index 306a030c55..3574266d43 100644 --- a/internal/alloycli/cmd_run.go +++ b/internal/alloycli/cmd_run.go @@ -356,6 +356,7 @@ func (fr *alloyRun) Run(cmd *cobra.Command, configPath string) error { if err != nil { return nil, fmt.Errorf("reading config path %q: %w", configPath, err) } + httpService.SetSources(alloySource.RawConfigs()) if err := f.LoadSource(alloySource, nil, configPath); err != nil { return alloySource, fmt.Errorf("error during the initial load: %w", err) } diff --git a/internal/service/http/http.go b/internal/service/http/http.go index 590802b9b4..4cdbcb2fae 100644 --- a/internal/service/http/http.go +++ b/internal/service/http/http.go @@ -78,6 +78,9 @@ type Service struct { // Used to enforce single-flight requests to supportHandler supportBundleMut sync.Mutex + // Track the raw config for use with the support bundle + sources map[string][]byte + // publicLis and tcpLis are used to lazily enable TLS, since TLS is // optionally configurable at runtime. // @@ -225,7 +228,7 @@ func (s *Service) Run(ctx context.Context, host service.Host) error { } // Wire in support bundle generator - r.HandleFunc("/-/support", s.supportHandler).Methods("GET") + r.HandleFunc("/-/support", s.generateSupportBundleHandler(host)).Methods("GET") // Wire custom service handlers for services which depend on the http // service. @@ -259,60 +262,75 @@ func (s *Service) Run(ctx context.Context, host service.Host) error { return nil } -func (s *Service) supportHandler(rw http.ResponseWriter, r *http.Request) { - s.supportBundleMut.Lock() - defer s.supportBundleMut.Unlock() +func (s *Service) generateSupportBundleHandler(host service.Host) func(rw http.ResponseWriter, r *http.Request) { + return func(rw http.ResponseWriter, r *http.Request) { + s.supportBundleMut.Lock() + defer s.supportBundleMut.Unlock() - // TODO(dehaansa) remove this check once the support bundle is generally available - if !s.opts.MinStability.Permits(featuregate.StabilityPublicPreview) { - rw.WriteHeader(http.StatusForbidden) - _, _ = rw.Write([]byte("support bundle generation is only available in public preview. Use" + - " --stability.level command-line flag to enable public-preview features")) - return - } + // TODO(dehaansa) remove this check once the support bundle is generally available + if !s.opts.MinStability.Permits(featuregate.StabilityPublicPreview) { + rw.WriteHeader(http.StatusForbidden) + _, _ = rw.Write([]byte("support bundle generation is only available in public preview. Use" + + " --stability.level command-line flag to enable public-preview features")) + return + } - if s.opts.BundleContext.DisableSupportBundle { - rw.WriteHeader(http.StatusForbidden) - _, _ = rw.Write([]byte("support bundle generation is disabled; it can be re-enabled by removing the --disable-support-bundle flag")) - return - } + if s.opts.BundleContext.DisableSupportBundle { + rw.WriteHeader(http.StatusForbidden) + _, _ = rw.Write([]byte("support bundle generation is disabled; it can be re-enabled by removing the --disable-support-bundle flag")) + return + } + + duration := getServerWriteTimeout(r) + if r.URL.Query().Has("duration") { + d, err := strconv.Atoi(r.URL.Query().Get("duration")) + if err != nil { + http.Error(rw, fmt.Sprintf("duration value (in seconds) should be a positive integer: %s", err), http.StatusBadRequest) + return + } + if d < 1 { + http.Error(rw, "duration value (in seconds) should be larger than 1", http.StatusBadRequest) + return + } + if float64(d) > duration.Seconds() { + http.Error(rw, "duration value exceeds the server's write timeout", http.StatusBadRequest) + return + } + duration = time.Duration(d) * time.Second + } + ctx, cancel := context.WithTimeout(context.Background(), duration) + defer cancel() - duration := getServerWriteTimeout(r) - if r.URL.Query().Has("duration") { - d, err := strconv.Atoi(r.URL.Query().Get("duration")) + var logsBuffer bytes.Buffer + syncBuff := log.NewSyncWriter(&logsBuffer) + s.globalLogger.SetTemporaryWriter(syncBuff) + defer func() { + s.globalLogger.RemoveTemporaryWriter() + }() + + cachedConfig, err := remoteCfgCachedConfig(host) if err != nil { - http.Error(rw, fmt.Sprintf("duration value (in seconds) should be a positive integer: %s", err), http.StatusBadRequest) - return + level.Debug(s.log).Log("msg", "failed to get cached remote config", "err", err) } - if d < 1 { - http.Error(rw, "duration value (in seconds) should be larger than 1", http.StatusBadRequest) + + bundle, err := ExportSupportBundle(ctx, s.opts.BundleContext.RuntimeFlags, s.opts.HTTPListenAddr, s.sources, cachedConfig, s.Data().(Data).DialFunc) + if err != nil { + http.Error(rw, err.Error(), http.StatusInternalServerError) return } - if float64(d) > duration.Seconds() { - http.Error(rw, "duration value exceeds the server's write timeout", http.StatusBadRequest) + if err := ServeSupportBundle(rw, bundle, &logsBuffer); err != nil { + http.Error(rw, err.Error(), http.StatusInternalServerError) return } - duration = time.Duration(d) * time.Second } - ctx, cancel := context.WithTimeout(context.Background(), duration) - defer cancel() - - var logsBuffer bytes.Buffer - syncBuff := log.NewSyncWriter(&logsBuffer) - s.globalLogger.SetTemporaryWriter(syncBuff) - defer func() { - s.globalLogger.RemoveTemporaryWriter() - }() +} - bundle, err := ExportSupportBundle(ctx, s.opts.BundleContext.RuntimeFlags, s.opts.HTTPListenAddr, s.Data().(Data).DialFunc) - if err != nil { - http.Error(rw, err.Error(), http.StatusInternalServerError) - return - } - if err := ServeSupportBundle(rw, bundle, &logsBuffer); err != nil { - http.Error(rw, err.Error(), http.StatusInternalServerError) - return - } +// SetSources sets the sources on reload to be delivered +// with the support bundle. +func (s *Service) SetSources(sources map[string][]byte) { + s.supportBundleMut.Lock() + defer s.supportBundleMut.Unlock() + s.sources = sources } func getServerWriteTimeout(r *http.Request) time.Duration { @@ -582,6 +600,14 @@ func (lis *lazyListener) Addr() net.Addr { return lis.inner.Addr() } +func remoteCfgCachedConfig(host service.Host) ([]byte, error) { + svc, ok := host.GetService(remotecfg.ServiceName) + if !ok { + return nil, fmt.Errorf("failed to get the remotecfg service") + } + return svc.(*remotecfg.Service).GetCachedConfig() +} + func remoteCfgHostProvider(host service.Host) func() (service.Host, error) { return func() (service.Host, error) { svc, ok := host.GetService(remotecfg.ServiceName) diff --git a/internal/service/http/supportbundle.go b/internal/service/http/supportbundle.go index 3c75c35150..ef83bf5f18 100644 --- a/internal/service/http/supportbundle.go +++ b/internal/service/http/supportbundle.go @@ -33,6 +33,8 @@ type Bundle struct { components []byte peers []byte runtimeFlags []byte + sources map[string][]byte + remoteCfg []byte heapBuf *bytes.Buffer goroutineBuf *bytes.Buffer blockBuf *bytes.Buffer @@ -49,7 +51,7 @@ type Metadata struct { } // ExportSupportBundle gathers the information required for the support bundle. -func ExportSupportBundle(ctx context.Context, runtimeFlags []string, srvAddress string, dialContext server.DialContextFunc) (*Bundle, error) { +func ExportSupportBundle(ctx context.Context, runtimeFlags []string, srvAddress string, sources map[string][]byte, remoteCfg []byte, dialContext server.DialContextFunc) (*Bundle, error) { // The block profiler is disabled by default. Temporarily enable recording // of all blocking events. Also, temporarily record all mutex contentions, // and defer restoring of earlier mutex profiling fraction. @@ -136,6 +138,8 @@ func ExportSupportBundle(ctx context.Context, runtimeFlags []string, srvAddress alloyMetrics: alloyMetrics, components: components, peers: peers, + sources: sources, + remoteCfg: remoteCfg, runtimeFlags: []byte(strings.Join(runtimeFlags, "\n")), heapBuf: &heapBuf, goroutineBuf: &goroutineBuf, @@ -169,17 +173,22 @@ func ServeSupportBundle(rw http.ResponseWriter, b *Bundle, logsBuf *bytes.Buffer rw.Header().Set("Content-Disposition", "attachment; filename=\"alloy-support-bundle.zip\"") zipStructure := map[string][]byte{ - "alloy-metadata.yaml": b.meta, - "alloy-components.json": b.components, - "alloy-peers.json": b.peers, - "alloy-metrics.txt": b.alloyMetrics, - "alloy-runtime-flags.txt": b.runtimeFlags, - "alloy-logs.txt": logsBuf.Bytes(), - "pprof/cpu.pprof": b.cpuBuf.Bytes(), - "pprof/heap.pprof": b.heapBuf.Bytes(), - "pprof/goroutine.pprof": b.goroutineBuf.Bytes(), - "pprof/mutex.pprof": b.mutexBuf.Bytes(), - "pprof/block.pprof": b.blockBuf.Bytes(), + "alloy-metadata.yaml": b.meta, + "alloy-components.json": b.components, + "alloy-peers.json": b.peers, + "alloy-metrics.txt": b.alloyMetrics, + "alloy-runtime-flags.txt": b.runtimeFlags, + "alloy-logs.txt": logsBuf.Bytes(), + "sources/remote-config/remote.alloy": b.remoteCfg, + "pprof/cpu.pprof": b.cpuBuf.Bytes(), + "pprof/heap.pprof": b.heapBuf.Bytes(), + "pprof/goroutine.pprof": b.goroutineBuf.Bytes(), + "pprof/mutex.pprof": b.mutexBuf.Bytes(), + "pprof/block.pprof": b.blockBuf.Bytes(), + } + + for p, s := range b.sources { + zipStructure[filepath.Join("sources", filepath.Base(p))] = s } for fn, b := range zipStructure { diff --git a/internal/service/remotecfg/remotecfg.go b/internal/service/remotecfg/remotecfg.go index 50ef83aee0..fdb10f2f8e 100644 --- a/internal/service/remotecfg/remotecfg.go +++ b/internal/service/remotecfg/remotecfg.go @@ -401,7 +401,7 @@ func (s *Service) fetchRemote() error { } func (s *Service) fetchLocal() { - b, err := s.getCachedConfig() + b, err := s.GetCachedConfig() if err != nil { level.Error(s.opts.Logger).Log("msg", "failed to read from cache", "err", err) return @@ -440,7 +440,7 @@ func (s *Service) getAPIConfig() ([]byte, error) { return []byte(gcr.Msg.GetContent()), nil } -func (s *Service) getCachedConfig() ([]byte, error) { +func (s *Service) GetCachedConfig() ([]byte, error) { s.mut.RLock() p := s.dataPath s.mut.RUnlock()