Skip to content

Commit

Permalink
Capture configuration files & runtime config in support bundle
Browse files Browse the repository at this point in the history
  • Loading branch information
dehaansa committed Nov 14, 2024
1 parent 9b73e94 commit 828e652
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 59 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ Main (unreleased)

- Add `otelcol.exporter.splunkhec` allowing to export otel data to Splunk HEC (@adlotsof)

### Enhancements

- Add all raw configuration files & a copy of the latest remote config to the support bundle (@dehaansa)

### Bugfixes

- Fixed an issue in the `prometheus.exporter.postgres` component that would leak goroutines when the target was not reachable (@dehaansa)
Expand Down
5 changes: 4 additions & 1 deletion docs/sources/troubleshoot/support_bundle.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,11 @@ A support bundle contains the following data:
* `alloy-runtime-flags.txt` contains the values of the runtime flags available in {{< param "PRODUCT_NAME" >}}.
* The `pprof/` directory contains Go runtime profiling data (CPU, heap, goroutine, mutex, block profiles) as exported by the pprof package.
Refer to the [profile][profile] documentation for more details on how to use this information.
* The `sources/` directory contains copies of the local configuration files being used to configure {{< param "PRODUCT_NAME" >}}.
* `sources/remote-config/remote.alloy` contains a copy of the last received [remote configuration][remotecfg].

[profile]: ../profile
[components]: ../../get-started/components/
[alloy-repo]: https://github.com/grafana/alloy/issues
[backward-compatibility]: ../../introduction/backward-compatibility
[backward-compatibility]: ../../introduction/backward-compatibility
[remotecfg]: ../../reference/config-blocks/remotecfg/
1 change: 1 addition & 0 deletions internal/alloycli/cmd_run.go
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ func (fr *alloyRun) Run(cmd *cobra.Command, configPath string) error {
if err != nil {
return nil, fmt.Errorf("reading config path %q: %w", configPath, err)
}
httpService.SetSources(alloySource.RawConfigs())
if err := f.LoadSource(alloySource, nil, configPath); err != nil {
return alloySource, fmt.Errorf("error during the initial load: %w", err)
}
Expand Down
114 changes: 70 additions & 44 deletions internal/service/http/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ type Service struct {
// Used to enforce single-flight requests to supportHandler
supportBundleMut sync.Mutex

// Track the raw config for use with the support bundle
sources map[string][]byte

// publicLis and tcpLis are used to lazily enable TLS, since TLS is
// optionally configurable at runtime.
//
Expand Down Expand Up @@ -225,7 +228,7 @@ func (s *Service) Run(ctx context.Context, host service.Host) error {
}

// Wire in support bundle generator
r.HandleFunc("/-/support", s.supportHandler).Methods("GET")
r.HandleFunc("/-/support", s.generateSupportBundleHandler(host)).Methods("GET")

// Wire custom service handlers for services which depend on the http
// service.
Expand Down Expand Up @@ -259,60 +262,75 @@ func (s *Service) Run(ctx context.Context, host service.Host) error {
return nil
}

func (s *Service) supportHandler(rw http.ResponseWriter, r *http.Request) {
s.supportBundleMut.Lock()
defer s.supportBundleMut.Unlock()
func (s *Service) generateSupportBundleHandler(host service.Host) func(rw http.ResponseWriter, r *http.Request) {
return func(rw http.ResponseWriter, r *http.Request) {
s.supportBundleMut.Lock()
defer s.supportBundleMut.Unlock()

// TODO(dehaansa) remove this check once the support bundle is generally available
if !s.opts.MinStability.Permits(featuregate.StabilityPublicPreview) {
rw.WriteHeader(http.StatusForbidden)
_, _ = rw.Write([]byte("support bundle generation is only available in public preview. Use" +
" --stability.level command-line flag to enable public-preview features"))
return
}
// TODO(dehaansa) remove this check once the support bundle is generally available
if !s.opts.MinStability.Permits(featuregate.StabilityPublicPreview) {
rw.WriteHeader(http.StatusForbidden)
_, _ = rw.Write([]byte("support bundle generation is only available in public preview. Use" +
" --stability.level command-line flag to enable public-preview features"))
return
}

if s.opts.BundleContext.DisableSupportBundle {
rw.WriteHeader(http.StatusForbidden)
_, _ = rw.Write([]byte("support bundle generation is disabled; it can be re-enabled by removing the --disable-support-bundle flag"))
return
}
if s.opts.BundleContext.DisableSupportBundle {
rw.WriteHeader(http.StatusForbidden)
_, _ = rw.Write([]byte("support bundle generation is disabled; it can be re-enabled by removing the --disable-support-bundle flag"))
return
}

duration := getServerWriteTimeout(r)
if r.URL.Query().Has("duration") {
d, err := strconv.Atoi(r.URL.Query().Get("duration"))
if err != nil {
http.Error(rw, fmt.Sprintf("duration value (in seconds) should be a positive integer: %s", err), http.StatusBadRequest)
return
}
if d < 1 {
http.Error(rw, "duration value (in seconds) should be larger than 1", http.StatusBadRequest)
return
}
if float64(d) > duration.Seconds() {
http.Error(rw, "duration value exceeds the server's write timeout", http.StatusBadRequest)
return
}
duration = time.Duration(d) * time.Second
}
ctx, cancel := context.WithTimeout(context.Background(), duration)
defer cancel()

duration := getServerWriteTimeout(r)
if r.URL.Query().Has("duration") {
d, err := strconv.Atoi(r.URL.Query().Get("duration"))
var logsBuffer bytes.Buffer
syncBuff := log.NewSyncWriter(&logsBuffer)
s.globalLogger.SetTemporaryWriter(syncBuff)
defer func() {
s.globalLogger.RemoveTemporaryWriter()
}()

cachedConfig, err := remoteCfgCachedConfig(host)
if err != nil {
http.Error(rw, fmt.Sprintf("duration value (in seconds) should be a positive integer: %s", err), http.StatusBadRequest)
return
level.Debug(s.log).Log("msg", "failed to get cached remote config", "err", err)
}
if d < 1 {
http.Error(rw, "duration value (in seconds) should be larger than 1", http.StatusBadRequest)

bundle, err := ExportSupportBundle(ctx, s.opts.BundleContext.RuntimeFlags, s.opts.HTTPListenAddr, s.sources, cachedConfig, s.Data().(Data).DialFunc)
if err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
return
}
if float64(d) > duration.Seconds() {
http.Error(rw, "duration value exceeds the server's write timeout", http.StatusBadRequest)
if err := ServeSupportBundle(rw, bundle, &logsBuffer); err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
return
}
duration = time.Duration(d) * time.Second
}
ctx, cancel := context.WithTimeout(context.Background(), duration)
defer cancel()

var logsBuffer bytes.Buffer
syncBuff := log.NewSyncWriter(&logsBuffer)
s.globalLogger.SetTemporaryWriter(syncBuff)
defer func() {
s.globalLogger.RemoveTemporaryWriter()
}()
}

bundle, err := ExportSupportBundle(ctx, s.opts.BundleContext.RuntimeFlags, s.opts.HTTPListenAddr, s.Data().(Data).DialFunc)
if err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
return
}
if err := ServeSupportBundle(rw, bundle, &logsBuffer); err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
return
}
// SetSources sets the sources on reload to be delivered
// with the support bundle.
func (s *Service) SetSources(sources map[string][]byte) {
s.supportBundleMut.Lock()
defer s.supportBundleMut.Unlock()
s.sources = sources
}

func getServerWriteTimeout(r *http.Request) time.Duration {
Expand Down Expand Up @@ -582,6 +600,14 @@ func (lis *lazyListener) Addr() net.Addr {
return lis.inner.Addr()
}

func remoteCfgCachedConfig(host service.Host) ([]byte, error) {
svc, ok := host.GetService(remotecfg.ServiceName)
if !ok {
return nil, fmt.Errorf("failed to get the remotecfg service")
}
return svc.(*remotecfg.Service).GetCachedConfig()
}

func remoteCfgHostProvider(host service.Host) func() (service.Host, error) {
return func() (service.Host, error) {
svc, ok := host.GetService(remotecfg.ServiceName)
Expand Down
33 changes: 21 additions & 12 deletions internal/service/http/supportbundle.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ type Bundle struct {
components []byte
peers []byte
runtimeFlags []byte
sources map[string][]byte
remoteCfg []byte
heapBuf *bytes.Buffer
goroutineBuf *bytes.Buffer
blockBuf *bytes.Buffer
Expand All @@ -49,7 +51,7 @@ type Metadata struct {
}

// ExportSupportBundle gathers the information required for the support bundle.
func ExportSupportBundle(ctx context.Context, runtimeFlags []string, srvAddress string, dialContext server.DialContextFunc) (*Bundle, error) {
func ExportSupportBundle(ctx context.Context, runtimeFlags []string, srvAddress string, sources map[string][]byte, remoteCfg []byte, dialContext server.DialContextFunc) (*Bundle, error) {
// The block profiler is disabled by default. Temporarily enable recording
// of all blocking events. Also, temporarily record all mutex contentions,
// and defer restoring of earlier mutex profiling fraction.
Expand Down Expand Up @@ -136,6 +138,8 @@ func ExportSupportBundle(ctx context.Context, runtimeFlags []string, srvAddress
alloyMetrics: alloyMetrics,
components: components,
peers: peers,
sources: sources,
remoteCfg: remoteCfg,
runtimeFlags: []byte(strings.Join(runtimeFlags, "\n")),
heapBuf: &heapBuf,
goroutineBuf: &goroutineBuf,
Expand Down Expand Up @@ -169,17 +173,22 @@ func ServeSupportBundle(rw http.ResponseWriter, b *Bundle, logsBuf *bytes.Buffer
rw.Header().Set("Content-Disposition", "attachment; filename=\"alloy-support-bundle.zip\"")

zipStructure := map[string][]byte{
"alloy-metadata.yaml": b.meta,
"alloy-components.json": b.components,
"alloy-peers.json": b.peers,
"alloy-metrics.txt": b.alloyMetrics,
"alloy-runtime-flags.txt": b.runtimeFlags,
"alloy-logs.txt": logsBuf.Bytes(),
"pprof/cpu.pprof": b.cpuBuf.Bytes(),
"pprof/heap.pprof": b.heapBuf.Bytes(),
"pprof/goroutine.pprof": b.goroutineBuf.Bytes(),
"pprof/mutex.pprof": b.mutexBuf.Bytes(),
"pprof/block.pprof": b.blockBuf.Bytes(),
"alloy-metadata.yaml": b.meta,
"alloy-components.json": b.components,
"alloy-peers.json": b.peers,
"alloy-metrics.txt": b.alloyMetrics,
"alloy-runtime-flags.txt": b.runtimeFlags,
"alloy-logs.txt": logsBuf.Bytes(),
"sources/remote-config/remote.alloy": b.remoteCfg,
"pprof/cpu.pprof": b.cpuBuf.Bytes(),
"pprof/heap.pprof": b.heapBuf.Bytes(),
"pprof/goroutine.pprof": b.goroutineBuf.Bytes(),
"pprof/mutex.pprof": b.mutexBuf.Bytes(),
"pprof/block.pprof": b.blockBuf.Bytes(),
}

for p, s := range b.sources {
zipStructure[filepath.Join("sources", filepath.Base(p))] = s
}

for fn, b := range zipStructure {
Expand Down
4 changes: 2 additions & 2 deletions internal/service/remotecfg/remotecfg.go
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ func (s *Service) fetchRemote() error {
}

func (s *Service) fetchLocal() {
b, err := s.getCachedConfig()
b, err := s.GetCachedConfig()
if err != nil {
level.Error(s.opts.Logger).Log("msg", "failed to read from cache", "err", err)
return
Expand Down Expand Up @@ -440,7 +440,7 @@ func (s *Service) getAPIConfig() ([]byte, error) {
return []byte(gcr.Msg.GetContent()), nil
}

func (s *Service) getCachedConfig() ([]byte, error) {
func (s *Service) GetCachedConfig() ([]byte, error) {
s.mut.RLock()
p := s.dataPath
s.mut.RUnlock()
Expand Down

0 comments on commit 828e652

Please sign in to comment.