Skip to content

Commit

Permalink
Merge pull request #24778 from r-vasquez/save-crash-bundle
Browse files Browse the repository at this point in the history
[UX-66] rpk: save crash information in rpk debug bundle
  • Loading branch information
r-vasquez authored Jan 14, 2025
2 parents 055e1e4 + 4d54c8a commit 964cc35
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 0 deletions.
3 changes: 3 additions & 0 deletions src/go/rpk/pkg/cli/debug/bundle/bundle.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,9 @@ COMMON FILES
- Broker metrics: The broker's Prometheus metrics, fetched through its
admin API (/metrics and /public_metrics).
- Crash information: Both startup_log and crash_reports will be collected if
present in the configured data directory.
BARE-METAL
- Kernel: The kernel logs ring buffer (syslog) and parameters (sysctl).
Expand Down
2 changes: 2 additions & 0 deletions src/go/rpk/pkg/cli/debug/bundle/bundle_k8s_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ func executeK8SBundle(ctx context.Context, bp bundleParams) error {
saveCmdLine(ps),
saveConfig(ps, bp.yActual),
saveControllerLogDir(ps, bp.y, bp.controllerLogLimitBytes),
saveCrashReports(ps, bp.y),
saveDataDirStructure(ps, bp.y),
saveDiskUsage(ctx, ps, bp.y),
saveInterrupts(ps),
Expand All @@ -79,6 +80,7 @@ func executeK8SBundle(ctx context.Context, bp bundleParams) error {
saveMountedFilesystems(ps),
saveNTPDrift(ps),
saveResourceUsageData(ps, bp.y),
saveStartupLog(ps, bp.y),
saveSlabInfo(ps),
saveUname(ctx, ps),
}
Expand Down
48 changes: 48 additions & 0 deletions src/go/rpk/pkg/cli/debug/bundle/bundle_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ func executeBundle(ctx context.Context, bp bundleParams) error {
saveCmdLine(ps),
saveConfig(ps, bp.y),
saveControllerLogDir(ps, bp.y, bp.controllerLogLimitBytes),
saveCrashReports(ps, bp.y),
saveDNSData(ctx, ps),
saveDataDirStructure(ps, bp.y),
saveDiskUsage(ctx, ps, bp.y),
Expand All @@ -152,6 +153,7 @@ func executeBundle(ctx context.Context, bp bundleParams) error {
saveResourceUsageData(ps, bp.y),
saveSingleAdminAPICalls(ctx, ps, bp.fs, bp.p, addrs, bp.cpuProfilerWait),
saveMetricsAPICalls(ctx, ps, bp.fs, bp.p, addrs, bp.metricsInterval, bp.metricsSampleCount),
saveStartupLog(ps, bp.y),
saveSlabInfo(ps),
saveSocketData(ctx, ps),
saveSysctl(ctx, ps),
Expand Down Expand Up @@ -1021,6 +1023,52 @@ func saveControllerLogDir(ps *stepParams, y *config.RedpandaYaml, logLimitBytes
}
}

func saveStartupLog(ps *stepParams, y *config.RedpandaYaml) step {
return func() error {
if y.Redpanda.Directory == "" {
return fmt.Errorf("failed to save startup_log: 'redpanda.data_directory' is empty on the provided configuration file")
}
path := filepath.Join(y.Redpanda.Directory, "startup_log")
exists, err := afero.Exists(ps.fs, path)
if err != nil {
return fmt.Errorf("failed to save startup_log: unable to check existence of startup_log: %v", err)
}
if !exists {
return fmt.Errorf("skipping startup_log collection: unable to find file %q", path)
}
content, err := afero.ReadFile(ps.fs, path)
if err != nil {
return fmt.Errorf("failed to save startup_log: unable to read startup_log: %v", err)
}
err = writeFileToZip(ps, "startup_log", content)
if err != nil {
return fmt.Errorf("failed to save startup_log: %v", err)
}
return nil
}
}

func saveCrashReports(ps *stepParams, y *config.RedpandaYaml) step {
return func() error {
if y.Redpanda.Directory == "" {
return fmt.Errorf("failed to save crash_reports: 'redpanda.data_directory' is empty on the provided configuration file")
}
crashReportDir := filepath.Join(y.Redpanda.Directory, "crash_reports")
exists, err := afero.Exists(ps.fs, crashReportDir)
if err != nil {
return fmt.Errorf("failed to save crash_reports: unable to check existence of the crash_reports directory")
}
if !exists {
return fmt.Errorf("skipping crash_reports collection: directory %q does not exists", crashReportDir)
}
err = writeDirToZip(ps, crashReportDir, "crash_reports", nil)
if err != nil {
return fmt.Errorf("failed to save crash_reports: %v", err)
}
return nil
}
}

func walkDir(root string, files map[string]*fileInfo) error {
return filepath.WalkDir(
root,
Expand Down
7 changes: 7 additions & 0 deletions tests/rptest/tests/rpk_debug_bundle_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,13 @@ def test_debug_bundle(self):
continue
if re.match(r".* error querying .*\.ntp\..* i\/o timeout", l):
self.logger.error(f"Non-fatal transitory NTP error: {l}")
if re.match(
r".*skipping\s+(startup_log collection|crash_reports collection):\s*(unable to find file|directory).*",
l):
# this tests runs a development container, it will not have a
# startup_log and we don't expect a crash_reports dir to be
# in the data_directory as the container is new.
continue
else:
self.logger.error(f"Bad output line: {l}")
filtered_errors.append(l)
Expand Down

0 comments on commit 964cc35

Please sign in to comment.