Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[v24.3.x] rpk: add rpk debug remote-bundle; collect a cluster-wide bundle #24325

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ use_repo(
"com_github_docker_go_connections",
"com_github_docker_go_units",
"com_github_fatih_color",
"com_github_google_uuid",
"com_github_hamba_avro",
"com_github_hamba_avro_v2",
"com_github_hashicorp_go_multierror",
Expand Down
2 changes: 1 addition & 1 deletion src/go/rpk/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ require (
github.com/docker/go-connections v0.5.0
github.com/docker/go-units v0.5.0
github.com/fatih/color v1.18.0
github.com/google/uuid v1.6.0
github.com/hamba/avro/v2 v2.27.0
github.com/hashicorp/go-multierror v1.1.1
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51
Expand Down Expand Up @@ -94,7 +95,6 @@ require (
github.com/google/gnostic-models v0.6.9-0.20230804172637-c7be7c783f49 // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.1 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
Expand Down
4 changes: 1 addition & 3 deletions src/go/rpk/pkg/adminapi/admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,12 @@ import (

"github.com/kr/text"
mTerm "github.com/moby/term"

"go.uber.org/zap"

"github.com/redpanda-data/common-go/rpadmin"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/config"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/oauth"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/oauth/providers/auth0"
"github.com/spf13/afero"
"go.uber.org/zap"
)

// GenericErrorBody is the JSON decodable body that is produced by generic error
Expand Down
1 change: 1 addition & 0 deletions src/go/rpk/pkg/cli/debug/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ go_library(
visibility = ["//visibility:public"],
deps = [
"//src/go/rpk/pkg/cli/debug/bundle",
"//src/go/rpk/pkg/cli/debug/remotebundle",
"//src/go/rpk/pkg/config",
"@com_github_spf13_afero//:afero",
"@com_github_spf13_cobra//:cobra",
Expand Down
1 change: 1 addition & 0 deletions src/go/rpk/pkg/cli/debug/bundle/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ go_library(
importpath = "github.com/redpanda-data/redpanda/src/go/rpk/pkg/cli/debug/bundle",
visibility = ["//visibility:public"],
deps = [
"//src/go/rpk/pkg/cli/debug/common",
"//src/go/rpk/pkg/config",
"//src/go/rpk/pkg/httpapi",
"//src/go/rpk/pkg/kafka",
Expand Down
59 changes: 20 additions & 39 deletions src/go/rpk/pkg/cli/debug/bundle/bundle.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"time"

"github.com/docker/go-units"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/cli/debug/common"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/config"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/httpapi"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/kafka"
Expand Down Expand Up @@ -59,20 +60,8 @@ func NewCommand(fs afero.Fs, p *config.Params) *cobra.Command {
var (
outFile string
uploadURL string

logsSince string
logsUntil string
logsSizeLimit string

controllerLogsSizeLimit string
namespace string
labelSelector []string
partitionFlag []string

timeout time.Duration
metricsInterval time.Duration
metricsSampleCount int
cpuProfilerWait time.Duration
timeout time.Duration
opts common.DebugBundleSharedOptions
)
cmd := &cobra.Command{
Use: "bundle",
Expand All @@ -83,11 +72,11 @@ func NewCommand(fs afero.Fs, p *config.Params) *cobra.Command {
// Redpanda queries for samples from Seastar every ~13 seconds by
// default. Setting wait_ms to anything less than 13 seconds will
// result in no samples being returned.
if cpuProfilerWait < 15*time.Second {
if opts.CPUProfilerWait < 15*time.Second {
out.Die("--cpu-profiler-wait must be higher than 15 seconds")
}

if metricsSampleCount < 2 {
if opts.MetricsSampleCount < 2 {
out.Die("--metrics-samples must be 2 or higher")
}

Expand All @@ -108,38 +97,38 @@ func NewCommand(fs afero.Fs, p *config.Params) *cobra.Command {
path, err := determineFilepath(fs, yActual, outFile, cmd.Flags().Changed(outputFlag))
out.MaybeDie(err, "unable to determine filepath %q: %v", outFile, err)

partitions, err := parsePartitionFlag(partitionFlag)
out.MaybeDie(err, "unable to parse partition flag %v: %v", partitionFlag, err)
partitions, err := parsePartitionFlag(opts.PartitionFlag)
out.MaybeDie(err, "unable to parse partition flag %v: %v", opts.PartitionFlag, err)

cl, err := kafka.NewFranzClient(fs, p)
out.MaybeDie(err, "unable to initialize kafka client: %v", err)
defer cl.Close()

logsLimit, err := units.FromHumanSize(logsSizeLimit)
logsLimit, err := units.FromHumanSize(opts.LogsSizeLimit)
out.MaybeDie(err, "unable to parse --logs-size-limit: %v", err)

controllerLogsLimit, err := units.FromHumanSize(controllerLogsSizeLimit)
controllerLogsLimit, err := units.FromHumanSize(opts.ControllerLogsSizeLimit)
out.MaybeDie(err, "unable to parse --controller-logs-size-limit: %v", err)
bp := bundleParams{
fs: fs,
p: p,
y: y,
yActual: yActual,
cl: cl,
logsSince: logsSince,
logsUntil: logsUntil,
path: path,
namespace: namespace,
logsLimitBytes: int(logsLimit),
controllerLogLimitBytes: int(controllerLogsLimit),
timeout: timeout,
metricsInterval: metricsInterval,
metricsSampleCount: metricsSampleCount,
partitions: partitions,
cpuProfilerWait: cpuProfilerWait,
namespace: opts.Namespace,
logsSince: opts.LogsSince,
logsUntil: opts.LogsUntil,
metricsInterval: opts.MetricsInterval,
metricsSampleCount: opts.MetricsSampleCount,
cpuProfilerWait: opts.CPUProfilerWait,
}
if len(labelSelector) > 0 {
labelsMap, err := labels.ConvertSelectorToLabelsMap(strings.Join(labelSelector, ","))
if len(opts.LabelSelector) > 0 {
labelsMap, err := labels.ConvertSelectorToLabelsMap(strings.Join(opts.LabelSelector, ","))
out.MaybeDie(err, "unable to parse label-selector flag: %v", err)
bp.labelSelector = labelsMap
}
Expand All @@ -166,18 +155,10 @@ func NewCommand(fs afero.Fs, p *config.Params) *cobra.Command {

f := cmd.Flags()
f.StringVarP(&outFile, outputFlag, "o", "", "The file path where the debug file will be written (default ./<timestamp>-bundle.zip)")
f.DurationVar(&timeout, "timeout", 31*time.Second, "How long to wait for child commands to execute (e.g. 30s, 1.5m)")
f.DurationVar(&metricsInterval, "metrics-interval", 10*time.Second, "Interval between metrics snapshots (e.g. 30s, 1.5m)")
f.IntVar(&metricsSampleCount, "metrics-samples", 2, "Number of metrics samples to take (at the interval of --metrics-interval). Must be >= 2")
f.StringVar(&logsSince, "logs-since", "yesterday", "Include logs dated from specified date onward; (journalctl date format: YYYY-MM-DD, 'yesterday', or 'today'). Refer to journalctl documentation for more options")
f.StringVar(&logsUntil, "logs-until", "", "Include logs older than the specified date; (journalctl date format: YYYY-MM-DD, 'yesterday', or 'today'). Refer to journalctl documentation for more options")
f.StringVar(&logsSizeLimit, "logs-size-limit", "100MiB", "Read the logs until the given size is reached (e.g. 3MB, 1GiB)")
f.StringVar(&controllerLogsSizeLimit, "controller-logs-size-limit", "132MB", "The size limit of the controller logs that can be stored in the bundle (e.g. 3MB, 1GiB)")
f.DurationVar(&timeout, "timeout", 31*time.Second, "How long to wait for child commands to execute. For example: 30s, 1.5m")
f.StringVar(&uploadURL, "upload-url", "", "If provided, where to upload the bundle in addition to creating a copy on disk")
f.StringVarP(&namespace, "namespace", "n", "redpanda", "The namespace to use to collect the resources from (k8s only)")
f.StringArrayVarP(&labelSelector, "label-selector", "l", []string{"app.kubernetes.io/name=redpanda"}, "Comma-separated label selectors to filter your resources. e.g: <label>=<value>,<label>=<value> (k8s only)")
f.StringArrayVarP(&partitionFlag, "partition", "p", nil, "Comma-separated partition IDs; when provided, rpk saves extra admin API requests for those partitions. Check help for extended usage")
f.DurationVar(&cpuProfilerWait, "cpu-profiler-wait", 30*time.Second, "For how long to collect samples for the CPU profiler (e.g. 30s, 1.5m). Must be higher than 15s")
// Debug bundle options.
opts.InstallFlags(f)

return cmd
}
Expand Down
23 changes: 5 additions & 18 deletions src/go/rpk/pkg/cli/debug/bundle/bundle_k8s_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,14 @@ import (
"strings"
"time"

"github.com/redpanda-data/common-go/rpadmin"

authorizationv1 "k8s.io/api/authorization/v1"

"github.com/hashicorp/go-multierror"
"github.com/redpanda-data/common-go/rpadmin"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/adminapi"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/cli/debug/common"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/config"
"github.com/spf13/afero"
"go.uber.org/zap"
authorizationv1 "k8s.io/api/authorization/v1"
k8score "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
Expand Down Expand Up @@ -354,7 +353,7 @@ func saveSingleAdminAPICalls(ctx context.Context, ps *stepParams, fs afero.Fs, p
continue
}

aName := sanitizeName(a)
aName := common.SanitizeName(a)
r := []func() error{
func() error {
return requestAndSave(ctx, ps, fmt.Sprintf("admin/node_config_%v.json", aName), cl.RawNodeConfig)
Expand Down Expand Up @@ -427,7 +426,7 @@ func saveMetricsAPICalls(ctx context.Context, ps *stepParams, fs afero.Fs, p *co
}

endpoints := map[string]func(context.Context) ([]byte, error){"metrics": cl.PrometheusMetrics, "public_metrics": cl.PublicMetrics}
aName := sanitizeName(a)
aName := common.SanitizeName(a)
for endpointName, endpoint := range endpoints {
endpointPoller := func() error {
err := requestAndSave(ctx, ps, fmt.Sprintf("metrics/%v/t0_%s.txt", aName, endpointName), endpoint)
Expand Down Expand Up @@ -639,18 +638,6 @@ func parseJournalTime(str string, now time.Time) (time.Time, error) {
}
}

// sanitizeName replace any of the following characters with "-": "<", ">", ":",
// `"`, "/", "|", "?", "*". This is to avoid having forbidden names in Windows
// environments.
func sanitizeName(name string) string {
forbidden := []string{"<", ">", ":", `"`, "/", `\`, "|", "?", "*"}
r := name
for _, s := range forbidden {
r = strings.Replace(r, s, "-", -1)
}
return r
}

func saveExtraFuncs(ctx context.Context, ps *stepParams, cl *rpadmin.AdminAPI, partitionFilters []topicPartitionFilter) (funcs []func() error) {
for _, tpf := range partitionFilters {
tpf := tpf
Expand Down
3 changes: 2 additions & 1 deletion src/go/rpk/pkg/cli/debug/bundle/bundle_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"github.com/beevik/ntp"
"github.com/docker/go-units"
"github.com/hashicorp/go-multierror"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/cli/debug/common"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/config"
osutil "github.com/redpanda-data/redpanda/src/go/rpk/pkg/os"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/out"
Expand All @@ -61,7 +62,7 @@ func determineFilepath(fs afero.Fs, rp *config.RedpandaYaml, path string, isFlag
if path == "" {
timestamp := time.Now().Unix()
if rp.Redpanda.AdvertisedRPCAPI != nil {
path = fmt.Sprintf("%v-%d-bundle.zip", sanitizeName(rp.Redpanda.AdvertisedRPCAPI.Address), timestamp)
path = fmt.Sprintf("%v-%d-bundle.zip", common.SanitizeName(rp.Redpanda.AdvertisedRPCAPI.Address), timestamp)
} else {
path = fmt.Sprintf("%d-bundle.zip", timestamp)
}
Expand Down
16 changes: 16 additions & 0 deletions src/go/rpk/pkg/cli/debug/common/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
load("@rules_go//go:def.bzl", "go_library", "go_test")

go_library(
name = "common",
srcs = ["common.go"],
importpath = "github.com/redpanda-data/redpanda/src/go/rpk/pkg/cli/debug/common",
visibility = ["//visibility:public"],
deps = ["@com_github_spf13_pflag//:pflag"],
)

go_test(
name = "common_test",
srcs = ["common_test.go"],
embed = [":common"],
deps = ["@com_github_stretchr_testify//require"],
)
50 changes: 50 additions & 0 deletions src/go/rpk/pkg/cli/debug/common/common.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package common

import (
"strings"
"time"

"github.com/spf13/pflag"
)

// DebugBundleSharedOptions are the options of a debug bundle that can be
// shared between a normal and a remote bundle.
type DebugBundleSharedOptions struct {
CPUProfilerWait time.Duration
LogsSince string
LogsUntil string
MetricsInterval time.Duration
MetricsSampleCount int
PartitionFlag []string
Namespace string
LabelSelector []string
LogsSizeLimit string
ControllerLogsSizeLimit string
}

// InstallFlags installs the debug bundle flags that fills the debug bundle
// options.
func (o *DebugBundleSharedOptions) InstallFlags(f *pflag.FlagSet) {
f.StringVar(&o.ControllerLogsSizeLimit, "controller-logs-size-limit", "132MB", "The size limit of the controller logs that can be stored in the bundle. For example: 3MB, 1GiB")
f.DurationVar(&o.CPUProfilerWait, "cpu-profiler-wait", 30*time.Second, "How long to collect samples for the CPU profiler. For example: 30s, 1.5m. Must be higher than 15s")
f.StringVar(&o.LogsSizeLimit, "logs-size-limit", "100MiB", "Read the logs until the given size is reached. For example: 3MB, 1GiB")
f.StringVar(&o.LogsSince, "logs-since", "yesterday", "Include logs dated from specified date onward; (journalctl date format: YYYY-MM-DD, 'yesterday', or 'today'). See the journalctl documentation for more options")
f.StringVar(&o.LogsUntil, "logs-until", "", "Include logs older than the specified date; (journalctl date format: YYYY-MM-DD, 'yesterday', or 'today'). See the journalctl documentation for more options")
f.DurationVar(&o.MetricsInterval, "metrics-interval", 10*time.Second, "Interval between metrics snapshots. For example: 30s, 1.5m")
f.IntVar(&o.MetricsSampleCount, "metrics-samples", 2, "Number of metrics samples to take (at the interval of --metrics-interval). Must be >= 2")
f.StringArrayVarP(&o.PartitionFlag, "partition", "p", nil, "Comma-separated partition IDs. When provided, rpk saves extra Admin API requests for those partitions. See the help for extended usage")
f.StringVarP(&o.Namespace, "namespace", "n", "redpanda", "The namespace to use to collect the resources from (K8s only)")
f.StringArrayVarP(&o.LabelSelector, "label-selector", "l", []string{"app.kubernetes.io/name=redpanda"}, "Comma-separated label selectors to filter your resources. For example: <label>=<value>,<label>=<value> (K8s only)")
}

// SanitizeName replace any of the following characters with "-": "<", ">", ":",
// `"`, "/", "|", "?", "*". This is to avoid having forbidden names in Windows
// environments.
func SanitizeName(name string) string {
forbidden := []string{"<", ">", ":", `"`, "/", `\`, "|", "?", "*"}
r := name
for _, s := range forbidden {
r = strings.Replace(r, s, "-", -1)
}
return r
}
58 changes: 58 additions & 0 deletions src/go/rpk/pkg/cli/debug/common/common_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package common

import (
"testing"

"github.com/stretchr/testify/require"
)

func TestSanitizeName(t *testing.T) {
tests := []struct {
name string
input string
exp string
}{
{
name: "No forbidden characters",
input: "validName",
exp: "validName",
},
{
name: "Single forbidden character",
input: "invalid:8083",
exp: "invalid-8083",
},
{
name: "Multiple forbidden characters",
input: "name/with|forbidden?chars",
exp: "name-with-forbidden-chars",
},
{
name: "Only forbidden characters",
input: `<>:\"/\\|?*`,
exp: "-----------",
},
{
name: "Empty string",
input: "",
exp: "",
},
{
name: "No change with already sanitized name",
input: "cleanName123",
exp: "cleanName123",
},
{
name: "Name with numbers and special characters",
input: "name123|test*<>",
exp: "name123-test---",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
actual := SanitizeName(tt.input)
require.Equal(t, tt.exp, actual)
})
}
}
2 changes: 2 additions & 0 deletions src/go/rpk/pkg/cli/debug/debug.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ package debug

import (
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/cli/debug/bundle"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/cli/debug/remotebundle"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/config"
"github.com/spf13/afero"
"github.com/spf13/cobra"
Expand All @@ -25,6 +26,7 @@ func NewCommand(fs afero.Fs, p *config.Params) *cobra.Command {
cmd.AddCommand(
bundle.NewCommand(fs, p),
NewInfoCommand(),
remotebundle.NewCommand(fs, p),
)

return cmd
Expand Down
Loading