Skip to content

Commit

Permalink
rpk: add rpk debug remote bundle
Browse files Browse the repository at this point in the history
Fixes DEVEX-44

This commit introduces the rpk debug remote bundle
command, which allows the user to request a debug
bundle using the Admin API.
  • Loading branch information
r-vasquez committed Nov 22, 2024
1 parent 3004b8f commit b88a857
Show file tree
Hide file tree
Showing 17 changed files with 1,104 additions and 89 deletions.
1 change: 1 addition & 0 deletions MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ use_repo(
"com_github_docker_go_connections",
"com_github_docker_go_units",
"com_github_fatih_color",
"com_github_google_uuid",
"com_github_hamba_avro",
"com_github_hamba_avro_v2",
"com_github_hashicorp_go_multierror",
Expand Down
2 changes: 1 addition & 1 deletion src/go/rpk/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ require (
github.com/docker/go-connections v0.5.0
github.com/docker/go-units v0.5.0
github.com/fatih/color v1.18.0
github.com/google/uuid v1.6.0
github.com/hamba/avro/v2 v2.27.0
github.com/hashicorp/go-multierror v1.1.1
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51
Expand Down Expand Up @@ -94,7 +95,6 @@ require (
github.com/google/gnostic-models v0.6.9-0.20230804172637-c7be7c783f49 // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.1 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
Expand Down
1 change: 1 addition & 0 deletions src/go/rpk/pkg/cli/debug/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ go_library(
visibility = ["//visibility:public"],
deps = [
"//src/go/rpk/pkg/cli/debug/bundle",
"//src/go/rpk/pkg/cli/debug/remotebundle",
"//src/go/rpk/pkg/config",
"@com_github_spf13_afero//:afero",
"@com_github_spf13_cobra//:cobra",
Expand Down
2 changes: 1 addition & 1 deletion src/go/rpk/pkg/cli/debug/bundle/bundle.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ func NewCommand(fs afero.Fs, p *config.Params) *cobra.Command {

f := cmd.Flags()
f.StringVarP(&outFile, outputFlag, "o", "", "The file path where the debug file will be written (default ./<timestamp>-bundle.zip)")
f.DurationVar(&timeout, "timeout", 31*time.Second, "How long to wait for child commands to execute (e.g. 30s, 1.5m)")
f.DurationVar(&timeout, "timeout", 31*time.Second, "How long to wait for child commands to execute. For example: 30s, 1.5m")
f.StringVar(&uploadURL, "upload-url", "", "If provided, where to upload the bundle in addition to creating a copy on disk")
// Debug bundle options.
opts.InstallFlags(f)
Expand Down
9 changes: 8 additions & 1 deletion src/go/rpk/pkg/cli/debug/common/BUILD
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
load("@rules_go//go:def.bzl", "go_library")
load("@rules_go//go:def.bzl", "go_library", "go_test")

go_library(
name = "common",
Expand All @@ -7,3 +7,10 @@ go_library(
visibility = ["//visibility:public"],
deps = ["@com_github_spf13_pflag//:pflag"],
)

go_test(
name = "common_test",
srcs = ["common_test.go"],
embed = [":common"],
deps = ["@com_github_stretchr_testify//require"],
)
18 changes: 9 additions & 9 deletions src/go/rpk/pkg/cli/debug/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,16 @@ type DebugBundleSharedOptions struct {
// InstallFlags installs the debug bundle flags that fills the debug bundle
// options.
func (o *DebugBundleSharedOptions) InstallFlags(f *pflag.FlagSet) {
f.StringVar(&o.ControllerLogsSizeLimit, "controller-logs-size-limit", "132MB", "The size limit of the controller logs that can be stored in the bundle (e.g. 3MB, 1GiB)")
f.DurationVar(&o.CPUProfilerWait, "cpu-profiler-wait", 30*time.Second, "For how long to collect samples for the CPU profiler (e.g. 30s, 1.5m). Must be higher than 15s")
f.StringVar(&o.LogsSizeLimit, "logs-size-limit", "100MiB", "Read the logs until the given size is reached (e.g. 3MB, 1GiB)")
f.StringVar(&o.LogsSince, "logs-since", "yesterday", "Include logs dated from specified date onward; (journalctl date format: YYYY-MM-DD, 'yesterday', or 'today'). Refer to journalctl documentation for more options")
f.StringVar(&o.LogsUntil, "logs-until", "", "Include logs older than the specified date; (journalctl date format: YYYY-MM-DD, 'yesterday', or 'today'). Refer to journalctl documentation for more options")
f.DurationVar(&o.MetricsInterval, "metrics-interval", 10*time.Second, "Interval between metrics snapshots (e.g. 30s, 1.5m)")
f.StringVar(&o.ControllerLogsSizeLimit, "controller-logs-size-limit", "132MB", "The size limit of the controller logs that can be stored in the bundle. For example: 3MB, 1GiB")
f.DurationVar(&o.CPUProfilerWait, "cpu-profiler-wait", 30*time.Second, "How long to collect samples for the CPU profiler. For example: 30s, 1.5m. Must be higher than 15s")
f.StringVar(&o.LogsSizeLimit, "logs-size-limit", "100MiB", "Read the logs until the given size is reached. For example: 3MB, 1GiB")
f.StringVar(&o.LogsSince, "logs-since", "yesterday", "Include logs dated from specified date onward; (journalctl date format: YYYY-MM-DD, 'yesterday', or 'today'). See the journalctl documentation for more options")
f.StringVar(&o.LogsUntil, "logs-until", "", "Include logs older than the specified date; (journalctl date format: YYYY-MM-DD, 'yesterday', or 'today'). See the journalctl documentation for more options")
f.DurationVar(&o.MetricsInterval, "metrics-interval", 10*time.Second, "Interval between metrics snapshots. For example: 30s, 1.5m")
f.IntVar(&o.MetricsSampleCount, "metrics-samples", 2, "Number of metrics samples to take (at the interval of --metrics-interval). Must be >= 2")
f.StringArrayVarP(&o.PartitionFlag, "partition", "p", nil, "Comma-separated partition IDs; when provided, rpk saves extra admin API requests for those partitions. Check help for extended usage")
f.StringVarP(&o.Namespace, "namespace", "n", "redpanda", "The namespace to use to collect the resources from (k8s only)")
f.StringArrayVarP(&o.LabelSelector, "label-selector", "l", []string{"app.kubernetes.io/name=redpanda"}, "Comma-separated label selectors to filter your resources. e.g: <label>=<value>,<label>=<value> (k8s only)")
f.StringArrayVarP(&o.PartitionFlag, "partition", "p", nil, "Comma-separated partition IDs. When provided, rpk saves extra Admin API requests for those partitions. See the help for extended usage")
f.StringVarP(&o.Namespace, "namespace", "n", "redpanda", "The namespace to use to collect the resources from (K8s only)")
f.StringArrayVarP(&o.LabelSelector, "label-selector", "l", []string{"app.kubernetes.io/name=redpanda"}, "Comma-separated label selectors to filter your resources. For example: <label>=<value>,<label>=<value> (K8s only)")
}

// SanitizeName replace any of the following characters with "-": "<", ">", ":",
Expand Down
58 changes: 58 additions & 0 deletions src/go/rpk/pkg/cli/debug/common/common_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package common

import (
"testing"

"github.com/stretchr/testify/require"
)

func TestSanitizeName(t *testing.T) {
tests := []struct {
name string
input string
exp string
}{
{
name: "No forbidden characters",
input: "validName",
exp: "validName",
},
{
name: "Single forbidden character",
input: "invalid:8083",
exp: "invalid-8083",
},
{
name: "Multiple forbidden characters",
input: "name/with|forbidden?chars",
exp: "name-with-forbidden-chars",
},
{
name: "Only forbidden characters",
input: `<>:\"/\\|?*`,
exp: "-----------",
},
{
name: "Empty string",
input: "",
exp: "",
},
{
name: "No change with already sanitized name",
input: "cleanName123",
exp: "cleanName123",
},
{
name: "Name with numbers and special characters",
input: "name123|test*<>",
exp: "name123-test---",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
actual := SanitizeName(tt.input)
require.Equal(t, tt.exp, actual)
})
}
}
2 changes: 2 additions & 0 deletions src/go/rpk/pkg/cli/debug/debug.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ package debug

import (
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/cli/debug/bundle"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/cli/debug/remotebundle"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/config"
"github.com/spf13/afero"
"github.com/spf13/cobra"
Expand All @@ -25,6 +26,7 @@ func NewCommand(fs afero.Fs, p *config.Params) *cobra.Command {
cmd.AddCommand(
bundle.NewCommand(fs, p),
NewInfoCommand(),
remotebundle.NewCommand(fs, p),
)

return cmd
Expand Down
27 changes: 27 additions & 0 deletions src/go/rpk/pkg/cli/debug/remotebundle/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
load("@rules_go//go:def.bzl", "go_library")

go_library(
name = "remotebundle",
srcs = [
"cancel.go",
"download.go",
"remote.go",
"start.go",
"status.go",
],
importpath = "github.com/redpanda-data/redpanda/src/go/rpk/pkg/cli/debug/remotebundle",
visibility = ["//visibility:public"],
deps = [
"//src/go/rpk/pkg/adminapi",
"//src/go/rpk/pkg/cli/debug/common",
"//src/go/rpk/pkg/config",
"//src/go/rpk/pkg/out",
"@com_github_docker_go_units//:go-units",
"@com_github_google_uuid//:uuid",
"@com_github_redpanda_data_common_go_rpadmin//:rpadmin",
"@com_github_spf13_afero//:afero",
"@com_github_spf13_cobra//:cobra",
"@io_k8s_apimachinery//pkg/labels",
"@org_golang_x_sync//errgroup",
],
)
121 changes: 121 additions & 0 deletions src/go/rpk/pkg/cli/debug/remotebundle/cancel.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// Copyright 2024 Redpanda Data, Inc.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.md
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0

package remotebundle

import (
"fmt"
"os"
"sync"

"github.com/redpanda-data/redpanda/src/go/rpk/pkg/config"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/out"
"github.com/spf13/afero"
"github.com/spf13/cobra"
)

type cancelResponse struct {
Broker string
Canceled bool
Error string
}

func newCancelCommand(fs afero.Fs, p *config.Params) *cobra.Command {
var noConfirm bool
var jobID string
cmd := &cobra.Command{
Use: "cancel",
Short: "Cancel a remote bundle execution",
Long: `Cancel a remote bundle execution.
This command cancels the debug collection process in a remote cluster that
you configured in flags, environment variables, or your rpk profile.
Use the flag '--job-id' to only cancel the debug bundle with
the given job ID.
Use the flag '--no-confirm' to avoid the confirmation prompt.
`,
Args: cobra.NoArgs,
Run: func(cmd *cobra.Command, _ []string) {
p, err := p.LoadVirtualProfile(fs)
out.MaybeDie(err, "rpk unable to load config: %v", err)
config.CheckExitCloudAdmin(p)

status, anyErr, _, cache := executeBundleStatus(cmd.Context(), fs, p)
if jobID != "" {
status = filterStatusByJobID(status, jobID)
}
if !noConfirm {
printTextBundleStatus(status, anyErr)
confirmed, err := out.Confirm("Confirm debug bundle cancel from these brokers?")
out.MaybeDie(err, "unable to confirm cancel: %v; if you want to select a single broker, use -X admin.hosts=<brokers,to,cancel>", err)
if !confirmed {
out.Exit("operation canceled; if you want to select a single broker, use -X admin.hosts=<brokers,to,cancel>")
}
}
var (
wg sync.WaitGroup
rwMu sync.RWMutex // read from cache.
mu sync.Mutex // write to status.
response []cancelResponse
anyCancelErr bool
)
updateStatus := func(resp cancelResponse, err error) {
mu.Lock()
defer mu.Unlock()
anyCancelErr = anyCancelErr || err != nil
response = append(response, resp)
}
for _, s := range status {
wg.Add(1)
go func(addr, jobID string) {
defer wg.Done()
rwMu.RLock()
cl := cache[addr]
rwMu.RUnlock()
resp := cancelResponse{Broker: addr}
err := cl.CancelDebugBundleProcess(cmd.Context(), jobID)
if err != nil {
resp.Error = fmt.Sprintf("unable to cancel debug bundle: %s", tryMessageFromErr(err))
updateStatus(resp, err)
return
}
resp.Canceled = true
updateStatus(resp, nil)
}(s.Broker, s.JobID)
}
wg.Wait()
headers := []string{"broker", "canceled"}
if anyCancelErr {
headers = append(headers, "error")
defer os.Exit(1)
}

tw := out.NewTable(headers...)
defer tw.Flush()
for _, row := range response {
tw.PrintStructFields(row)
}
},
}
cmd.Flags().StringVar(&jobID, "job-id", "", "ID of the job to cancel the debug bundle")
cmd.Flags().BoolVar(&noConfirm, "no-confirm", false, "Disable confirmation prompt")
return cmd
}

func filterStatusByJobID(status []statusResponse, jobID string) []statusResponse {
var filtered []statusResponse
for _, s := range status {
if s.JobID == jobID {
filtered = append(filtered, s)
}
}
return filtered
}
Loading

0 comments on commit b88a857

Please sign in to comment.