From 2ac78f07e62e6b52b75f6de0b4144cef8f06bcdd Mon Sep 17 00:00:00 2001 From: Brennan Vincent Date: Mon, 7 Oct 2024 20:37:28 -0400 Subject: [PATCH 1/2] Allow disabling tracepoints Currently we fail to run if tracepoints can't be set on various sub-events of /sys/kernel/debug/tracing/events/syscalls/ . Currently these are only used to detect process exit, so with --disable-tracepoints we will not detect that as quickly/precisely, leading to bloat in the BPF maps. Without tracepoints, we will only detect exited processes every pidCleanupInterval (by default 5 minutes), through a scan of /proc. Thus setting this flag is not recommended unless necessary. It might be necessary because certain obscure distributions apparently run with a configuration that doesn't expost these syscall tracepoint events. I have confirmed that with this patch (and setting the flag), the agent starts up and seems to work normally on a kernel without CONFIG_FTRACE_SYSCALLS, which fails without this patch. --- flags/flags.go | 1 + main.go | 14 ++++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/flags/flags.go b/flags/flags.go index 94bf2a4b3a..671010b0d3 100644 --- a/flags/flags.go +++ b/flags/flags.go @@ -123,6 +123,7 @@ type Flags struct { Hidden FlagsHidden `embed:"" hidden:"" prefix:""` BPF FlagsBPF `embed:"" prefix:"bpf-"` + DisableTracepoints bool `default:"false" help:"Disable kernel tracepoints. Not recommended unless the agent fails to start otherwise."` } type ExitCode int diff --git a/main.go b/main.go index ccb819517e..b0324c991d 100644 --- a/main.go +++ b/main.go @@ -248,8 +248,12 @@ func mainWithExitCode() flags.ExitCode { return flags.Failure(fmt.Sprintf("Failed to probe eBPF syscall: %v", err)) } - if err = tracer.ProbeTracepoint(); err != nil { - return flags.Failure("Failed to probe tracepoint: %v", err) + if !f.DisableTracepoints { + log.Info("Probing tracepoint...") + if err = tracer.ProbeTracepoint(); err != nil { + return flags.Failure("Failed to probe tracepoint: %v", err) + } + log.Info("Success") } externalLabels := reporter.Labels{} @@ -361,8 +365,10 @@ func mainWithExitCode() flags.ExitCode { } } - if err := trc.AttachSchedMonitor(); err != nil { - return flags.Failure("Failed to attach scheduler monitor: %v", err) + if !f.DisableTracepoints { + if err := trc.AttachSchedMonitor(); err != nil { + return flags.Failure("Failed to attach scheduler monitor: %v", err) + } } // This log line is used in our system tests to verify if that the agent has started. So if you From 4d3ee1ea03125a7c83ecabda171478d436be2723 Mon Sep 17 00:00:00 2001 From: Brennan Vincent Date: Tue, 8 Oct 2024 09:15:01 -0400 Subject: [PATCH 2/2] Do it another way: 1. Get rid of the flag; unconditionally continue attempting to run even if ProbeTracepoint fails. 2. Don't gate attaching the scheduling monitor; this is available even when syscall probes aren't. --- flags/flags.go | 1 - main.go | 14 ++++---------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/flags/flags.go b/flags/flags.go index 671010b0d3..94bf2a4b3a 100644 --- a/flags/flags.go +++ b/flags/flags.go @@ -123,7 +123,6 @@ type Flags struct { Hidden FlagsHidden `embed:"" hidden:"" prefix:""` BPF FlagsBPF `embed:"" prefix:"bpf-"` - DisableTracepoints bool `default:"false" help:"Disable kernel tracepoints. Not recommended unless the agent fails to start otherwise."` } type ExitCode int diff --git a/main.go b/main.go index b0324c991d..1968838488 100644 --- a/main.go +++ b/main.go @@ -248,12 +248,8 @@ func mainWithExitCode() flags.ExitCode { return flags.Failure(fmt.Sprintf("Failed to probe eBPF syscall: %v", err)) } - if !f.DisableTracepoints { - log.Info("Probing tracepoint...") - if err = tracer.ProbeTracepoint(); err != nil { - return flags.Failure("Failed to probe tracepoint: %v", err) - } - log.Info("Success") + if err = tracer.ProbeTracepoint(); err != nil { + log.Warnf("Failed to probe tracepoint: %v. Parca-agent may fail to run on some kernel versions.", err) } externalLabels := reporter.Labels{} @@ -365,10 +361,8 @@ func mainWithExitCode() flags.ExitCode { } } - if !f.DisableTracepoints { - if err := trc.AttachSchedMonitor(); err != nil { - return flags.Failure("Failed to attach scheduler monitor: %v", err) - } + if err := trc.AttachSchedMonitor(); err != nil { + return flags.Failure("Failed to attach scheduler monitor: %v", err) } // This log line is used in our system tests to verify if that the agent has started. So if you