-
Notifications
You must be signed in to change notification settings - Fork 77
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add capable and tcpconnlat examples (#88)
* add capable and tcpconnlat examples * address review comments
- Loading branch information
1 parent
39dc423
commit db2c8fe
Showing
5 changed files
with
331 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# Overview | ||
|
||
The capable example is heavily based on the [capable program in BCC repository](https://github.com/iovisor/bcc/blob/master/tools/capable.py), which is created by Brendan Gregg. | ||
This eBPF program will trace all security capability checks (cap_capable() calls) in your system. | ||
|
||
# Usage | ||
|
||
To see all the syscalls in your environment, you can run your image without filters: | ||
|
||
```console | ||
bee run ghcr.io/solo-io/bumblebee/capable:$(bee version) | ||
``` | ||
|
||
You can also try out the filtering capability by referencing fields in your BPF map: | ||
|
||
```c | ||
struct cap_event { | ||
__u64 mntnsid; | ||
__u32 pid; | ||
int cap; | ||
__u32 tgid; | ||
__u32 uid; | ||
int cap_opt; | ||
char task[TASK_COMM_LEN]; | ||
}; | ||
``` | ||
|
||
For example, to filter for all the capability checks where the `task` is `ping`, you can use: | ||
|
||
```console | ||
bee run -f="events,task,ping" ghcr.io/solo-io/bumblebee/capable:$(bee version) | ||
``` | ||
|
||
# Prometheus integration | ||
|
||
Let's say, you want to visualize the rate of such syscalls in your Prometheus stack, or want to alert on certain syscalls. | ||
|
||
You can modify your `events` map to generate a `counter` from your cap_capable() calls: | ||
|
||
> Note: you can rename `events` to `cap_events` to illustrate the goal of the exposed events better. | ||
```c | ||
struct { | ||
__uint(type, BPF_MAP_TYPE_RINGBUF); | ||
__uint(max_entries, 1 << 24); | ||
__type(value, struct cap_event); | ||
} events SEC(".maps.print"); | ||
``` | ||
You should consider removing high cardinality fields from your map to avoid overloading your Prometheus instance, e.g. `mntnsid`. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
// Based on: https://github.com/kinvolk/inspektor-gadget/blob/main/pkg/gadgets/capabilities/tracer/core/bpf/capable.bpf.c | ||
// Copyright 2022 Sony Group Corporation | ||
|
||
#include <vmlinux.h> | ||
#include <bpf/bpf_core_read.h> | ||
#include <bpf/bpf_helpers.h> | ||
#include <bpf/bpf_tracing.h> | ||
|
||
#define TASK_COMM_LEN 16 | ||
#define MAX_ENTRIES 10240 | ||
|
||
struct cap_event { | ||
__u64 mntnsid; | ||
__u32 pid; | ||
int cap; | ||
__u32 tgid; | ||
__u32 uid; | ||
int cap_opt; | ||
char task[TASK_COMM_LEN]; | ||
}; | ||
|
||
struct key_t { | ||
__u32 pid; | ||
__u32 tgid; | ||
int user_stack_id; | ||
int kern_stack_id; | ||
}; | ||
|
||
struct { | ||
__uint(type, BPF_MAP_TYPE_RINGBUF); | ||
__uint(max_entries, 1 << 24); | ||
__type(value, struct cap_event); | ||
} events SEC(".maps.print"); | ||
|
||
struct { | ||
__uint(type, BPF_MAP_TYPE_HASH); | ||
__type(key, struct key_t); | ||
__type(value, struct cap_event); | ||
__uint(max_entries, MAX_ENTRIES); | ||
} info SEC(".maps"); | ||
|
||
struct { | ||
__uint(type, BPF_MAP_TYPE_HASH); | ||
__uint(max_entries, 1024); | ||
__uint(key_size, sizeof(u64)); | ||
__uint(value_size, sizeof(u32)); | ||
} mount_ns_set SEC(".maps"); | ||
|
||
SEC("kprobe/cap_capable") | ||
int BPF_KPROBE(kprobe__cap_capable, const struct cred *cred, struct user_namespace *targ_ns, int cap, int cap_opt) | ||
{ | ||
__u32 pid; | ||
u64 mntns_id; | ||
__u64 pid_tgid; | ||
struct key_t i_key; | ||
struct task_struct *task; | ||
|
||
task = (struct task_struct*) bpf_get_current_task(); | ||
mntns_id = (u64) BPF_CORE_READ(task, nsproxy, mnt_ns, ns.inum); | ||
|
||
pid_tgid = bpf_get_current_pid_tgid(); | ||
pid = pid_tgid >> 32; | ||
|
||
struct cap_event *event; | ||
|
||
event = bpf_ringbuf_reserve(&events, sizeof(struct cap_event), 0); | ||
if (!event) { | ||
return 0; | ||
} | ||
|
||
event->pid = pid; | ||
event->tgid = pid_tgid; | ||
event->cap = cap; | ||
event->uid = bpf_get_current_uid_gid(); | ||
event->mntnsid = mntns_id; | ||
event->cap_opt = cap_opt; | ||
bpf_get_current_comm(&event->task, sizeof(event->task)); | ||
|
||
bpf_ringbuf_submit(event, 0); | ||
|
||
return 0; | ||
} | ||
|
||
char LICENSE[] SEC("license") = "GPL"; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
# Overview | ||
|
||
The tcpconnlat example is heavily based on the [tcpconnlat program in BCC's libbpf-tools](https://github.com/iovisor/bcc/blob/master/libbpf-tools/tcpconnlat.bpf.c), which is itself based on the original [BCC tcpconnlat](https://github.com/iovisor/bcc/blob/master/tools/tcpconnlat.py). | ||
|
||
This eBPF program will trace all TCP active connection latencies. | ||
|
||
# Usage | ||
|
||
To see all the connections in your environment, you can run your image without filters: | ||
|
||
```console | ||
bee run ghcr.io/solo-io/bumblebee/tcpconnlat:$(bee version) | ||
``` | ||
|
||
You can also try out the filtering capability by referencing fields in your BPF map: | ||
|
||
```c | ||
struct event { | ||
ipv4_addr saddr_v4; | ||
ipv4_addr daddr_v4; | ||
char comm[TASK_COMM_LEN]; | ||
__u64 delta_us; | ||
__u64 ts_us; | ||
__u32 tgid; | ||
int af; | ||
}; | ||
``` | ||
|
||
For example, to filter for all active connections where the `daadr_v4` is `8.8.8.8`, you can use: | ||
|
||
```console | ||
bee run -f="events,daddr_v4,8.8.8.8" ghcr.io/solo-io/bumblebee/tcpconnlat:$(bee version) | ||
``` | ||
|
||
Result: | ||
|
||
```console | ||
af comm daddr_v4 delta_us saddr_v4 tgid ts_us | ||
2 telnet 8.8.8.8 1220 10.132.0.48 15173 5149693413 | ||
``` | ||
|
||
# Prometheus integration | ||
|
||
Let's say, you want to visualize the latencies in your Prometheus stack, or want to alert on certain limits. | ||
|
||
> Note that BumbleBee currently only supports counter and gauge metric types, so as of now, you cannot expose latency metrics as histograms. The support for histogram is on our [roadmap](https://github.com/solo-io/bumblebee/blob/main/ROADMAP.md). | ||
> Also note that currently BumbleBee is exposing metrics for all the members of the struct describing the map as labels. As `ts_us` is there as a timestamp, the cardinality will explode quite soon, so **generate Prometheus metrics only in a lab or a very low traffic environment**. | ||
You can modify your `events` map to generate a `counter` from your active connections: | ||
|
||
```c | ||
struct { | ||
__uint(type, BPF_MAP_TYPE_RINGBUF); | ||
__uint(max_entries, 1 << 24); | ||
__type(value, struct event); | ||
} events SEC(".maps.print"); | ||
``` | ||
This will generate Prometheus metrics like this: | ||
```console | ||
# HELP ebpf_solo_io_events | ||
# TYPE ebpf_solo_io_events counter | ||
ebpf_solo_io_events{af="2",comm="coredns",daddr_v4="127.0.0.1",delta_us="44",saddr_v4="127.0.0.1",tgid="4508",ts_us="5914339221"} 1 | ||
ebpf_solo_io_events{af="2",comm="coredns",daddr_v4="127.0.0.1",delta_us="46",saddr_v4="127.0.0.1",tgid="4508",ts_us="5910339887"} 1 | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
// Based on: https://github.com/iovisor/bcc/blob/master/libbpf-tools/tcpconnlat.bpf.c | ||
// SPDX-License-Identifier: GPL-2.0 | ||
// Copyright (c) 2020 Wenbo Zhang | ||
|
||
#include <vmlinux.h> | ||
#include <bpf/bpf_helpers.h> | ||
#include <bpf/bpf_core_read.h> | ||
#include <bpf/bpf_tracing.h> | ||
#include "solo_types.h" | ||
|
||
#define TASK_COMM_LEN 16 | ||
|
||
struct event { | ||
ipv4_addr saddr_v4; | ||
ipv4_addr daddr_v4; | ||
char comm[TASK_COMM_LEN]; | ||
__u64 delta_us; | ||
__u64 ts_us; | ||
__u32 tgid; | ||
int af; | ||
}; | ||
|
||
struct piddata { | ||
char comm[TASK_COMM_LEN]; | ||
u64 ts; | ||
u32 tgid; | ||
}; | ||
|
||
//Commenting these and the corresponding logic out until we have support for kernel-side filtering. | ||
//const volatile __u64 targ_min_us = 0; | ||
//const volatile pid_t targ_tgid = 0; | ||
|
||
struct { | ||
__uint(type, BPF_MAP_TYPE_HASH); | ||
__uint(max_entries, 4096); | ||
__type(key, struct sock *); | ||
__type(value, struct piddata); | ||
} start SEC(".maps"); | ||
|
||
struct { | ||
__uint(type, BPF_MAP_TYPE_RINGBUF); | ||
__uint(max_entries, 1 << 24); | ||
__type(value, struct event); | ||
} events SEC(".maps.print"); | ||
|
||
static int trace_connect(struct sock *sk) | ||
{ | ||
u32 tgid = bpf_get_current_pid_tgid() >> 32; | ||
struct piddata piddata = {}; | ||
|
||
// if (targ_tgid && targ_tgid != tgid) | ||
// return 0; | ||
|
||
bpf_get_current_comm(&piddata.comm, sizeof(piddata.comm)); | ||
piddata.ts = bpf_ktime_get_ns(); | ||
piddata.tgid = tgid; | ||
bpf_map_update_elem(&start, &sk, &piddata, 0); | ||
return 0; | ||
} | ||
|
||
static int handle_tcp_rcv_state_process(void *ctx, struct sock *sk) | ||
{ | ||
struct piddata *piddatap; | ||
struct event event = {}; | ||
s64 delta; | ||
u64 ts; | ||
|
||
if (BPF_CORE_READ(sk, __sk_common.skc_state) != TCP_SYN_SENT) | ||
return 0; | ||
|
||
piddatap = bpf_map_lookup_elem(&start, &sk); | ||
if (!piddatap) | ||
return 0; | ||
|
||
ts = bpf_ktime_get_ns(); | ||
delta = (s64)(ts - piddatap->ts); | ||
if (delta < 0) { | ||
bpf_map_delete_elem(&start, &sk); | ||
return 0; | ||
} | ||
|
||
event.delta_us = delta / 1000U; | ||
|
||
// if (targ_min_us && event.delta_us < targ_min_us) { | ||
// bpf_map_delete_elem(&start, &sk); | ||
// return 0; | ||
// } | ||
|
||
__builtin_memcpy(&event.comm, piddatap->comm, | ||
sizeof(event.comm)); | ||
event.ts_us = ts / 1000; | ||
event.tgid = piddatap->tgid; | ||
event.af = BPF_CORE_READ(sk, __sk_common.skc_family); | ||
event.saddr_v4 = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr); | ||
event.daddr_v4 = BPF_CORE_READ(sk, __sk_common.skc_daddr); | ||
|
||
struct event *ring_val; | ||
|
||
ring_val = bpf_ringbuf_reserve(&events, sizeof(struct event), 0); | ||
if (!ring_val) { | ||
return 0; | ||
} | ||
|
||
memcpy(ring_val, &event, sizeof(struct event)); | ||
|
||
bpf_ringbuf_submit(ring_val, 0); | ||
|
||
return 0; | ||
} | ||
|
||
SEC("kprobe/tcp_v4_connect") | ||
int BPF_KPROBE(tcp_v4_connect, struct sock *sk) | ||
{ | ||
return trace_connect(sk); | ||
} | ||
|
||
SEC("kprobe/tcp_rcv_state_process") | ||
int BPF_KPROBE(tcp_rcv_state_process, struct sock *sk) | ||
{ | ||
return handle_tcp_rcv_state_process(ctx, sk); | ||
} | ||
|
||
char LICENSE[] SEC("license") = "GPL"; |