From db2c8fe697266f1f92dc329d277f3e99214b17f0 Mon Sep 17 00:00:00 2001 From: krisztianfekete <103492698+krisztianfekete@users.noreply.github.com> Date: Tue, 19 Jul 2022 17:34:03 +0200 Subject: [PATCH] Add capable and tcpconnlat examples (#88) * add capable and tcpconnlat examples * address review comments --- Makefile | 10 ++- examples/capable/README.md | 50 +++++++++++++ examples/capable/capable.c | 84 +++++++++++++++++++++ examples/tcpconnlat/README.md | 67 +++++++++++++++++ examples/tcpconnlat/tcpconnlat.c | 123 +++++++++++++++++++++++++++++++ 5 files changed, 331 insertions(+), 3 deletions(-) create mode 100644 examples/capable/README.md create mode 100644 examples/capable/capable.c create mode 100644 examples/tcpconnlat/README.md create mode 100644 examples/tcpconnlat/tcpconnlat.c diff --git a/Makefile b/Makefile index 011e5a7..c1f8798 100644 --- a/Makefile +++ b/Makefile @@ -50,9 +50,13 @@ activeconn: $(EXAMPLES_DIR)/activeconn .PHONY: tcpconnect tcpconnect: $(EXAMPLES_DIR)/tcpconnect .PHONY: exitsnoop -tcpconnect: $(EXAMPLES_DIR)/exitsnoop +exitsnoop: $(EXAMPLES_DIR)/exitsnoop .PHONY: oomkill -tcpconnect: $(EXAMPLES_DIR)/oomkill +oomkill: $(EXAMPLES_DIR)/oomkill +.PHONY: capable +capable: $(EXAMPLES_DIR)/capable +.PHONY: tcpconnlat +tcpconnlat: $(EXAMPLES_DIR)/tcpconnlat $(EXAMPLES_DIR)/%: @@ -60,7 +64,7 @@ $(EXAMPLES_DIR)/%: $(OUTDIR)/bee-linux-amd64 push $(HUB)/$(REPO_NAME)/$*:$(VERSION) .PHONY: release-examples -release-examples: activeconn tcpconnect exitsnoop oomkill +release-examples: activeconn tcpconnect exitsnoop oomkill capable tcpconnlat #---------------------------------------------------------------------------------- # CLI diff --git a/examples/capable/README.md b/examples/capable/README.md new file mode 100644 index 0000000..10074a6 --- /dev/null +++ b/examples/capable/README.md @@ -0,0 +1,50 @@ +# Overview + +The capable example is heavily based on the [capable program in BCC repository](https://github.com/iovisor/bcc/blob/master/tools/capable.py), which is created by Brendan Gregg. +This eBPF program will trace all security capability checks (cap_capable() calls) in your system. + +# Usage + +To see all the syscalls in your environment, you can run your image without filters: + +```console +bee run ghcr.io/solo-io/bumblebee/capable:$(bee version) +``` + +You can also try out the filtering capability by referencing fields in your BPF map: + +```c +struct cap_event { + __u64 mntnsid; + __u32 pid; + int cap; + __u32 tgid; + __u32 uid; + int cap_opt; + char task[TASK_COMM_LEN]; +}; +``` + +For example, to filter for all the capability checks where the `task` is `ping`, you can use: + +```console +bee run -f="events,task,ping" ghcr.io/solo-io/bumblebee/capable:$(bee version) +``` + +# Prometheus integration + +Let's say, you want to visualize the rate of such syscalls in your Prometheus stack, or want to alert on certain syscalls. + +You can modify your `events` map to generate a `counter` from your cap_capable() calls: + +> Note: you can rename `events` to `cap_events` to illustrate the goal of the exposed events better. + +```c +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 1 << 24); + __type(value, struct cap_event); +} events SEC(".maps.print"); +``` + +You should consider removing high cardinality fields from your map to avoid overloading your Prometheus instance, e.g. `mntnsid`. diff --git a/examples/capable/capable.c b/examples/capable/capable.c new file mode 100644 index 0000000..16919ec --- /dev/null +++ b/examples/capable/capable.c @@ -0,0 +1,84 @@ +// Based on: https://github.com/kinvolk/inspektor-gadget/blob/main/pkg/gadgets/capabilities/tracer/core/bpf/capable.bpf.c +// Copyright 2022 Sony Group Corporation + +#include +#include +#include +#include + +#define TASK_COMM_LEN 16 +#define MAX_ENTRIES 10240 + +struct cap_event { + __u64 mntnsid; + __u32 pid; + int cap; + __u32 tgid; + __u32 uid; + int cap_opt; + char task[TASK_COMM_LEN]; +}; + +struct key_t { + __u32 pid; + __u32 tgid; + int user_stack_id; + int kern_stack_id; +}; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 1 << 24); + __type(value, struct cap_event); +} events SEC(".maps.print"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct key_t); + __type(value, struct cap_event); + __uint(max_entries, MAX_ENTRIES); +} info SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1024); + __uint(key_size, sizeof(u64)); + __uint(value_size, sizeof(u32)); +} mount_ns_set SEC(".maps"); + +SEC("kprobe/cap_capable") +int BPF_KPROBE(kprobe__cap_capable, const struct cred *cred, struct user_namespace *targ_ns, int cap, int cap_opt) +{ + __u32 pid; + u64 mntns_id; + __u64 pid_tgid; + struct key_t i_key; + struct task_struct *task; + + task = (struct task_struct*) bpf_get_current_task(); + mntns_id = (u64) BPF_CORE_READ(task, nsproxy, mnt_ns, ns.inum); + + pid_tgid = bpf_get_current_pid_tgid(); + pid = pid_tgid >> 32; + + struct cap_event *event; + + event = bpf_ringbuf_reserve(&events, sizeof(struct cap_event), 0); + if (!event) { + return 0; + } + + event->pid = pid; + event->tgid = pid_tgid; + event->cap = cap; + event->uid = bpf_get_current_uid_gid(); + event->mntnsid = mntns_id; + event->cap_opt = cap_opt; + bpf_get_current_comm(&event->task, sizeof(event->task)); + + bpf_ringbuf_submit(event, 0); + + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/examples/tcpconnlat/README.md b/examples/tcpconnlat/README.md new file mode 100644 index 0000000..8c37e09 --- /dev/null +++ b/examples/tcpconnlat/README.md @@ -0,0 +1,67 @@ +# Overview + +The tcpconnlat example is heavily based on the [tcpconnlat program in BCC's libbpf-tools](https://github.com/iovisor/bcc/blob/master/libbpf-tools/tcpconnlat.bpf.c), which is itself based on the original [BCC tcpconnlat](https://github.com/iovisor/bcc/blob/master/tools/tcpconnlat.py). + +This eBPF program will trace all TCP active connection latencies. + +# Usage + +To see all the connections in your environment, you can run your image without filters: + +```console +bee run ghcr.io/solo-io/bumblebee/tcpconnlat:$(bee version) +``` + +You can also try out the filtering capability by referencing fields in your BPF map: + +```c +struct event { + ipv4_addr saddr_v4; + ipv4_addr daddr_v4; + char comm[TASK_COMM_LEN]; + __u64 delta_us; + __u64 ts_us; + __u32 tgid; + int af; +}; +``` + +For example, to filter for all active connections where the `daadr_v4` is `8.8.8.8`, you can use: + +```console +bee run -f="events,daddr_v4,8.8.8.8" ghcr.io/solo-io/bumblebee/tcpconnlat:$(bee version) +``` + +Result: + +```console +af comm daddr_v4 delta_us saddr_v4 tgid ts_us +2 telnet 8.8.8.8 1220 10.132.0.48 15173 5149693413 +``` + +# Prometheus integration + +Let's say, you want to visualize the latencies in your Prometheus stack, or want to alert on certain limits. + +> Note that BumbleBee currently only supports counter and gauge metric types, so as of now, you cannot expose latency metrics as histograms. The support for histogram is on our [roadmap](https://github.com/solo-io/bumblebee/blob/main/ROADMAP.md). + +> Also note that currently BumbleBee is exposing metrics for all the members of the struct describing the map as labels. As `ts_us` is there as a timestamp, the cardinality will explode quite soon, so **generate Prometheus metrics only in a lab or a very low traffic environment**. + +You can modify your `events` map to generate a `counter` from your active connections: + +```c +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 1 << 24); + __type(value, struct event); +} events SEC(".maps.print"); +``` + +This will generate Prometheus metrics like this: + +```console +# HELP ebpf_solo_io_events +# TYPE ebpf_solo_io_events counter +ebpf_solo_io_events{af="2",comm="coredns",daddr_v4="127.0.0.1",delta_us="44",saddr_v4="127.0.0.1",tgid="4508",ts_us="5914339221"} 1 +ebpf_solo_io_events{af="2",comm="coredns",daddr_v4="127.0.0.1",delta_us="46",saddr_v4="127.0.0.1",tgid="4508",ts_us="5910339887"} 1 +``` diff --git a/examples/tcpconnlat/tcpconnlat.c b/examples/tcpconnlat/tcpconnlat.c new file mode 100644 index 0000000..5f8680c --- /dev/null +++ b/examples/tcpconnlat/tcpconnlat.c @@ -0,0 +1,123 @@ +// Based on: https://github.com/iovisor/bcc/blob/master/libbpf-tools/tcpconnlat.bpf.c +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Wenbo Zhang + +#include +#include +#include +#include +#include "solo_types.h" + +#define TASK_COMM_LEN 16 + +struct event { + ipv4_addr saddr_v4; + ipv4_addr daddr_v4; + char comm[TASK_COMM_LEN]; + __u64 delta_us; + __u64 ts_us; + __u32 tgid; + int af; +}; + +struct piddata { + char comm[TASK_COMM_LEN]; + u64 ts; + u32 tgid; +}; + +//Commenting these and the corresponding logic out until we have support for kernel-side filtering. +//const volatile __u64 targ_min_us = 0; +//const volatile pid_t targ_tgid = 0; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 4096); + __type(key, struct sock *); + __type(value, struct piddata); +} start SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 1 << 24); + __type(value, struct event); +} events SEC(".maps.print"); + +static int trace_connect(struct sock *sk) +{ + u32 tgid = bpf_get_current_pid_tgid() >> 32; + struct piddata piddata = {}; + +// if (targ_tgid && targ_tgid != tgid) +// return 0; + + bpf_get_current_comm(&piddata.comm, sizeof(piddata.comm)); + piddata.ts = bpf_ktime_get_ns(); + piddata.tgid = tgid; + bpf_map_update_elem(&start, &sk, &piddata, 0); + return 0; +} + +static int handle_tcp_rcv_state_process(void *ctx, struct sock *sk) +{ + struct piddata *piddatap; + struct event event = {}; + s64 delta; + u64 ts; + + if (BPF_CORE_READ(sk, __sk_common.skc_state) != TCP_SYN_SENT) + return 0; + + piddatap = bpf_map_lookup_elem(&start, &sk); + if (!piddatap) + return 0; + + ts = bpf_ktime_get_ns(); + delta = (s64)(ts - piddatap->ts); + if (delta < 0) { + bpf_map_delete_elem(&start, &sk); + return 0; + } + + event.delta_us = delta / 1000U; + +// if (targ_min_us && event.delta_us < targ_min_us) { +// bpf_map_delete_elem(&start, &sk); +// return 0; +// } + + __builtin_memcpy(&event.comm, piddatap->comm, + sizeof(event.comm)); + event.ts_us = ts / 1000; + event.tgid = piddatap->tgid; + event.af = BPF_CORE_READ(sk, __sk_common.skc_family); + event.saddr_v4 = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr); + event.daddr_v4 = BPF_CORE_READ(sk, __sk_common.skc_daddr); + + struct event *ring_val; + + ring_val = bpf_ringbuf_reserve(&events, sizeof(struct event), 0); + if (!ring_val) { + return 0; + } + + memcpy(ring_val, &event, sizeof(struct event)); + + bpf_ringbuf_submit(ring_val, 0); + + return 0; +} + +SEC("kprobe/tcp_v4_connect") +int BPF_KPROBE(tcp_v4_connect, struct sock *sk) +{ + return trace_connect(sk); +} + +SEC("kprobe/tcp_rcv_state_process") +int BPF_KPROBE(tcp_rcv_state_process, struct sock *sk) +{ + return handle_tcp_rcv_state_process(ctx, sk); +} + +char LICENSE[] SEC("license") = "GPL";