-
Notifications
You must be signed in to change notification settings - Fork 26
verifier: xdp7.p4 hit the kernel max BPF stack size #22
Comments
This one won't be easy to fix. But 528 < 512 + 32 + 8.
Table keys are never all live at the same time, so we should only need the largest of them on the stack. Can you please add xdp6 and 7 to the repo? |
thanks, I've pushed xdp6 and xdp7 |
All headers in xdp7.c require a bit more than 74 bytes, a little more than on the wire, since they are unpacked (I didn't count padding, maybe that adds up to something as well). There are a few more local variables which add up to about 24 bytes. The key requires 2 bytes. So we should be pretty far from 528. |
there are two MAX_BPF_STACK, #define MAX_BPF_STACK 512 |
Even that should be large enough. I wonder why we use 528 bytes of stack. |
right... that's weird... let me dig deeper |
have some discussion in iovisor |
this part looks not right. so I guess the ethernet header takes 8 * (6+6) + 2 byte? 18: r3 = *(u8 *)(r1 + 6)
; hd.ethernet.source[5] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 5) >> 0));
19: *(u64 *)(r10 - 72) = r3
20: r3 = *(u8 *)(r1 + 5)
; hd.ethernet.source[4] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 4) >> 0));
21: *(u64 *)(r10 - 80) = r3
22: r3 = *(u8 *)(r1 + 4)
; hd.ethernet.source[3] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 3) >> 0));
23: *(u64 *)(r10 - 88) = r3
24: r3 = *(u8 *)(r1 + 3)
; hd.ethernet.source[2] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 2) >> 0));
25: *(u64 *)(r10 - 96) = r3
26: r3 = *(u8 *)(r1 + 2)
; hd.ethernet.source[1] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 1) >> 0));
27: *(u64 *)(r10 - 104) = r3
28: r3 = *(u8 *)(r1 + 1)
; hd.ethernet.source[0] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 0) >> 0));
29: *(u64 *)(r10 - 112) = r3
30: r3 = *(u8 *)(r1 + 0) |
Can you try to replace char source[6] with u8 source[6]? |
The bug is in the load_byte macro - the cast should be applied to data before adding |
still the same, with the following 2 changes. #define load_byte(data, b) (*((u8 *)data + (b)))
struct Ethernet {
u8 source[6]; /* bit<48> */
u8 destination[6]; /* bit<48> */
u16 protocol; /* bit<16> */
u8 ebpf_valid;
}; |
can you just print sizeof(headers) and sizeof(headers.ethernet)? |
another solution is to avoid using local stack, but put the struct Headers in a BPF MAP. I tried it a bit and seems ok. It's s.t lik --- a/tmp/xdp1.c
+++ b/tmp/xdp1.c.usemap
@@ -1,5 +1,6 @@
#define KBUILD_MODNAME "xdptest"
#include <linux/bpf.h>
#include "bpf_helpers.h"
#define load_byte(data, b) (*(u8 *)(data + (b)))
@@ -36,7 +37,7 @@ struct Ethernet {
char destination[6]; /* bit<48> */
u16 protocol; /* bit<16> */
u8 ebpf_valid;
};
struct IPv4 {
u8 version; /* bit<4> */
@@ -67,16 +68,18 @@ struct bpf_map_def SEC("maps") ebpf_outTable = {
.max_entries = 1 /* No multicast support */
};
+struct bpf_map_def SEC("maps") Headers = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(struct Headers),
+ .pinning = 2, /* PIN_GLOBAL_NS */
+ .max_entries = 1 /* No multicast support */
+};
+
+
SEC("prog")
int ebpf_filter(struct xdp_md* skb){
- struct Headers hd = {
- .ethernet = {
- .ebpf_valid = 0
- },
- .ipv4 = {
- .ebpf_valid = 0
- },
- };
+ struct Headers hd_zero, *hd;
unsigned ebpf_packetOffsetInBits = 0;
enum ebpf_errorCodes ebpf_errorCode = NoError;
void* ebpf_packetStart = ((void*)(long)skb->data);
@@ -88,82 +91,88 @@ int ebpf_filter(struct xdp_md* skb){
/* TODO: this should be initialized by the environment. HOW? */
struct xdp_input xin;
+ memset(&hd_zero, 0, sizeof(hd_zero));
+ bpf_map_update_elem(&Headers, &ebpf_zero, &hd_zero, BPF_ANY);
+ hd = bpf_map_lookup_elem(&Headers, &ebpf_zero);
+ if (!hd)
+ return XDP_ABORTED;
+
goto start;
start: {
- /* extract(hd.ethernet)*/
+ /* extract(hd->ethernet)*/ |
This should be much slower. We should be able to figure out what the problem with the stack is. |
adding printk printk("headers %d ethernet %d\n", sizeof(struct Headers), sizeof(hd.ethernet));
goto start;
start: { it prints out <idle>-0 [001] ..s. 27808.060250: : headers 44 ethernet 16 here I'm using xdp1, headers has struct Ethernet {
u8 source[6]; /* bit<48> */
u8 destination[6]; /* bit<48> */
u16 protocol; /* bit<16> */
u8 ebpf_valid;
};
struct IPv4 {
u8 version; /* bit<4> */
u8 ihl; /* bit<4> */
u8 diffserv; /* bit<8> */
u16 totalLen; /* bit<16> */
u16 identification; /* bit<16> */
u8 flags; /* bit<3> */
u16 fragOffset; /* bit<13> */
u8 ttl; /* bit<8> */
u8 protocol; /* bit<8> */
u16 hdrChecksum; /* bit<16> */
u32 srcAddr; /* bit<32> */
u32 dstAddr; /* bit<32> */
u8 ebpf_valid;
};
struct Headers {
struct Ethernet ethernet; /* Ethernet */
struct IPv4 ipv4; /* IPv4 */
}; |
So we know that stack usage is not the problem. |
How about you just run the preprocessor and look at the generated C? |
need to go out now, will work on it in the afternoon.
…On Fri, Feb 10, 2017 at 9:37 AM, Mihai Budiu ***@***.***> wrote:
How about you just run the preprocessor and look at the generated C?
—
You are receiving this because you authored the thread.
Reply to this email directly, view it on GitHub
<https://github.com/williamtu/p4c-xdp/issues/22#issuecomment-279010739>,
or mute the thread
<https://github.com/notifications/unsubscribe-auth/AE7w-4KKqMwTv7Q_i00kWq3GXFzuTYAWks5rbKB0gaJpZM4L7AyM>
.
|
Actually it can't be the load_byte - that's the load. The store is the problem. |
I did another experiment using kernel's sample code |
This looks like a bug in the bpf llvm back-end bug. |
For example, I changed the source to be just an u8, and the code generated is this:
As far as I can tell, ldb is load byte, and std is store double. |
I found a workaround this problem: if you take the address of the headers struct the compiler has to put it on the stack. So I just added this:
I will add this to the generated code. |
yes, it will put it on stack, but still uses 8 bytes to store an u8
hd still uses 8 bytes for each u8 |
from Alexei's reply |
I force llvm to allocate struct Headers on stack using alloca clang -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign -Wno-compare-distinct-pointer-types -Wno-gnu-variable-sized-type-not-at-end -Wno-tautological-compare -O2 -emit-llvm -g -c xdp9.c
llvm-dis xdp9.bc -o /tmp/xdp9.ll make the change in LLVM IR, xdp9.ll %ebpf_zero = alloca i32, align 4
%xout = alloca %struct.xdp_output, align 4
%key = alloca %struct.dstmactable_key, align 2
+ %hd = alloca %struct.Headers, align 4
+ %hd.ethernet = alloca %struct.Ethernet, align 2
+ %hd.ipv4 = alloca %struct.IPv4, align 4
assembly back from LLVM IR to bitcode llvm-as /tmp/xdp9.ll -o xdp9.bc
llc -march=bpf -filetype=obj -o xdp9.o xdp9.bc
llvm-objdump -S -no-show-raw-insn xdp9.o result of objdump Disassembly of section prog:
ebpf_filter:
; int ebpf_filter(struct xdp_md* skb){
0: r6 = r1
; void* ebpf_packetEnd = ((void*)(long)skb->data_end);
1: r3 = *(u32 *)(r6 + 4)
; void* ebpf_packetStart = ((void*)(long)skb->data);
2: r2 = *(u32 *)(r6 + 0)
3: r8 = 0
; u32 ebpf_zero = 0;
4: *(u32 *)(r10 - 92) = r8
; if (ebpf_packetEnd < ebpf_packetStart + BYTES(ebpf_packetOffsetInBits + 112)) {
5: r1 = r2
6: r1 += 14
7: if r1 > r3 goto 891
8: r1 = 14
9: *(u64 *)(r10 - 136) = r1
10: r5 = 0
; hd.ethernet.destination[5] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 5) >> 0));
11: r1 = *(u8 *)(r2 + 11)
; hd.ethernet.destination[4] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 4) >> 0));
12: *(u64 *)(r10 - 544) = r1
13: r1 = *(u8 *)(r2 + 10)
; hd.ethernet.destination[3] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 3) >> 0));
14: *(u64 *)(r10 - 552) = r1
15: r1 = *(u8 *)(r2 + 9)
; hd.ethernet.destination[2] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 2) >> 0));
16: *(u64 *)(r10 - 560) = r1
17: r1 = *(u8 *)(r2 + 8)
; hd.ethernet.destination[1] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 1) >> 0));
18: *(u64 *)(r10 - 568) = r1
19: r1 = *(u8 *)(r2 + 7)
; hd.ethernet.destination[0] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 0) >> 0));
20: *(u64 *)(r10 - 576) = r1
21: r1 = *(u8 *)(r2 + 6)
; hd.ethernet.source[5] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 5) >> 0));
22: *(u64 *)(r10 - 584) = r1
23: r1 = *(u8 *)(r2 + 5)
; hd.ethernet.source[4] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 4) >> 0));
24: *(u64 *)(r10 - 600) = r1
25: r1 = *(u8 *)(r2 + 4)
; hd.ethernet.source[3] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 3) >> 0));
26: *(u64 *)(r10 - 608) = r1
27: r1 = *(u8 *)(r2 + 3) unfortunately it's still the same, the hd is using r10 - off, where off is 8 byte offset |
I don't understand why a 'switch' statement need to clear the stack ; int ebpf_filter(struct xdp_md* skb){
0: r6 = r1
; void* ebpf_packetEnd = ((void*)(long)skb->data_end);
1: r4 = *(u32 *)(r6 + 4)
; void* ebpf_packetStart = ((void*)(long)skb->data);
2: r3 = *(u32 *)(r6 + 0)
3: r7 = 0
; u32 ebpf_zero = 0;
4: *(u32 *)(r10 - 4) = r7
; if (ebpf_packetEnd < ebpf_packetStart + BYTES(ebpf_packetOffsetInBits + 112)) {
5: r1 = r3
6: r1 += 14
7: if r1 > r4 goto 245
8: r2 = 14
9: r1 = 0
; hd.ethernet.destination[5] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 5) >> 0));
10: *(u64 *)(r10 - 120) = r1
11: r1 = *(u8 *)(r3 + 11)
; hd.ethernet.destination[4] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 4) >> 0));
12: *(u64 *)(r10 - 136) = r1
13: r1 = *(u8 *)(r3 + 10)
; hd.ethernet.destination[3] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 3) >> 0));
14: *(u64 *)(r10 - 144) = r1
15: r1 = *(u8 *)(r3 + 9)
; hd.ethernet.destination[2] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 2) >> 0));
16: *(u64 *)(r10 - 152) = r1
17: r1 = *(u8 *)(r3 + 8)
; hd.ethernet.destination[1] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 1) >> 0));
18: *(u64 *)(r10 - 160) = r1
19: r1 = *(u8 *)(r3 + 7)
; hd.ethernet.destination[0] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 0) >> 0));
20: *(u64 *)(r10 - 168) = r1
21: r1 = *(u8 *)(r3 + 6)
; hd.ethernet.source[5] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 5) >> 0));
22: *(u64 *)(r10 - 176) = r1
23: r1 = *(u8 *)(r3 + 5)
; hd.ethernet.source[4] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 4) >> 0));
24: *(u64 *)(r10 - 184) = r1
25: r1 = *(u8 *)(r3 + 4)
; hd.ethernet.source[3] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 3) >> 0));
26: *(u64 *)(r10 - 192) = r1
27: r1 = *(u8 *)(r3 + 3)
; hd.ethernet.source[2] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 2) >> 0));
28: *(u64 *)(r10 - 200) = r1
29: r1 = *(u8 *)(r3 + 2)
; hd.ethernet.source[1] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 1) >> 0));
30: *(u64 *)(r10 - 208) = r1
31: r1 = *(u8 *)(r3 + 1)
; hd.ethernet.source[0] = (u8)((load_byte(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits) + 0) >> 0));
32: *(u64 *)(r10 - 216) = r1
33: r1 = *(u8 *)(r3 + 0)
; hd.ethernet.protocol = (u16)((load_half(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits))));
34: *(u64 *)(r10 - 224) = r1
35: r9 = *(u16 *)(r3 + 12)
; switch (hd.ethernet.protocol) {
36: r5 = r9
37: r5 <<= 8
; hd.ethernet.protocol = (u16)((load_half(ebpf_packetStart, BYTES(ebpf_packetOffsetInBits))));
38: r8 = r9
39: r8 >>= 8
; switch (hd.ethernet.protocol) {
40: r1 = r8
41: r1 |= r5
42: r1 &= 65535
43: r5 = 0
44: *(u64 *)(r10 - 296) = r5
45: r5 = 0
46: *(u64 *)(r10 - 232) = r5
47: r5 = 0
48: *(u64 *)(r10 - 240) = r5
49: r5 = 0
50: *(u64 *)(r10 - 280) = r5
51: r5 = 0
52: *(u64 *)(r10 - 272) = r5
53: r5 = 0
54: r7 = 0
55: r0 = 0
56: *(u64 *)(r10 - 256) = r0
57: r0 = 0
58: *(u64 *)(r10 - 248) = r0
59: r0 = 0
60: *(u64 *)(r10 - 128) = r0
61: r0 = 0
62: *(u64 *)(r10 - 264) = r0
63: r0 = 0
64: *(u64 *)(r10 - 288) = r0
65: r0 = 0
66: *(u64 *)(r10 - 312) = r0
67: r0 = 0
68: *(u64 *)(r10 - 328) = r0
69: r0 = 0
70: *(u64 *)(r10 - 320) = r0
71: r0 = 0
72: *(u64 *)(r10 - 304) = r0
73: r0 = 0
74: *(u64 *)(r10 - 376) = r0
75: r0 = 0
76: *(u64 *)(r10 - 368) = r0
77: r0 = 0
78: *(u64 *)(r10 - 336) = r0
79: r0 = 0
80: *(u64 *)(r10 - 344) = r0
81: r0 = 0
82: *(u64 *)(r10 - 352) = r0
83: r0 = 0
84: *(u64 *)(r10 - 360) = r0
85: if r1 != 2048 goto 60
86: r7 = 0
; if (ebpf_packetEnd < ebpf_packetStart + BYTES(ebpf_packetOffsetInBits + 160)) {
87: r2 = r3
88: r2 += 34
89: if r2 > r4 goto 163
90: r2 = 34 |
xdp7.p4 show the error message
looking at the source code
we only allow stack size of 512 + 32 + 8
need to discuss with kernel bpf maintainer...
The text was updated successfully, but these errors were encountered: