Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

l4 port redirect & fix loopback for ip6 packets #112

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
522 changes: 243 additions & 279 deletions docs/graph.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions modules/infra/control/loopback.c
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ static void iface_loopback_poll(evutil_socket_t, short reason, void *ev_iface) {

// packet sent from linux tun iface, no need to compute checksum;
mbuf->ol_flags = RTE_MBUF_F_RX_IP_CKSUM_GOOD;
mbuf->packet_type = data[0] == 6 ? RTE_PTYPE_L3_IPV6 : RTE_PTYPE_L3_IPV4;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ugh, ok.
I guess the previous patch relies on this.

This must be documented, or a different mechanism is needed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wished to avoid setting unnecessary data if the packet type was already defined by the drivers.
The other option is to add a family field in the buffer metadata for both ip_local_mbuf_data and ip6_local_mbuf_data to define which kind of header we should fill in.
I found it uglier as the 2 priv data types are defined in different modules.

GR_MBUF_PRIV_DATA_TYPE(ip_local_mbuf_data, {
+     uint8_t family;
        ip4_addr_t src;
        ip4_addr_t dst;
        uint16_t len;
        uint16_t vrf_id;
        uint8_t proto;
        uint8_t ttl;
});


GR_MBUF_PRIV_DATA_TYPE(ip6_local_mbuf_data, {
+     uint8_t family;
        struct rte_ipv6_addr src;
        struct rte_ipv6_addr dst;
        uint16_t len;
        uint8_t hop_limit;
        uint8_t proto;
});

@rjarry Any opinion about that ?
Should we define instead a common header for both local_mbuf_data instead ?

GR_MBUF_PRIV_DATA_TYPE(ip46_local_mbuf_data, {
        uint8_t family;
        union {
            struct rte_ipv6_addr v6;
            ip4_addr_t v4;  
        } src;
        union {
            struct rte_ipv6_addr v6;
            ip4_addr_t v4;  
        } dst;
        uint16_t len;
        union {
            uint8_t hop_limit;
            uint8_t ttl;
        };
        uint8_t proto;
});

// Emulate ethernet input, required by ip(6)_input
e = eth_input_mbuf_data(mbuf);
e->iface = iface;
Expand Down
11 changes: 11 additions & 0 deletions modules/l4/gr_l4.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// SPDX-License-Identifier: BSD-3-Clause
// Copyright (c) 2024 Christophe Fontaine

#ifndef _GR_L4_H
#define _GR_L4_H

#include <stdint.h>

void l4_input_register_port(uint8_t proto, uint16_t port, const char *next_node);

#endif
96 changes: 96 additions & 0 deletions modules/l4/l4_input_local.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// SPDX-License-Identifier: BSD-3-Clause
// Copyright (c) 2024 Christophe Fontaine

#include "gr_l4.h"

#include <gr_datapath.h>
#include <gr_graph.h>
#include <gr_ip4_datapath.h>
#include <gr_ip6_datapath.h>
#include <gr_log.h>
#include <gr_port.h>
#include <gr_trace.h>

#include <rte_graph_worker.h>
#include <rte_ip.h>
#include <rte_mbuf.h>

enum edges {
MANAGEMENT = 0,
BAD_PROTO,
EDGE_COUNT,
};

static rte_edge_t udp_edges[65536] = {MANAGEMENT};

void l4_input_register_port(uint8_t proto, uint16_t port, const char *next_node) {
LOG(DEBUG, "l4_input_register_port: proto=%hhu port=%hu-> %s", proto, port, next_node);
switch (proto) {
case IPPROTO_UDP:
if (udp_edges[port] != MANAGEMENT)
ABORT("next node already registered for udp port=%hhu", port);
udp_edges[proto] = gr_node_attach_parent("ip_input_local", next_node);
gr_node_attach_parent("ip6_input_local", next_node);
break;
default:
ABORT("proto not supported %hhu", proto);
}
}

static uint16_t l4_input_local_process(
struct rte_graph *graph,
struct rte_node *node,
void **objs,
uint16_t nb_objs
) {
struct rte_udp_hdr *hdr;
struct rte_mbuf *mbuf;
rte_edge_t edge;
uint8_t proto;

for (uint16_t i = 0; i < nb_objs; i++) {
mbuf = objs[i];
edge = BAD_PROTO;

if (mbuf->packet_type & RTE_PTYPE_L3_IPV4)
proto = ip_local_mbuf_data(mbuf)->proto;
else if (mbuf->packet_type & RTE_PTYPE_L3_IPV6)
proto = ip6_local_mbuf_data(mbuf)->proto;
else
goto next;

if (proto != IPPROTO_UDP) {
edge = MANAGEMENT;
goto next;
}

hdr = rte_pktmbuf_mtod(mbuf, struct rte_udp_hdr *);
edge = udp_edges[hdr->dst_port];
next:
rte_node_enqueue_x1(graph, node, edge, mbuf);
}
return nb_objs;
}

static void l4_input_local_register(void) {
ip_input_local_add_proto(IPPROTO_UDP, "l4_input_local");
ip6_input_local_add_proto(IPPROTO_TCP, "l4_input_local");
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ouch .... should register for both ip4/udp ip6/udp and ip4/tcp ip4/tcp

}
static struct rte_node_register input_node = {
.name = "l4_input_local",
.process = l4_input_local_process,
.nb_edges = EDGE_COUNT,
.next_nodes = {
[MANAGEMENT] = "l4_redirect_loopback",
[BAD_PROTO] = "l4_bad_proto",
},
};

static struct gr_node_info info = {
.node = &input_node,
.register_callback = l4_input_local_register,
};

GR_NODE_REGISTER(info);

GR_DROP_REGISTER(l4_bad_proto);
98 changes: 38 additions & 60 deletions modules/l4/l4_redirect_loopback.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
enum edges {
REDIRECT = 0,
NO_IFACE,
BAD_PROTO,
EDGE_COUNT,
};

Expand All @@ -25,89 +26,66 @@ static uint16_t redirect_loopback_process(
void **objs,
uint16_t nb_objs
) {
struct rte_ipv4_hdr *ip;
struct rte_mbuf *mbuf;
struct mbuf_data *d;
int edge = NO_IFACE;
rte_edge_t edge;

for (uint16_t i = 0; i < nb_objs; i++) {
mbuf = objs[i];
edge = REDIRECT;

d = mbuf_data(mbuf);
d->iface = get_vrf_iface(d->iface->vrf_id);
if (d->iface)
edge = REDIRECT;
rte_pktmbuf_prepend(mbuf, sizeof(*ip));
if (!d->iface) {
edge = NO_IFACE;
goto next;
}

if (mbuf->packet_type & RTE_PTYPE_L3_IPV4) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Relying on RTE_PTYPE_L3_IPV4 is risky (is the tun driver really setting this correctly?).
You'll need something else.

struct ip_local_mbuf_data *d = ip_local_mbuf_data(mbuf);
struct rte_ipv4_hdr *ip;
ip = (struct rte_ipv4_hdr *)rte_pktmbuf_prepend(mbuf, sizeof(*ip));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where do you make sure there is enough headroom?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

indeed, will fix it

ip->src_addr = d->src;
ip->dst_addr = d->dst;
ip->total_length = rte_cpu_to_be_16(d->len) + sizeof(*ip);
ip->next_proto_id = d->proto;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some fields don't seem initialized, so you are keeping some stale values from memory.

struct rte_ipv4_hdr {
	union {
		uint8_t            version_ihl;          /*     0     1 */
		struct {
			uint8_t    ihl:4;                /*     0: 0  1 */
			uint8_t    version:4;            /*     0: 4  1 */
		};                                       /*     0     1 */
	};                                               /*     0     1 */
	uint8_t                    type_of_service;      /*     1     1 */
	rte_be16_t                 total_length;         /*     2     2 */
	rte_be16_t                 packet_id;            /*     4     2 */
	rte_be16_t                 fragment_offset;      /*     6     2 */
	uint8_t                    time_to_live;         /*     8     1 */
	uint8_t                    next_proto_id;        /*     9     1 */
	rte_be16_t                 hdr_checksum;         /*    10     2 */
	rte_be32_t                 src_addr;             /*    12     4 */
	rte_be32_t                 dst_addr;             /*    16     4 */

	/* size: 20, cachelines: 1, members: 10 */
	/* last cacheline: 20 bytes */
} __attribute__((__aligned__(2)));

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will also fix for ipv6 hdr.

} else if (mbuf->packet_type & RTE_PTYPE_L3_IPV6) {
struct ip6_local_mbuf_data *d = ip6_local_mbuf_data(mbuf);
struct rte_ipv6_hdr *ip;
ip = (struct rte_ipv6_hdr *)rte_pktmbuf_prepend(mbuf, sizeof(*ip));
ip->src_addr = d->src;
ip->dst_addr = d->dst;
ip->payload_len = rte_cpu_to_be_16(d->len);
ip->hop_limits = d->hop_limit;
ip->proto = d->proto;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Idem, not initialized fields.

} else {
edge = BAD_PROTO;
}

next:
if (gr_mbuf_is_traced(mbuf)) {
gr_mbuf_trace_add(mbuf, node, 0);
}
rte_node_enqueue_x1(graph, node, edge, mbuf);
}

rte_node_enqueue(graph, node, edge, objs, nb_objs);
return nb_objs;
}

static struct rte_node_register tcp_redirect_loopback_node = {
.name = "tcp_redirect_loopback",
.process = redirect_loopback_process,
.nb_edges = EDGE_COUNT,
.next_nodes = {
[REDIRECT] = "loopback_output",
[NO_IFACE] = "no_loop_iface",
},
};

static struct rte_node_register udp_redirect_loopback_node = {
.name = "udp_redirect_loopback",
.process = redirect_loopback_process,
.nb_edges = EDGE_COUNT,
.next_nodes = {
[REDIRECT] = "loopback_output",
[NO_IFACE] = "no_loop_iface",
},
};

static struct rte_node_register sctp_redirect_loopback_node = {
.name = "sctp_redirect_loopback",
static struct rte_node_register redirect_loopback_node = {
.name = "l4_redirect_loopback",
.process = redirect_loopback_process,
.nb_edges = EDGE_COUNT,
.next_nodes = {
[REDIRECT] = "loopback_output",
[NO_IFACE] = "no_loop_iface",
[BAD_PROTO] = "l4_bad_proto",
},
};

static void tcp_redirect_loopback_register(void) {
ip_input_local_add_proto(IPPROTO_TCP, "tcp_redirect_loopback");
ip6_input_local_add_proto(IPPROTO_TCP, "tcp_redirect_loopback");
}

static void udp_redirect_loopback_register(void) {
ip_input_local_add_proto(IPPROTO_UDP, "udp_redirect_loopback");
ip6_input_local_add_proto(IPPROTO_UDP, "udp_redirect_loopback");
}

static void sctp_redirect_loopback_register(void) {
ip_input_local_add_proto(IPPROTO_SCTP, "sctp_redirect_loopback");
ip6_input_local_add_proto(IPPROTO_SCTP, "sctp_redirect_loopback");
}

static struct gr_node_info info_tcp_redirect = {
.node = &tcp_redirect_loopback_node,
.register_callback = tcp_redirect_loopback_register,
};

static struct gr_node_info info_udp_redirect = {
.node = &udp_redirect_loopback_node,
.register_callback = udp_redirect_loopback_register,
};

static struct gr_node_info info_sctp_redirect = {
.node = &sctp_redirect_loopback_node,
.register_callback = sctp_redirect_loopback_register,
static struct gr_node_info info = {
.node = &redirect_loopback_node,
};

GR_NODE_REGISTER(info_tcp_redirect);
GR_NODE_REGISTER(info_udp_redirect);
GR_NODE_REGISTER(info_sctp_redirect);
GR_NODE_REGISTER(info);

GR_DROP_REGISTER(no_loop_iface);
1 change: 1 addition & 0 deletions modules/l4/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Copyright (c) 2024 Christophe Fontaine

src += files(
'l4_input_local.c',
'l4_redirect_loopback.c',
)
inc += include_directories('.')
Loading