Skip to content

Commit

Permalink
selftests/bpf: Add bpf_burst scheduler & test
Browse files Browse the repository at this point in the history
This patch implements the burst BPF MPTCP scheduler, named bpf_burst,
which is the default scheduler in protocol.c. bpf_burst_get_send() uses
the same logic as mptcp_subflow_get_send() and bpf_burst_get_retrans
uses the same logic as mptcp_subflow_get_retrans().

Using MPTCP_SCHED_TEST macro to add a new test for this bpf_burst
scheduler, the arguments "1 1" means data has been sent on both net
devices. Run this test by RUN_MPTCP_TEST macro.

Signed-off-by: Geliang Tang <[email protected]>
Reviewed-by: Mat Martineau <[email protected]>
Reviewed-by: Matthieu Baerts (NGI0) <[email protected]>
  • Loading branch information
Geliang Tang authored and matttbe committed May 15, 2024
1 parent f09219e commit c8558ee
Show file tree
Hide file tree
Showing 2 changed files with 217 additions and 0 deletions.
15 changes: 15 additions & 0 deletions tools/testing/selftests/bpf/prog_tests/mptcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "mptcp_bpf_bkup.skel.h"
#include "mptcp_bpf_rr.skel.h"
#include "mptcp_bpf_red.skel.h"
#include "mptcp_bpf_burst.skel.h"

#define NS_TEST "mptcp_ns"
#define ADDR_1 "10.0.1.1"
Expand Down Expand Up @@ -607,6 +608,18 @@ static void test_red(void)
mptcp_bpf_red__destroy(skel);
}

static void test_burst(void)
{
struct mptcp_bpf_burst *skel;

skel = mptcp_bpf_burst__open_and_load();
if (!ASSERT_OK_PTR(skel, "open_and_load: burst"))
return;

test_bpf_sched(skel->obj, "burst", WITH_DATA, WITH_DATA);
mptcp_bpf_burst__destroy(skel);
}

void test_mptcp(void)
{
if (test__start_subtest("base"))
Expand All @@ -625,4 +638,6 @@ void test_mptcp(void)
test_rr();
if (test__start_subtest("red"))
test_red();
if (test__start_subtest("burst"))
test_burst();
}
202 changes: 202 additions & 0 deletions tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2023, SUSE. */

#include "mptcp_bpf.h"
#include <bpf/bpf_tracing.h>
#include <limits.h>

char _license[] SEC("license") = "GPL";

#define MPTCP_SEND_BURST_SIZE 65428

#define min(a, b) ((a) < (b) ? (a) : (b))

struct bpf_subflow_send_info {
__u8 subflow_id;
__u64 linger_time;
};

extern bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) __ksym;
extern void mptcp_set_timeout(struct sock *sk) __ksym;
extern __u64 mptcp_wnd_end(const struct mptcp_sock *msk) __ksym;
extern bool tcp_stream_memory_free(const struct sock *sk, int wake) __ksym;
extern bool bpf_mptcp_subflow_queues_empty(struct sock *sk) __ksym;
extern void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) __ksym;

#define SSK_MODE_ACTIVE 0
#define SSK_MODE_BACKUP 1
#define SSK_MODE_MAX 2

static __always_inline __u64 div_u64(__u64 dividend, __u32 divisor)
{
return dividend / divisor;
}

static __always_inline bool tcp_write_queue_empty(struct sock *sk)
{
const struct tcp_sock *tp = bpf_skc_to_tcp_sock(sk);

return tp ? tp->write_seq == tp->snd_nxt : true;
}

static __always_inline bool tcp_rtx_and_write_queues_empty(struct sock *sk)
{
return bpf_mptcp_subflow_queues_empty(sk) && tcp_write_queue_empty(sk);
}

static __always_inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
{
if (sk->sk_wmem_queued >= sk->sk_sndbuf)
return false;

return tcp_stream_memory_free(sk, wake);
}

static __always_inline bool sk_stream_memory_free(const struct sock *sk)
{
return __sk_stream_memory_free(sk, 0);
}

SEC("struct_ops")
void BPF_PROG(mptcp_sched_burst_init, struct mptcp_sock *msk)
{
}

SEC("struct_ops")
void BPF_PROG(mptcp_sched_burst_release, struct mptcp_sock *msk)
{
}

static int bpf_burst_get_send(struct mptcp_sock *msk,
struct mptcp_sched_data *data)
{
struct bpf_subflow_send_info send_info[SSK_MODE_MAX];
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
__u32 pace, burst, wmem;
__u64 linger_time;
struct sock *ssk;
int i;

/* pick the subflow with the lower wmem/wspace ratio */
for (i = 0; i < SSK_MODE_MAX; ++i) {
send_info[i].subflow_id = MPTCP_SUBFLOWS_MAX;
send_info[i].linger_time = -1;
}

for (i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) {
subflow = bpf_mptcp_subflow_ctx_by_pos(data, i);
if (!subflow)
break;

ssk = mptcp_subflow_tcp_sock(subflow);
if (!mptcp_subflow_active(subflow))
continue;

pace = subflow->avg_pacing_rate;
if (!pace) {
/* init pacing rate from socket */
subflow->avg_pacing_rate = ssk->sk_pacing_rate;
pace = subflow->avg_pacing_rate;
if (!pace)
continue;
}

linger_time = div_u64((__u64)ssk->sk_wmem_queued << 32, pace);
if (linger_time < send_info[subflow->backup].linger_time) {
send_info[subflow->backup].subflow_id = i;
send_info[subflow->backup].linger_time = linger_time;
}
}
mptcp_set_timeout(sk);

/* pick the best backup if no other subflow is active */
if (send_info[SSK_MODE_ACTIVE].subflow_id == MPTCP_SUBFLOWS_MAX)
send_info[SSK_MODE_ACTIVE].subflow_id = send_info[SSK_MODE_BACKUP].subflow_id;

subflow = bpf_mptcp_subflow_ctx_by_pos(data, send_info[SSK_MODE_ACTIVE].subflow_id);
if (!subflow)
return -1;
ssk = mptcp_subflow_tcp_sock(subflow);
if (!ssk || !sk_stream_memory_free(ssk))
return -1;

burst = min(MPTCP_SEND_BURST_SIZE, mptcp_wnd_end(msk) - msk->snd_nxt);
wmem = ssk->sk_wmem_queued;
if (!burst)
goto out;

subflow->avg_pacing_rate = div_u64((__u64)subflow->avg_pacing_rate * wmem +
ssk->sk_pacing_rate * burst,
burst + wmem);
msk->snd_burst = burst;

out:
mptcp_subflow_set_scheduled(subflow, true);
return 0;
}

static int bpf_burst_get_retrans(struct mptcp_sock *msk,
struct mptcp_sched_data *data)
{
int backup = MPTCP_SUBFLOWS_MAX, pick = MPTCP_SUBFLOWS_MAX, subflow_id;
struct mptcp_subflow_context *subflow;
int min_stale_count = INT_MAX;
struct sock *ssk;

for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) {
subflow = bpf_mptcp_subflow_ctx_by_pos(data, i);
if (!subflow)
break;

if (!mptcp_subflow_active(subflow))
continue;

ssk = mptcp_subflow_tcp_sock(subflow);
/* still data outstanding at TCP level? skip this */
if (!tcp_rtx_and_write_queues_empty(ssk)) {
mptcp_pm_subflow_chk_stale(msk, ssk);
min_stale_count = min(min_stale_count, subflow->stale_count);
continue;
}

if (subflow->backup) {
if (backup == MPTCP_SUBFLOWS_MAX)
backup = i;
continue;
}

if (pick == MPTCP_SUBFLOWS_MAX)
pick = i;
}

if (pick < MPTCP_SUBFLOWS_MAX) {
subflow_id = pick;
goto out;
}
subflow_id = min_stale_count > 1 ? backup : MPTCP_SUBFLOWS_MAX;

out:
subflow = bpf_mptcp_subflow_ctx_by_pos(data, subflow_id);
if (!subflow)
return -1;
mptcp_subflow_set_scheduled(subflow, true);
return 0;
}

SEC("struct_ops")
int BPF_PROG(bpf_burst_get_subflow, struct mptcp_sock *msk,
struct mptcp_sched_data *data)
{
if (data->reinject)
return bpf_burst_get_retrans(msk, data);
return bpf_burst_get_send(msk, data);
}

SEC(".struct_ops")
struct mptcp_sched_ops burst = {
.init = (void *)mptcp_sched_burst_init,
.release = (void *)mptcp_sched_burst_release,
.get_subflow = (void *)bpf_burst_get_subflow,
.name = "bpf_burst",
};

0 comments on commit c8558ee

Please sign in to comment.