-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Custom Scheduling * Capturing cgroup stats on per invoke level * BPFLibrary to create a pinned BPFMap of function characteristics * used to share data with scx scheduler
- Loading branch information
1 parent
ba8fb72
commit ee4014c
Showing
77 changed files
with
4,197 additions
and
93 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,4 +4,5 @@ __pycache__/*.lock | |
**.lock | ||
**__pycache__** | ||
.idea/ | ||
.vscode/ | ||
.vscode/ | ||
**.swp |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
[package] | ||
name = "fs_policy_tsksz" | ||
version = "0.0.3" | ||
authors = ["Abdul Rehman <[email protected]>"] | ||
edition = "2021" | ||
description = "A simple scheduler that preserves locality for a function cgroup" | ||
license = "GPL-2.0-only" | ||
|
||
[dependencies] | ||
anyhow = "1.0.65" | ||
plain = "0.2.3" | ||
ctrlc = { version = "3.1", features = ["termination"] } | ||
libbpf-rs = "0.24.1" | ||
libc = "0.2.137" | ||
scx_utils = { version = "1.0.3" } | ||
scx_rustland_core = { version = "1.0.3" } | ||
|
||
# Specific to iluvatar | ||
iluvatar_library = { path = "../iluvatar_library" } | ||
iluvatar_worker_library = { path = "../iluvatar_worker_library" } | ||
clap = { version = "4.5.4", features = ["derive"] } | ||
ipc-channel = { version = "0.18.1", features = ["memfd"] } | ||
serde = { version = "1.0" } | ||
|
||
[build-dependencies] | ||
scx_utils = { version = "1.0.3" } | ||
scx_rustland_core = { version = "1.0.3" } | ||
|
||
[features] | ||
enable_backtrace = [] | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../../../LICENSE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# scx_rlfifo | ||
|
||
This is a single user-defined scheduler used within [sched_ext](https://github.com/sched-ext/scx/tree/main), which is a Linux kernel feature which enables implementing kernel thread schedulers in BPF and dynamically loading them. [Read more about sched_ext](https://github.com/sched-ext/scx/tree/main). | ||
|
||
## Overview | ||
|
||
scx_rlfifo is a simple FIFO scheduler runs in user-space, based on the | ||
scx_rustland_core framework. | ||
|
||
## Typical Use Case | ||
|
||
This scheduler is provided as a simple template that can be used as a baseline | ||
to test more complex scheduling policies. | ||
|
||
## Production Ready? | ||
|
||
Definitely not. Using this scheduler in a production environment is not | ||
recommended, unless there are specific requirements that necessitate a basic | ||
FIFO scheduling approach. Even then, it's still recommended to use the kernel's | ||
SCHED_FIFO real-time class. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
// This software may be used and distributed according to the terms of the | ||
// GNU General Public License version 2. | ||
|
||
fn main() { | ||
scx_utils::BpfBuilder::new() | ||
.unwrap() | ||
.enable_intf("src/bpf/intf.h", "bpf_intf.rs") | ||
.enable_skel("src/bpf/main.bpf.c", "bpf") | ||
.build() | ||
.unwrap(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
if serialize | ||
sched_deps = [libbpf, bpftool_target, sched] | ||
else | ||
sched_deps = [libbpf, bpftool_target] | ||
endif | ||
|
||
sched = custom_target('scx_rlfifo', | ||
output: '@PLAINNAME@.__PHONY__', | ||
input: 'Cargo.toml', | ||
command: [cargo, 'build', '--manifest-path=@INPUT@', '--target-dir=@OUTDIR@', | ||
cargo_build_args], | ||
env: cargo_env, | ||
depends: sched_deps, | ||
build_always_stale: true) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# Get help on options with `rustfmt --help=config` | ||
# Please keep these in alphabetical order. | ||
edition = "2021" | ||
group_imports = "StdExternalCrate" | ||
imports_granularity = "Item" | ||
merge_derives = false | ||
use_field_init_shorthand = true | ||
version = "Two" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,219 @@ | ||
// Copyright (c) Andrea Righi <[email protected]> | ||
|
||
// This software may be used and distributed according to the terms of the | ||
// GNU General Public License version 2. | ||
|
||
use std::mem::MaybeUninit; | ||
|
||
use crate::bpf_intf; | ||
use crate::bpf_skel::*; | ||
|
||
use std::io::Read; | ||
|
||
use anyhow::Context; | ||
use anyhow::Result; | ||
|
||
use libbpf_rs::skel::OpenSkel; | ||
use libbpf_rs::skel::Skel; | ||
use libbpf_rs::skel::SkelBuilder; | ||
use libbpf_rs::OpenObject; | ||
|
||
use libc::{pthread_self, pthread_setschedparam, sched_param}; | ||
|
||
#[cfg(target_env = "musl")] | ||
use libc::timespec; | ||
|
||
use scx_utils::scx_ops_attach; | ||
use scx_utils::scx_ops_load; | ||
use scx_utils::scx_ops_open; | ||
use scx_utils::uei_exited; | ||
use scx_utils::uei_report; | ||
use scx_utils::UserExitInfo; | ||
|
||
use scx_rustland_core::ALLOCATOR; | ||
|
||
// Defined in UAPI | ||
const SCHED_EXT: i32 = 7; | ||
|
||
pub struct BpfScheduler<'cb> { | ||
pub skel: BpfSkel<'cb>, // Low-level BPF connector | ||
struct_ops: Option<libbpf_rs::Link>, // Low-level BPF methods | ||
queued_pids: libbpf_rs::RingBuffer<'cb>, // ring buffer of tasks pids to be switched to schedext | ||
queued_stats: libbpf_rs::RingBuffer<'cb>, // ring buffer of tasks pids to be switched to schedext | ||
} | ||
|
||
#[derive(Clone, Copy, Debug)] | ||
pub struct lpolicy_stats(bpf_intf::policy_stats); | ||
|
||
macro_rules! define_buffer { | ||
( $bufname: ident, $abufname: ident, $abuf: ident, $callback: ident, $tdst: ty ) => { | ||
const $bufname: usize = std::mem::size_of::<$tdst>(); | ||
#[repr(align(8))] | ||
struct $abufname([u8; $bufname]); | ||
static mut $abuf: $abufname = $abufname([0; $bufname]); | ||
fn $callback(data: &[u8]) -> i32 { | ||
unsafe { | ||
$abuf.0.copy_from_slice(data); | ||
} | ||
LIBBPF_STOP | ||
} | ||
}; | ||
} | ||
|
||
define_buffer!(BUFSIZE_PID, AlignedBufferPid, BUF_PID, callback_pid, i32); | ||
fn fetch_pid(bytes: &[u8]) -> i32 { | ||
let ps = unsafe { *(bytes.as_ptr() as *const bpf_intf::packet_pid) }; | ||
ps.pid | ||
} | ||
|
||
define_buffer!( | ||
BUFSIZE_STATS, | ||
AlignedBufferstats, | ||
BUF_STATS, | ||
callback_stats, | ||
bpf_intf::policy_stats | ||
); | ||
fn fetch_stats(bytes: &[u8]) -> lpolicy_stats { | ||
let ps = unsafe { *(bytes.as_ptr() as *const bpf_intf::policy_stats) }; | ||
lpolicy_stats(ps.clone()) | ||
} | ||
|
||
// Special negative error code for libbpf to stop after consuming just one item from a BPF | ||
// ring buffer. | ||
const LIBBPF_STOP: i32 = -255; | ||
|
||
impl<'cb> BpfScheduler<'cb> { | ||
pub fn init( | ||
open_object: &'cb mut MaybeUninit<OpenObject>, | ||
slice_us: u64, | ||
exit_dump_len: u32, | ||
verbose: bool, | ||
) -> Result<Self> { | ||
// Open the BPF prog first for verification. | ||
let mut skel_builder = BpfSkelBuilder::default(); | ||
skel_builder.obj_builder.debug(verbose); | ||
let mut skel = scx_ops_open!(skel_builder, open_object, tsksz_ops)?; | ||
|
||
// Lock all the memory to prevent page faults that could trigger potential deadlocks during | ||
// scheduling. | ||
ALLOCATOR.lock_memory(); | ||
|
||
skel.struct_ops.tsksz_ops_mut().exit_dump_len = exit_dump_len; | ||
skel.maps.bss_data.usersched_pid = std::process::id(); | ||
skel.maps.rodata_data.effective_slice_ns = slice_us * 1000; | ||
|
||
let path = "/sys/fs/bpf/func_metadata"; | ||
let func_metadata = &mut skel.maps.func_metadata; | ||
assert!(func_metadata.reuse_pinned_map("/asdf").is_err()); | ||
func_metadata | ||
.reuse_pinned_map(path) | ||
.expect("failed to reuse map"); | ||
|
||
// Attach BPF scheduler. | ||
let mut skel = scx_ops_load!(skel, tsksz_ops, uei)?; | ||
let struct_ops = Some(scx_ops_attach!(skel, tsksz_ops)?); | ||
|
||
// Build the ring buffer of queued tasks. | ||
let rb_map = &mut skel.maps.queued_pids; | ||
let mut builder = libbpf_rs::RingBufferBuilder::new(); | ||
builder.add(rb_map, callback_pid).unwrap(); | ||
let queued_pids = builder.build().unwrap(); | ||
|
||
// Build the ring buffer of queued tasks. | ||
let rb_map = &mut skel.maps.queued_stats; | ||
let mut builder = libbpf_rs::RingBufferBuilder::new(); | ||
builder.add(rb_map, callback_stats).unwrap(); | ||
let queued_stats = builder.build().unwrap(); | ||
|
||
// Make sure to use the SCHED_EXT class at least for the scheduler itself. | ||
match Self::use_sched_ext() { | ||
0 => Ok(Self { | ||
skel, | ||
struct_ops, | ||
queued_pids, | ||
queued_stats, | ||
}), | ||
err => Err(anyhow::Error::msg(format!( | ||
"sched_setscheduler error: {}", | ||
err | ||
))), | ||
} | ||
} | ||
|
||
// Receive a task pid from the BPF scheduler to switch to schedext policy. | ||
pub fn dequeue_pid(&mut self) -> Result<Option<i32>, i32> { | ||
match self.queued_pids.consume_raw() { | ||
0 => Ok(None), | ||
LIBBPF_STOP => { | ||
// A valid pid is received, convert data to a proper pid. | ||
let pid = unsafe { fetch_pid(&BUF_PID.0) }; | ||
Ok(Some(pid)) | ||
} | ||
res if res < 0 => Err(res), | ||
res => panic!( | ||
"Unexpected return value from libbpf-rs::consume_raw(): {}", | ||
res | ||
), | ||
} | ||
} | ||
|
||
// Receive stats from the BPF scheduler to switch to schedext policy. | ||
pub fn dequeue_stats(&mut self) -> Result<Option<lpolicy_stats>, i32> { | ||
match self.queued_stats.consume_raw() { | ||
0 => Ok(None), | ||
LIBBPF_STOP => { | ||
// A valid pid is received, convert data to a proper pid. | ||
let stats = unsafe { fetch_stats(&BUF_STATS.0) }; | ||
Ok(Some(stats)) | ||
} | ||
res if res < 0 => Err(res), | ||
res => panic!( | ||
"Unexpected return value from libbpf-rs::consume_raw(): {}", | ||
res | ||
), | ||
} | ||
} | ||
|
||
// Set scheduling class for the scheduler itself to SCHED_EXT | ||
fn use_sched_ext() -> i32 { | ||
#[cfg(target_env = "gnu")] | ||
let param: sched_param = sched_param { sched_priority: 0 }; | ||
#[cfg(target_env = "musl")] | ||
let param: sched_param = sched_param { | ||
sched_priority: 0, | ||
sched_ss_low_priority: 0, | ||
sched_ss_repl_period: timespec { | ||
tv_sec: 0, | ||
tv_nsec: 0, | ||
}, | ||
sched_ss_init_budget: timespec { | ||
tv_sec: 0, | ||
tv_nsec: 0, | ||
}, | ||
sched_ss_max_repl: 0, | ||
}; | ||
|
||
unsafe { pthread_setschedparam(pthread_self(), SCHED_EXT, ¶m as *const sched_param) } | ||
} | ||
|
||
// Read exit code from the BPF part. | ||
pub fn exited(&mut self) -> bool { | ||
uei_exited!(&self.skel, uei) | ||
} | ||
|
||
// Called on exit to shutdown and report exit message from the BPF part. | ||
pub fn shutdown_and_report(&mut self) -> Result<UserExitInfo> { | ||
self.struct_ops.take(); | ||
uei_report!(&self.skel, uei) | ||
} | ||
} | ||
|
||
// Disconnect the low-level BPF scheduler. | ||
impl<'a> Drop for BpfScheduler<'a> { | ||
fn drop(&mut self) { | ||
if let Some(struct_ops) = self.struct_ops.take() { | ||
drop(struct_ops); | ||
} | ||
ALLOCATOR.unlock_memory(); | ||
} | ||
} |
Oops, something went wrong.