Skip to content

Commit

Permalink
feat: initial pest grammar & code
Browse files Browse the repository at this point in the history
  • Loading branch information
desbma-s1n committed May 27, 2024
1 parent 91f1634 commit a5db4a1
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 10 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ lazy_static = { version = "1.4.0", default-features = false }
log = { version = "0.4.19", default-features = false, features = ["max_level_trace", "release_max_level_info"] }
nix = { version = "0.26.2", default-features = false, features = ["fs"] }
pest = { version = "2.7.10", default-features = false, features = ["std", "memchr"], optional = true }
pest_derive = { version = "2.7.10", default-features = false, features = ["std"], optional = true}
pest_derive = { version = "2.7.10", default-features = false, features = ["std", "grammar-extras"], optional = true}
rand = { version = "0.8.5", default-features = false, features = ["std", "std_rng"] }
regex = { version = "1.9.1", default-features = false, features = ["std", "perf"] }
serde = { version = "1.0.193", default-features = false, features = ["std", "derive"] }
Expand Down
32 changes: 27 additions & 5 deletions src/strace/parser/peg.pest
Original file line number Diff line number Diff line change
@@ -1,13 +1,35 @@
// "382944 0.000054 mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f52a332e000"

syscall_line = { SOI ~ pid ~ " "+ ~ timestamp ~ " " ~ name ~ arguments ~ " = " ~ ret ~ EOI }
syscall_line = { SOI ~ pid ~ " "+ ~ rel_ts ~ " " ~ name ~ arguments ~ " = " ~ ret_val ~ EOI }

pid = { ASCII_DIGIT+ }

timestamp = { ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ }
rel_ts = { ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ }

name = { (ASCII_ALPHA_LOWER ~ ASCII_DIGIT ~ "_")+ }
name = { (ASCII_ALPHA_LOWER | ASCII_DIGIT | "_")+ }

arguments = { "(" ~ ANY* ~ ")" }
arguments = {
"(" ~
(
(argument ~ (", " ~ argument)+)
|
argument?
) ~
")"
}

ret = { ANY+ }
argument = { #expr_int = expression_int | #buf = buffer }

ret_val = { expression_int_literal ~ (" " ~ ANY*)? }


expression_int = { #or = expression_int_or | #lit = expression_int_literal | #named = expression_named_constant }
expression_int_literal = { #hex = literal_int_hex | #int = literal_int }
expression_int_or = { expression_named_constant ~ ("|" ~ expression_int)+ }
expression_named_constant = { (ASCII_ALPHA_UPPER | ASCII_DIGIT | "_")+ }

literal_int_hex = { "0x" ~ ASCII_HEX_DIGIT+ }
literal_int = { "-"? ~ ASCII_DIGIT+ }

buffer = { "\"" ~ buffer_byte* ~ "\"" }
buffer_byte = { !"\"" ~ ANY }
121 changes: 117 additions & 4 deletions src/strace/parser/peg.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
//! PEG based strace output parser
use pest::iterators::Pair;
use pest::Parser as _;

use crate::strace::Syscall;
use crate::strace::{BufferType, IntegerExpression, Syscall, SyscallArg};

use super::ParseResult;

Expand All @@ -11,7 +12,119 @@ use super::ParseResult;
struct PegParser;

pub fn parse_line(line: &str, unfinished_syscalls: &[Syscall]) -> anyhow::Result<ParseResult> {
let res = PegParser::parse(Rule::syscall_line, line);
dbg!(&res);
todo!();
let pair = match PegParser::parse(Rule::syscall_line, line) {
Err(_) => return Ok(ParseResult::IgnoredLine),
Ok(mut p) => p.next().unwrap(),
};
dbg!(&pair);
Ok(ParseResult::Syscall(pair.into()))
}

fn pair_descend(pair: Pair<'_, Rule>, levels: usize) -> Option<Pair<'_, Rule>> {
let mut pair = pair;
let mut levels = levels;
while levels > 0 {
if let Some(below_pair) = pair.into_inner().next() {
pair = below_pair;
} else {
return None;
}
levels -= 1;
}
Some(pair)
}

fn int_literal_pair_val<T>(pair: Pair<Rule>) -> Option<T>
where
T: From<i128>,
{
let val: Option<i128> = match pair.as_node_tag() {
Some("hex") => pair
.as_str()
.strip_prefix("0x")
.and_then(|s| i128::from_str_radix(s, 16).ok()),
Some("int") => pair.as_str().parse().ok(),
_ => unreachable!(),
};
val.map(|v| v.into())
}

fn buf_val(pair: Pair<Rule>) -> Option<Vec<u8>> {
Some(
pair.into_inner()
.flat_map(|b| b.as_str().as_bytes())
.copied()
.collect(),
)
}

impl From<Pair<'_, Rule>> for IntegerExpression {
fn from(pair: Pair<Rule>) -> Self {
match pair.as_node_tag() {
Some("lit") => {
let lit_pair = pair_descend(pair, 1).unwrap();
IntegerExpression::Literal(int_literal_pair_val(lit_pair).unwrap())
}
Some("named") => IntegerExpression::NamedConst(pair.as_str().to_owned()),
Some("or") => {
let mut children = pair.into_inner();
let mut or_elems = Vec::with_capacity(children.len());
or_elems.push(IntegerExpression::NamedConst(
children.next().unwrap().as_str().to_owned(),
));
or_elems.extend(
children
.map(|c| pair_descend(c, 1).unwrap().into())
.flat_map(|e| {
// Flatten or child expressions
if let IntegerExpression::BinaryOr(es) = e {
es.into_iter()
} else {
vec![e].into_iter()
}
}),
);
IntegerExpression::BinaryOr(or_elems)
}
_ => unreachable!("{pair:?}"),
}
}
}

impl From<Pair<'_, Rule>> for Syscall {
fn from(pair: Pair<Rule>) -> Self {
let mut subpairs = pair.into_inner();
// Note if the grammar is correct, we should *never* panic below
let pid = subpairs.next().unwrap().as_str().parse().unwrap();
let rel_ts = subpairs.next().unwrap().as_str().parse().unwrap();
let name = subpairs.next().unwrap().as_str().to_owned();
let args = subpairs
.next()
.unwrap()
.into_inner()
.map(|p| {
let pair = pair_descend(p, 1).unwrap();
match pair.as_node_tag() {
Some("expr_int") => SyscallArg::Integer {
value: pair_descend(pair, 1).unwrap().into(),
metadata: None,
},
Some("buf") => SyscallArg::Buffer {
value: buf_val(pair).unwrap(),
type_: BufferType::Unknown,
},
_ => unreachable!("{pair:?}"),
}
})
.collect();
let ret_val_pair = pair_descend(subpairs.next().unwrap(), 2).unwrap();
let ret_val = int_literal_pair_val(ret_val_pair).unwrap();
Syscall {
pid,
rel_ts,
name,
args,
ret_val,
}
}
}

0 comments on commit a5db4a1

Please sign in to comment.