From b0710fa2da5020492422cbe553457a4682a3a548 Mon Sep 17 00:00:00 2001 From: Nathan Date: Thu, 7 Nov 2024 02:09:02 -0700 Subject: [PATCH] Use Arson for DTA parsing --- crates/dtacheck/Cargo.toml | 3 +- crates/dtacheck/src/lexer.rs | 97 -------- crates/dtacheck/src/lib.rs | 2 - crates/dtacheck/src/linter.rs | 130 ++++------- crates/dtacheck/src/main.rs | 13 +- crates/dtacheck/src/parser.rs | 405 ---------------------------------- 6 files changed, 53 insertions(+), 597 deletions(-) delete mode 100644 crates/dtacheck/src/lexer.rs delete mode 100644 crates/dtacheck/src/parser.rs diff --git a/crates/dtacheck/Cargo.toml b/crates/dtacheck/Cargo.toml index 7eddcf9..9824f9a 100644 --- a/crates/dtacheck/Cargo.toml +++ b/crates/dtacheck/Cargo.toml @@ -7,7 +7,6 @@ publish = false # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +arson = { git = "https://github.com/hmxmilohax/arson", version = "0.1.0", rev = "c0c102c86f3734d41623b2fd4bfc78bf74302500" } clap = { version = "4.4.12", features = ["derive"] } codespan-reporting = "0.11.1" -derive_more = "0.99.17" -logos = "0.13.0" diff --git a/crates/dtacheck/src/lexer.rs b/crates/dtacheck/src/lexer.rs deleted file mode 100644 index 041cdf6..0000000 --- a/crates/dtacheck/src/lexer.rs +++ /dev/null @@ -1,97 +0,0 @@ -use std::ops::Range; - -use derive_more::IsVariant; -use derive_more::Unwrap; -use logos::Lexer; -use logos::Logos; - -// do not try to understand the regex here -#[derive(Logos, Debug, PartialEq, IsVariant, Unwrap, Clone)] -pub enum TokenKind { - #[token("kDataUnhandled")] - Unhandled, - #[token("#ifdef")] - IfDef, - #[token("#else")] - Else, - #[token("#endif")] - EndIf, - #[token("(")] - LParen, - #[token(")")] - RParen, - #[token("{")] - LBrace, - #[token("}")] - RBrace, - #[token("[")] - LBracket, - #[token("]")] - RBracket, - #[token("#define")] - Define, - #[token("#include")] - Include, - #[token("#merge")] - Merge, - #[token("#ifndef")] - IfNDef, - #[token("#autorun")] - Autorun, - #[token("#undef")] - UnDef, - #[regex(r#"[\-\+]?[0-9]+"#, |lex| lex.slice().parse().ok(), priority=2)] - Int(i32), - #[regex(r#"[\-\+]?[0-9]+\.[0-9]+"#, |lex| lex.slice().parse().ok(), priority=2)] - Float(f32), - #[regex(r#"\$[0-9a-zA-Z_]+"#, |lex| lex.slice().parse().ok())] - Var(String), - #[regex(r#"[^ \t\n\r\f\(\[\{\}\]\)]+"#, |lex| lex.slice().parse().ok())] - #[regex(r#"'(?:\.|[^'])+'"#, trim_delimiters)] - Sym(String), - #[regex(r#""(?:\.|[^"])+""#, trim_delimiters)] - String(String), - #[regex(r"(;[^\n]*|[ \t\s\f\n\r])", priority = 2, callback = logos::skip)] - Invalid, - Eof, -} - -#[derive(Debug, Clone)] -pub struct Token { - pub kind: TokenKind, - pub span: Range, -} - -fn trim_delimiters(lexer: &mut Lexer) -> Option { - let slice = lexer.slice(); - let length = slice.len(); - slice[1..length - 1].parse().ok() -} - -pub fn lex(data: &str) -> Vec { - let mut tokens: Vec<_> = TokenKind::lexer(data) - .spanned() - .map(|(tok, span)| match tok { - Ok(tok) => Token { kind: tok, span }, - Err(()) => Token { - kind: TokenKind::Invalid, - span, - }, - }) - .collect(); - - if tokens.is_empty() { - tokens.push(Token { - kind: TokenKind::Eof, - span: 0..0, - }); - } else { - let last = tokens.last().unwrap().span.end; - tokens.push(Token { - kind: TokenKind::Eof, - span: last..last, - }); - } - - tokens -} diff --git a/crates/dtacheck/src/lib.rs b/crates/dtacheck/src/lib.rs index e4f1122..c786047 100644 --- a/crates/dtacheck/src/lib.rs +++ b/crates/dtacheck/src/lib.rs @@ -1,3 +1 @@ -pub mod lexer; pub mod linter; -pub mod parser; diff --git a/crates/dtacheck/src/linter.rs b/crates/dtacheck/src/linter.rs index 91dbdeb..03b3410 100644 --- a/crates/dtacheck/src/linter.rs +++ b/crates/dtacheck/src/linter.rs @@ -1,44 +1,54 @@ use std::collections::HashMap; use std::ops::Range; +use arson::parse::parser as arson_parse; +use arson_parse::{Expression, ExpressionKind}; use codespan_reporting::diagnostic::Diagnostic; use codespan_reporting::diagnostic::Label; -use crate::lexer::Token; -use crate::lexer::TokenKind; -use crate::parser::Node; -use crate::parser::NodeKind; - pub trait Lint { fn to_codespan(&self, id: usize) -> Diagnostic; } -pub fn lint_file( - ast: &[Node], - tokens: &[Token], - funcs: &Function, -) -> Vec> { +impl<'src> Lint for arson_parse::ParseError<'src> { + fn to_codespan(&self, id: usize) -> Diagnostic { + self.to_diagnostic(id) + } +} + +pub fn lint_file(ast: &[Expression], funcs: &Function) -> Vec> { let mut lints = Vec::new(); lint_node(&mut lints, ast, funcs); - lint_preprocs(&mut lints, tokens); lints } -fn lint_node(lints: &mut Vec>, ast: &[Node], funcs: &Function) { +fn lint_node( + lints: &mut Vec>, + ast: &[Expression], + funcs: &Function, +) { for node in ast { match &node.kind { - NodeKind::Array(array) - | NodeKind::Prop(array) - | NodeKind::Define(_, array) => lint_node(lints, array, funcs), - NodeKind::Stmt(array) => { + ExpressionKind::Array(array) | ExpressionKind::Property(array) => { + lint_node(lints, &array, funcs) + } + ExpressionKind::Define(_, array) => { + lint_node(lints, &array.exprs, funcs) + } + ExpressionKind::Command(array) => { lint_node(lints, array, funcs); - let has_preprocessor_directive = - array.iter().any(Node::is_preproc); + let has_preprocessor_directive = array.iter().any(|e| { + matches!(e.kind, ExpressionKind::Conditional { .. }) + }); if !has_preprocessor_directive { - lint_fn_args(lints, array, node.span.clone(), funcs); - lint_switch_fallthrough(lints, array, node.span.clone()); + lint_fn_args(lints, array, node.location.clone(), funcs); + lint_switch_fallthrough( + lints, + array, + node.location.clone(), + ); } } _ => (), @@ -89,11 +99,15 @@ impl Default for Function { } impl Function { - pub fn lookup(&self, stmt: &[Node]) -> (&Function, usize) { + pub fn lookup(&self, stmt: &[Expression]) -> (&Function, usize) { self.lookup_inner(stmt, 0) } - fn lookup_inner(&self, stmt: &[Node], depth: usize) -> (&Function, usize) { + fn lookup_inner( + &self, + stmt: &[Expression], + depth: usize, + ) -> (&Function, usize) { if self.children.is_empty() { return (self, depth); }; @@ -102,7 +116,7 @@ impl Function { return (self, depth); }; - let NodeKind::Symbol(ref sym) = node.kind else { + let ExpressionKind::Symbol(sym) = node.kind else { return (self, depth); }; @@ -132,7 +146,7 @@ impl Function { fn lint_fn_args( lints: &mut Vec>, - stmt: &[Node], + stmt: &[Expression], span: Range, funcs: &Function, ) { @@ -145,74 +159,20 @@ fn lint_fn_args( } } -fn generate_function_name(stmt: &[Node]) -> String { +fn generate_function_name(stmt: &[Expression]) -> String { let list: Vec<&str> = stmt .iter() - .map(|x| match &x.kind { - NodeKind::Symbol(sym) => Some(sym), + .map(|x| match x.kind { + ExpressionKind::Symbol(sym) => Some(sym), _ => None, }) .take_while(Option::is_some) - .map(|x| x.unwrap().as_str()) + .map(|x| x.unwrap()) .collect(); list.join(" ") } -// preprocesor directives -enum PreProcLint { - Unmatched(Range), - Extra(Range), -} - -impl Lint for PreProcLint { - fn to_codespan(&self, id: usize) -> Diagnostic { - match self { - Self::Unmatched(s) => Diagnostic::error() - .with_message("unmatched preprocessing directive") - .with_labels(vec![Label::primary(id, s.clone())]), - Self::Extra(s) => Diagnostic::error() - .with_message("extraneous preprocessing directive") - .with_labels(vec![Label::primary(id, s.clone())]), - } - } -} - -fn lint_preprocs(lints: &mut Vec>, tokens: &[Token]) { - let mut directive_stack: Vec<(Range, bool)> = Vec::new(); - for token in tokens { - match token.kind { - TokenKind::IfNDef | TokenKind::IfDef => { - directive_stack.push((token.span.clone(), false)); - } - TokenKind::Else => { - if let Some(entry) = directive_stack.pop() { - if entry.1 { - lints.push(Box::new(PreProcLint::Extra( - token.span.clone(), - ))); - } - directive_stack.push((token.span.clone(), true)); - } else { - lints - .push(Box::new(PreProcLint::Extra(token.span.clone()))); - } - } - TokenKind::EndIf => { - if directive_stack.pop().is_none() { - lints - .push(Box::new(PreProcLint::Extra(token.span.clone()))); - } - } - _ => (), - } - } - - for lint in directive_stack { - lints.push(Box::new(PreProcLint::Unmatched(lint.0))); - } -} - // switch fallthough struct SwitchFallthroughLint(Range, Range); @@ -231,14 +191,14 @@ impl Lint for SwitchFallthroughLint { fn lint_switch_fallthrough( lints: &mut Vec>, - stmt: &[Node], + stmt: &[Expression], span: Range, ) { if stmt.is_empty() { return; } - let NodeKind::Symbol(ref sym) = stmt[0].kind else { + let ExpressionKind::Symbol(sym) = stmt[0].kind else { return; }; @@ -250,7 +210,7 @@ fn lint_switch_fallthrough( return; }; - if last_node.kind.is_array() { + if matches!(last_node.kind, ExpressionKind::Array(_)) { let pos = span.end - 1; lints.push(Box::new(SwitchFallthroughLint(span, pos..pos))) } diff --git a/crates/dtacheck/src/main.rs b/crates/dtacheck/src/main.rs index 185a358..c52beaa 100644 --- a/crates/dtacheck/src/main.rs +++ b/crates/dtacheck/src/main.rs @@ -2,17 +2,17 @@ use std::fs; use std::path::Path; use std::path::PathBuf; +use arson::parse::lexer; +use arson::parse::parser; use clap::Parser as ClapParser; use codespan_reporting::files::SimpleFiles; use codespan_reporting::term; use codespan_reporting::term::termcolor::ColorChoice; use codespan_reporting::term::termcolor::StandardStream; use codespan_reporting::term::Chars; -use dtacheck::lexer; use dtacheck::linter::lint_file; use dtacheck::linter::Function; use dtacheck::linter::Lint; -use dtacheck::parser; #[derive(ClapParser)] struct Args { @@ -54,7 +54,10 @@ fn main() { let file_id = files.add(args.file.to_str().unwrap(), &data); let tokens = lexer::lex(&data); - let (ast, diagnostics) = parser::parse(&tokens); + let (ast, diagnostics) = match parser::parse(tokens) { + Ok(ast) => (ast, Vec::new()), + Err(errors) => (Vec::new(), errors), + }; let writer = StandardStream::stderr(ColorChoice::Auto); let config = codespan_reporting::term::Config { @@ -71,9 +74,7 @@ fn main() { ); } - let Ok(ast) = ast else { return }; - - for diag in lint_file(&ast, &tokens, &funcs) { + for diag in lint_file(&ast, &funcs) { let _ = term::emit( &mut writer.lock(), &config, diff --git a/crates/dtacheck/src/parser.rs b/crates/dtacheck/src/parser.rs deleted file mode 100644 index 985582c..0000000 --- a/crates/dtacheck/src/parser.rs +++ /dev/null @@ -1,405 +0,0 @@ -use std::ops::Range; - -use codespan_reporting::diagnostic::Diagnostic; -use codespan_reporting::diagnostic::Label; -use derive_more::IsVariant; -use derive_more::Unwrap; - -use crate::lexer::Token; -use crate::lexer::TokenKind; -use crate::linter::Lint; - -#[derive(Default)] -struct Parser<'a> { - cursor: usize, - brace_stack: Vec, - tokens: &'a [Token], - diagnostics: Vec, -} - -#[derive(Debug)] -pub enum ParseLint { - UnmatchedBrace(Range, Range), - GenericError(Range), -} - -impl Lint for ParseLint { - fn to_codespan(&self, id: usize) -> Diagnostic { - match self { - Self::UnmatchedBrace(opening, closing) => Diagnostic::error() - .with_message("unmatched delimiter") - .with_labels(vec![ - Label::primary(id, closing.clone()) - .with_message("unexpected token"), - Label::primary(id, opening.clone()) - .with_message("unmatched delimiter"), - ]), - Self::GenericError(span) => Diagnostic::error() - .with_message("unexpected token") - .with_labels(vec![Label::primary(id, span.clone()) - .with_message("unexpected token")]), - } - } -} - -type ParseResult = Result; - -impl<'a> Parser<'a> { - fn new(tokens: &'a [Token]) -> Self { - Self { - tokens, - ..Default::default() - } - } - - fn bump(&mut self, amount: usize) { - self.cursor += amount; - } - - fn lookahead(&self, amount: usize) -> Token { - self.tokens[self.cursor + amount].clone() - } - - fn previous(&self) -> Token { - self.tokens[self.cursor - 1].clone() - } - - fn eat(&mut self, f: fn(&TokenKind) -> bool) -> ParseResult { - let token = self.lookahead(0); - - if f(&token.kind) { - self.bump(1); - Ok(token) - } else { - Err(ParseLint::GenericError(token.span)) - } - } - - fn eat_open_brace( - &mut self, - f: fn(&TokenKind) -> bool, - ) -> ParseResult { - let token = self.lookahead(0); - - if f(&token.kind) { - self.brace_stack.push(token.clone()); - self.bump(1); - Ok(token) - } else { - Err(ParseLint::GenericError(token.span)) - } - } - - fn eat_if(&mut self, f: fn(&TokenKind) -> bool) -> bool { - let token = self.lookahead(0); - - if f(&token.kind) { - self.bump(1); - true - } else { - false - } - } - - fn eat_if_open_brace(&mut self, f: fn(&TokenKind) -> bool) -> bool { - let token = self.lookahead(0); - - if f(&token.kind) { - self.brace_stack.push(token.clone()); - self.bump(1); - true - } else { - false - } - } - - // i seriously can not think of a better way to write this - // - // PRs welcome - #[allow(clippy::if_same_then_else)] - fn parse_node(&mut self) -> ParseResult { - if self.eat_if(TokenKind::is_int) { - Ok(Node::from(self.previous())) - } else if self.eat_if(TokenKind::is_float) { - Ok(Node::from(self.previous())) - } else if self.eat_if(TokenKind::is_var) { - Ok(Node::from(self.previous())) - } else if self.eat_if(TokenKind::is_sym) { - Ok(Node::from(self.previous())) - } else if self.eat_if(TokenKind::is_unhandled) { - Ok(Node::from(self.previous())) - } else if self.eat_if(TokenKind::is_if_def) { - let span = self.previous().span; - let sym = self.eat(TokenKind::is_sym)?; - Ok(Node::new_ifdef(span, sym)) - } else if self.eat_if(TokenKind::is_else) { - Ok(Node::from(self.previous())) - } else if self.eat_if(TokenKind::is_end_if) { - Ok(Node::from(self.previous())) - } else if self.eat_if_open_brace(TokenKind::is_l_paren) { - let lower_span = self.previous().span; - let array = self.parse_list(TokenKind::is_r_paren)?; - let upper_span = self.previous().span; - Ok(Node::new_array(array, lower_span.start..upper_span.end)) - } else if self.eat_if_open_brace(TokenKind::is_l_bracket) { - let lower_span = self.previous().span; - let array = self.parse_list(TokenKind::is_r_bracket)?; - let upper_span = self.previous().span; - Ok(Node::new_prop(array, lower_span.start..upper_span.end)) - } else if self.eat_if(TokenKind::is_string) { - Ok(Node::from(self.previous())) - } else if self.eat_if_open_brace(TokenKind::is_l_brace) { - let lower_span = self.previous().span; - let array = self.parse_list(TokenKind::is_r_brace)?; - let upper_span = self.previous().span; - Ok(Node::new_stmt(array, lower_span.start..upper_span.end)) - } else if self.eat_if(TokenKind::is_define) { - let lower_span = self.previous().span; - let sym = self.eat(TokenKind::is_sym)?; - self.eat_open_brace(TokenKind::is_l_paren)?; - let array = self.parse_list(TokenKind::is_r_paren)?; - let upper_span = self.previous().span; - let lower = lower_span.start; - let upper = upper_span.end; - Ok(Node::new_define(lower..upper, sym, array)) - } else if self.eat_if(TokenKind::is_include) { - let span = self.previous().span; - let sym = self.eat(TokenKind::is_sym)?; - Ok(Node::new_include(span, sym)) - } else if self.eat_if(TokenKind::is_merge) { - let span = self.previous().span; - let sym = self.eat(TokenKind::is_sym)?; - Ok(Node::new_merge(span, sym)) - } else if self.eat_if(TokenKind::is_if_n_def) { - let span = self.previous().span; - let sym = self.eat(TokenKind::is_sym)?; - Ok(Node::new_ifndef(span, sym)) - } else if self.eat_if(TokenKind::is_autorun) { - Ok(Node::from(self.previous())) - } else if self.eat_if(TokenKind::is_un_def) { - let span = self.previous().span; - let sym = self.eat(TokenKind::is_sym)?; - Ok(Node::new_undef(span, sym)) - } else { - Err(ParseLint::GenericError(self.lookahead(0).span)) - } - } - - fn recover_mismatched_braces(&mut self) -> bool { - if !self.brace_stack.is_empty() { - let token = self.lookahead(0); - let unmatched = self.brace_stack.last().unwrap().span.clone(); - let current = token.span.clone(); - let diag = ParseLint::UnmatchedBrace(unmatched, current); - - if token.kind.is_r_bracket() - || token.kind.is_r_paren() - || token.kind.is_r_brace() - { - self.diagnostics.push(diag); - self.bump(1); - self.brace_stack.pop().unwrap(); - return true; - } - - if token.kind.is_eof() { - self.diagnostics.push(diag); - self.brace_stack.pop().unwrap(); - return true; - } - } - - false - } - - fn parse_list( - &mut self, - stop: fn(&TokenKind) -> bool, - ) -> ParseResult> { - let mut nodes = Vec::new(); - loop { - if self.eat_if(stop) { - if self.previous().kind != TokenKind::Eof { - self.brace_stack.pop().unwrap(); - } - break; - } - match self.parse_node() { - Ok(x) => nodes.push(x), - Err(e) => { - if self.recover_mismatched_braces() { - break; - } - return Err(e); - } - } - } - Ok(nodes) - } -} - -pub fn parse(tokens: &[Token]) -> (Result, ()>, Vec) { - let mut parser = Parser::new(tokens); - let parse_result = parser.parse_list(TokenKind::is_eof); - let mut diagnostics = parser.diagnostics; - - let res = match parse_result { - Ok(r) => Ok(r), - Err(e) => { - diagnostics.push(e); - Err(()) - } - }; - - (res, diagnostics) -} - -#[derive(Debug, IsVariant, Unwrap)] -pub enum NodeKind { - Int(i32), - Float(f32), - Var(String), - Symbol(String), - Unhandled, - IfDef(String), - Else, - EndIf, - Array(Vec), - Stmt(Vec), - String(String), - Prop(Vec), - Define(String, Vec), - Include(String), - Merge(String), - IfNDef(String), - Autorun, - Undef(String), -} - -#[derive(Debug)] -pub struct Node { - pub kind: NodeKind, - pub span: Range, -} - -impl Node { - fn new_array(list: Vec, span: Range) -> Node { - Node { - kind: NodeKind::Array(list), - span, - } - } - - fn new_stmt(list: Vec, span: Range) -> Node { - Node { - kind: NodeKind::Stmt(list), - span, - } - } - - fn new_prop(list: Vec, span: Range) -> Node { - Node { - kind: NodeKind::Prop(list), - span, - } - } - - fn new_define(span: Range, sym: Token, array: Vec) -> Node { - Node { - kind: NodeKind::Define(sym.kind.unwrap_sym(), array), - span, - } - } - - fn new_ifdef(span: Range, sym: Token) -> Node { - let span = span.start..sym.span.end; - Node { - kind: NodeKind::IfDef(sym.kind.unwrap_sym()), - span, - } - } - fn new_ifndef(span: Range, sym: Token) -> Node { - let span = span.start..sym.span.end; - Node { - kind: NodeKind::IfNDef(sym.kind.unwrap_sym()), - span, - } - } - fn new_include(span: Range, sym: Token) -> Node { - let span = span.start..sym.span.end; - Node { - kind: NodeKind::Include(sym.kind.unwrap_sym()), - span, - } - } - fn new_merge(span: Range, sym: Token) -> Node { - let span = span.start..sym.span.end; - Node { - kind: NodeKind::Merge(sym.kind.unwrap_sym()), - span, - } - } - fn new_undef(span: Range, sym: Token) -> Node { - let span = span.start..sym.span.end; - Node { - kind: NodeKind::Undef(sym.kind.unwrap_sym()), - span, - } - } - - pub fn is_preproc(&self) -> bool { - self.kind.is_if_def() - || self.kind.is_else() - || self.kind.is_end_if() - || self.kind.is_define() - || self.kind.is_include() - || self.kind.is_merge() - || self.kind.is_if_n_def() - || self.kind.is_autorun() - || self.kind.is_undef() - } -} - -impl From for Node { - fn from(value: Token) -> Self { - match value.kind { - TokenKind::Sym(s) => Node { - kind: NodeKind::Symbol(s), - span: value.span, - }, - TokenKind::Int(s) => Node { - kind: NodeKind::Int(s), - span: value.span, - }, - TokenKind::Float(s) => Node { - kind: NodeKind::Float(s), - span: value.span, - }, - TokenKind::String(s) => Node { - kind: NodeKind::String(s), - span: value.span, - }, - TokenKind::Var(s) => Node { - kind: NodeKind::Var(s), - span: value.span, - }, - TokenKind::Else => Node { - kind: NodeKind::Else, - span: value.span, - }, - TokenKind::EndIf => Node { - kind: NodeKind::EndIf, - span: value.span, - }, - TokenKind::Unhandled => Node { - kind: NodeKind::Unhandled, - span: value.span, - }, - TokenKind::Autorun => Node { - kind: NodeKind::Autorun, - span: value.span, - }, - _ => unreachable!(), - } - } -}