From a4cb7aee3fece87405b7e565360e6e121ffeac16 Mon Sep 17 00:00:00 2001 From: m Date: Thu, 25 Jul 2024 18:54:26 -0700 Subject: [PATCH] remove lifetime from ParseError (#12) --- src/frontend/parser.rs | 159 ++++++++++++++++++++++------------------ src/frontend/scanner.rs | 144 ++++++++++++++++++++---------------- src/main.rs | 7 +- 3 files changed, 173 insertions(+), 137 deletions(-) diff --git a/src/frontend/parser.rs b/src/frontend/parser.rs index c833fd7..f7c8ec8 100644 --- a/src/frontend/parser.rs +++ b/src/frontend/parser.rs @@ -7,7 +7,7 @@ use super::{ ArgList, Ast, AstRef, Element, FilterList, Inline, Leaf, Qualifier, RValue, Selector, SelectorCombinator, SelectorList, Statement, StatementList, }, - scanner::{Lexeme, Scanner, Token}, + scanner::{Lexeme, Scanner, Span, Token}, }; #[derive(Debug)] @@ -18,26 +18,50 @@ pub struct Parser<'a> { #[derive(Debug, Clone)] #[non_exhaustive] -pub enum ParseError<'a> { +pub enum ParseError { UnexpectedToken { expected: Vec, - got: Lexeme<'a>, + got: Token, + value: String, + span: Span, }, } -impl<'a> fmt::Display for ParseError<'a> { +impl fmt::Display for ParseError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Self::UnexpectedToken { expected, got } => { - write!(f, "Expected one of {expected:?}, got {got:?}") + Self::UnexpectedToken { + expected, + got, + span, + value, + } => { + write!( + f, + "Expected one of {expected:?}, got {got:?} '{value}' on line {}", + span.line + ) } } } } -impl std::error::Error for ParseError<'_> {} +impl std::error::Error for ParseError {} + +impl ParseError { + /// Helper function to construct the `ParseError::UnexpectedToken` variant + /// from a [`Lexeme`] and a [`Span`] and expected values. + pub fn unexpected(expected: Vec, lx: Lexeme<'_>, span: Span) -> Self { + Self::UnexpectedToken { + expected, + got: lx.token, + value: lx.value.to_owned(), + span, + } + } +} -type Result<'a, T> = std::result::Result>; +type Result = std::result::Result; impl<'a> Parser<'a> { #[must_use] @@ -48,7 +72,7 @@ impl<'a> Parser<'a> { } } - pub fn parse(mut self) -> Result<'a, (Arena>, Option>>)> { + pub fn parse(mut self) -> Result<(Arena>, Option>>)> { let r = match self.parse_statement_list() { Ok(r) => r, Err(e) => { @@ -59,8 +83,8 @@ impl<'a> Parser<'a> { Ok((self.arena, r)) } - pub fn parse_statement_list(&mut self) -> Result<'a, Option>>> { - let lx = self.scanner.peek_non_whitespace(); + pub fn parse_statement_list(&mut self) -> Result>>> { + let (_, lx) = self.scanner.peek_non_whitespace(); if lx.token == Token::Id { let statement = self.parse_statement()?; @@ -75,7 +99,7 @@ impl<'a> Parser<'a> { } } - fn parse_statement(&mut self) -> Result<'a, Statement<'a>> { + fn parse_statement(&mut self) -> Result> { let id = self.try_eat(Token::Id)?.value; self.try_eat(Token::Colon)?; let value = self.parse_rvalue()?; @@ -84,8 +108,8 @@ impl<'a> Parser<'a> { Ok(Statement { id, value, filters }) } - fn parse_rvalue(&mut self) -> Result<'a, RValue<'a>> { - let lx = self.scanner.peek_non_whitespace(); + fn parse_rvalue(&mut self) -> Result> { + let (_, lx) = self.scanner.peek_non_whitespace(); match lx.token { Token::Id | Token::Less | Token::Dot | Token::Hash => { @@ -95,9 +119,9 @@ impl<'a> Parser<'a> { } } - fn parse_leaf(&mut self) -> Result<'a, Leaf<'a>> { + fn parse_leaf(&mut self) -> Result> { self.scanner.peek_non_whitespace(); - let lx = self.scanner.eat_token(); + let (span, lx) = self.scanner.eat_token(); match lx.token { Token::String => Ok(Leaf::String(parse_string_literal(lx.value))), Token::Float => Ok(Leaf::Float( @@ -108,29 +132,27 @@ impl<'a> Parser<'a> { let id = self.try_eat(Token::Id)?.value; Ok(Leaf::Var(id)) } - _ => Err(ParseError::UnexpectedToken { - expected: vec![Token::String, Token::Float, Token::Int, Token::Dollar], - got: lx, - }), + _ => Err(ParseError::unexpected( + vec![Token::String, Token::Float, Token::Int, Token::Dollar], + lx, + span, + )), } } #[inline] - fn try_eat(&mut self, tk: Token) -> Result<'a, Lexeme<'a>> { - let lx = self.scanner.peek_non_whitespace(); + fn try_eat(&mut self, tk: Token) -> Result> { + let (span, lx) = self.scanner.peek_non_whitespace(); self.scanner.eat_token(); if lx.token == tk { Ok(lx) } else { - Err(ParseError::UnexpectedToken { - expected: vec![tk], - got: lx, - }) + Err(ParseError::unexpected(vec![tk], lx, span)) } } - fn parse_element(&mut self) -> Result<'a, Element<'a>> { + fn parse_element(&mut self) -> Result> { let url = self.parse_maybe_url()?; let selector_head = self.parse_selector()?; let selectors = self.parse_selector_list()?; @@ -152,8 +174,8 @@ impl<'a> Parser<'a> { }) } - fn parse_maybe_url(&mut self) -> Result<'a, Option>> { - let lx = self.scanner.peek_non_whitespace(); + fn parse_maybe_url(&mut self) -> Result>> { + let (_, lx) = self.scanner.peek_non_whitespace(); if lx.token == Token::Less { self.parse_inline().map(Some) } else { @@ -161,35 +183,29 @@ impl<'a> Parser<'a> { } } - fn parse_inline(&mut self) -> Result<'a, Inline<'a>> { - let lx = self.scanner.peek_non_whitespace(); - if lx.token == Token::Less { - self.scanner.eat_token(); - let value = self.parse_leaf()?; - let filters = self.parse_filter_list()?; - self.try_eat(Token::Greater)?; - Ok(Inline { value, filters }) - } else { - Err(ParseError::UnexpectedToken { - expected: vec![Token::Less], - got: lx, - }) - } + fn parse_inline(&mut self) -> Result> { + self.try_eat(Token::Less)?; + let value = self.parse_leaf()?; + let filters = self.parse_filter_list()?; + self.try_eat(Token::Greater)?; + Ok(Inline { value, filters }) } - fn parse_selector_list(&mut self) -> Result<'a, Option>>> { - let mut lx = self.scanner.peek_token(); - if lx.token == Token::Whitespace { + fn parse_selector_list(&mut self) -> Result>>> { + let mut item = self.scanner.peek_token(); + if item.1.token == Token::Whitespace { self.scanner.eat_token(); - let next_lx = self.scanner.peek_non_whitespace(); + let next = self.scanner.peek_non_whitespace(); // if the next lexeme after the whitespace doesn't signify a selector, // the whitespace is not significant. - match next_lx.token { + match next.1.token { Token::Id | Token::Hash | Token::Dot | Token::Star => (), - _ => lx = next_lx, + _ => item = next, }; } + let (span, lx) = item; + let sel = match lx.token { Token::BraceOpen | Token::ParenOpen => return Ok(None), // invariant: peek_next_whitespace is one of Id | Hash | Dot | Star @@ -211,8 +227,8 @@ impl<'a> Parser<'a> { SelectorCombinator::And(self.parse_selector()?) } _ => { - return Err(ParseError::UnexpectedToken { - expected: vec![ + return Err(ParseError::unexpected( + vec![ Token::Whitespace, Token::Greater, Token::Plus, @@ -222,8 +238,9 @@ impl<'a> Parser<'a> { Token::Id, Token::Star, ], - got: lx, - }) + lx, + span, + )) } }; @@ -232,8 +249,8 @@ impl<'a> Parser<'a> { Ok(Some(self.arena.insert_variant(itm))) } - fn parse_selector(&mut self) -> Result<'a, Selector<'a>> { - let lx = self.scanner.peek_non_whitespace(); + fn parse_selector(&mut self) -> Result> { + let (span, lx) = self.scanner.peek_non_whitespace(); match lx.token { Token::Dot => { self.scanner.eat_token(); @@ -251,15 +268,16 @@ impl<'a> Parser<'a> { self.scanner.eat_token(); Ok(Selector::Any) } - _ => Err(ParseError::UnexpectedToken { - expected: vec![Token::Dot, Token::Hash, Token::Id, Token::Star], - got: lx, - }), + _ => Err(ParseError::unexpected( + vec![Token::Dot, Token::Hash, Token::Id, Token::Star], + lx, + span, + )), } } - fn parse_filter_list(&mut self) -> Result<'a, Option>>> { - let lx = self.scanner.peek_non_whitespace(); + fn parse_filter_list(&mut self) -> Result>>> { + let (_, lx) = self.scanner.peek_non_whitespace(); if lx.token == Token::Pipe { self.scanner.eat_token(); let id = self.try_eat(Token::Id)?.value; @@ -277,8 +295,8 @@ impl<'a> Parser<'a> { } } - fn parse_arg_list(&mut self) -> Result<'a, Option>>> { - let lx = self.scanner.peek_non_whitespace(); + fn parse_arg_list(&mut self) -> Result>>> { + let (span, lx) = self.scanner.peek_non_whitespace(); match lx.token { Token::ParenClose => Ok(None), Token::Id => { @@ -286,7 +304,7 @@ impl<'a> Parser<'a> { self.scanner.eat_token(); self.try_eat(Token::Colon)?; let value = self.parse_leaf()?; - let next = match self.scanner.peek_non_whitespace().token { + let next = match self.scanner.peek_non_whitespace().1.token { Token::Comma => { self.scanner.eat_token(); self.parse_arg_list()? @@ -297,15 +315,16 @@ impl<'a> Parser<'a> { let r = self.arena.insert_variant(ArgList::new(id, value, next)); Ok(Some(r)) } - _ => Err(ParseError::UnexpectedToken { - expected: vec![Token::ParenClose, Token::Id], - got: lx, - }), + _ => Err(ParseError::unexpected( + vec![Token::ParenClose, Token::Id], + lx, + span, + )), } } - fn parse_qualifier(&mut self) -> Result<'a, Qualifier> { - let lx = self.scanner.peek_non_whitespace(); + fn parse_qualifier(&mut self) -> Result { + let (_, lx) = self.scanner.peek_non_whitespace(); Ok(match lx.token { Token::Question => { self.scanner.eat_token(); diff --git a/src/frontend/scanner.rs b/src/frontend/scanner.rs index 4054e66..9a49b28 100644 --- a/src/frontend/scanner.rs +++ b/src/frontend/scanner.rs @@ -53,23 +53,7 @@ pub enum Token { mod statics { use super::Token; use regex::{Regex, RegexSet}; - use std::sync::OnceLock; - - pub struct Lazy T>(OnceLock, F); - - impl T> Lazy { - pub const fn new(f: F) -> Self { - Self(OnceLock::new(), f) - } - } - - impl T> std::ops::Deref for Lazy { - type Target = T; - - fn deref(&self) -> &Self::Target { - self.0.get_or_init(&self.1) - } - } + use std::sync::LazyLock; macro_rules! make_regex_set { {$vis: vis ($tokens: ident, $re_set: ident, $re_compiled: ident) = {$($tk: ident <- $pat: literal)*};} => { @@ -77,13 +61,13 @@ mod statics { $(Token::$tk, )* ]; - $vis static $re_set: Lazy = Lazy::new(|| RegexSet::new(&[ + $vis static $re_set: LazyLock = LazyLock::new(|| RegexSet::new(&[ $( concat!("^", $pat), )* ]).expect("error building RegexSet")); - $vis static $re_compiled: Lazy> = Lazy::new(|| vec![ + $vis static $re_compiled: LazyLock> = LazyLock::new(|| vec![ $( Regex::new(concat!("^", $pat)).expect(concat!("Error building Regex `", $pat, "`")), )* @@ -124,6 +108,15 @@ mod statics { pub struct Scanner<'a> { slice: &'a str, idx: usize, + line: usize, +} + +#[derive(Debug, Clone, Copy, Default)] +#[non_exhaustive] +pub struct Span { + pub line: usize, + pub start: usize, + pub end: usize, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -140,13 +133,17 @@ const EOF: Lexeme = Lexeme { impl<'a> Scanner<'a> { #[must_use] pub const fn new(slice: &'a str) -> Self { - Self { slice, idx: 0 } + Self { + slice, + idx: 0, + line: 1, + } } #[must_use] - pub fn peek_token(&self) -> Lexeme<'a> { + pub fn peek_token(&self) -> (Span, Lexeme<'a>) { if self.idx >= self.slice.len() { - return EOF; + return (Span::default(), EOF); } // note to self: we can't use find_at because it still considers the @@ -162,23 +159,44 @@ impl<'a> Scanner<'a> { .as_str(), }) .max_by_key(|x| x.value.len()) - .unwrap_or(Lexeme { - token: Token::Unknown, - value: &self.slice[self.idx..=self.idx], + .map(|lx| { + ( + Span { + line: self.line, + start: self.idx, + end: self.idx + lx.value.len(), + }, + lx, + ) }) + .unwrap_or(( + Span { + line: self.line, + start: self.idx, + end: self.idx + 1, + }, + Lexeme { + token: Token::Unknown, + value: &self.slice[self.idx..=self.idx], + }, + )) } - pub fn eat_token(&mut self) -> Lexeme<'a> { - let lexeme = self.peek_token(); + pub fn eat_token(&mut self) -> (Span, Lexeme<'a>) { + let (span, lexeme) = self.peek_token(); self.idx += lexeme.value.len(); - lexeme + self.line += lexeme.value.chars().filter(|&x| x == '\n').count(); + (span, lexeme) } - pub fn peek_non_whitespace(&mut self) -> Lexeme<'a> { - while let Lexeme { - token: Token::Whitespace, - .. - } = self.peek_token() + pub fn peek_non_whitespace(&mut self) -> (Span, Lexeme<'a>) { + while let ( + _, + Lexeme { + token: Token::Whitespace, + .. + }, + ) = self.peek_token() { self.eat_token(); } @@ -193,14 +211,14 @@ mod tests { #[test] fn test_tokens() { let scanner = Scanner::new(""); - assert_eq!(scanner.peek_token(), EOF); + assert_eq!(scanner.peek_token().1, EOF); macro_rules! test_matches { {$($tk: ident => $($pat: literal)+ $(!($($npat: literal)+))?)* } => { $( $( assert_eq!( - Scanner::new($pat).peek_token(), + Scanner::new($pat).peek_token().1, Lexeme { token: Token::$tk, value: $pat } ); )+ @@ -208,7 +226,7 @@ mod tests { $( $( assert_ne!( - Scanner::new($npat).peek_token(), + Scanner::new($npat).peek_token().1, Lexeme { token: Token::$tk, value: $npat } ); )* @@ -243,51 +261,51 @@ mod tests { #[test] fn test_eat() { let mut sc = Scanner::new("h3 h4#h5.h6 {}"); - assert_eq!(sc.eat_token(), lx!(Id, "h3")); - assert_eq!(sc.eat_token(), lx!(Whitespace, " ")); - assert_eq!(sc.eat_token(), lx!(Id, "h4")); - assert_eq!(sc.eat_token(), lx!(Hash, "#")); - assert_eq!(sc.eat_token(), lx!(Id, "h5")); - assert_eq!(sc.eat_token(), lx!(Dot, ".")); - assert_eq!(sc.eat_token(), lx!(Id, "h6")); - assert_eq!(sc.eat_token(), lx!(Whitespace, " ")); - assert_eq!(sc.eat_token(), lx!(BraceOpen, "{")); - assert_eq!(sc.eat_token(), lx!(BraceClose, "}")); + assert_eq!(sc.eat_token().1, lx!(Id, "h3")); + assert_eq!(sc.eat_token().1, lx!(Whitespace, " ")); + assert_eq!(sc.eat_token().1, lx!(Id, "h4")); + assert_eq!(sc.eat_token().1, lx!(Hash, "#")); + assert_eq!(sc.eat_token().1, lx!(Id, "h5")); + assert_eq!(sc.eat_token().1, lx!(Dot, ".")); + assert_eq!(sc.eat_token().1, lx!(Id, "h6")); + assert_eq!(sc.eat_token().1, lx!(Whitespace, " ")); + assert_eq!(sc.eat_token().1, lx!(BraceOpen, "{")); + assert_eq!(sc.eat_token().1, lx!(BraceClose, "}")); } #[test] fn test_peek_whitespace() { let mut sc = Scanner::new("h3 h4#h5.h6 {}"); sc.peek_non_whitespace(); - assert_eq!(sc.eat_token(), lx!(Id, "h3")); + assert_eq!(sc.eat_token().1, lx!(Id, "h3")); sc.peek_non_whitespace(); - assert_eq!(sc.eat_token(), lx!(Id, "h4")); + assert_eq!(sc.eat_token().1, lx!(Id, "h4")); sc.peek_non_whitespace(); - assert_eq!(sc.eat_token(), lx!(Hash, "#")); + assert_eq!(sc.eat_token().1, lx!(Hash, "#")); sc.peek_non_whitespace(); - assert_eq!(sc.eat_token(), lx!(Id, "h5")); + assert_eq!(sc.eat_token().1, lx!(Id, "h5")); sc.peek_non_whitespace(); - assert_eq!(sc.eat_token(), lx!(Dot, ".")); + assert_eq!(sc.eat_token().1, lx!(Dot, ".")); sc.peek_non_whitespace(); - assert_eq!(sc.eat_token(), lx!(Id, "h6")); + assert_eq!(sc.eat_token().1, lx!(Id, "h6")); sc.peek_non_whitespace(); - assert_eq!(sc.eat_token(), lx!(BraceOpen, "{")); + assert_eq!(sc.eat_token().1, lx!(BraceOpen, "{")); sc.peek_non_whitespace(); - assert_eq!(sc.eat_token(), lx!(BraceClose, "}")); + assert_eq!(sc.eat_token().1, lx!(BraceClose, "}")); } #[test] fn test_whitespace_mix() { let mut sc = Scanner::new("h3 h4#h5.h6 {}"); - assert_eq!(sc.eat_token(), lx!(Id, "h3")); - assert_eq!(sc.eat_token(), lx!(Whitespace, " ")); - assert_eq!(sc.eat_token(), lx!(Id, "h4")); - assert_eq!(sc.eat_token(), lx!(Hash, "#")); - assert_eq!(sc.eat_token(), lx!(Id, "h5")); - assert_eq!(sc.eat_token(), lx!(Dot, ".")); - assert_eq!(sc.eat_token(), lx!(Id, "h6")); + assert_eq!(sc.eat_token().1, lx!(Id, "h3")); + assert_eq!(sc.eat_token().1, lx!(Whitespace, " ")); + assert_eq!(sc.eat_token().1, lx!(Id, "h4")); + assert_eq!(sc.eat_token().1, lx!(Hash, "#")); + assert_eq!(sc.eat_token().1, lx!(Id, "h5")); + assert_eq!(sc.eat_token().1, lx!(Dot, ".")); + assert_eq!(sc.eat_token().1, lx!(Id, "h6")); sc.peek_non_whitespace(); - assert_eq!(sc.eat_token(), lx!(BraceOpen, "{")); - assert_eq!(sc.eat_token(), lx!(BraceClose, "}")); + assert_eq!(sc.eat_token().1, lx!(BraceOpen, "{")); + assert_eq!(sc.eat_token().1, lx!(BraceClose, "}")); } } diff --git a/src/main.rs b/src/main.rs index f850491..94b0423 100644 --- a/src/main.rs +++ b/src/main.rs @@ -27,10 +27,9 @@ async fn main() -> anyhow::Result<()> { let parser = Parser::new(&pgm); - let (ast, head) = match parser.parse() { - Ok(x) => x, - Err(e) => anyhow::bail!("Parse Error: {e}"), - }; + let (ast, head) = parser + .parse() + .with_context(|| format!("parse error in {filename}:"))?; let interpreter = Interpreter::new(&ast);