From df44df2b49dbf3eec683389b4e86166bae413a87 Mon Sep 17 00:00:00 2001 From: m Date: Sat, 17 Aug 2024 10:16:28 -0700 Subject: [PATCH] flatten ast, remove arena - also fix selector parsing --- src/frontend/arena.rs | 98 ------------ src/frontend/ast.rs | 338 +++++++++++++++++------------------------ src/frontend/mod.rs | 2 - src/frontend/parser.rs | 283 ++++++++++++++++------------------ src/interpreter/mod.rs | 84 ++++------ src/main.rs | 3 +- 6 files changed, 305 insertions(+), 503 deletions(-) delete mode 100644 src/frontend/arena.rs diff --git a/src/frontend/arena.rs b/src/frontend/arena.rs deleted file mode 100644 index 2979598..0000000 --- a/src/frontend/arena.rs +++ /dev/null @@ -1,98 +0,0 @@ -use std::{fmt::Debug, marker::PhantomData, num::NonZeroUsize}; - -#[derive(Debug, Clone)] -pub struct ArenaInner { - inner: Vec, - /// we use 1-indexed refs for null-pointer optimization - next_ref: NonZeroUsize, -} - -#[derive(Debug)] -pub struct Arena(ArenaInner); - -#[derive(Debug)] -pub struct Ref<'a, T>(NonZeroUsize, PhantomData<&'a T>); - -/// explicit impl because we want it even when `T` is not Clone -impl<'a, T> Clone for Ref<'a, T> { - fn clone(&self) -> Self { - *self - } -} - -impl<'a, T> Copy for Ref<'a, T> {} - -impl Default for Arena { - fn default() -> Self { - Self::new() - } -} - -/// Hack to get a constant NonZeroUsize(1). Uses out-of-bounds -/// array access to simulate panicking in a const declaration. -const ONE: NonZeroUsize = match NonZeroUsize::new(1) { - Some(x) => x, - #[allow(clippy::out_of_bounds_indexing)] - None => [][0], -}; - -impl Arena { - #[inline] - #[must_use] - pub const fn new() -> Self { - Self(ArenaInner { - inner: Vec::new(), - next_ref: ONE, - }) - } - - #[inline] - #[must_use] - pub fn get<'a, 's>(&'s self, r: Ref<'a, T>) -> &'s T - where - 'a: 's, - { - &self.0.inner[r.0.get() - 1] - } - - pub fn insert<'a, 's>(&'s mut self, item: T) -> Ref<'a, T> - where - 'a: 's, - { - let r = Ref(self.0.next_ref, PhantomData); - self.0.next_ref = self - .0 - .next_ref - .checked_add(1) - .expect("maximum elements in arena."); - self.0.inner.push(item); - r - } -} - -#[cfg(test)] -mod tests { - use super::Arena; - - #[test] - fn test_compiles() { - let mut arena = Arena::::new(); - let r1 = arena.insert(1); - let r2 = arena.insert(2); - assert_eq!(*arena.get(r1), 1); - assert_eq!(*arena.get(r2), 2); - } - - #[test] - fn test_no_copy() { - #[derive(Debug, PartialEq, Eq)] - struct NoCopy(u8); - - let mut arena = Arena::new(); - let r1 = arena.insert(NoCopy(1)); - let r2 = arena.insert(NoCopy(2)); - - assert_eq!(&NoCopy(1), arena.get(r1)); - assert_eq!(&NoCopy(2), arena.get(r2)); - } -} diff --git a/src/frontend/ast.rs b/src/frontend/ast.rs index d9c05b6..ddefda9 100644 --- a/src/frontend/ast.rs +++ b/src/frontend/ast.rs @@ -1,102 +1,88 @@ -use core::{fmt, marker::PhantomData}; use std::borrow::Cow; -use super::arena::{Arena, Ref}; +/// Represents a named selector fragment, like `h2` or `#id`. +#[derive(Debug, Clone)] +pub enum NamedSelector<'a> { + /// Select by HTML tag name, such as `a` or `h2`. + Tag(&'a str), + /// Select by HTML class, such as `.kitty` or `.item`. + Class(&'a str), + /// Select by HTML id, such as `#unique` or `#main`. + Id(&'a str), +} -pub type AstArena<'a> = Arena>; +/// A selector fragment, combinable with [`SelectorCombinator`]. +#[derive(Debug, Clone)] +pub enum SelectorFragment<'a> { + /// An AND of named selector fragments. The string + /// `a#b.c` would be `[Tag("a"), Id("b"), Class("c")]`. + Named(NonEmpty>), + /// The special wildcard selector `*`. + Any, +} -#[derive(Debug)] -pub struct AstRef<'a, T>(Ref<'a, Ast<'a>>, PhantomData<&'a T>); +/// A [`Vec`] that is guaranteed to have at least one element. +#[derive(Debug, Clone)] +pub struct NonEmpty(Vec); -pub trait AstType<'a>: Sized { +impl NonEmpty { + #[inline] #[must_use] - fn unwrap_ref<'b>(node: &'b Ast<'a>) -> &'b Self; + pub fn from_one(item: T) -> Self { + Self(vec![item]) + } #[must_use] - fn wrap(self) -> Ast<'a>; -} - -pub trait AstArenaFlatten<'a>: AstType<'a> { - fn flatten<'s: 'o, 'o>(&'s self, arena: &'s AstArena<'a>, out: &mut Vec<&'o Self>); -} - -/// explicit impl because we want it even when `T` is not Clone -impl<'a, T> Clone for AstRef<'a, T> { - fn clone(&self) -> Self { - *self + pub fn from_vec(vec: Vec) -> Option { + if vec.is_empty() { + None + } else { + Some(Self(vec)) + } } -} -impl<'a, T> Copy for AstRef<'a, T> {} - -/// specialization for typed references -impl<'a> AstArena<'a> { #[inline] #[must_use] - pub fn get_variant<'s, T>(&'s self, r: AstRef<'a, T>) -> &'s T - where - 'a: 's, - T: AstType<'a>, - { - T::unwrap_ref(self.get(r.0)) + pub fn first(&self) -> &T { + assert!(self.0.len() > 0); + &self[0] } - pub fn insert_variant<'s, T: AstType<'a>>(&'s mut self, variant: T) -> AstRef<'a, T> - where - 'a: 's, - { - let r = self.insert(variant.wrap()); - AstRef(r, PhantomData) + #[inline] + #[must_use] + pub fn last(&self) -> &T { + assert!(self.0.len() > 0); + &self[self.0.len() - 1] } - #[must_use] - pub fn flatten>(&self, r: Option>) -> Vec<&T> { - let mut out = Vec::new(); - if let Some(r) = r { - self.get_variant(r).flatten(self, &mut out); - } - out + #[inline] + pub fn push(&mut self, item: T) { + self.0.push(item); } } -#[derive(Debug, Clone)] -pub enum Selector<'a> { - Any, - Tag(&'a str), - Class(&'a str), - Id(&'a str), -} +impl std::ops::Deref for NonEmpty { + type Target = [T]; -impl fmt::Display for Selector<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Any => write!(f, "*"), - Self::Tag(id) => write!(f, "{id}"), - Self::Class(id) => write!(f, ".{id}"), - Self::Id(id) => write!(f, "#{id}"), - } + #[inline] + fn deref(&self) -> &Self::Target { + &self.0 } } -#[derive(Debug, Clone)] -pub enum SelectorCombinator<'a> { - NextSibling(Selector<'a>), - Child(Selector<'a>), - SubsequentSibling(Selector<'a>), - Descendent(Selector<'a>), - And(Selector<'a>), +#[derive(Debug, Clone, Copy)] +pub enum SelectorCombinator { + NextSibling, + Child, + SubsequentSibling, + Descendent, } -impl fmt::Display for SelectorCombinator<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::NextSibling(s) => write!(f, " + {s}"), - Self::Child(s) => write!(f, " > {s}"), - Self::SubsequentSibling(s) => write!(f, " ~ {s}"), - Self::Descendent(s) => write!(f, " {s}"), - Self::And(s) => write!(f, "{s}"), - } - } +/// A full CSS selector, with a nonempty head and a list of selectors. +#[derive(Debug, Clone)] +pub struct Selector<'a> { + pub head: SelectorFragment<'a>, + pub combinators: Vec<(SelectorCombinator, SelectorFragment<'a>)>, } #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] @@ -107,87 +93,6 @@ pub enum Qualifier { Collection, } -macro_rules! ast_enum { - { - // macro limitations can only have nonrepeating metavariable at this scope - // see https://github.com/rust-lang/rust/issues/96184 - #![$allmeta: meta] - $(#[$astmeta: meta])* - pub enum Ast<'a> { - $( - $(#[$indivmeta: meta])* - $(@flatten[$(.$preorder: ident, )* self $(, .$postorder: ident)*])? - $variant: ident { - $( - $(#[$membermeta: meta])* - $member: ident: $ty: ty, - )* - }, - )* - } - } => { - $( - #[$allmeta] - #[non_exhaustive] - $(#[$indivmeta])* - pub struct $variant<'a> { - $( - $(#[$membermeta])* - pub $member: $ty, - )* - lt: PhantomData<&'a ()>, - } - - impl<'a> $variant<'a> { - #[must_use] - #[inline] - pub const fn new($($member: $ty, )*) -> Self { - Self { lt: PhantomData, $($member, )* } - } - } - - impl<'a> AstType<'a> for $variant<'a> { - #[inline] - #[must_use] - fn unwrap_ref<'b>(node: &'b Ast<'a>) -> &'b Self { - let Ast::$variant(x) = node else { unreachable!(concat!("expected a `Ast::", stringify!($variant), "` variant."))}; - x - } - - #[inline] - #[must_use] - fn wrap(self) -> Ast<'a> { - Ast::$variant(self) - } - } - - $( - impl<'a> AstArenaFlatten<'a> for $variant<'a> { - fn flatten<'s: 'o, 'o>(&'s self, arena: &'s AstArena<'a>, out: &mut Vec<&'o Self>) { - $( - if let Some(pre) = self.$preorder { - arena.get_variant(pre).flatten(arena, out); - } - )* - out.push(self); - $( - if let Some(post) = self.$postorder { - arena.get_variant(post).flatten(arena, out); - } - )* - } - } - )? - )* - - #[$allmeta] - $(#[$astmeta])* - pub enum Ast<'a> { - $($variant($variant<'a>),)* - } - }; - } - #[derive(Debug, Clone)] pub enum Leaf<'a> { Var(&'a str), @@ -205,14 +110,14 @@ pub enum RValue<'a> { #[derive(Debug, Clone)] pub struct Inline<'a> { pub value: Leaf<'a>, - pub filters: Option>>, + pub filters: Vec>, } impl<'a> From> for Inline<'a> { fn from(value: Leaf<'a>) -> Self { Self { value, - filters: None, + filters: vec![], } } } @@ -221,56 +126,99 @@ impl<'a> From> for Inline<'a> { pub struct Statement<'a> { pub id: &'a str, pub value: RValue<'a>, - pub filters: Option>>, + pub filters: Vec>, } #[derive(Debug, Clone)] pub struct Element<'a> { pub url: Option>, - pub selector_head: Selector<'a>, - pub selectors: Option>>, + pub selector: Selector<'a>, pub qualifier: Qualifier, - pub statements: Option>>, + pub statements: Vec>, } #[derive(Debug, Clone)] -pub enum Filter<'a> { +pub enum FilterType<'a> { Call(FilterCall<'a>), Select(FilterSelect<'a>), } -ast_enum! { -#![derive(Debug, Clone)] -pub enum Ast<'a> { - @flatten[self, .next] - SelectorList { - sel: SelectorCombinator<'a>, - next: Option>>, - }, - @flatten[self, .next] - ArgList { - id: &'a str, - value: Inline<'a>, - next: Option>>, - }, - FilterCall { - id: &'a str, - args: Option>>, - }, - FilterSelect { - name: &'a str, - value: Inline<'a>, - }, - @flatten[self, .next] - FilterList { - filter: Filter<'a>, - qualifier: Qualifier, - next: Option>>, - }, - @flatten[self, .next] - StatementList { - value: Statement<'a>, - next: Option>>, - }, +#[derive(Debug, Clone)] +pub struct Filter<'a> { + pub filter: FilterType<'a>, + pub qualifier: Qualifier, +} + +#[derive(Debug, Clone)] +pub struct FilterCall<'a> { + pub id: &'a str, + pub args: Vec>, +} + +#[derive(Debug, Clone)] +pub struct FilterSelect<'a> { + pub name: &'a str, + pub value: Inline<'a>, } + +#[derive(Debug, Clone)] +pub struct Arg<'a> { + pub id: &'a str, + pub value: Inline<'a>, +} + +mod selector_display { + use core::fmt; + + use super::{NamedSelector, Selector, SelectorCombinator, SelectorFragment}; + + impl fmt::Display for NamedSelector<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + NamedSelector::Tag(s) => write!(f, "{s}"), + NamedSelector::Class(s) => write!(f, ".{s}"), + NamedSelector::Id(s) => write!(f, "#{s}"), + } + } + } + + impl fmt::Display for SelectorFragment<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SelectorFragment::Any => f.write_str("*")?, + SelectorFragment::Named(names) => { + // Technically unsound for a {tag}{tag} combo, but this is also + // not parsable currently. + for name in names.iter() { + write!(f, "{name}")?; + } + } + } + + Ok(()) + } + } + + impl fmt::Display for SelectorCombinator { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SelectorCombinator::Child => f.write_str(" > "), + SelectorCombinator::Descendent => f.write_str(" "), + SelectorCombinator::NextSibling => f.write_str(" + "), + SelectorCombinator::SubsequentSibling => f.write_str(" ~ "), + } + } + } + + impl fmt::Display for Selector<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.head)?; + + for (comb, frag) in self.combinators.iter() { + write!(f, "{comb}{frag}")?; + } + + Ok(()) + } + } } diff --git a/src/frontend/mod.rs b/src/frontend/mod.rs index 0baa54d..f145d76 100644 --- a/src/frontend/mod.rs +++ b/src/frontend/mod.rs @@ -1,8 +1,6 @@ -mod arena; pub mod ast; mod parser; mod scanner; -pub use ast::AstArena; pub use parser::{ParseError, Parser}; pub use scanner::{Lexeme, Span, Token}; diff --git a/src/frontend/parser.rs b/src/frontend/parser.rs index fd55697..f680df8 100644 --- a/src/frontend/parser.rs +++ b/src/frontend/parser.rs @@ -2,10 +2,9 @@ use core::fmt; use std::borrow::Cow; use super::{ - arena::Arena, ast::{ - ArgList, Ast, AstRef, Element, Filter, FilterCall, FilterList, FilterSelect, Inline, Leaf, - Qualifier, RValue, Selector, SelectorCombinator, SelectorList, Statement, StatementList, + Arg, Element, Filter, FilterCall, FilterSelect, FilterType, Inline, Leaf, NamedSelector, + NonEmpty, Qualifier, RValue, Selector, SelectorCombinator, SelectorFragment, Statement, }, scanner::{Lexeme, Scanner, Span, Token}, }; @@ -13,7 +12,6 @@ use super::{ #[derive(Debug)] pub struct Parser<'a> { scanner: Scanner<'a>, - arena: Arena>, } #[derive(Debug, Clone)] @@ -68,35 +66,27 @@ impl<'a> Parser<'a> { pub const fn new(input: &'a str) -> Self { Self { scanner: Scanner::new(input), - arena: Arena::new(), } } - pub fn parse(mut self) -> Result<(Arena>, Option>>)> { - let r = match self.parse_statement_list() { - Ok(r) => r, - Err(e) => { - return Err(e); - } - }; + pub fn parse(mut self) -> Result>> { + let statements = self.parse_statement_list()?; self.try_eat(Token::Eof)?; - Ok((self.arena, r)) + Ok(statements) } - pub fn parse_statement_list(&mut self) -> Result>>> { - let (_, lx) = self.scanner.peek_non_whitespace(); - - if lx.token == Token::Id { - let statement = self.parse_statement()?; - let next = self.parse_statement_list()?; - - Ok(Some( - self.arena - .insert_variant(StatementList::new(statement, next)), - )) - } else { - Ok(None) + pub fn parse_statement_list(&mut self) -> Result>> { + let mut vec = Vec::new(); + while let ( + _, + Lexeme { + token: Token::Id, .. + }, + ) = self.scanner.peek_non_whitespace() + { + vec.push(self.parse_statement()?); } + Ok(vec) } fn parse_statement(&mut self) -> Result> { @@ -154,8 +144,7 @@ impl<'a> Parser<'a> { fn parse_element(&mut self) -> Result> { let url = self.parse_maybe_url()?; - let selector_head = self.parse_selector()?; - let selectors = self.parse_selector_list()?; + let selector = self.parse_selector()?; self.try_eat(Token::BraceOpen)?; @@ -167,8 +156,7 @@ impl<'a> Parser<'a> { Ok(Element { url, - selector_head, - selectors, + selector, qualifier, statements, }) @@ -196,10 +184,7 @@ impl<'a> Parser<'a> { match lx.token { Token::Less => self.parse_inline(), Token::Dollar | Token::Int | Token::Float | Token::String => { - self.parse_leaf().map(|value| Inline { - value, - filters: None, - }) + self.parse_leaf().map(Inline::from) } _ => Err(ParseError::unexpected( vec![ @@ -215,7 +200,21 @@ impl<'a> Parser<'a> { } } - fn parse_selector_list(&mut self) -> Result>>> { + fn parse_selector(&mut self) -> Result> { + let head = self.parse_selector_fragment()?; + + let mut combinators = Vec::new(); + + while let Some(combinator) = self.parse_selector_combinator()? { + let fragment = self.parse_selector_fragment()?; + + combinators.push((combinator, fragment)); + } + + Ok(Selector { head, combinators }) + } + + fn parse_selector_combinator(&mut self) -> Result> { let mut item = self.scanner.peek_non_comment(); if item.1.token == Token::Whitespace { self.scanner.eat_token(); @@ -230,93 +229,95 @@ impl<'a> Parser<'a> { let (span, lx) = item; - let sel = match lx.token { - Token::BraceOpen | Token::ParenOpen => return Ok(None), + match lx.token { + Token::BraceOpen | Token::ParenOpen => Ok(None), // invariant: peek_next_whitespace is one of Id | Hash | Dot | Star // whitespace is eaten in the above block. - Token::Whitespace => SelectorCombinator::Descendent(self.parse_selector()?), + Token::Whitespace => Ok(Some(SelectorCombinator::Descendent)), Token::Greater => { self.scanner.eat_token(); - SelectorCombinator::Child(self.parse_selector()?) + Ok(Some(SelectorCombinator::Child)) } Token::Plus => { self.scanner.eat_token(); - SelectorCombinator::NextSibling(self.parse_selector()?) + Ok(Some(SelectorCombinator::NextSibling)) } Token::Tilde => { self.scanner.eat_token(); - SelectorCombinator::SubsequentSibling(self.parse_selector()?) + Ok(Some(SelectorCombinator::SubsequentSibling)) } - Token::Hash | Token::Dot | Token::Id | Token::Star => { - SelectorCombinator::And(self.parse_selector()?) - } - _ => { - return Err(ParseError::unexpected( - vec![ - Token::Whitespace, - Token::Greater, - Token::Plus, - Token::Tilde, - Token::Hash, - Token::Dot, - Token::Id, - Token::Star, - ], - lx, - span, - )) - } - }; + _ => Err(ParseError::unexpected( + vec![Token::Whitespace, Token::Greater, Token::Plus, Token::Tilde], + lx, + span, + )), + } + } - let itm = SelectorList::new(sel, self.parse_selector_list()?); + fn parse_selector_fragment(&mut self) -> Result> { + let (span, lx) = self.scanner.peek_non_whitespace(); - Ok(Some(self.arena.insert_variant(itm))) + match lx.token { + Token::Star => Ok(SelectorFragment::Any), + Token::Dot | Token::Id | Token::Hash => { + let named_selector = self.parse_named_selector()?; + let mut selector = NonEmpty::from_one(named_selector); + + while let Token::Dot | Token::Id | Token::Hash = + self.scanner.peek_non_whitespace().1.token + { + selector.push(self.parse_named_selector()?); + } + + Ok(SelectorFragment::Named(selector)) + } + _ => Err(ParseError::unexpected( + vec![Token::Star, Token::Dot, Token::Id, Token::Hash], + lx, + span, + )), + } } - fn parse_selector(&mut self) -> Result> { + fn parse_named_selector(&mut self) -> Result> { let (span, lx) = self.scanner.peek_non_whitespace(); match lx.token { Token::Dot => { self.scanner.eat_token(); - self.try_eat(Token::Id).map(|lx| Selector::Class(lx.value)) + self.try_eat(Token::Id) + .map(|lx| NamedSelector::Class(lx.value)) } Token::Hash => { self.scanner.eat_token(); - self.try_eat(Token::Id).map(|lx| Selector::Id(lx.value)) + self.try_eat(Token::Id) + .map(|lx| NamedSelector::Id(lx.value)) } Token::Id => { self.scanner.eat_token(); - Ok(Selector::Tag(lx.value)) - } - Token::Star => { - self.scanner.eat_token(); - Ok(Selector::Any) + Ok(NamedSelector::Tag(lx.value)) } _ => Err(ParseError::unexpected( - vec![Token::Dot, Token::Hash, Token::Id, Token::Star], + vec![Token::Dot, Token::Hash, Token::Id], lx, span, )), } } - fn parse_filter_list(&mut self) -> Result>>> { - let (_, lx) = self.scanner.peek_non_whitespace(); - if lx.token == Token::Pipe { + fn parse_filter_list(&mut self) -> Result>> { + let mut vec = Vec::new(); + + while let Token::Pipe = self.scanner.peek_non_whitespace().1.token { self.scanner.eat_token(); let filter = self.parse_filter()?; - let next = self.parse_filter_list()?; let qualifier = self.parse_qualifier()?; - let r = self - .arena - .insert_variant(FilterList::new(filter, qualifier, next)); - Ok(Some(r)) - } else { - Ok(None) + vec.push(Filter { filter, qualifier }) } + + Ok(vec) } - fn parse_filter(&mut self) -> Result> { + fn parse_filter(&mut self) -> Result> { let (span, lx) = self.scanner.peek_non_whitespace(); self.scanner.eat_token(); @@ -326,7 +327,7 @@ impl<'a> Parser<'a> { self.try_eat(Token::ParenOpen)?; let args = self.parse_arg_list()?; self.try_eat(Token::ParenClose)?; - Ok(Filter::Call(FilterCall::new(id, args))) + Ok(FilterType::Call(FilterCall { id, args })) } Token::BracketOpen => { let name = self.try_eat(Token::Id)?.value; @@ -334,13 +335,13 @@ impl<'a> Parser<'a> { let leaf = self.parse_leaf()?; let filters = self.parse_filter_list()?; self.try_eat(Token::BracketClose)?; - Ok(Filter::Select(FilterSelect::new( + Ok(FilterType::Select(FilterSelect { name, - Inline { + value: Inline { value: leaf, filters, }, - ))) + })) } _ => Err(ParseError::unexpected( vec![Token::Id, Token::BracketOpen], @@ -350,32 +351,39 @@ impl<'a> Parser<'a> { } } - fn parse_arg_list(&mut self) -> Result>>> { - let (span, lx) = self.scanner.peek_non_whitespace(); - match lx.token { - Token::ParenClose => Ok(None), - Token::Id => { - let id = lx.value; - self.scanner.eat_token(); - self.try_eat(Token::Colon)?; - let value = self.parse_value()?; - let next = match self.scanner.peek_non_whitespace().1.token { - Token::Comma => { + fn parse_arg_list(&mut self) -> Result>> { + let mut vec = Vec::new(); + loop { + let (span, lx) = self.scanner.peek_non_whitespace(); + match lx.token { + Token::ParenClose => break, + Token::Id => { + let id = lx.value; + self.scanner.eat_token(); + self.try_eat(Token::Colon)?; + let value = self.parse_value()?; + + vec.push(Arg { id, value }); + + // eat trailing comma. if there is no comma, there can be + // no subsequent args. + if let Token::Comma = self.scanner.peek_non_whitespace().1.token { self.scanner.eat_token(); - self.parse_arg_list()? + } else { + break; } - _ => None, - }; - - let r = self.arena.insert_variant(ArgList::new(id, value, next)); - Ok(Some(r)) + } + _ => { + return Err(ParseError::unexpected( + vec![Token::ParenClose, Token::Id], + lx, + span, + )) + } } - _ => Err(ParseError::unexpected( - vec![Token::ParenClose, Token::Id], - lx, - span, - )), } + + Ok(vec) } fn parse_qualifier(&mut self) -> Result { @@ -453,23 +461,6 @@ mod tests { use super::{parse_string_literal, Parser}; use crate::frontend::ast::*; - fn fmt_selector<'a>(head: &Selector<'a>, list: &[&SelectorList<'a>]) -> String { - use std::fmt::Write as _; - let mut out = String::new(); - write!(&mut out, "{head}").expect("fmt error"); - for node in list { - let _ = match &node.sel { - SelectorCombinator::And(s) => write!(&mut out, "{s}"), - SelectorCombinator::Child(s) => write!(&mut out, " > {s}"), - SelectorCombinator::Descendent(s) => write!(&mut out, " {s}"), - SelectorCombinator::NextSibling(s) => write!(&mut out, " + {s}"), - SelectorCombinator::SubsequentSibling(s) => write!(&mut out, " ~ {s}"), - }; - } - - out - } - #[test] fn test_parse() { let string = r#"a: h1 { @@ -480,25 +471,20 @@ mod tests { }; };"#; let parser = Parser::new(string); - let (arena, r) = parser.parse().expect("parsing failed"); + let stmts = parser.parse().expect("parsing failed"); - let stmts = arena.flatten(r); - let stmt = &stmts[0].value; + let stmt = &stmts[0]; assert_eq!(stmt.id, "a"); let RValue::Element(element) = &stmt.value else { panic!("expected element"); }; - assert_eq!( - fmt_selector(&element.selector_head, &arena.flatten(element.selectors)), - "h1" - ); + assert_eq!(element.selector.to_string(), "h1"); assert_eq!(element.qualifier, Qualifier::One); - let statements = arena.flatten(element.statements); - let stmt = &statements[0].value; + let stmt = &element.statements[0]; assert!( matches!( @@ -512,17 +498,17 @@ mod tests { "found {stmt:?}", ); - let filters = arena.flatten(stmt.filters); + let filters = &stmt.filters; assert!( matches!( &filters[..], [ - FilterList { - filter: Filter::Call(FilterCall { id: "cat", .. }), + Filter { + filter: FilterType::Call(FilterCall { id: "cat", .. }), .. }, - FilterList { - filter: Filter::Call(FilterCall { id: "meow", .. }), + Filter { + filter: FilterType::Call(FilterCall { id: "meow", .. }), .. } ] @@ -530,18 +516,18 @@ mod tests { "found {filters:?}" ); - let Filter::Call(filter) = &filters[0].filter else { + let FilterType::Call(filter) = &filters[0].filter else { unreachable!("Validated as Filter::Call above"); }; - let args = arena.flatten(filter.args); + let args = &filter.args; assert!( matches!( &args[..], - [ArgList { + [Arg { id: "i", value: Inline { value: Leaf::String(Cow::Borrowed("x")), - filters: None, + filters: _, }, .. }] @@ -550,17 +536,14 @@ mod tests { &args[..] ); - let stmt = &statements[1].value; + let stmt = &element.statements[1]; let RValue::Element(element) = &stmt.value else { panic!("Expected element"); }; - assert!(element.statements.is_none()); - assert_eq!( - fmt_selector(&element.selector_head, &arena.flatten(element.selectors)), - "h2#x > .cat" - ); + assert!(element.statements.is_empty()); + assert_eq!(element.selector.to_string(), "h2#x > .cat"); } #[test] diff --git a/src/interpreter/mod.rs b/src/interpreter/mod.rs index c07bdf0..65570f1 100644 --- a/src/interpreter/mod.rs +++ b/src/interpreter/mod.rs @@ -6,45 +6,23 @@ use scrapelect_filter_types::{ bail, other, Bindings, EValue, ElementContext, ListIter, PValue, Value, }; -use crate::frontend::{ - ast::{ - self, AstRef, Element, FilterList, Inline, Leaf, Qualifier, RValue, Statement, - StatementList, - }, - AstArena, -}; +use crate::frontend::ast::{self, Element, Inline, Leaf, Qualifier, RValue, Statement}; mod execution_mode; pub mod filter; pub use scrapelect_filter_types::{Error, MessageExt, Result, WrapExt}; -impl<'ast> Element<'ast> { - #[must_use] - pub fn to_selector_str(&self, ast: &AstArena<'ast>) -> String { - use std::fmt::Write as _; - - let mut buf = String::new(); - let _ = write!(&mut buf, "{}", self.selector_head); - - for selector in ast.flatten(self.selectors) { - let _ = write!(&mut buf, "{}", selector.sel); - } - - buf - } -} - #[derive(Debug)] pub struct Interpreter<'ast> { client: reqwest::Client, - ast: &'ast AstArena<'ast>, + ast: &'ast [Statement<'ast>], } impl<'ast> Interpreter<'ast> { #[must_use] #[inline] - pub fn new(ast: &'ast AstArena<'ast>) -> Self { + pub fn new(ast: &'ast [Statement<'ast>]) -> Self { Self::with_client( ast, reqwest::Client::builder() @@ -60,18 +38,14 @@ impl<'ast> Interpreter<'ast> { #[must_use] #[inline] - pub const fn with_client(ast: &'ast AstArena<'ast>, client: reqwest::Client) -> Self { + pub const fn with_client(ast: &'ast [Statement<'ast>], client: reqwest::Client) -> Self { Self { ast, client } } #[inline] - pub async fn interpret( - &self, - root_url: Url, - head: Option>>, - ) -> Result> { + pub async fn interpret(&self, root_url: Url) -> Result> { let html = self.get_html(&root_url).await?; - self.interpret_block(html.root_element(), head, None, root_url) + self.interpret_block(html.root_element(), self.ast, None, root_url) .await } @@ -98,14 +72,14 @@ impl<'ast> Interpreter<'ast> { async fn interpret_block( &self, element: scraper::ElementRef<'_>, - statements: Option>>, + statements: &'ast [Statement<'ast>], parent: Option<&ElementContext<'ast, '_>>, url: Url, ) -> Result> { let mut ctx = ElementContext::new(element, parent, url); - for statement in self.ast.flatten(statements) { - self.interpret_statement(&statement.value, &mut ctx).await?; + for statement in statements { + self.interpret_statement(statement, &mut ctx).await?; } Ok(ctx.bindings.into_data()) @@ -113,7 +87,7 @@ impl<'ast> Interpreter<'ast> { async fn interpret_statement( &self, - statement: &Statement<'ast>, + statement: &'ast Statement<'ast>, ctx: &mut ElementContext<'ast, '_>, ) -> Result<()> { let inner = || async move { @@ -122,8 +96,7 @@ impl<'ast> Interpreter<'ast> { RValue::Element(e) => Value::from_data(self.interpret_element(e, ctx).await?), }; - let value = - self.apply_filters(value, self.ast.flatten(statement.filters).into_iter(), ctx)?; + let value = self.apply_filters(value, statement.filters.iter(), ctx)?; ctx.set(Cow::Borrowed(statement.id), value)?; Ok(()) @@ -139,11 +112,10 @@ impl<'ast> Interpreter<'ast> { async fn interpret_element( &self, - element: &Element<'ast>, + element: &'ast Element<'ast>, ctx: &mut ElementContext<'ast, '_>, ) -> Result { - let selector_str = element.to_selector_str(self.ast); - let selector_str = &selector_str; + let selector_str = &element.selector.to_string(); let inner = || async move { let html; @@ -177,7 +149,7 @@ impl<'ast> Interpreter<'ast> { let values = futures::future::try_join_all(element_refs.into_iter().map(|element_ref| { - self.interpret_block(element_ref, element.statements, Some(ctx), url.clone()) + self.interpret_block(element_ref, &element.statements, Some(ctx), url.clone()) })) .await?; @@ -196,23 +168,22 @@ impl<'ast> Interpreter<'ast> { fn apply_filters<'ctx>( &self, value: EValue<'ctx>, - mut filters: impl Iterator>, + mut filters: impl Iterator>, ctx: &mut ElementContext<'ast, 'ctx>, ) -> Result> { filters .try_fold(value.into(), |value, filter| match &filter.filter { - ast::Filter::Call(call) => { - let args = self - .ast - .flatten(call.args) - .into_iter() + ast::FilterType::Call(call) => { + let args = call + .args + .iter() .map(|arg| Ok((arg.id, self.eval_inline(&arg.value, ctx)?))) .collect::>>()?; qualify(filter.qualifier, value, |value| { filter::dispatch_filter(call.id, value, args.clone(), ctx) }) } - ast::Filter::Select(select) => qualify(filter.qualifier, value, |value| { + ast::FilterType::Select(select) => qualify(filter.qualifier, value, |value| { let ls: ListIter = value.try_unwrap()?; let mut inner_scope = @@ -239,12 +210,12 @@ impl<'ast> Interpreter<'ast> { fn eval_inline<'ctx>( &self, - inline: &Inline<'ast>, + inline: &'ast Inline<'ast>, ctx: &mut ElementContext<'ast, 'ctx>, ) -> Result> { self.apply_filters( leaf_to_value(ctx, &inline.value)?, - self.ast.flatten(inline.filters).into_iter(), + inline.filters.iter(), ctx, ) } @@ -289,14 +260,15 @@ pub async fn interpret_string_harness( ) -> anyhow::Result> { use anyhow::Context; - let (ast, head) = crate::frontend::Parser::new(program).parse()?; + let statements = crate::frontend::Parser::new(program).parse()?; let html = scraper::Html::parse_document(html); - let interpreter = Interpreter::new(Box::leak(Box::new(ast))); + let statements = Box::leak(Box::new(statements)); + let interpreter = Interpreter::new(statements); interpreter // TODO: url hack .interpret_block( html.root_element(), - head, + statements, None, "file:///tmp/inmemory.html".parse().expect("URL parse"), ) @@ -312,7 +284,7 @@ mod tests { let input = std::fs::read_to_string(format!("examples/inputs/{filename}.html"))?; let script = std::fs::read_to_string(format!("examples/scrps/{filename}.scrp"))?; - let (ast, head) = crate::frontend::Parser::new(&script) + let ast = crate::frontend::Parser::new(&script) .parse() .expect("parse error"); @@ -321,7 +293,7 @@ mod tests { let result = super::Interpreter::new(&ast) .interpret_block( html.root_element(), - head, + &ast, None, format!( "file://{}/examples/inputs/{}", diff --git a/src/main.rs b/src/main.rs index a2e1d76..4808715 100644 --- a/src/main.rs +++ b/src/main.rs @@ -23,7 +23,7 @@ async fn main() -> anyhow::Result<()> { let parser = Parser::new(&pgm); - let (ast, head) = parser + let ast = parser .parse() .with_context(|| format!("parse error in {filename}:"))?; @@ -33,7 +33,6 @@ async fn main() -> anyhow::Result<()> { .interpret( url.parse() .with_context(|| format!("Couldn't parse `{url}` into a URL"))?, - head, ) .await?;