Skip to content

Commit

Permalink
Remove EndOfFileTrivia and always collect trivia after top-level pa…
Browse files Browse the repository at this point in the history
…rsing (#733)

Checks boxes in #640 #638

This adapts `ParserFunction` to always attempt parsing the leading
trivia after we had a succesful match. This has a nice side benefit of
not overwriting tokens that would allow for more progress when
collecting the final trivia (unlike previous [AttemptedParseRule,
EndOfFileTrivia] sequence).

I'm not so happy with the resulting tree shape, however; IMO we should
either:

1. return a "tree" that has both parents - the first one being the
expected parsed rule node but also the second parent being the "leading
trivia"
2. or at least introduce a notion of a virtual EndOfFile token that this
final end-of-file trivia can belong to.

Otherwise, a lone, final LeadingTrivia child belonging to the rule node
seems misplaced and it's not intuitive for me to find the remaining
trivia in that place. I'm open for other suggestions!
  • Loading branch information
Xanewok authored Jan 12, 2024
1 parent 330700c commit 954a236
Show file tree
Hide file tree
Showing 70 changed files with 2,126 additions and 613 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -278,10 +278,7 @@ impl Selector {
RustNamedNode {
name: _,
node: RustNode::Rule(rule),
} if matches!(
rule.kind,
RuleKind::LeadingTrivia | RuleKind::TrailingTrivia | RuleKind::EndOfFileTrivia
) => {
} if rule.kind.is_trivia() => {
// skip trivia, since it's not part of the AST
self.index += 1;
continue;
Expand Down
40 changes: 35 additions & 5 deletions crates/codegen/parser/runtime/src/support/parser_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::rc::Rc;

use crate::cst::{self, NamedNode};
use crate::kinds::TokenKind;
use crate::lexer::Lexer;
use crate::parse_error::ParseError;
use crate::parse_output::ParseOutput;
use crate::support::context::ParserContext;
Expand All @@ -12,16 +13,43 @@ pub trait ParserFunction<L>
where
Self: Fn(&L, &mut ParserContext<'_>) -> ParserResult,
{
fn parse(&self, language: &L, input: &str) -> ParseOutput;
fn parse(&self, language: &L, input: &str, collect_trivia: bool) -> ParseOutput;
}

impl<L, F> ParserFunction<L> for F
where
L: Lexer,
F: Fn(&L, &mut ParserContext<'_>) -> ParserResult,
{
fn parse(&self, language: &L, input: &str) -> ParseOutput {
fn parse(&self, language: &L, input: &str, collect_trivia: bool) -> ParseOutput {
let mut stream = ParserContext::new(input);
let result = self(language, &mut stream);
let mut result = self(language, &mut stream);

// For a succesful/recovered parse, collect any remaining trivia as part of the parse result
// TODO(#737): Remove this once we unconditionally collect trivia
if collect_trivia {
if let ParserResult::Match(r#match) = &mut result {
let [topmost] = r#match.nodes.as_mut_slice() else {
unreachable!(
"Match at the top level of a parse does not have exactly one Rule node"
)
};

let eof_trivia = match Lexer::leading_trivia(language, &mut stream) {
ParserResult::Match(eof_trivia) if !eof_trivia.nodes.is_empty() => {
Some(eof_trivia.nodes)
}
_ => None,
};

if let (cst::Node::Rule(rule), Some(eof_trivia)) = (&mut topmost.node, eof_trivia) {
let mut new_children = rule.children.clone();
new_children.extend(eof_trivia);

topmost.node = cst::Node::rule(rule.kind, new_children);
}
}
}

let is_incomplete = matches!(result, ParserResult::IncompleteMatch(_));
let is_recovering = matches!(result, ParserResult::SkippedUntil(_));
Expand Down Expand Up @@ -67,7 +95,6 @@ where

let start = stream.position();

let errors = stream.into_errors();
// Mark the rest of the unconsumed stream as skipped and report an error
// NOTE: IncompleteMatch internally consumes the stream when picked via choice,
// so needs a separate check here.
Expand All @@ -81,7 +108,8 @@ where
cst::Node::token(TokenKind::SKIPPED, input[start.utf8..].to_string());
let mut new_children = topmost_rule.children.clone();
new_children.push(NamedNode::anonymous(skipped_node));
let mut errors = errors;

let mut errors = stream.into_errors();
errors.push(ParseError::new_covering_range(
start..input.into(),
expected_tokens,
Expand All @@ -93,6 +121,8 @@ where
}
} else {
let parse_tree = cst::Node::Rule(topmost_rule);
let errors = stream.into_errors();

// Sanity check: Make sure that succesful parse is equivalent to not having any SKIPPED nodes
debug_assert_eq!(
errors.is_empty(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,12 @@ impl Language {
pub fn parse(&self, kind: RuleKind, input: &str) -> ParseOutput {
match kind {
{%- for parser_name, _code in code.parser_functions -%}
RuleKind::{{ parser_name }} => Self::{{ parser_name | snake_case }}.parse(self, input),
{# TODO(#737): Remove the special case once we stop generating RuleKind for trivia #}
{%- if parser_name is ending_with("Trivia") -%}
RuleKind::{{ parser_name }} => Self::{{ parser_name | snake_case }}.parse(self, input, false),
{%- else -%}
RuleKind::{{ parser_name }} => Self::{{ parser_name | snake_case }}.parse(self, input, true),
{%- endif -%}
{%- endfor -%}
}
}
Expand Down
52 changes: 2 additions & 50 deletions crates/solidity/inputs/language/src/grammar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,9 @@ pub trait GrammarConstructorDslV2 {
}

impl GrammarConstructorDslV2 for Grammar {
#[allow(clippy::too_many_lines)] // TODO: Remove me once the hack below is removed
fn from_dsl_v2(lang: &model::Language) -> Grammar {
// Collect language items into a lookup table to speed up resolution
let mut items: HashMap<_, _> = lang
let items: HashMap<_, _> = lang
.items_with_section()
.map(|(_, topic, item)| {
(
Expand All @@ -35,39 +34,6 @@ impl GrammarConstructorDslV2 for Grammar {
})
.collect();

// TODO(#638): To minimize regression in the parser migration, we keep the existing DSL v1 model
// of SourceUnit being followed by `EndOfFileTrivia`.
items.insert(
Identifier::from("SourceUnit"),
(
None,
model::Item::Struct {
item: Rc::new(model::StructItem {
name: Identifier::from("SourceUnit"),
enabled: None,
error_recovery: None,
fields: IndexMap::from_iter([
(
Identifier::from("members"),
model::Field::Optional {
reference: Identifier::from("SourceUnitMembers"),

enabled: None,
},
),
(
Identifier::from("eof_trivia"),
model::Field::Optional {
reference: Identifier::from("EndOfFileTrivia"),
enabled: None,
},
),
]),
}),
},
),
);

let mut resolved = HashMap::new();
let mut ctx = ResolveCtx {
items: &items,
Expand All @@ -84,16 +50,6 @@ impl GrammarConstructorDslV2 for Grammar {
def: resolve_trivia(lang.trailing_trivia.clone(), &mut ctx),
}) as Rc<dyn TriviaParserDefinition>;

let eof_trivia = Rc::new(NamedTriviaParser {
name: "EndOfFileTrivia",
def: resolve_trivia(lang.leading_trivia.clone(), &mut ctx),
}) as Rc<dyn TriviaParserDefinition>;

ctx.resolved.insert(
Identifier::from("EndOfFileTrivia"),
eof_trivia.clone().into(),
);

for (_lex_ctx, item) in items.values() {
resolve_grammar_element(item.name(), &mut ctx);
}
Expand Down Expand Up @@ -151,7 +107,7 @@ impl GrammarConstructorDslV2 for Grammar {
trailing_trivia_parser: trailing_trivia.clone(),
elements: resolved_items
.chain(
[leading_trivia, trailing_trivia, eof_trivia]
[leading_trivia, trailing_trivia]
.into_iter()
.map(|elem| (elem.name(), elem.into())),
)
Expand Down Expand Up @@ -321,10 +277,6 @@ struct ResolveCtx<'a> {

#[allow(clippy::too_many_lines)] // FIXME: Simplify me when we simplify the v2-to-v1 interface
fn resolve_grammar_element(ident: &Identifier, ctx: &mut ResolveCtx<'_>) -> GrammarElement {
if ident.as_str() == "EndOfFileTrivia" {
return ctx.resolved.get(ident).unwrap().clone();
}

let (lex_ctx, elem) = ctx.items.get(ident).expect("Missing item");

// FIXME: Don't leak
Expand Down
3 changes: 0 additions & 3 deletions crates/solidity/outputs/cargo/crate/src/generated/kinds.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 954a236

Please sign in to comment.