Skip to content

Commit

Permalink
Precise error location in UNRECOGNIZED by attaching trivia to the exp…
Browse files Browse the repository at this point in the history
…ected nonterminal (#1172)

Fixes #841

In examples like the following:

```
// some trivia
unexpected
```

The parsing function was returning a terminal with `UNRECOGNIZED` kind
and the entire text inside. Since a terminal cannot have trivia
attached, we have to change the returning value to be a nonterminal. But
which one? For a moment I thought of a new specific `UNRECOGNIZED` kind,
but after discussing with Omar, it was clear that the better option was
to return the expected one: if we are parsing a `Statement`, then return
a `Statement` with the trivia and the `UNRECOGNIZED` terminal at the
end. This is realized in the `NoMatch` structure with a new field for
the optional expected nonterminal kind, which is essentially attached in
the `ParserResult::with_kind` method.

The commit history contains a previous attempt in which the trivia was
cached within the `NoMatch` struct. This added unnecessary complexity,
so I decided against it.

**Note1:** In a couple of places the code will use a
`ParserResult::no_match(<default values>)`. Noting there was a
`ParseResult::default()` function, I decided to use it instead, but I'm
not sure if this is the expected use of the default function.

**Note2:** I see that the error is one off at the end, probably
accounting EOF? But was already the case before, so there's no real
change there.
  • Loading branch information
beta-ziliani authored Dec 12, 2024
1 parent 9f3c44a commit 6102886
Show file tree
Hide file tree
Showing 33 changed files with 292 additions and 140 deletions.
5 changes: 5 additions & 0 deletions .changeset/smart-cooks-suffer.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@nomicfoundation/slang": minor
---

Improved error recovery, where leading trivia are always parsed and included before an erroneous terminal.
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ pub(crate) trait Lexer {
.is_some_and(|t| t.accepted_as(kind))
{
input.set_position(start);
return ParserResult::no_match(vec![kind]);
return ParserResult::no_match(None, vec![kind]);
}
let end = input.position();

Expand Down Expand Up @@ -129,7 +129,7 @@ pub(crate) trait Lexer {
.is_some_and(|t| t.accepted_as(kind))
{
input.set_position(restore);
return ParserResult::no_match(vec![kind]);
return ParserResult::no_match(None, vec![kind]);
}
let end = input.position();
children.push(Edge::anonymous(Node::terminal(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ pub struct ChoiceHelper {
impl ChoiceHelper {
pub fn new(input: &mut ParserContext<'_>) -> Self {
Self {
result: ParserResult::no_match(vec![]),
result: ParserResult::default(),
start_position: input.mark(),
recovered_errors: vec![],
last_progress: input.position(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,16 +57,44 @@ where
ParserResult::PrattOperatorMatch(..) => unreachable!("PrattOperatorMatch is internal"),

ParserResult::NoMatch(no_match) => {
// Parse leading trivia (see #1172 for details). One could argue that trivia should be parsed
// just once, and returned in the `NoMatch` structure. However, in rules like (This | That),
// trivia is already parsed twice, one for each branch. And there's a good reason: each branch might
// accept different trivia, so it's not clear what the combination of the two rules should return in a
// NoMatch. Therefore, we just parse it again. Note that trivia is anyway cached by the parser (#1119).
let mut trivia_nodes = if let ParserResult::Match(matched) =
Lexer::leading_trivia(parser, &mut stream)
{
matched.nodes
} else {
vec![]
};

let mut start = TextIndex::ZERO;
for edge in &trivia_nodes {
if let Node::Terminal(terminal) = &edge.node {
if terminal.kind.is_valid() {
start.advance_str(terminal.text.as_str());
}
}
}
let input = &input[start.utf8..];
let kind = if input.is_empty() {
TerminalKind::MISSING
} else {
TerminalKind::UNRECOGNIZED
};

let node = Node::terminal(kind, input.to_string());
let tree = if no_match.kind.is_none() || start.utf8 == 0 {
node
} else {
trivia_nodes.push(Edge::anonymous(node));
Node::nonterminal(no_match.kind.unwrap(), trivia_nodes)
};
ParseOutput {
parse_tree: Node::terminal(kind, input.to_string()),
parse_tree: tree,
errors: vec![ParseError::new(
TextIndex::ZERO..input.into(),
start..start + input.into(),
no_match.expected_terminals,
)],
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ pub enum ParserResult {
impl Default for ParserResult {
fn default() -> Self {
Self::NoMatch(NoMatch {
kind: None,
expected_terminals: vec![],
})
}
Expand All @@ -34,13 +35,13 @@ impl ParserResult {
ParserResult::IncompleteMatch(IncompleteMatch::new(nodes, expected_terminals))
}

/// Whenever a parser didn't run because it's disabled due to versioning. Shorthand for `no_match(vec![])`.
/// Whenever a parser didn't run because it's disabled due to versioning. Shorthand for `no_match(None, vec![])`.
pub fn disabled() -> Self {
Self::no_match(vec![])
Self::no_match(None, vec![])
}

pub fn no_match(expected_terminals: Vec<TerminalKind>) -> Self {
ParserResult::NoMatch(NoMatch::new(expected_terminals))
pub fn no_match(kind: Option<NonterminalKind>, expected_terminals: Vec<TerminalKind>) -> Self {
ParserResult::NoMatch(NoMatch::new(kind, expected_terminals))
}

#[must_use]
Expand All @@ -61,7 +62,9 @@ impl ParserResult {
nodes: vec![Edge::anonymous(Node::nonterminal(new_kind, skipped.nodes))],
..skipped
}),
ParserResult::NoMatch(_) => self,
ParserResult::NoMatch(no_match) => {
ParserResult::no_match(Some(new_kind), no_match.expected_terminals)
}
ParserResult::PrattOperatorMatch(_) => {
unreachable!("PrattOperatorMatch cannot be converted to a nonterminal")
}
Expand Down Expand Up @@ -240,13 +243,18 @@ impl IncompleteMatch {

#[derive(PartialEq, Eq, Clone, Debug)]
pub struct NoMatch {
/// The nonterminal kind this match is coming from
pub kind: Option<NonterminalKind>,
/// Terminals that would have allowed for more progress. Collected for the purposes of error reporting.
pub expected_terminals: Vec<TerminalKind>,
}

impl NoMatch {
pub fn new(expected_terminals: Vec<TerminalKind>) -> Self {
Self { expected_terminals }
pub fn new(kind: Option<NonterminalKind>, expected_terminals: Vec<TerminalKind>) -> Self {
Self {
kind,
expected_terminals,
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ impl ParserResult {
ParseResultKind::Incomplete => {
ParserResult::incomplete_match(nodes, expected_terminals)
}
ParseResultKind::NoMatch => ParserResult::no_match(expected_terminals),
ParseResultKind::NoMatch => ParserResult::no_match(None, expected_terminals),
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ impl<const MIN_COUNT: usize> RepetitionHelper<MIN_COUNT> {

// Couldn't get a full match but we allow 0 items - return an empty match
// so the parse is considered valid but note the expected terminals
ParserResult::NoMatch(NoMatch { expected_terminals }) if MIN_COUNT == 0 => {
ParserResult::NoMatch(NoMatch {
kind: _,
expected_terminals,
}) if MIN_COUNT == 0 => {
return ParserResult::r#match(vec![], expected_terminals);
}
// Don't try repeating if we don't have a full match and we require at least one
Expand Down Expand Up @@ -60,7 +63,9 @@ impl<const MIN_COUNT: usize> RepetitionHelper<MIN_COUNT> {
ParserResult::IncompleteMatch(IncompleteMatch {
expected_terminals, ..
})
| ParserResult::NoMatch(NoMatch { expected_terminals }),
| ParserResult::NoMatch(NoMatch {
expected_terminals, ..
}),
) => {
input.rewind(save);
running.expected_terminals = expected_terminals;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ impl SeparatedHelper {
}
ParserResult::NoMatch(no_match) => {
return if accum.is_empty() {
ParserResult::no_match(no_match.expected_terminals)
ParserResult::no_match(None, no_match.expected_terminals)
} else {
ParserResult::incomplete_match(accum, no_match.expected_terminals)
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ impl PrecedenceParserDefinitionCodegen for PrecedenceParserDefinitionRef {
[inner @ cst::Edge { node: cst::Node::Nonterminal(node), .. }] if node.kind == NonterminalKind::#op_nonterminal_name => {
ParserResult::r#match(vec![inner.clone()], r#match.expected_terminals.clone())
}
_ => ParserResult::no_match(vec![]),
_ => ParserResult::default(),
}
_ => ParserResult::no_match(vec![]),
_ => ParserResult::default(),
}
};

Expand Down
1 change: 1 addition & 0 deletions crates/metaslang/cst/generated/public_api.txt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 9 additions & 5 deletions crates/metaslang/cst/src/text_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ impl TextIndex {
}
}
}

pub fn advance_str(&mut self, text: &str) {
let mut iter = text.chars().peekable();
while let Some(c) = iter.next() {
let n = iter.peek();
self.advance(c, n);
}
}
}

impl PartialOrd for TextIndex {
Expand All @@ -63,11 +71,7 @@ impl Display for TextIndex {
impl<T: AsRef<str>> From<T> for TextIndex {
fn from(s: T) -> Self {
let mut result = Self::ZERO;
let mut iter = s.as_ref().chars().peekable();
while let Some(c) = iter.next() {
let n = iter.peek();
result.advance(c, n);
}
result.advance_str(s.as_ref());
result
}
}
Expand Down
Loading

0 comments on commit 6102886

Please sign in to comment.