From 7e467ce199cb07acb443da9f542fbcc74f2a5321 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gustavo=20Gir=C3=A1ldez?= Date: Wed, 7 Aug 2024 04:54:53 -0300 Subject: [PATCH] Remove `...` ellipsis from query syntax and add explicit adjacency operator (#1030) This PR changes the CST query syntax by: - Removing the `...` ellipsis operator and adding it implicitly at the edges and between elements in a matching sequence. So that `[Foo [A] [B]]` is equivalent to `[Foo ... [A] ... [B] ...]`. - Adding the `.` adjacency (anchor) operator to explicitly indicate that a matched node should be the first (eg. `[Foo . [A]]`) or last child (eg. `[Foo [A] .]`), or that two matched nodes should be consecutive (eg. `[Foo [A] . [B]]`). The adjacency operator is allowed in sub-sequences of alternative options or quantified sequences, but not at the beginning or end of the pattern, where the adjacency is implicit. So: `[Foo ([Bar] | [X] . [Y])]` is allowed, but `[Foo ([Bar] | . [X])]` is not. - Trivia kinds cannot be used in node matchers. The PR also introduces some semantic changes to query execution: - Trivia nodes are skipped over when executing a query and they cannot be matched against. - Only the first implicit ellipsis operator is allowed to match multiple nodes in a sequence of siblings, unless there is an explicit node match in between. This prevents the engine from returning duplicate results when two ellipsis operators are effectively adjacent (for example when they are separated by an optional matcher). For example, given a sequence such as `ABCD`, the query `[_] ["B"]? [_]` will operationally be equivalent to `[_] ... ["B"]? ... [_]`. With the previous semantics, this would have returned 4 results, matching: 1. zero nodes for the first ellipsis, the optional matches the `B` and the second ellipsis takes the `C` 2. zero nodes for the first ellipsis, zero nodes for the optional, and the second ellipsis takes both `BC` 3. first ellipsis takes `B`, optional matches nothing, and second ellipsis takes `C` 4. first ellipsis takes both `BC`, and optional and second ellipsis take no nodes After this PR, only two results are possible, corresponding to the cases i. and iv. above, since the second ellipsis is allowed to match nodes _only_ if the optional succeeds in matching at least some node. The two returned results are distinct though, because the user may want to capture the optional with the query `[_] @x ["B"]? [_]`. By unification semantics the optional can match zero or one nodes, which is consistent with the results obtained without capturing. --------- Co-authored-by: Omar Tawfik <15987992+OmarTawfik@users.noreply.github.com> --- .changeset/few-taxis-retire.md | 5 + .changeset/gentle-shirts-deliver.md | 5 + crates/metaslang/cst/src/query/engine.rs | 373 ++++++++++++++---- crates/metaslang/cst/src/query/model.rs | 10 +- crates/metaslang/cst/src/query/parser.rs | 184 ++++++--- .../inputs/language/bindings/rules.msgb | 200 ++++------ .../bindings/generated/binding_rules.rs | 200 ++++------ .../outputs/cargo/tests/src/binding_rules.rs | 22 ++ .../src/bindings_assertions/assertions.rs | 38 +- .../outputs/cargo/tests/src/bindings_rules.rs | 10 - .../src/doc_examples/tree_query_language.rs | 144 +++++-- .../tests/src/doc_examples/using_queries.rs | 15 +- .../solidity/outputs/cargo/tests/src/lib.rs | 2 +- .../tests/src/doc-examples/using-queries.ts | 15 +- .../outputs/cargo/tests/src/graph/mod.rs | 4 +- .../cargo/tests/src/query/engine_tests.rs | 267 ++++++++++++- .../cargo/tests/src/query/parser_tests.rs | 89 +++-- .../outputs/npm/tests/src/tests/query.ts | 2 +- .../public/user-guide/tree-query-language.md | 42 +- 19 files changed, 1127 insertions(+), 500 deletions(-) create mode 100644 .changeset/few-taxis-retire.md create mode 100644 .changeset/gentle-shirts-deliver.md create mode 100644 crates/solidity/outputs/cargo/tests/src/binding_rules.rs delete mode 100644 crates/solidity/outputs/cargo/tests/src/bindings_rules.rs diff --git a/.changeset/few-taxis-retire.md b/.changeset/few-taxis-retire.md new file mode 100644 index 0000000000..7b924c5feb --- /dev/null +++ b/.changeset/few-taxis-retire.md @@ -0,0 +1,5 @@ +--- +"@nomicfoundation/slang": minor +--- + +Tree Query Language: queries now ignore trivia nodes. diff --git a/.changeset/gentle-shirts-deliver.md b/.changeset/gentle-shirts-deliver.md new file mode 100644 index 0000000000..77e5b8b740 --- /dev/null +++ b/.changeset/gentle-shirts-deliver.md @@ -0,0 +1,5 @@ +--- +"@nomicfoundation/slang": minor +--- + +Tree Query Language: remove the ellipsis query `...` operator making it implicit, add an adjacency operator `.`. diff --git a/crates/metaslang/cst/src/query/engine.rs b/crates/metaslang/cst/src/query/engine.rs index ff5f1284a1..995dfa2b89 100644 --- a/crates/metaslang/cst/src/query/engine.rs +++ b/crates/metaslang/cst/src/query/engine.rs @@ -9,7 +9,7 @@ use super::model::{ }; use crate::cst::NodeKind; use crate::query::CaptureQuantifier; -use crate::KindTypes; +use crate::{KindTypes, TerminalKind as _}; impl Cursor { pub fn query(self, queries: Vec>) -> QueryMatchIterator { @@ -46,6 +46,8 @@ impl Cursor { NodeSelector::EdgeLabelAndNodeText { .. } => false, }, + Node::::Terminal(terminal) if terminal.kind.is_trivia() => false, + Node::::Terminal(terminal) => match node_selector { NodeSelector::Anonymous => true, NodeSelector::NodeKind { node_kind } => { @@ -79,31 +81,67 @@ impl ASTNode { Self::Sequence(matcher) => matcher.children[0].can_match(cursor), Self::OneOrMore(matcher) => matcher.child.can_match(cursor), Self::Optional(_) => true, - Self::Ellipsis => true, + Self::Adjacency => true, } } - fn create_matcher(&self, cursor: Cursor) -> MatcherRef { + // The `require_explicit_match` parameter modifies the behaviour of this and + // later matchers. If this value is true, this and later matchers should not + // match sibling nodes implicitly. + // Currently this only modifies the behaviour of the ellipsis matcher, which + // otherwise will attempt to consume any number of sibling nodes. + // In a sequence of matchers, this value is set to true by the ellipsis + // operator itself, to consume all available sibling nodes and prevent later + // ellipsis matchers from doing so. + // Conversely, it's set to false by the `NodeMatcher`, both when recursing + // into its children and for later matchers after itself, as it handles an + // explicit match requested by the user. + // All other matchers should propagate the received value forward. + // + // The whole point of propagating this flag is to prevent a weird + // interaction between ellipsis operators working on the same set of sibling + // nodes. While two consecutive ellipsis operators should never happen, we + // have the `OptionalMatcher` which will not consume any nodes in the nil + // case. This means that `... [_]? ...` will effectively work (in one case) + // as `... ...`. If we allow both ellipsis operators to consume any number + // of nodes, for a sequence of N nodes we get N+1 identical query results + // when the operators take turns matching each prefix and complementary + // suffix of the list of nodes. By only allowing the first ellipsis operator + // to consume an arbitrary number of nodes, we reduce the returned matches + // to a single one. + // + fn create_matcher(&self, cursor: Cursor, require_explicit_match: bool) -> MatcherRef { match self { - Self::Capture(matcher) => { - Box::new(CaptureMatcher::::new(Rc::clone(matcher), cursor)) - } + Self::Capture(matcher) => Box::new(CaptureMatcher::::new( + Rc::clone(matcher), + cursor, + require_explicit_match, + )), Self::NodeMatch(matcher) => { + // By definition this matcher matches nodes explicitly Box::new(NodeMatchMatcher::::new(Rc::clone(matcher), cursor)) } - Self::Sequence(matcher) => { - Box::new(SequenceMatcher::::new(Rc::clone(matcher), cursor)) - } - Self::Alternatives(matcher) => { - Box::new(AlternativesMatcher::::new(Rc::clone(matcher), cursor)) - } - Self::Optional(matcher) => { - Box::new(OptionalMatcher::::new(Rc::clone(matcher), cursor)) - } - Self::OneOrMore(matcher) => { - Box::new(OneOrMoreMatcher::::new(Rc::clone(matcher), cursor)) - } - Self::Ellipsis => Box::new(EllipsisMatcher::::new(cursor)), + Self::Sequence(matcher) => Box::new(SequenceMatcher::::new( + Rc::clone(matcher), + cursor, + require_explicit_match, + )), + Self::Alternatives(matcher) => Box::new(AlternativesMatcher::::new( + Rc::clone(matcher), + cursor, + require_explicit_match, + )), + Self::Optional(matcher) => Box::new(OptionalMatcher::::new( + Rc::clone(matcher), + cursor, + require_explicit_match, + )), + Self::OneOrMore(matcher) => Box::new(OneOrMoreMatcher::::new( + Rc::clone(matcher), + cursor, + require_explicit_match, + )), + Self::Adjacency => Box::new(AdjacencyMatcher::::new(cursor, require_explicit_match)), } } } @@ -112,7 +150,7 @@ pub struct QueryMatch { pub queries: Rc>>, pub query_number: usize, pub root_cursor: Cursor, - // These correspond to the capture definitions in tne query + // These correspond to the capture definitions in the query pub captures: BTreeMap>>, } @@ -179,7 +217,8 @@ impl QueryMatchIterator { while self.query_number < self.queries.len() { let ast_node = &self.queries[self.query_number].ast_node; if ast_node.can_match(&self.cursor) { - self.matcher = Some(ast_node.create_matcher(self.cursor.clone())); + // The first matcher in the query should allow implicit matches + self.matcher = Some(ast_node.create_matcher(self.cursor.clone(), false)); return; }; self.query_number += 1; @@ -216,11 +255,28 @@ impl Iterator for QueryMatchIterator { } } +#[derive(Clone)] +struct MatcherResult { + // if cursor.is_completed() -> end of input + // if !cursor.is_completed() -> there is more input to go + cursor: Cursor, + + // Controls whether next matchers can match nodes implicitly. For matchers + // applied on a sequence of sibling nodes, this will be: + // - initially false, allowing the first found ellipsis matcher to consume + // an arbitrary number of nodes + // - true after the execution of an ellipsis, thus preventing later ellipsis + // from consuming nodes + // - propagated forward by other matchers, until + // - an actual `NodeMatcher` successfully matches a node, which then flips + // this value back to false + require_explicit_match: bool, +} + trait Matcher { // None -> failed to match, you must backtrack. DO NOT call again - // Some(cursor) if cursor.is_complete -> matched, end of input - // Some(cursor) if !cursor.is_complete -> matched, more input to go - fn next(&mut self) -> Option>; + // Some(result) -> matched, check result.cursor and pass require_explicit_match forward + fn next(&mut self) -> Option>; fn record_captures(&self, captures: &mut BTreeMap>>); } type MatcherRef = Box>; @@ -232,8 +288,14 @@ struct CaptureMatcher { } impl CaptureMatcher { - fn new(matcher: Rc>, cursor: Cursor) -> Self { - let child = matcher.child.create_matcher(cursor.clone()); + fn new( + matcher: Rc>, + cursor: Cursor, + require_explicit_match: bool, + ) -> Self { + let child = matcher + .child + .create_matcher(cursor.clone(), require_explicit_match); Self { matcher, cursor, @@ -243,7 +305,7 @@ impl CaptureMatcher { } impl Matcher for CaptureMatcher { - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { self.child.next() } @@ -275,7 +337,7 @@ impl NodeMatchMatcher { } impl Matcher for NodeMatchMatcher { - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { if self.cursor.is_completed() { return None; } @@ -293,25 +355,39 @@ impl Matcher for NodeMatchMatcher { if let Some(child) = self.matcher.child.as_ref() { let mut child_cursor = self.cursor.clone(); if !child_cursor.go_to_first_child() { + // We have child matchers, but no children. return None; } - self.child = Some(child.create_matcher(child_cursor)); + // Start traversing the children nodes allowing an ellipsis + // operator to match implicitly. + self.child = Some(child.create_matcher(child_cursor, false)); } else { + // We have no child matchers, we can return the result now. let mut return_cursor = self.cursor.clone(); return_cursor.irrevocably_go_to_next_sibling(); - return Some(return_cursor); + return Some(MatcherResult { + cursor: return_cursor, + require_explicit_match: false, + }); } } if let Some(child) = self.child.as_mut() { - while let Some(cursor) = child.as_mut().next() { + // Match our children with the child matcher repeatedly. + while let Some(MatcherResult { cursor, .. }) = child.as_mut().next() { if cursor.is_completed() { + // If match found and exhausted our children list, return + // the match *from our own cursor* let mut return_cursor = self.cursor.clone(); return_cursor.irrevocably_go_to_next_sibling(); - return Some(return_cursor); + return Some(MatcherResult { + cursor: return_cursor, + require_explicit_match: false, + }); } } + // No more matches from the child matcher, we will backtrack at this point. self.child = None; } @@ -325,43 +401,107 @@ impl Matcher for NodeMatchMatcher { } } +enum SequenceItem { + ChildMatcher(usize), + Ellipsis, +} + struct SequenceMatcher { matcher: Rc>, children: Vec>, cursor: Cursor, is_initialised: bool, + template: Vec, + require_explicit_match: bool, } -impl SequenceMatcher { - fn new(matcher: Rc>, cursor: Cursor) -> Self { +impl SequenceMatcher { + fn new( + matcher: Rc>, + cursor: Cursor, + require_explicit_match: bool, + ) -> Self { + // Produce a template of instructions to create the matchers for the + // sequence by inserting ellipsis matchers at the start, end, and in + // between each of the child matchers, unless we find an adjacency + // operator. If the sequence is adjacent (eg. option in alt or + // quantified group sequence) then we should not add matchers at the + // edges. + let (mut template, last_adjacent) = matcher.children.iter().enumerate().fold( + (Vec::new(), matcher.adjacent), + |(mut acc, last_adjacent), (index, child)| { + if matches!(child, ASTNode::Adjacency) { + if last_adjacent { + unreachable!("Found two consecutive adjacency operators") + } + acc.push(SequenceItem::ChildMatcher(index)); + (acc, true) + } else { + if !last_adjacent { + acc.push(SequenceItem::Ellipsis); + } + acc.push(SequenceItem::ChildMatcher(index)); + (acc, false) + } + }, + ); + if !last_adjacent && !matcher.adjacent { + template.push(SequenceItem::Ellipsis); + } Self { matcher, children: vec![], cursor, is_initialised: false, + template, + require_explicit_match, + } + } + + fn create_matcher( + &self, + index: usize, + cursor: Cursor, + require_explicit_match: bool, + ) -> MatcherRef { + let item = &self.template[index]; + match item { + SequenceItem::Ellipsis => { + Box::new(EllipsisMatcher::new(cursor, require_explicit_match)) + } + SequenceItem::ChildMatcher(index) => { + self.matcher.children[*index].create_matcher(cursor, require_explicit_match) + } } } } impl Matcher for SequenceMatcher { - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { if !self.is_initialised { self.is_initialised = true; let child_cursor = self.cursor.clone(); - let child = self.matcher.children[0].create_matcher(child_cursor); + let child = self.create_matcher(0, child_cursor, self.require_explicit_match); self.children.push(child); } while !self.children.is_empty() { - if let Some(child_cursor) = self.children.last_mut().unwrap().next() { - if self.children.len() == self.matcher.children.len() { - return Some(child_cursor); + if let Some(child_matcher_result) = self.children.last_mut().unwrap().next() { + if self.children.len() == self.template.len() { + // Last child, return its result as our own + return Some(child_matcher_result); } - - let child = self.matcher.children[self.children.len()].create_matcher(child_cursor); + // Create the next child matcher propagating the + // `require_explicit_match` flag forward. + let child = self.create_matcher( + self.children.len(), + child_matcher_result.cursor, + child_matcher_result.require_explicit_match, + ); self.children.push(child); } else { + // Backtrack self.children.pop(); } } @@ -381,26 +521,35 @@ struct AlternativesMatcher { next_child_number: usize, child: Option>, cursor: Cursor, + require_explicit_match: bool, } impl AlternativesMatcher { - fn new(matcher: Rc>, cursor: Cursor) -> Self { + fn new( + matcher: Rc>, + cursor: Cursor, + require_explicit_match: bool, + ) -> Self { Self { matcher, next_child_number: 0, child: None, cursor, + require_explicit_match, } } } impl Matcher for AlternativesMatcher { - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { loop { if self.child.is_none() { + // Create the next available child matcher forwarding the + // `require_explicit_match` flag, or give up if we have no more match self.matcher.children.get(self.next_child_number) { Some(child) => { - let child = child.create_matcher(self.cursor.clone()); + let child = + child.create_matcher(self.cursor.clone(), self.require_explicit_match); self.child = Some(child); self.next_child_number += 1; } @@ -409,7 +558,7 @@ impl Matcher for AlternativesMatcher { } match self.child.as_mut().unwrap().next() { - Some(cursor) => return Some(cursor), + Some(child_matcher_result) => return Some(child_matcher_result), None => self.child = None, } } @@ -425,37 +574,52 @@ struct OptionalMatcher { child: Option>, cursor: Cursor, have_nonempty_match: bool, + require_explicit_match: bool, } impl OptionalMatcher { - fn new(matcher: Rc>, cursor: Cursor) -> Self { + fn new( + matcher: Rc>, + cursor: Cursor, + require_explicit_match: bool, + ) -> Self { Self { matcher, child: None, cursor, have_nonempty_match: false, + require_explicit_match, } } } impl Matcher for OptionalMatcher { - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { if let Some(child) = self.child.as_mut() { - match child.next() { - r#match @ Some(_) => { - self.have_nonempty_match = true; - r#match - } - None => { - self.child = None; - None - } + // Second visit, we have a child matcher created + if let Some(child_matcher_result) = child.next() { + self.have_nonempty_match = true; + Some(child_matcher_result) + } else { + self.child = None; + None } } else { + // First visit, we don't have a child matcher yet, so create it + // forwarding our `require_explicit_match` flag let child_cursor = self.cursor.clone(); - let child = self.matcher.child.create_matcher(child_cursor); + let child = self + .matcher + .child + .create_matcher(child_cursor, self.require_explicit_match); self.child = Some(child); - Some(self.cursor.clone()) + + // Return a match result for the empty case, forwarding the + // `require_explicit_match` flag. + Some(MatcherResult { + cursor: self.cursor.clone(), + require_explicit_match: self.require_explicit_match, + }) } } @@ -471,36 +635,43 @@ impl Matcher for OptionalMatcher { struct OneOrMoreMatcher { matcher: Rc>, children: Vec>, - cursor_for_next_repetition: Option>, + result_for_next_repetition: Option>, } impl OneOrMoreMatcher { - fn new(matcher: Rc>, cursor: Cursor) -> Self { - let cursor_for_next_repetition = Some(cursor); + fn new( + matcher: Rc>, + cursor: Cursor, + require_explicit_match: bool, + ) -> Self { + let result_for_next_repetition = Some(MatcherResult { + cursor, + require_explicit_match, + }); Self { matcher, children: vec![], - cursor_for_next_repetition, + result_for_next_repetition, } } } impl Matcher for OneOrMoreMatcher { - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { loop { - if let Some(cursor_for_next_repetition) = self.cursor_for_next_repetition.take() { + if let Some(last_result) = self.result_for_next_repetition.take() { let next_child = self .matcher .child - .create_matcher(cursor_for_next_repetition); + .create_matcher(last_result.cursor, last_result.require_explicit_match); self.children.push(next_child); } else { let tail = self.children.last_mut().unwrap(); - if let Some(cursor) = tail.next() { - if !cursor.is_completed() { - self.cursor_for_next_repetition = Some(cursor.clone()); + if let Some(child_matcher_result) = tail.next() { + if !child_matcher_result.cursor.is_completed() { + self.result_for_next_repetition = Some(child_matcher_result.clone()); } - return Some(cursor); + return Some(child_matcher_result); } self.children.pop(); if self.children.is_empty() { @@ -517,29 +688,50 @@ impl Matcher for OneOrMoreMatcher { } } +/// Matches any number of sibling nodes and is used in between other matchers +/// when matching sequences, unless an explicit adjacency operator is found. +/// If `require_explicit_match` is true, then this matcher can only return a +/// result for the empty case. This usually means that in the same sequence of +/// siblings we found a previous ellipsis matcher which will be able to consume +/// an arbitrary number of nodes. Then, the value is false if this is the first +/// `EllipsisMatcher` in a sibling list, or there was an explicit match (by a +/// `NodeMatcher`) in a previous matcher of the sequence. struct EllipsisMatcher { cursor: Cursor, has_returned_initial_empty_value: bool, + require_explicit_match: bool, } impl EllipsisMatcher { - fn new(cursor: Cursor) -> Self { + fn new(cursor: Cursor, require_explicit_match: bool) -> Self { Self { cursor, has_returned_initial_empty_value: false, + require_explicit_match, } } } impl Matcher for EllipsisMatcher { - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { + // First visit, we always return a match for empty case if !self.has_returned_initial_empty_value { self.has_returned_initial_empty_value = true; - return Some(self.cursor.clone()); + // We need later matchers to avoid consuming nodes + return Some(MatcherResult { + cursor: self.cursor.clone(), + require_explicit_match: true, + }); } - if self.cursor.irrevocably_go_to_next_sibling() { - return Some(self.cursor.clone()); + // Subsequent visits: we only consume nodes if an explicit match is not + // required, ie. if this is the *first* ellipsis operator in a sibling + // sequence or there was an explicit match before us. + if !self.require_explicit_match && self.cursor.irrevocably_go_to_next_sibling() { + return Some(MatcherResult { + cursor: self.cursor.clone(), + require_explicit_match: true, + }); } None @@ -547,3 +739,36 @@ impl Matcher for EllipsisMatcher { fn record_captures(&self, _: &mut BTreeMap>>) {} } + +/// Greedily consumes available trivia nodes only +struct AdjacencyMatcher { + cursor: Option>, + require_explicit_match: bool, +} + +impl AdjacencyMatcher { + fn new(cursor: Cursor, require_explicit_match: bool) -> Self { + Self { + cursor: Some(cursor), + require_explicit_match, + } + } +} + +impl Matcher for AdjacencyMatcher { + fn next(&mut self) -> Option> { + if let Some(mut cursor) = self.cursor.take() { + while !cursor.is_completed() && cursor.node().is_trivia() { + cursor.irrevocably_go_to_next_sibling(); + } + Some(MatcherResult { + cursor, + require_explicit_match: self.require_explicit_match, + }) + } else { + None + } + } + + fn record_captures(&self, _: &mut BTreeMap>>) {} +} diff --git a/crates/metaslang/cst/src/query/model.rs b/crates/metaslang/cst/src/query/model.rs index a2f3a8dd1c..4a61405d9a 100644 --- a/crates/metaslang/cst/src/query/model.rs +++ b/crates/metaslang/cst/src/query/model.rs @@ -81,7 +81,7 @@ impl Query { capture_quantifiers, )?; } - ASTNode::Ellipsis => {} + ASTNode::Adjacency => {} } Ok(()) } @@ -113,7 +113,7 @@ pub enum ASTNode { Alternatives(Rc>), Sequence(Rc>), OneOrMore(Rc>), - Ellipsis, + Adjacency, } impl ASTNode { @@ -167,7 +167,7 @@ impl fmt::Display for ASTNode { Self::OneOrMore(one_or_more) => { write!(f, "({})+", one_or_more.child) } - Self::Ellipsis => write!(f, "..."), + Self::Adjacency => write!(f, "."), } } } @@ -256,6 +256,10 @@ pub struct NodeMatchASTNode { #[derive(Debug)] pub struct SequenceASTNode { pub children: Vec>, + // By default sequences can match any number of nodes at the beginning and + // end of it. Setting this value to true prevents it and instead forces + // strict adjacency at the edges. + pub adjacent: bool, } #[derive(Debug)] diff --git a/crates/metaslang/cst/src/query/parser.rs b/crates/metaslang/cst/src/query/parser.rs index a43ca8f205..6f490ac9a2 100644 --- a/crates/metaslang/cst/src/query/parser.rs +++ b/crates/metaslang/cst/src/query/parser.rs @@ -2,10 +2,10 @@ use std::fmt; use std::rc::Rc; use nom::branch::alt; -use nom::bytes::complete::{is_not, tag, take_while, take_while1, take_while_m_n}; -use nom::character::complete::{char, multispace0, multispace1, satisfy}; +use nom::bytes::complete::{is_not, take_while, take_while1, take_while_m_n}; +use nom::character::complete::{char, multispace0, multispace1, none_of, satisfy}; use nom::combinator::{ - all_consuming, cut, map_opt, map_res, opt, peek, recognize, success, value, verify, + all_consuming, cut, eof, map_opt, map_res, opt, peek, recognize, success, value, verify, }; use nom::error::{ErrorKind, FromExternalError, ParseError}; use nom::multi::{fold_many0, many1, separated_list1}; @@ -19,7 +19,7 @@ use super::model::{ }; use crate::cst::NodeKind; use crate::text_index::TextIndex; -use crate::{AbstractKind as _, KindTypes}; +use crate::{AbstractKind as _, KindTypes, TerminalKind as _}; // ---------------------------------------------------------------------------- // Parse errors @@ -47,10 +47,21 @@ enum QueryParserErrorKind { Syntax(QuerySyntaxError), } +#[derive(Clone)] enum QuerySyntaxError { EdgeLabel(String), NodeKind(String), EscapedUnicode, + DeprecatedEllipsis, + ForbiddenTriviaKind, +} + +impl QueryParserError { + fn from_query_syntax_error(input: I, error: QuerySyntaxError) -> Self { + QueryParserError { + errors: vec![(input, QueryParserErrorKind::Syntax(error))], + } + } } impl ParseError for QueryParserError { @@ -74,9 +85,7 @@ impl ParseError for QueryParserError { impl FromExternalError for QueryParserError { fn from_external_error(input: I, _kind: ErrorKind, e: QuerySyntaxError) -> Self { - QueryParserError { - errors: vec![(input, QueryParserErrorKind::Syntax(e))], - } + Self::from_query_syntax_error(input, e) } } @@ -88,6 +97,12 @@ impl fmt::Display for QuerySyntaxError { QuerySyntaxError::EscapedUnicode => { write!(f, "Invalid escaped Unicode character") } + QuerySyntaxError::DeprecatedEllipsis => { + write!(f, "The ellipsis `...` operator is deprecated, and replaced with a new adjacency `.` operator. For more information, check the Tree Query Language guide: https://nomicfoundation.github.io/slang/user-guide/tree-query-language/") + } + QuerySyntaxError::ForbiddenTriviaKind => { + write!(f, "Matching trivia nodes directly is forbidden. For more information, check the Tree Query Language guide: https://nomicfoundation.github.io/slang/user-guide/tree-query-language/") + } } } } @@ -149,7 +164,7 @@ fn compute_row_and_column(target: &str, input: &str) -> TextIndex { fn parse_matcher_alternatives( i: &str, ) -> IResult<&str, ASTNode, QueryParserError<&str>> { - separated_list1(token('|'), parse_matcher_sequence::) + separated_list1(token('|'), parse_matcher_alt_sequence::) .map(|mut children| { if children.len() == 1 { children.pop().unwrap() @@ -163,38 +178,76 @@ fn parse_matcher_alternatives( fn parse_matcher_sequence( i: &str, ) -> IResult<&str, ASTNode, QueryParserError<&str>> { - many1(parse_quantified_matcher::) - .map(|mut children| { - if children.len() == 1 { - children.pop().unwrap() - } else { - ASTNode::Sequence(Rc::new(SequenceASTNode { children })) - } - }) - .parse(i) + verify( + many1(parse_sequence_item::), + |children: &[ASTNode]| { + // It doesn't make sense for a sequence to be a single adjacency operator + children.len() > 1 || !matches!(children[0], ASTNode::Adjacency) + }, + ) + .map(|children| { + ASTNode::Sequence(Rc::new(SequenceASTNode { + children, + adjacent: false, + })) + }) + .parse(i) } -fn parse_quantified_matcher( +fn parse_matcher_alt_sequence( i: &str, ) -> IResult<&str, ASTNode, QueryParserError<&str>> { + verify( + many1(parse_sequence_item::), + |children: &[ASTNode]| { + // Alternative sequences cannot start or end with an adjacency + // operator, because it is implicitly adjacent to the previous and + // next matchers + !matches!(children[0], ASTNode::Adjacency) + && !matches!(children[children.len() - 1], ASTNode::Adjacency) + }, + ) + .map(|mut children| { + if children.len() == 1 { + // Alternative sequences of length 1 can be simplified to the child pattern + children.pop().unwrap() + } else { + ASTNode::Sequence(Rc::new(SequenceASTNode { + children, + adjacent: true, + })) + } + }) + .parse(i) +} + +fn parse_sequence_item(i: &str) -> IResult<&str, ASTNode, QueryParserError<&str>> { alt(( - ellipsis_token.map(|_| ASTNode::Ellipsis), // Cannot be quantified - pair( - parse_bound_matcher, - parse_trailing_quantifier, // admits epsilon - ) - .map(|(child, quantifier)| match quantifier { - CaptureQuantifier::ZeroOrOne => ASTNode::Optional(Rc::new(OptionalASTNode { child })), - CaptureQuantifier::ZeroOrMore => ASTNode::Optional(Rc::new(OptionalASTNode { - child: ASTNode::OneOrMore(Rc::new(OneOrMoreASTNode { child })), - })), - CaptureQuantifier::OneOrMore => ASTNode::OneOrMore(Rc::new(OneOrMoreASTNode { child })), - CaptureQuantifier::One => child, - }), + ellipsis_token, + adjacency_operator::, + parse_quantified_matcher::, )) .parse(i) } +fn parse_quantified_matcher( + i: &str, +) -> IResult<&str, ASTNode, QueryParserError<&str>> { + pair( + parse_bound_matcher, + parse_trailing_quantifier, // admits epsilon + ) + .map(|(child, quantifier)| match quantifier { + CaptureQuantifier::ZeroOrOne => ASTNode::Optional(Rc::new(OptionalASTNode { child })), + CaptureQuantifier::ZeroOrMore => ASTNode::Optional(Rc::new(OptionalASTNode { + child: ASTNode::OneOrMore(Rc::new(OneOrMoreASTNode { child })), + })), + CaptureQuantifier::OneOrMore => ASTNode::OneOrMore(Rc::new(OneOrMoreASTNode { child })), + CaptureQuantifier::One => child, + }) + .parse(i) +} + fn parse_bound_matcher(i: &str) -> IResult<&str, ASTNode, QueryParserError<&str>> { pair( opt(capture_name_token), @@ -307,7 +360,10 @@ fn anonymous_selector( terminated( terminated( char('_'), - peek(satisfy(|c| c != '_' && !c.is_alphanumeric())), + peek( + eof.map(|_| ' ') + .or(satisfy(|c| c != '_' && !c.is_alphanumeric())), + ), ), multispace0, ) @@ -319,14 +375,21 @@ fn kind_token(i: &str) -> IResult<&str, NodeKind, QueryParserEr terminated( preceded( peek(satisfy(|c| c.is_alphabetic() || c == '_')), - cut(map_res(raw_identifier, |id| { - T::TerminalKind::try_from_str(id.as_str()) - .map(NodeKind::Terminal) - .or_else(|_| { - T::NonterminalKind::try_from_str(id.as_str()).map(NodeKind::Nonterminal) - }) - .or(Err(QuerySyntaxError::NodeKind(id))) - })), + cut(map_res( + raw_identifier, + |id| match T::TerminalKind::try_from_str(id.as_str()) { + Ok(kind) => { + if kind.is_trivia() { + Err(QuerySyntaxError::ForbiddenTriviaKind) + } else { + Ok(NodeKind::Terminal(kind)) + } + } + Err(_) => T::NonterminalKind::try_from_str(id.as_str()) + .map(NodeKind::Nonterminal) + .or(Err(QuerySyntaxError::NodeKind(id))), + }, + )), ), multispace0, ) @@ -413,10 +476,43 @@ fn text_token(i: &str) -> IResult<&str, String, QueryParserError<&str>> { .parse(i) } -fn ellipsis_token(i: &str) -> IResult<&str, &str, QueryParserError<&str>> { - terminated(tag("..."), multispace0).parse(i) -} - fn token<'input>(c: char) -> impl Parser<&'input str, char, QueryParserError<&'input str>> { terminated(char(c), multispace0) } + +fn adjacency_operator(i: &str) -> IResult<&str, ASTNode, QueryParserError<&str>> { + // An adjacency operator is a single '.' character, and cannot be followed + // by another adjacency operator + pair(token('.'), cut(peek(none_of(". \t\r\n")))) + .map(|_| ASTNode::Adjacency) + .parse(i) +} + +fn recognize_as_failure( + error: QuerySyntaxError, + mut parser: F, +) -> impl FnMut(I) -> IResult> +where + F: nom::Parser>, +{ + use nom::Err::Failure; + move |input: I| { + let i = input.clone(); + match parser.parse(i) { + Ok((_, _)) => Err(Failure(QueryParserError::from_query_syntax_error( + input, + error.clone(), + ))), + Err(e) => Err(e), + } + } +} + +fn ellipsis_token(i: &str) -> IResult<&str, O, QueryParserError<&str>> { + use nom::bytes::complete::tag; + recognize_as_failure( + QuerySyntaxError::DeprecatedEllipsis, + terminated(tag("..."), multispace0), + ) + .parse(i) +} diff --git a/crates/solidity/inputs/language/bindings/rules.msgb b/crates/solidity/inputs/language/bindings/rules.msgb index 524d8c64c3..ad683dc7c9 100644 --- a/crates/solidity/inputs/language/bindings/rules.msgb +++ b/crates/solidity/inputs/language/bindings/rules.msgb @@ -94,8 +94,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } ;; Top-level definitions... -@source_unit [SourceUnit ... [SourceUnitMembers - ... +@source_unit [SourceUnit [SourceUnitMembers [SourceUnitMember @unit_member ( [ContractDefinition] | [InterfaceDefinition] @@ -108,8 +107,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i | [UserDefinedValueTypeDefinition] | [EventDefinition] )] - ... -] ...] { +]] { edge @unit_member.lexical_scope -> @source_unit.lexical_scope ;; ... are available in the file's lexical scope @@ -124,7 +122,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;;; Named definitions (contracts, functions, libraries, etc.) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -@contract [ContractDefinition ... @name name: [Identifier] ...] { +@contract [ContractDefinition @name name: [Identifier]] { node def attr (def) node_definition = @name @@ -147,7 +145,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge type_member -> @contract.type_members } -@interface [InterfaceDefinition ... @name name: [Identifier] ...] { +@interface [InterfaceDefinition @name name: [Identifier]] { node def attr (def) node_definition = @name @@ -170,7 +168,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge type_member -> @interface.type_members } -@library [LibraryDefinition ... @name name: [Identifier] ...] { +@library [LibraryDefinition @name name: [Identifier]] { node def attr (def) node_definition = @name @@ -183,7 +181,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge member -> @library.members } -@function [FunctionDefinition ... name: [FunctionName ... @name [Identifier] ...] ...] { +@function [FunctionDefinition name: [FunctionName @name [Identifier]]] { node def attr (def) node_definition = @name @@ -221,16 +219,16 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } ;; The identifier path constructs a path of nodes connected from right to left -[IdentifierPath ... @name [Identifier] ...] { +[IdentifierPath @name [Identifier]] { node @name.ref attr (@name.ref) node_reference = @name } -@id_path [IdentifierPath ... @name [Identifier] (trailing_trivia:[_])*] { +@id_path [IdentifierPath @name [Identifier] .] { edge @id_path.right -> @name.ref } -[IdentifierPath ... @left_name [Identifier] [Period] @right_name [Identifier] ...] { +[IdentifierPath @left_name [Identifier] . [Period] . @right_name [Identifier]] { node member attr (member) push_symbol = "." @@ -238,7 +236,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge member -> @left_name.ref } -@id_path [IdentifierPath (leading_trivia:[_])* @name [Identifier] ...] { +@id_path [IdentifierPath . @name [Identifier]] { edge @name.ref -> @id_path.left } @@ -252,11 +250,11 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i node @param.def } -@param [Parameter ... @type_name [TypeName] ...] { +@param [Parameter @type_name [TypeName]] { edge @type_name.type_ref -> @param.lexical_scope } -@param [Parameter ... @type_name [TypeName] ... @name [Identifier]] { +@param [Parameter @type_name [TypeName] @name [Identifier]] { node def attr (def) node_definition = @name @@ -269,11 +267,9 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge typeof -> @type_name.output } -@function [FunctionDefinition ... parameters: [ParametersDeclaration - ... - [Parameters ... @param item: [Parameter] ...] - ... -] ...] { +@function [FunctionDefinition parameters: [ParametersDeclaration + [Parameters @param item: [Parameter]] +]] { edge @param.lexical_scope -> @function.lexical_scope ;; Input parameters are available in the function scope @@ -281,11 +277,9 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i attr (@function.lexical_scope -> @param.def) precedence = 1 } -@function [FunctionDefinition ... returns: [ReturnsDeclaration - ... - [ParametersDeclaration ... [Parameters ... @param item: [Parameter] ...] ...] - ... -] ...] { +@function [FunctionDefinition returns: [ReturnsDeclaration + [ParametersDeclaration [Parameters @param item: [Parameter]]] +]] { edge @param.lexical_scope -> @function.lexical_scope ;; Return parameters are available in the function scope @@ -299,29 +293,23 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;; NB. free-functions (ie. those defined at the file's level) are already ;; covered above -@contract [ContractDefinition ... members: [ContractMembers - ... +@contract [ContractDefinition members: [ContractMembers item: [ContractMember @function variant: [FunctionDefinition]] - ... -] ...] { +]] { edge @function.lexical_scope -> @contract.lexical_scope edge @contract.members -> @function.def } -@interface [InterfaceDefinition ... members: [InterfaceMembers - ... +@interface [InterfaceDefinition members: [InterfaceMembers item: [ContractMember @function variant: [FunctionDefinition]] - ... -] ...] { +]] { edge @function.lexical_scope -> @interface.lexical_scope edge @interface.members -> @function.def } -@library [LibraryDefinition ... members: [LibraryMembers - ... +@library [LibraryDefinition members: [LibraryMembers item: [ContractMember @function variant: [FunctionDefinition]] - ... -] ...] { +]] { edge @function.lexical_scope -> @library.lexical_scope edge @library.members -> @function.def } @@ -351,20 +339,20 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } ;; The first statement in a block -@block [Block ... [Statements @stmt [Statement] ...] ...] { +@block [Block [Statements . @stmt [Statement]]] { if (version-matches ">= 0.5.0") { edge @stmt.lexical_scope -> @block.lexical_scope } } ;; Two consecutive statements -[Statements ... @left_stmt [Statement] @right_stmt [Statement] ...] { +[Statements @left_stmt [Statement] . @right_stmt [Statement]] { if (version-matches ">= 0.5.0") { edge @right_stmt.lexical_scope -> @left_stmt.lexical_scope } } -@block [Block ... [Statements ... @stmt [Statement]...] ...] { +@block [Block [Statements @stmt [Statement]]] { ;; Hoist statement definitions for Solidity < 0.5.0 if (version-matches "< 0.5.0") { ;; definitions are carried over to the block @@ -390,7 +378,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } ;; Connect the function body's block lexical scope to the function -@function [FunctionDefinition ... [FunctionBody @block [Block]] ...] { +@function [FunctionDefinition [FunctionBody @block [Block]]] { edge @block.lexical_scope -> @function.lexical_scope } @@ -400,11 +388,8 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @stmt [Statement [VariableDeclarationStatement - ... [VariableDeclarationType @var_type [TypeName]] - ... @name name: [Identifier] - ... ]] { node def attr (def) node_definition = @name @@ -419,26 +404,23 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge typeof -> @var_type.output } -@stmt [Statement [TupleDeconstructionStatement ... [TupleDeconstructionElements - ... - [TupleDeconstructionElement [TupleMember variant: [UntypedTupleMember ... @name name: [Identifier]]]] - ... -] ...]] { +@stmt [Statement [TupleDeconstructionStatement [TupleDeconstructionElements + [TupleDeconstructionElement [TupleMember variant: [UntypedTupleMember + @name name: [Identifier] + ]]] +]]] { node def attr (def) node_definition = @name edge @stmt.defs -> def } -@stmt [Statement [TupleDeconstructionStatement ... [TupleDeconstructionElements - ... +@stmt [Statement [TupleDeconstructionStatement [TupleDeconstructionElements [TupleDeconstructionElement [TupleMember variant: [TypedTupleMember - ... @member_type type_name: [TypeName] - ... - @name name: [Identifier]]]] - ... -] ...]] { + @name name: [Identifier] + ]]] +]]] { node def attr (def) node_definition = @name @@ -463,11 +445,8 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } @state_var [StateVariableDefinition - ... @type_name type_name: [TypeName] - ... @name name: [Identifier] - ... ] { node def attr (def) node_definition = @name @@ -485,11 +464,9 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;; NB. Even though the grammar allows it, state variables can only be declared ;; inside contracts, and not interfaces or libraries. So, we will only bind ;; contract state variables. -@contract [ContractDefinition ... members: [ContractMembers - ... +@contract [ContractDefinition members: [ContractMembers item: [ContractMember @state_var variant: [StateVariableDefinition]] - ... -] ...] { +]] { edge @state_var.lexical_scope -> @contract.lexical_scope edge @contract.lexical_scope -> @state_var.def } @@ -499,7 +476,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;;; Enum definitions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -@enum [EnumDefinition ... @name name: [Identifier] ...] { +@enum [EnumDefinition @name name: [Identifier]] { node def attr (def) node_definition = @name @@ -513,9 +490,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } @enum [EnumDefinition - ... - members: [EnumMembers ... @item [Identifier] ...] - ... + members: [EnumMembers @item [Identifier]] ] { node def attr (def) node_definition = @item @@ -527,27 +502,21 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;; NB. top-level enums (ie. those defined at the file's level) are already ;; covered above -@contract [ContractDefinition ... members: [ContractMembers - ... +@contract [ContractDefinition members: [ContractMembers item: [ContractMember @enum variant: [EnumDefinition]] - ... -] ...] { +]] { edge @contract.type_members -> @enum.def } -@interface [InterfaceDefinition ... members: [InterfaceMembers - ... +@interface [InterfaceDefinition members: [InterfaceMembers item: [ContractMember @enum variant: [EnumDefinition]] - ... -] ...] { +]] { edge @interface.type_members -> @enum.def } -@library [LibraryDefinition ... members: [LibraryMembers - ... +@library [LibraryDefinition members: [LibraryMembers item: [ContractMember @enum variant: [EnumDefinition]] - ... -] ...] { +]] { edge @library.members -> @enum.def } @@ -556,7 +525,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;;; Structure definitions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -@struct [StructDefinition ... @name name: [Identifier] ...] { +@struct [StructDefinition @name name: [Identifier]] { node def attr (def) node_definition = @name @@ -573,16 +542,14 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge member -> @struct.members } -@struct [StructDefinition ... [StructMembers ... @member item: [StructMember] ...] ...] { +@struct [StructDefinition [StructMembers @member item: [StructMember]]] { node @member.lexical_scope edge @member.lexical_scope -> @struct.lexical_scope } -@struct [StructDefinition ... [StructMembers - ... - @member item: [StructMember ... @type_name [TypeName] ... @name name: [Identifier] ...] - ... -] ...] { +@struct [StructDefinition [StructMembers + @member item: [StructMember @type_name [TypeName] @name name: [Identifier]] +]] { node def attr (def) node_definition = @name @@ -601,29 +568,23 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;; NB. top-level enums (ie. those defined at the file's level) are already ;; covered above -@contract [ContractDefinition ... members: [ContractMembers - ... +@contract [ContractDefinition members: [ContractMembers item: [ContractMember @struct variant: [StructDefinition]] - ... -] ...] { +]] { edge @struct.lexical_scope -> @contract.lexical_scope edge @contract.type_members -> @struct.def } -@interface [InterfaceDefinition ... members: [InterfaceMembers - ... +@interface [InterfaceDefinition members: [InterfaceMembers item: [ContractMember @struct variant: [StructDefinition]] - ... -] ...] { +]] { edge @struct.lexical_scope -> @interface.lexical_scope edge @interface.type_members -> @struct.def } -@library [LibraryDefinition ... members: [LibraryMembers - ... +@library [LibraryDefinition members: [LibraryMembers item: [ContractMember @struct variant: [StructDefinition]] - ... -] ...] { +]] { edge @struct.lexical_scope -> @library.lexical_scope edge @library.members -> @struct.def } @@ -640,38 +601,32 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } ;; General case for nested expressions -@expr [Expression ... variant: [_ ... @child [Expression] ...] ...] { +@expr [Expression variant: [_ @child [Expression]]] { edge @child.lexical_scope -> @expr.lexical_scope } ;; Expressions as statements -@stmt [Statement ... variant: [_ ... @expr [Expression] ...] ...] { +@stmt [Statement variant: [_ @expr [Expression]]] { edge @expr.lexical_scope -> @stmt.lexical_scope } ;; Expressions used for variable declarations -@stmt [Statement ... variant: [VariableDeclarationStatement - ... - value: [VariableDeclarationValue ... @expr [Expression] ...] - ... -] ...] { +@stmt [Statement variant: [VariableDeclarationStatement + value: [VariableDeclarationValue @expr [Expression]] +]] { edge @expr.lexical_scope -> @stmt.lexical_scope } ;; Expressions used for state variable declarations @state_var [StateVariableDefinition - ... - value: [StateVariableDefinitionValue ... @expr [Expression]] - ... + value: [StateVariableDefinitionValue @expr [Expression]] ] { edge @expr.lexical_scope -> @state_var.lexical_scope } ;; Tuple expressions @tuple_expr [Expression [TupleExpression - ... - items: [TupleValues ... [TupleValue @expr [Expression]] ...] - ... + items: [TupleValues [TupleValue @expr [Expression]]] ]] { edge @expr.lexical_scope -> @tuple_expr.lexical_scope } @@ -679,7 +634,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;;; Identifier expressions -@expr [Expression ... @name variant: [Identifier]] { +@expr [Expression @name variant: [Identifier]] { node ref attr (ref) node_reference = @name @@ -691,13 +646,10 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;;; Member access expressions ;; TODO: implement variant for `.address` member -@expr [Expression ... [MemberAccessExpression - ... +@expr [Expression [MemberAccessExpression @operand operand: [Expression] - ... @name member: [Identifier] - ... -...]] { +]] { node ref attr (ref) node_reference = @name @@ -720,14 +672,12 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } @args [ArgumentsDeclaration [PositionalArgumentsDeclaration - ... - [PositionalArguments ... @argument [Expression] ...] - ... + [PositionalArguments @argument [Expression]] ]] { edge @argument.lexical_scope -> @args.lexical_scope } -@named_arg [NamedArgument ... @name [Identifier] [Colon] @value [Expression]] { +@named_arg [NamedArgument @name [Identifier] [Colon] @value [Expression]] { node @named_arg.lexical_scope edge @value.lexical_scope -> @named_arg.lexical_scope @@ -739,22 +689,18 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } @args [ArgumentsDeclaration [NamedArgumentsDeclaration - ... - [NamedArgumentGroup ... [NamedArguments ... @argument [NamedArgument] ...] ...] - ... + [NamedArgumentGroup [NamedArguments @argument [NamedArgument]]] ]] { edge @argument.lexical_scope -> @args.lexical_scope } -@funcall [Expression [FunctionCallExpression ... @args [ArgumentsDeclaration]]] { +@funcall [Expression [FunctionCallExpression @args [ArgumentsDeclaration]]] { edge @args.lexical_scope -> @funcall.lexical_scope } ;;; Type expressions -@type_expr [Expression [TypeExpression ... @type [TypeName] ...]] { +@type_expr [Expression [TypeExpression @type [TypeName]]] { edge @type.type_ref -> @type_expr.lexical_scope } - - diff --git a/crates/solidity/outputs/cargo/slang_solidity/src/generated/bindings/generated/binding_rules.rs b/crates/solidity/outputs/cargo/slang_solidity/src/generated/bindings/generated/binding_rules.rs index 0c47769020..28e97320f8 100644 --- a/crates/solidity/outputs/cargo/slang_solidity/src/generated/bindings/generated/binding_rules.rs +++ b/crates/solidity/outputs/cargo/slang_solidity/src/generated/bindings/generated/binding_rules.rs @@ -99,8 +99,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } ;; Top-level definitions... -@source_unit [SourceUnit ... [SourceUnitMembers - ... +@source_unit [SourceUnit [SourceUnitMembers [SourceUnitMember @unit_member ( [ContractDefinition] | [InterfaceDefinition] @@ -113,8 +112,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i | [UserDefinedValueTypeDefinition] | [EventDefinition] )] - ... -] ...] { +]] { edge @unit_member.lexical_scope -> @source_unit.lexical_scope ;; ... are available in the file's lexical scope @@ -129,7 +127,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;;; Named definitions (contracts, functions, libraries, etc.) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -@contract [ContractDefinition ... @name name: [Identifier] ...] { +@contract [ContractDefinition @name name: [Identifier]] { node def attr (def) node_definition = @name @@ -152,7 +150,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge type_member -> @contract.type_members } -@interface [InterfaceDefinition ... @name name: [Identifier] ...] { +@interface [InterfaceDefinition @name name: [Identifier]] { node def attr (def) node_definition = @name @@ -175,7 +173,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge type_member -> @interface.type_members } -@library [LibraryDefinition ... @name name: [Identifier] ...] { +@library [LibraryDefinition @name name: [Identifier]] { node def attr (def) node_definition = @name @@ -188,7 +186,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge member -> @library.members } -@function [FunctionDefinition ... name: [FunctionName ... @name [Identifier] ...] ...] { +@function [FunctionDefinition name: [FunctionName @name [Identifier]]] { node def attr (def) node_definition = @name @@ -226,16 +224,16 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } ;; The identifier path constructs a path of nodes connected from right to left -[IdentifierPath ... @name [Identifier] ...] { +[IdentifierPath @name [Identifier]] { node @name.ref attr (@name.ref) node_reference = @name } -@id_path [IdentifierPath ... @name [Identifier] (trailing_trivia:[_])*] { +@id_path [IdentifierPath @name [Identifier] .] { edge @id_path.right -> @name.ref } -[IdentifierPath ... @left_name [Identifier] [Period] @right_name [Identifier] ...] { +[IdentifierPath @left_name [Identifier] . [Period] . @right_name [Identifier]] { node member attr (member) push_symbol = "." @@ -243,7 +241,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge member -> @left_name.ref } -@id_path [IdentifierPath (leading_trivia:[_])* @name [Identifier] ...] { +@id_path [IdentifierPath . @name [Identifier]] { edge @name.ref -> @id_path.left } @@ -257,11 +255,11 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i node @param.def } -@param [Parameter ... @type_name [TypeName] ...] { +@param [Parameter @type_name [TypeName]] { edge @type_name.type_ref -> @param.lexical_scope } -@param [Parameter ... @type_name [TypeName] ... @name [Identifier]] { +@param [Parameter @type_name [TypeName] @name [Identifier]] { node def attr (def) node_definition = @name @@ -274,11 +272,9 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge typeof -> @type_name.output } -@function [FunctionDefinition ... parameters: [ParametersDeclaration - ... - [Parameters ... @param item: [Parameter] ...] - ... -] ...] { +@function [FunctionDefinition parameters: [ParametersDeclaration + [Parameters @param item: [Parameter]] +]] { edge @param.lexical_scope -> @function.lexical_scope ;; Input parameters are available in the function scope @@ -286,11 +282,9 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i attr (@function.lexical_scope -> @param.def) precedence = 1 } -@function [FunctionDefinition ... returns: [ReturnsDeclaration - ... - [ParametersDeclaration ... [Parameters ... @param item: [Parameter] ...] ...] - ... -] ...] { +@function [FunctionDefinition returns: [ReturnsDeclaration + [ParametersDeclaration [Parameters @param item: [Parameter]]] +]] { edge @param.lexical_scope -> @function.lexical_scope ;; Return parameters are available in the function scope @@ -304,29 +298,23 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;; NB. free-functions (ie. those defined at the file's level) are already ;; covered above -@contract [ContractDefinition ... members: [ContractMembers - ... +@contract [ContractDefinition members: [ContractMembers item: [ContractMember @function variant: [FunctionDefinition]] - ... -] ...] { +]] { edge @function.lexical_scope -> @contract.lexical_scope edge @contract.members -> @function.def } -@interface [InterfaceDefinition ... members: [InterfaceMembers - ... +@interface [InterfaceDefinition members: [InterfaceMembers item: [ContractMember @function variant: [FunctionDefinition]] - ... -] ...] { +]] { edge @function.lexical_scope -> @interface.lexical_scope edge @interface.members -> @function.def } -@library [LibraryDefinition ... members: [LibraryMembers - ... +@library [LibraryDefinition members: [LibraryMembers item: [ContractMember @function variant: [FunctionDefinition]] - ... -] ...] { +]] { edge @function.lexical_scope -> @library.lexical_scope edge @library.members -> @function.def } @@ -356,20 +344,20 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } ;; The first statement in a block -@block [Block ... [Statements @stmt [Statement] ...] ...] { +@block [Block [Statements . @stmt [Statement]]] { if (version-matches ">= 0.5.0") { edge @stmt.lexical_scope -> @block.lexical_scope } } ;; Two consecutive statements -[Statements ... @left_stmt [Statement] @right_stmt [Statement] ...] { +[Statements @left_stmt [Statement] . @right_stmt [Statement]] { if (version-matches ">= 0.5.0") { edge @right_stmt.lexical_scope -> @left_stmt.lexical_scope } } -@block [Block ... [Statements ... @stmt [Statement]...] ...] { +@block [Block [Statements @stmt [Statement]]] { ;; Hoist statement definitions for Solidity < 0.5.0 if (version-matches "< 0.5.0") { ;; definitions are carried over to the block @@ -395,7 +383,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } ;; Connect the function body's block lexical scope to the function -@function [FunctionDefinition ... [FunctionBody @block [Block]] ...] { +@function [FunctionDefinition [FunctionBody @block [Block]]] { edge @block.lexical_scope -> @function.lexical_scope } @@ -405,11 +393,8 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @stmt [Statement [VariableDeclarationStatement - ... [VariableDeclarationType @var_type [TypeName]] - ... @name name: [Identifier] - ... ]] { node def attr (def) node_definition = @name @@ -424,26 +409,23 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge typeof -> @var_type.output } -@stmt [Statement [TupleDeconstructionStatement ... [TupleDeconstructionElements - ... - [TupleDeconstructionElement [TupleMember variant: [UntypedTupleMember ... @name name: [Identifier]]]] - ... -] ...]] { +@stmt [Statement [TupleDeconstructionStatement [TupleDeconstructionElements + [TupleDeconstructionElement [TupleMember variant: [UntypedTupleMember + @name name: [Identifier] + ]]] +]]] { node def attr (def) node_definition = @name edge @stmt.defs -> def } -@stmt [Statement [TupleDeconstructionStatement ... [TupleDeconstructionElements - ... +@stmt [Statement [TupleDeconstructionStatement [TupleDeconstructionElements [TupleDeconstructionElement [TupleMember variant: [TypedTupleMember - ... @member_type type_name: [TypeName] - ... - @name name: [Identifier]]]] - ... -] ...]] { + @name name: [Identifier] + ]]] +]]] { node def attr (def) node_definition = @name @@ -468,11 +450,8 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } @state_var [StateVariableDefinition - ... @type_name type_name: [TypeName] - ... @name name: [Identifier] - ... ] { node def attr (def) node_definition = @name @@ -490,11 +469,9 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;; NB. Even though the grammar allows it, state variables can only be declared ;; inside contracts, and not interfaces or libraries. So, we will only bind ;; contract state variables. -@contract [ContractDefinition ... members: [ContractMembers - ... +@contract [ContractDefinition members: [ContractMembers item: [ContractMember @state_var variant: [StateVariableDefinition]] - ... -] ...] { +]] { edge @state_var.lexical_scope -> @contract.lexical_scope edge @contract.lexical_scope -> @state_var.def } @@ -504,7 +481,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;;; Enum definitions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -@enum [EnumDefinition ... @name name: [Identifier] ...] { +@enum [EnumDefinition @name name: [Identifier]] { node def attr (def) node_definition = @name @@ -518,9 +495,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } @enum [EnumDefinition - ... - members: [EnumMembers ... @item [Identifier] ...] - ... + members: [EnumMembers @item [Identifier]] ] { node def attr (def) node_definition = @item @@ -532,27 +507,21 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;; NB. top-level enums (ie. those defined at the file's level) are already ;; covered above -@contract [ContractDefinition ... members: [ContractMembers - ... +@contract [ContractDefinition members: [ContractMembers item: [ContractMember @enum variant: [EnumDefinition]] - ... -] ...] { +]] { edge @contract.type_members -> @enum.def } -@interface [InterfaceDefinition ... members: [InterfaceMembers - ... +@interface [InterfaceDefinition members: [InterfaceMembers item: [ContractMember @enum variant: [EnumDefinition]] - ... -] ...] { +]] { edge @interface.type_members -> @enum.def } -@library [LibraryDefinition ... members: [LibraryMembers - ... +@library [LibraryDefinition members: [LibraryMembers item: [ContractMember @enum variant: [EnumDefinition]] - ... -] ...] { +]] { edge @library.members -> @enum.def } @@ -561,7 +530,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;;; Structure definitions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -@struct [StructDefinition ... @name name: [Identifier] ...] { +@struct [StructDefinition @name name: [Identifier]] { node def attr (def) node_definition = @name @@ -578,16 +547,14 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge member -> @struct.members } -@struct [StructDefinition ... [StructMembers ... @member item: [StructMember] ...] ...] { +@struct [StructDefinition [StructMembers @member item: [StructMember]]] { node @member.lexical_scope edge @member.lexical_scope -> @struct.lexical_scope } -@struct [StructDefinition ... [StructMembers - ... - @member item: [StructMember ... @type_name [TypeName] ... @name name: [Identifier] ...] - ... -] ...] { +@struct [StructDefinition [StructMembers + @member item: [StructMember @type_name [TypeName] @name name: [Identifier]] +]] { node def attr (def) node_definition = @name @@ -606,29 +573,23 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;; NB. top-level enums (ie. those defined at the file's level) are already ;; covered above -@contract [ContractDefinition ... members: [ContractMembers - ... +@contract [ContractDefinition members: [ContractMembers item: [ContractMember @struct variant: [StructDefinition]] - ... -] ...] { +]] { edge @struct.lexical_scope -> @contract.lexical_scope edge @contract.type_members -> @struct.def } -@interface [InterfaceDefinition ... members: [InterfaceMembers - ... +@interface [InterfaceDefinition members: [InterfaceMembers item: [ContractMember @struct variant: [StructDefinition]] - ... -] ...] { +]] { edge @struct.lexical_scope -> @interface.lexical_scope edge @interface.type_members -> @struct.def } -@library [LibraryDefinition ... members: [LibraryMembers - ... +@library [LibraryDefinition members: [LibraryMembers item: [ContractMember @struct variant: [StructDefinition]] - ... -] ...] { +]] { edge @struct.lexical_scope -> @library.lexical_scope edge @library.members -> @struct.def } @@ -645,38 +606,32 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } ;; General case for nested expressions -@expr [Expression ... variant: [_ ... @child [Expression] ...] ...] { +@expr [Expression variant: [_ @child [Expression]]] { edge @child.lexical_scope -> @expr.lexical_scope } ;; Expressions as statements -@stmt [Statement ... variant: [_ ... @expr [Expression] ...] ...] { +@stmt [Statement variant: [_ @expr [Expression]]] { edge @expr.lexical_scope -> @stmt.lexical_scope } ;; Expressions used for variable declarations -@stmt [Statement ... variant: [VariableDeclarationStatement - ... - value: [VariableDeclarationValue ... @expr [Expression] ...] - ... -] ...] { +@stmt [Statement variant: [VariableDeclarationStatement + value: [VariableDeclarationValue @expr [Expression]] +]] { edge @expr.lexical_scope -> @stmt.lexical_scope } ;; Expressions used for state variable declarations @state_var [StateVariableDefinition - ... - value: [StateVariableDefinitionValue ... @expr [Expression]] - ... + value: [StateVariableDefinitionValue @expr [Expression]] ] { edge @expr.lexical_scope -> @state_var.lexical_scope } ;; Tuple expressions @tuple_expr [Expression [TupleExpression - ... - items: [TupleValues ... [TupleValue @expr [Expression]] ...] - ... + items: [TupleValues [TupleValue @expr [Expression]]] ]] { edge @expr.lexical_scope -> @tuple_expr.lexical_scope } @@ -684,7 +639,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;;; Identifier expressions -@expr [Expression ... @name variant: [Identifier]] { +@expr [Expression @name variant: [Identifier]] { node ref attr (ref) node_reference = @name @@ -696,13 +651,10 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;;; Member access expressions ;; TODO: implement variant for `.address` member -@expr [Expression ... [MemberAccessExpression - ... +@expr [Expression [MemberAccessExpression @operand operand: [Expression] - ... @name member: [Identifier] - ... -...]] { +]] { node ref attr (ref) node_reference = @name @@ -725,14 +677,12 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } @args [ArgumentsDeclaration [PositionalArgumentsDeclaration - ... - [PositionalArguments ... @argument [Expression] ...] - ... + [PositionalArguments @argument [Expression]] ]] { edge @argument.lexical_scope -> @args.lexical_scope } -@named_arg [NamedArgument ... @name [Identifier] [Colon] @value [Expression]] { +@named_arg [NamedArgument @name [Identifier] [Colon] @value [Expression]] { node @named_arg.lexical_scope edge @value.lexical_scope -> @named_arg.lexical_scope @@ -744,24 +694,20 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } @args [ArgumentsDeclaration [NamedArgumentsDeclaration - ... - [NamedArgumentGroup ... [NamedArguments ... @argument [NamedArgument] ...] ...] - ... + [NamedArgumentGroup [NamedArguments @argument [NamedArgument]]] ]] { edge @argument.lexical_scope -> @args.lexical_scope } -@funcall [Expression [FunctionCallExpression ... @args [ArgumentsDeclaration]]] { +@funcall [Expression [FunctionCallExpression @args [ArgumentsDeclaration]]] { edge @args.lexical_scope -> @funcall.lexical_scope } ;;; Type expressions -@type_expr [Expression [TypeExpression ... @type [TypeName] ...]] { +@type_expr [Expression [TypeExpression @type [TypeName]]] { edge @type.type_ref -> @type_expr.lexical_scope } - - "#####; diff --git a/crates/solidity/outputs/cargo/tests/src/binding_rules.rs b/crates/solidity/outputs/cargo/tests/src/binding_rules.rs new file mode 100644 index 0000000000..e8bbc239ab --- /dev/null +++ b/crates/solidity/outputs/cargo/tests/src/binding_rules.rs @@ -0,0 +1,22 @@ +use std::path::PathBuf; + +use metaslang_graph_builder::ast::File; +use slang_solidity::bindings; +use slang_solidity::cst::KindTypes; + +#[test] +fn test_binding_rules_parse_successfully() { + let binding_rules = bindings::get_binding_rules(); + let graph_builder = File::::from_str(binding_rules); + + assert!( + graph_builder.is_ok(), + "Parsing binding rules failed:\n{}", + graph_builder + .err() + .map(|err| err + .display_pretty(&PathBuf::from("rules.msgb"), binding_rules) + .to_string()) + .unwrap_or_default() + ); +} diff --git a/crates/solidity/outputs/cargo/tests/src/bindings_assertions/assertions.rs b/crates/solidity/outputs/cargo/tests/src/bindings_assertions/assertions.rs index 564073e092..ed4f390516 100644 --- a/crates/solidity/outputs/cargo/tests/src/bindings_assertions/assertions.rs +++ b/crates/solidity/outputs/cargo/tests/src/bindings_assertions/assertions.rs @@ -7,7 +7,7 @@ use regex::Regex; use semver::{Version, VersionReq}; use slang_solidity::bindings::Bindings; use slang_solidity::cursor::Cursor; -use slang_solidity::query::Query; +use slang_solidity::kinds::TerminalKind; use thiserror::Error; #[derive(Debug, Error)] @@ -149,24 +149,30 @@ impl<'a> fmt::Display for DisplayCursor<'a> { /// // ^ref:2 /// // Result { +pub fn collect_assertions( + mut cursor: Cursor, + version: &Version, +) -> Result { let mut assertions = Assertions::new(); - let query = Query::parse("@comment [SingleLineComment]").unwrap(); - for result in cursor.query(vec![query]) { - let captures = result.captures; - let Some(comment) = captures.get("comment").and_then(|v| v.first()) else { - continue; - }; - - match find_assertion_in_comment(comment, version)? { - Some(Assertion::Definition(assertion)) => { - assertions.insert_definition_assertion(assertion)?; - } - Some(Assertion::Reference(assertion)) => { - assertions.insert_reference_assertion(assertion); + loop { + if cursor + .node() + .is_terminal_with_kind(TerminalKind::SingleLineComment) + { + match find_assertion_in_comment(&cursor, version)? { + Some(Assertion::Definition(assertion)) => { + assertions.insert_definition_assertion(assertion)?; + } + Some(Assertion::Reference(assertion)) => { + assertions.insert_reference_assertion(assertion); + } + None => (), } - None => (), + } + + if !cursor.go_to_next() { + break; } } diff --git a/crates/solidity/outputs/cargo/tests/src/bindings_rules.rs b/crates/solidity/outputs/cargo/tests/src/bindings_rules.rs deleted file mode 100644 index 67f1019801..0000000000 --- a/crates/solidity/outputs/cargo/tests/src/bindings_rules.rs +++ /dev/null @@ -1,10 +0,0 @@ -use metaslang_graph_builder::ast::File; -use slang_solidity::bindings; -use slang_solidity::cst::KindTypes; - -#[test] -fn test_bindings_rules_parsing() { - let graph_builder = File::::from_str(bindings::get_binding_rules()); - - assert!(graph_builder.is_ok()); -} diff --git a/crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs b/crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs index e36df96531..3a8142ec86 100644 --- a/crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs +++ b/crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs @@ -73,7 +73,7 @@ fn query_syntax() { let query = Query::parse( &" // --8<-- [start:query-syntax-4] - [MultiplicativeExpression left_operand:[_] [_] ...] + [MultiplicativeExpression left_operand:[_] [_]] // --8<-- [end:query-syntax-4] " .remove_mkdoc_snippet_markers(), @@ -84,7 +84,7 @@ fn query_syntax() { let query = Query::parse( &" // --8<-- [start:query-syntax-5] - [MultiplicativeExpression ... [Expression [StringExpression]] ...] + [MultiplicativeExpression [Expression [StringExpression]]] // --8<-- [end:query-syntax-5] " .remove_mkdoc_snippet_markers(), @@ -107,7 +107,7 @@ fn capturing_nodes() { let query = Query::parse( &" // --8<-- [start:capturing-nodes-1] - [StructDefinition ... @struct_name name:[Identifier] ...] + [StructDefinition @struct_name name:[Identifier]] // --8<-- [end:capturing-nodes-1] " .remove_mkdoc_snippet_markers(), @@ -120,21 +120,12 @@ fn capturing_nodes() { &" // --8<-- [start:capturing-nodes-2] [ContractDefinition - ... @contract_name name:[Identifier] - ... members:[ContractMembers - ... [ContractMember - [EventDefinition - ... - @event_name name:[Identifier] - ... - ] + [EventDefinition @event_name name:[Identifier]] ] - ... ] - ... ] // --8<-- [end:capturing-nodes-2] " @@ -154,7 +145,7 @@ fn quantification() { let query = Query::parse( &" // --8<-- [start:quantification-1] - [SourceUnit ... (leading_trivia:[_])+] + [SourceUnit members:[_ ([_ @import [ImportDirective]])+]] // --8<-- [end:quantification-1] " .remove_mkdoc_snippet_markers(), @@ -164,18 +155,15 @@ fn quantification() { assert_matches( &query, NonterminalKind::SourceUnit, - "// comment 1\n// comment 2\n/* comment 3 */", + "import 'test.sol';\nimport * as Utils from 'lib/utils.sol'\n\ncontract Test {}", ); let query = Query::parse( &" // --8<-- [start:quantification-2] - [ContractDefinition - ... - (@docline [SingleLineNatSpecComment])+ - ... + [StructDefinition @name name:[_] - ... + members:[_ ([_ @member [Identifier]])+] ] // --8<-- [end:quantification-2] " @@ -187,8 +175,10 @@ fn quantification() { &query, NonterminalKind::SourceUnit, " - /// A doc comment - contract A {} + struct Test { + int x; + int y; + } ", ); @@ -196,20 +186,13 @@ fn quantification() { &" // --8<-- [start:quantification-3] [FunctionCallExpression - ... arguments:[ArgumentsDeclaration variant:[PositionalArgumentsDeclaration - ... arguments:[PositionalArguments - ... (@arg [Expression variant:[StringExpression]])? - ... ] - ... ] - ... ] - ... ] // --8<-- [end:quantification-3] " @@ -227,7 +210,7 @@ fn quantification() { let matches: Vec<_> = iter.collect(); - matches[3].captures.get("arg").unwrap(); + matches[0].captures.get("arg").unwrap(); } #[test] @@ -240,7 +223,6 @@ fn alternations() { (@function variant:[Identifier] | @method variant:[MemberAccessExpression]) ] - ... ] // --8<-- [end:alternations-1] " @@ -292,3 +274,103 @@ fn alternations() { "break" ); } + +#[test] +fn adjacency() { + let query = Query::parse( + &r#" + // --8<-- [start:adjacency-1] + [FunctionDefinition + [ParametersDeclaration + [Parameters . @first_param [Parameter]] + ] + ] + // --8<-- [end:adjacency-1] + "# + .remove_mkdoc_snippet_markers(), + ) + .unwrap(); + + let iter = assert_matches( + &query, + NonterminalKind::FunctionDefinition, + "function test(int x, int y);", + ); + + let matches: Vec<_> = iter.collect(); + assert_eq!(matches.len(), 1); + assert_eq!( + matches[0].captures.get("first_param").unwrap()[0] + .node() + .unparse(), + "int x" + ); + + let query = Query::parse( + &r#" + // --8<-- [start:adjacency-2] + [FunctionDefinition + [ParametersDeclaration + [Parameters @last_param [Parameter] .] + ] + ] + // --8<-- [end:adjacency-2] + "# + .remove_mkdoc_snippet_markers(), + ) + .unwrap(); + + let iter = assert_matches( + &query, + NonterminalKind::FunctionDefinition, + "function test(int x, int y);", + ); + + let matches: Vec<_> = iter.collect(); + assert_eq!(matches.len(), 1); + assert_eq!( + matches[0].captures.get("last_param").unwrap()[0] + .node() + .unparse(), + " int y" + ); + + let query = Query::parse( + &r#" + // --8<-- [start:adjacency-3] + [Statements @stmt1 [Statement] . @stmt2 [Statement]] + // --8<-- [end:adjacency-3] + "# + .remove_mkdoc_snippet_markers(), + ) + .unwrap(); + + let iter = assert_matches(&query, NonterminalKind::Statements, "int x; int y; x + y;"); + + let matches: Vec<_> = iter.collect(); + assert_eq!(matches.len(), 2); + assert_eq!( + matches[0].captures.get("stmt1").unwrap()[0] + .node() + .unparse(), + "int x;" + ); + assert_eq!( + matches[0].captures.get("stmt2").unwrap()[0] + .node() + .unparse(), + " int y;" + ); + assert_eq!( + matches[1].captures.get("stmt1").unwrap()[0] + .node() + .unparse(), + " int y;" + ); + assert_eq!( + matches[1].captures.get("stmt2").unwrap()[0] + .node() + .unparse(), + " x + y;" + ); +} diff --git a/crates/solidity/outputs/cargo/tests/src/doc_examples/using_queries.rs b/crates/solidity/outputs/cargo/tests/src/doc_examples/using_queries.rs index 2f1439dea5..6447cb8fc3 100644 --- a/crates/solidity/outputs/cargo/tests/src/doc_examples/using_queries.rs +++ b/crates/solidity/outputs/cargo/tests/src/doc_examples/using_queries.rs @@ -64,8 +64,8 @@ fn using_queries() -> Result<()> { // --8<-- [start:multiple-patterns] let mut names = vec![]; - let struct_def = Query::parse("[StructDefinition ... @name [Identifier] ...]").unwrap(); - let enum_def = Query::parse("[EnumDefinition ... @name [Identifier] ...]").unwrap(); + let struct_def = Query::parse("[StructDefinition @name [Identifier]]").unwrap(); + let enum_def = Query::parse("[EnumDefinition @name [Identifier]]").unwrap(); for r#match in cursor.query(vec![struct_def, enum_def]) { let index = r#match.query_number; @@ -96,7 +96,7 @@ fn using_queries() -> Result<()> { let mut names = vec![]; - let query = Query::parse("[TypedTupleMember ... @type type_name:[_] ...]").unwrap(); + let query = Query::parse("[TypedTupleMember @type type_name:[_]]").unwrap(); for r#match in cursor.query(vec![query]) { let captures = r#match.captures; @@ -144,15 +144,8 @@ fn tx_origin_query() -> Result<()> { // --8<-- [start:tx-origin] let query = Query::parse( r#"@txorigin [MemberAccessExpression - ... - [Expression - ... - @start ["tx"] - ... - ] - ... + [Expression @start ["tx"]] ["origin"] - ... ]"#, ) .unwrap(); diff --git a/crates/solidity/outputs/cargo/tests/src/lib.rs b/crates/solidity/outputs/cargo/tests/src/lib.rs index 17fdb1a6c0..d0dbbc5f2e 100644 --- a/crates/solidity/outputs/cargo/tests/src/lib.rs +++ b/crates/solidity/outputs/cargo/tests/src/lib.rs @@ -2,9 +2,9 @@ use metaslang_bindings as _; +mod binding_rules; mod bindings_assertions; mod bindings_output; -mod bindings_rules; mod cst_output; mod doc_examples; mod generated; diff --git a/crates/solidity/outputs/npm/tests/src/doc-examples/using-queries.ts b/crates/solidity/outputs/npm/tests/src/doc-examples/using-queries.ts index b3e532af6c..da826ca4c5 100644 --- a/crates/solidity/outputs/npm/tests/src/doc-examples/using-queries.ts +++ b/crates/solidity/outputs/npm/tests/src/doc-examples/using-queries.ts @@ -58,8 +58,8 @@ test("using queries", async () => { // --8<-- [start:multiple-patterns] const names = []; - const struct_def = Query.parse("[StructDefinition ... @name [Identifier] ...]"); - const enum_def = Query.parse("[EnumDefinition ... @name [Identifier] ...]"); + const struct_def = Query.parse("[StructDefinition @name [Identifier]]"); + const enum_def = Query.parse("[EnumDefinition @name [Identifier]]"); const matches = cursor.query([struct_def, enum_def]); let match = null; @@ -89,7 +89,7 @@ test("using queries", async () => { const names = []; - const query = Query.parse("[TypedTupleMember ... @type type_name:[_] ...]"); + const query = Query.parse("[TypedTupleMember @type type_name:[_]]"); const matches = cursor.query([query]); let match = null; @@ -137,15 +137,8 @@ test("using queries", async () => { // --8<-- [start:tx-origin] const query = Query.parse(` @txorigin [MemberAccessExpression - ... - [Expression - ... - @start ["tx"] - ... - ] - ... + [Expression @start ["tx"]] ["origin"] - ... ]`); const matches = cursor.query([query]); diff --git a/crates/testlang/outputs/cargo/tests/src/graph/mod.rs b/crates/testlang/outputs/cargo/tests/src/graph/mod.rs index a8eaa45605..8b5f7b171a 100644 --- a/crates/testlang/outputs/cargo/tests/src/graph/mod.rs +++ b/crates/testlang/outputs/cargo/tests/src/graph/mod.rs @@ -20,11 +20,11 @@ fn builds_a_graph() { node @tree_node.def } - @tree [Tree ... @root node: [TreeNode] ...] { + @tree [Tree @root node: [TreeNode]] { edge @root.def -> @tree.def } - @parent [TreeNode ... members: [_ ... [_ @child variant: [TreeNode]] ...] ...] { + @parent [TreeNode members: [_ [_ @child variant: [TreeNode]]]] { edge @child.def -> @parent.def } "; diff --git a/crates/testlang/outputs/cargo/tests/src/query/engine_tests.rs b/crates/testlang/outputs/cargo/tests/src/query/engine_tests.rs index 0c6c464075..53568507c6 100644 --- a/crates/testlang/outputs/cargo/tests/src/query/engine_tests.rs +++ b/crates/testlang/outputs/cargo/tests/src/query/engine_tests.rs @@ -115,11 +115,32 @@ fn common_test_tree() -> Edge { ) } +fn common_test_tree_with_trivia() -> Edge { + cst_tree!( + TreeNode [ + Node: DelimitedIdentifier "A", + Whitespace " ", + DelimitedIdentifier "B", + Whitespace " ", + EndOfLine "\n", + DelimitedIdentifier "C", + TreeNodeChild [ + Whitespace " ", + DelimitedIdentifier "D", + EndOfLine "\n", + Whitespace " ", + Node: DelimitedIdentifier "E", + Whitespace " ", + ], + ] + ) +} + #[test] fn test_spread() { run_query_test( &common_test_tree(), - "[TreeNode ... @x1 [DelimitedIdentifier] ... @x2 [DelimitedIdentifier] ...]", + "[TreeNode @x1 [DelimitedIdentifier] @x2 [DelimitedIdentifier]]", query_matches! { {x1: ["A"], x2: ["B"]} {x1: ["A"], x2: ["C"]} @@ -132,7 +153,7 @@ fn test_spread() { fn test_adjacent() { run_query_test( &common_test_tree(), - "[TreeNode ... @y1 [DelimitedIdentifier] @y2 [DelimitedIdentifier] ...]", + "[TreeNode @y1 [DelimitedIdentifier] . @y2 [DelimitedIdentifier]]", query_matches! { {y1: ["A"], y2: ["B"]} {y1: ["B"], y2: ["C"]} @@ -140,11 +161,35 @@ fn test_adjacent() { ); } +#[test] +fn test_adjacency_skips_trivia() { + run_query_test( + &common_test_tree_with_trivia(), + "[TreeNode @y1 [DelimitedIdentifier] . @y2 [DelimitedIdentifier]]", + query_matches! { + {y1: ["A"], y2: ["B"]} + {y1: ["B"], y2: ["C"]} + }, + ); +} + +#[test] +fn test_anonymous_node_matcher_skips_trivia() { + run_query_test( + &common_test_tree_with_trivia(), + "[TreeNodeChild @x [_]]", + query_matches! { + {x: ["D"]} + {x: ["E"]} + }, + ); +} + #[test] fn test_child() { run_query_test( &common_test_tree(), - "[TreeNodeChild ... @x [DelimitedIdentifier] ...]", + "[TreeNodeChild @x [DelimitedIdentifier]]", query_matches! { {x: ["D"]} {x: ["E"]} @@ -156,7 +201,7 @@ fn test_child() { fn test_parent_and_child() { run_query_test( &common_test_tree(), - "[TreeNode ... @p node:[_] ... [TreeNodeChild ... @c [DelimitedIdentifier] ...]]", + "[TreeNode @p node:[_] [TreeNodeChild @c [DelimitedIdentifier]]]", query_matches! { {c: ["D"], p: ["A"]} {c: ["E"], p: ["A"]} @@ -168,7 +213,7 @@ fn test_parent_and_child() { fn test_named() { run_query_test( &common_test_tree(), - "[TreeNode ... @x node:[DelimitedIdentifier] ...]", + "[TreeNode @x node:[DelimitedIdentifier]]", query_matches! { {x: ["A"]} }, @@ -179,7 +224,7 @@ fn test_named() { fn test_multilevel_adjacent() { run_query_test( &common_test_tree(), - "[_ ... @x [DelimitedIdentifier] @y [DelimitedIdentifier] ...]", + "[_ @x [DelimitedIdentifier] . @y [DelimitedIdentifier]]", query_matches! { {x: ["A"], y: ["B"]} {x: ["B"], y: ["C"]} @@ -192,7 +237,7 @@ fn test_multilevel_adjacent() { fn test_multilevel_named() { run_query_test( &common_test_tree(), - "[_ ... @x node:[_] ...]", + "[_ @x node:[_]]", query_matches! { {x: ["A"]} {x: ["E"]} @@ -204,7 +249,7 @@ fn test_multilevel_named() { fn test_text_value() { run_query_test( &common_test_tree(), - r#"[TreeNode ... @z1 [DelimitedIdentifier] ["B"] @z2 [DelimitedIdentifier] ...]"#, + r#"[TreeNode @z1 [DelimitedIdentifier] . ["B"] . @z2 [DelimitedIdentifier]]"#, query_matches! { {z1: ["A"], z2: ["C"]} }, @@ -215,7 +260,7 @@ fn test_text_value() { fn test_one_or_more() { run_query_test( &common_test_tree(), - "[TreeNode ... (@x [DelimitedIdentifier])+ [_] ]", + "[TreeNode (@x [DelimitedIdentifier])+ . [_] .]", query_matches! { {x: ["A", "B", "C"]} {x: ["B", "C"]} @@ -228,7 +273,7 @@ fn test_one_or_more() { fn test_zero_or_more() { run_query_test( &common_test_tree(), - "[TreeNode ... (@y [DelimitedIdentifier])* [_] ]", + "[TreeNode (@y [DelimitedIdentifier])* . [_] .]", query_matches! { {y: ["A", "B", "C"]} {y: ["B", "C"]} @@ -242,7 +287,7 @@ fn test_zero_or_more() { fn test_optional() { run_query_test( &common_test_tree(), - "[TreeNode ... (@z [DelimitedIdentifier])? [_] ]", + "[TreeNode (@z [DelimitedIdentifier])? . [_] .]", query_matches! { {z: ["C"]} {} @@ -254,9 +299,207 @@ fn test_optional() { fn test_nested() { run_query_test( &common_test_tree(), - "@root [TreeNode ... @z [DelimitedIdentifier] [_] ]", + "@root [TreeNode @z [DelimitedIdentifier] . [_] .]", query_matches! { {root: ["ABCDE"], z: ["C"]} }, ); } + +#[test] +fn test_alternatives() { + run_query_test( + &common_test_tree(), + "(@x node:[_] | @y [DelimitedIdentifier] . @z [DelimitedIdentifier])", + query_matches! { + {x: ["A"]} + {y: ["A"], z: ["B"]} + {y: ["B"], z: ["C"]} + {y: ["D"], z: ["E"]} + {x: ["E"]} + }, + ); +} + +#[test] +fn test_adjacency_at_beginning_skips_trivia() { + run_query_test( + &common_test_tree_with_trivia(), + "[TreeNodeChild . @x [DelimitedIdentifier]]", + query_matches! { + {x: ["D"]} + }, + ); +} + +#[test] +fn test_adjacency_at_end_skips_trivia() { + run_query_test( + &common_test_tree_with_trivia(), + "[TreeNodeChild @x [DelimitedIdentifier] .]", + query_matches! { + {x: ["E"]} + }, + ); +} + +fn flat_tree() -> Edge { + cst_tree!( + TreeNode [ + Node: DelimitedIdentifier "A", + Whitespace " ", + DelimitedIdentifier "B", + DelimitedIdentifier "C", + DelimitedIdentifier "D", + ] + ) +} + +#[test] +fn test_ellipsis_followed_by_optional_grouping() { + run_query_test( + &flat_tree(), + "[TreeNode @x [DelimitedIdentifier] (@y [DelimitedIdentifier] . @z [DelimitedIdentifier])?]", + query_matches! { + {x: ["A"], y: ["B"], z: ["C"]} + {x: ["A"], y: ["C"], z: ["D"]} + {x: ["A"]} + {x: ["B"], y: ["C"], z: ["D"]} + {x: ["B"]} + {x: ["C"]} + {x: ["D"]} + }, + ); +} + +#[test] +fn test_adjacency_followed_by_optional_grouping() { + run_query_test( + &flat_tree(), + "[TreeNode @x [DelimitedIdentifier] . (@y [DelimitedIdentifier] . @z [DelimitedIdentifier])?]", + query_matches! { + {x: ["A"]} + {x: ["A"], y: ["B"], z: ["C"]} + {x: ["B"]} + {x: ["B"], y: ["C"], z: ["D"]} + {x: ["C"]} + {x: ["D"]} + }, + ); +} + +#[test] +fn test_captures_followed_by_non_captured_matchers() { + run_query_test( + &flat_tree(), + "[TreeNode @x [DelimitedIdentifier] [DelimitedIdentifier]]", + query_matches! { + {x: ["A"]} + {x: ["A"]} + {x: ["A"]} + {x: ["B"]} + {x: ["B"]} + {x: ["C"]} + }, + ); +} + +#[test] +fn test_captures_followed_by_anonymous_matchers() { + run_query_test( + &flat_tree(), + "[TreeNode @x [DelimitedIdentifier] [_]]", + query_matches! { + {x: ["A"]} + {x: ["A"]} + {x: ["A"]} + {x: ["B"]} + {x: ["B"]} + {x: ["C"]} + }, + ); +} + +#[test] +fn test_captures_followed_by_non_captured_optional_matchers() { + run_query_test( + &flat_tree(), + "[TreeNode @x [DelimitedIdentifier] [DelimitedIdentifier]?]", + query_matches! { + {x: ["A"]} + {x: ["A"]} + {x: ["A"]} + {x: ["A"]} + {x: ["B"]} + {x: ["B"]} + {x: ["B"]} + {x: ["C"]} + {x: ["C"]} + {x: ["D"]} + }, + ); +} + +#[test] +fn test_captures_followed_by_captured_optional_matchers() { + run_query_test( + &flat_tree(), + "[TreeNode @x [DelimitedIdentifier] @y [DelimitedIdentifier]?]", + query_matches! { + {x: ["A"], y: ["B"]} + {x: ["A"], y: ["C"]} + {x: ["A"], y: ["D"]} + {x: ["A"]} + {x: ["B"], y: ["C"]} + {x: ["B"], y: ["D"]} + {x: ["B"]} + {x: ["C"], y: ["D"]} + {x: ["C"]} + {x: ["D"]} + }, + ); +} + +fn sample_deep_tree() -> Edge { + cst_tree!( + Tree [ + Keyword: TreeKeyword "tree", + Name: Identifier "$t1", + Node: TreeNode [ + OpenBracket "[", + Members: TreeNodeChildren [ + TreeNodeChild [ + Variant: DelimitedIdentifier "A", + ], + TreeNodeChild [ + Variant: TreeNode [ + OpenBracket "[", + Members: TreeNodeChildren [ + TreeNodeChild [ + Variant: DelimitedIdentifier "B" + ], + TreeNodeChild [ + Variant: DelimitedIdentifier "C" + ], + ], + CloseBracket "]", + ] + ] + ], + CloseBracket "]", + ], + Semicolon: Semicolon ";" + ] + ) +} + +#[test] +fn test_deeply_nested_matchers() { + run_query_test( + &sample_deep_tree(), + "@parent [TreeNode members: [TreeNodeChildren [TreeNodeChild @child variant: [TreeNode]]]]", + query_matches! { + {parent: ["[A[BC]]"], child: ["[BC]"]} + }, + ); +} diff --git a/crates/testlang/outputs/cargo/tests/src/query/parser_tests.rs b/crates/testlang/outputs/cargo/tests/src/query/parser_tests.rs index 796485930a..3ed6dc0cc0 100644 --- a/crates/testlang/outputs/cargo/tests/src/query/parser_tests.rs +++ b/crates/testlang/outputs/cargo/tests/src/query/parser_tests.rs @@ -16,8 +16,8 @@ fn test_text_escaping() { } #[test] -fn test_ellipsis() { - run_parser_test(r#"[_ ...]"#, r#"[_ ...]"#); +fn test_adjacency() { + run_parser_test(r#"[_ . [_]]"#, r#"[_ . [_]]"#); } #[test] @@ -52,15 +52,12 @@ fn test_zero_or_more_canonicalisation() { // Test the error message on parse failure #[test] fn test_parsing_error() { - let result = Query::parse(r#"@root [_ ..."#); + let result = Query::parse(r#"@root [_"#); match result { Ok(_) => panic!("Expected error"), Err(e) => { - assert_eq!( - e.message, - "Parse error:\nexpected ']' at: \nAlt at: [_ ...\nAlt at: @root [_ ...\n" - ); - assert_eq!((e.row, e.column), (0, 12)); + assert_eq!(e.message, "Parse error:\nexpected ']' at: \nAlt at: [_\n"); + assert_eq!((e.row, e.column), (0, 8)); } } } @@ -68,52 +65,96 @@ fn test_parsing_error() { // See https://github.com/NomicFoundation/slang/issues/1042 #[test] fn test_parsing_error_with_invalid_edge_label() { - let result = Query::parse( - r#" -[Tree - ... - @name Name: [_] - ... -]"#, - ); + let result = Query::parse(r#"[Tree @name Name: [_]]"#); match result { Ok(_) => panic!("Expected error"), Err(e) => { assert_eq!( e.message, - "Parse error:\n'Name' is not a valid edge label at: Name: [_]\n ...\n]\n", + "Parse error:\n'Name' is not a valid edge label at: Name: [_]]\n", ); - assert_eq!((e.row, e.column), (3, 10)); + assert_eq!((e.row, e.column), (0, 12)); } } } #[test] fn test_parsing_error_with_invalid_node_kind() { - let result = Query::parse(r#"[Tree ... [tree_node] ...]"#); + let result = Query::parse(r#"[Tree [tree_node]]"#); match result { Ok(_) => panic!("Expected error"), Err(e) => { assert_eq!( e.message, - "Parse error:\n'tree_node' is not a valid node kind at: tree_node] ...]\n", + "Parse error:\n'tree_node' is not a valid node kind at: tree_node]]\n", ); - assert_eq!((e.row, e.column), (0, 11)); + assert_eq!((e.row, e.column), (0, 7)); } } } #[test] fn test_parsing_error_with_kind_beginning_with_underscore() { - let result = Query::parse(r#"[Tree ... [_tree_node] ...]"#); + let result = Query::parse(r#"[Tree [_tree_node]]"#); match result { Ok(_) => panic!("Expected error"), Err(e) => { assert_eq!( e.message, - "Parse error:\n'_tree_node' is not a valid node kind at: _tree_node] ...]\n", + "Parse error:\n'_tree_node' is not a valid node kind at: _tree_node]]\n", ); - assert_eq!((e.row, e.column), (0, 11)); + assert_eq!((e.row, e.column), (0, 7)); } } } + +#[test] +fn test_fails_parsing_ellipsis() { + let result = Query::parse(r#"[_ ...]"#); + match result { + Ok(_) => panic!("Expected parse failure"), + Err(e) => assert_eq!( + e.message, + "Parse error:\nThe ellipsis `...` operator is deprecated, and replaced with a new adjacency `.` operator. For more information, check the Tree Query Language guide: https://nomicfoundation.github.io/slang/user-guide/tree-query-language/ at: ...]\n", + ), + } +} + +#[test] +fn test_fails_consecutive_adjacency_operators() { + let result = Query::parse(r#"[_ [DelimitedIdentifier] . .]"#); + match result { + Ok(_) => panic!("Expected parse failure"), + Err(e) => assert_eq!(e.message, "Parse error:\nNoneOf at: .]\n"), + } +} + +#[test] +fn test_fails_sole_adjacency() { + let result = Query::parse(r#"[_ .]"#); + match result { + Ok(_) => panic!("Expected parse failure"), + Err(e) => assert_eq!( + e.message, + "Parse error:\nexpected ']' at: .]\nAlt at: [_ .]\n" + ), + } +} + +#[test] +fn test_fails_adjacency_at_edge_of_alt_option() { + let result = Query::parse(r#"([TreeNode] | . [DelimitedIdentifier])+"#); + assert!(result.is_err(), "Expected parse failure"); +} + +#[test] +fn test_fails_parsing_trivia_node_selector() { + let result = Query::parse(r#"[EndOfLine]"#); + match result { + Ok(_) => panic!("Expected parse failure"), + Err(e) => assert_eq!( + e.message, + "Parse error:\nMatching trivia nodes directly is forbidden. For more information, check the Tree Query Language guide: https://nomicfoundation.github.io/slang/user-guide/tree-query-language/ at: EndOfLine]\n" + ), + } +} diff --git a/crates/testlang/outputs/npm/tests/src/tests/query.ts b/crates/testlang/outputs/npm/tests/src/tests/query.ts index 2405e6bacb..4f328e3215 100644 --- a/crates/testlang/outputs/npm/tests/src/tests/query.ts +++ b/crates/testlang/outputs/npm/tests/src/tests/query.ts @@ -8,7 +8,7 @@ test("simple query", () => { const tree_source = `tree [A [B C] D];`; const parse_output = language.parse(NonterminalKind.Tree, tree_source); - const query_source = `[TreeNodeChild ... @id [DelimitedIdentifier]]`; + const query_source = `[TreeNodeChild @id [DelimitedIdentifier]]`; const query = Query.parse(query_source); const matches = parse_output.createTreeCursor().query([query]); diff --git a/documentation/public/user-guide/tree-query-language.md b/documentation/public/user-guide/tree-query-language.md index bfca807e70..3b682e4e17 100644 --- a/documentation/public/user-guide/tree-query-language.md +++ b/documentation/public/user-guide/tree-query-language.md @@ -6,8 +6,8 @@ A _query_ is a pattern that matches a certain set of nodes in a tree. The expression to match a given node consists of a pair of brackets (`[]`) containing two things: the node's kind, and optionally, a series of other patterns that match the node's children. For -example, this pattern would match any `MultiplicativeExpression` node whose children -are exactly two `Expression` nodes, with an `Asterisk` node in between (no whitespace): +example, this pattern would match any `MultiplicativeExpression` node that has +two children `Expression` nodes, with an `Asterisk` node in between: ```{ .scheme } --8<-- "crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs:query-syntax-1" @@ -36,7 +36,7 @@ node with two children, one of any kind labeled `left_operand` and one of any ki --8<-- "crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs:query-syntax-4" ``` -Children can also be elided. For example, this would produce multiple matches for a +Children can be elided. For example, this would produce multiple matches for a `MultiplicativeExpression` where at least _one_ of the children is an expression of a `StringExpression` variant, where each match is associated with each of the `StringExpression` children: @@ -44,6 +44,10 @@ is associated with each of the `StringExpression` children: --8<-- "crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs:query-syntax-5" ``` +Trivia nodes (whitespace, comments, etc.) will be skipped over when running a +query. Furthermore, trivia nodes cannot be explicitly (or implicitly with `_`) +matched by queries. + ### Capturing Nodes When matching patterns, you may want to process specific nodes within the @@ -72,13 +76,13 @@ by a `?`, `*` or `+` operator. The `?` operator matches _zero or one_ repetition of a pattern, the `*` operator matches _zero or more_, and the `+` operator matches _one or more_. -For example, this pattern would match a sequence of one or more comments at the top of the file: +For example, this pattern would match a sequence of one or more import directives at the top of the file: ```{ .scheme } --8<-- "crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs:quantification-1" ``` -This pattern would match a contract definition with at least one doc comment, capturing them: +This pattern would match a structure definition with one or more members, capturing their names: ```{ .scheme } --8<-- "crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs:quantification-2" @@ -93,7 +97,7 @@ present: ### Alternations -An alternation is written as a sequence of patterns separated by '|' and surrounded by parentheses. +An alternation is written as a sequence of patterns separated by `|` and surrounded by parentheses. For example, this pattern would match a call to either a variable or an object property. In the case of a variable, capture it as `@function`, and in the case of a property, capture it as `@method`: @@ -107,3 +111,29 @@ This pattern would match a set of possible keyword terminals, capturing them as ```{ .scheme } --8<-- "crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs:alternations-2" ``` + +### Adjacency + +By using the adjacency operator `.` you can constrain a pattern to only match +the first or the last child nodes. + +For example, the following pattern would match only the first parameter +declaration in a function definition: + +```{ .scheme } +--8<-- "crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs:adjacency-1" +``` + +And conversely the following will match only the last parameter: + +```{ .scheme } +--8<-- "crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs:adjacency-2" +``` + +If the adjacency operator is used in between two patterns it constrains matches +on both patterns to occur consecutively, ie. without any other sibling node in +between. For example, this pattern matches pairs of consecutive statements: + +```{ .scheme } +--8<-- "crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs:adjacency-3" +```