rust-lang · nikomatsakis · Nov 9, 2023 · Nov 4, 2023 · Nov 4, 2023 · Nov 5, 2023
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/book/src/formality_core/parse.md b/book/src/formality_core/parse.md
@@ -19,14 +19,33 @@ struct MyEnum {
 }
 ```
 
-### Ambiguity and precedence
+### Ambiguity and greedy parsing
 
-When parsing an enum there will be multiple possibilities. We will attempt to parse them all. If more than one succeeds, the parser will attempt to resolve the ambiguity. Ambiguity can be resolved in two ways:
+When parsing an enum there will be multiple possibilities. We will attempt to parse them all. If more than one succeeds, the parser will attempt to resolve the ambiguity by looking for the **longest match**. However, we don't just consider the number of characters, we look for a **reduction prefix**:
 
-* Explicit precedence: By default, every variant has precedence 0, but you can override this by annotating variants with `#[precedence(N)]` (where `N` is some integer). This will override the precedence for that variant. Variants with higher precedences are preferred.
-* Reduction prefix: When parsing, we track the list of things we had to parse. If there are two variants at the same precedence level, but one of them had to parse strictly more things than the other and in the same way, we'll prefer the longer one. So for example if one variant parsed a `Ty` and the other parsed a `Ty Ty`, we'd take the `Ty Ty`.
+* When parsing, we track the list of things we had to parse. If there are two variants at the same precedence level, but one of them had to parse strictly more things than the other and in the same way, we'll prefer the longer one. So for example if one variant parsed a `Ty` and the other parsed a `Ty Ty`, we'd take the `Ty Ty`.
+    * When considering whether a reduction is "significant", we take casts into account. See `ActiveVariant::mark_as_cast_variant` for a more detailed explanation and set of examples.
 
-Otherwise, the parser will panic and report ambiguity. The parser panics rather than returning an error because ambiguity doesn't mean that there is no way to parse the given text as the nonterminal -- rather that there are multiple ways. Errors mean that the text does not match the grammar for that nonterminal.
+### Precedence and left-recursive grammars
+
+We support left-recursive grammars like this one from the `parse-torture-tests`:
+
+```rust
+{{#include ../../../tests/parser-torture-tests/src/path.rs:path}}
+```
+
+We also support ambiguous grammars. For example, you can code up arithmetic expressions like this:
+
+
+```rust
+{{#include ../../../tests/parser-torture-tests/src/left_associative.rs:Expr}}
+```
+
+When specifying the `#[precedence]` of a variant, the default is left-associativity, which can be written more explicitly as `#[precedence(L, left)]`. If you prefer, you can specify right-associativity (`#[precedence(L, right)]`) or non-associativity `#[precedence(L, none)]`. This affects how things of the same level are parsed:
+
+* `1 + 1 + 1` when left-associative is `(1 + 1) + 1`
+* `1 + 1 + 1` when right-associative is `1 + (1 + 1)`
+* `1 + 1 + 1` when none-associative is an error.
 
 ### Symbols
 
@@ -39,11 +58,20 @@ A grammar consists of a series of *symbols*. Each symbol matches some text in th
     * If fields have names, then `$field` should name the field.
     * For position fields (e.g., the T and U in `Mul(Expr, Expr)`), use `$v0`, `$v1`, etc.
     * Exception: `$$` is treated as the terminal `'$'`.
-* Nonterminals can also accept modes:
+* Nonterminals have various modes:
     * `$field` -- just parse the field's type
     * `$*field` -- the field must be a `Vec<T>` -- parse any number of `T` instances. Something like `[ $*field ]` would parse `[f1 f2 f3]`, assuming `f1`, `f2`, and `f3` are valid values for `field`.
     * `$,field` -- similar to the above, but uses a comma separated list (with optional trailing comma). So `[ $,field ]` will parse something like `[f1, f2, f3]`.
     * `$?field` -- will parse `field` and use `Default::default()` value if not present.
+    * `$<field>` -- parse `<E1, E2, E3>`, where `field: Vec<E>` 
+    * `$<?field>` -- parse `<E1, E2, E3>`, where `field: Vec<E>`, but accept empty string as empty vector
+    * `$(field)` -- parse `(E1, E2, E3)`, where `field: Vec<E>` 
+    * `$(?field)` -- parse `(E1, E2, E3)`, where `field: Vec<E>`, but accept empty string as empty vector
+    * `$[field]` -- parse `[E1, E2, E3]`, where `field: Vec<E>` 
+    * `$[?field]` -- parse `[E1, E2, E3]`, where `field: Vec<E>`, but accept empty string as empty vector
+    * `${field}` -- parse `{E1, E2, E3}`, where `field: Vec<E>` 
+    * `${?field}` -- parse `{E1, E2, E3}`, where `field: Vec<E>`, but accept empty string as empty vector
+    * `$:guard <nonterminal>` -- parses `<nonterminal>` but only if the keyword `guard` is present. For example, `$:where $,where_clauses` would parse `where WhereClause1, WhereClause2, WhereClause3`
 
 ### Greediness
 

diff --git a/crates/formality-core/Cargo.toml b/crates/formality-core/Cargo.toml
@@ -20,6 +20,7 @@ tracing-tree = { version = "0.2" }
 formality-macros = { version = "0.1.0", path = "../formality-macros" }
 anyhow = "1.0.75"
 contracts = "0.6.3"
+final_fn = "0.1.0"
 
 [dev-dependencies]
 expect-test = "1.4.1"
diff --git a/crates/formality-core/src/binder.rs b/crates/formality-core/src/binder.rs
@@ -261,14 +261,16 @@ where
     T: std::fmt::Debug,
 {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "<")?;
-        for (kind, i) in self.kinds.iter().zip(0..) {
-            if i > 0 {
-                write!(f, ", ")?;
+        if !self.kinds.is_empty() {
+            write!(f, "{}", L::BINDING_OPEN)?;
+            for (kind, i) in self.kinds.iter().zip(0..) {
+                if i > 0 {
+                    write!(f, ", ")?;
+                }
+                write!(f, "{:?}", kind)?;
             }
-            write!(f, "{:?}", kind)?;
+            write!(f, "{} ", L::BINDING_CLOSE)?;
         }
-        write!(f, "> ")?;
         write!(f, "{:?}", &self.term)?;
         Ok(())
     }

diff --git a/crates/formality-core/src/language.rs b/crates/formality-core/src/language.rs
@@ -1,6 +1,7 @@
 use crate::cast::UpcastFrom;
 use crate::term::CoreTerm;
 use crate::variable::{CoreBoundVar, CoreExistentialVar, CoreUniversalVar, CoreVariable};
+use crate::DowncastTo;
 use std::fmt::Debug;
 use std::hash::Hash;
 
@@ -20,7 +21,8 @@ pub trait Language: 'static + Copy + Ord + Hash + Debug + Default {
         + UpcastFrom<CoreVariable<Self>>
         + UpcastFrom<CoreUniversalVar<Self>>
         + UpcastFrom<CoreExistentialVar<Self>>
-        + UpcastFrom<CoreBoundVar<Self>>;
+        + UpcastFrom<CoreBoundVar<Self>>
+        + DowncastTo<CoreVariable<Self>>;
 
     /// The token (typically `<`) used to open binders.
     const BINDING_OPEN: char;

diff --git a/crates/formality-core/src/lib.rs b/crates/formality-core/src/lib.rs
@@ -32,6 +32,7 @@ pub mod language;
 pub mod parse;
 pub mod substitution;
 pub mod term;
+pub mod util;
 pub mod variable;
 pub mod visit;
 
@@ -95,18 +96,27 @@ macro_rules! declare_language {
     ) => {
         $(#[$the_lang_m:meta])*
         $the_lang_v mod $the_lang {
-            use super::*;
+            use $crate::language::Language;
 
             #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Default)]
             pub struct FormalityLang;
 
-            impl $crate::language::Language for FormalityLang {
-                const NAME: &'static str = $name;
-                type Kind = $kind;
-                type Parameter = $param;
-                const BINDING_OPEN: char = $binding_open;
-                const BINDING_CLOSE: char = $binding_close;
-                const KEYWORDS: &'static [&'static str] = &[$($kw),*];
+            // This module may seem weird -- it permits us to import `super::*`
+            // so that all the types in `$kind` and `$param` are valid without
+            // importing `super::*` into the entire module. This not only makes
+            // things a bit nicer, since those imports are not needed and could
+            // cause weird behavior, it avoids a cycle when users
+            // do `pub use $lang::grammar::*`.
+            mod __hygiene {
+                use super::super::*;
+                impl $crate::language::Language for super::FormalityLang {
+                    const NAME: &'static str = $name;
+                    type Kind = $kind;
+                    type Parameter = $param;
+                    const BINDING_OPEN: char = $binding_open;
+                    const BINDING_CLOSE: char = $binding_close;
+                    const KEYWORDS: &'static [&'static str] = &[$($kw),*];
+                }
             }
 
             $crate::trait_alias! {
@@ -125,15 +135,19 @@ macro_rules! declare_language {
                 pub trait Term = $crate::term::CoreTerm<FormalityLang>
             }
 
-            pub type Variable = $crate::variable::CoreVariable<FormalityLang>;
-            pub type ExistentialVar = $crate::variable::CoreExistentialVar<FormalityLang>;
-            pub type UniversalVar = $crate::variable::CoreUniversalVar<FormalityLang>;
-            pub type BoundVar = $crate::variable::CoreBoundVar<FormalityLang>;
-            pub type DebruijnIndex = $crate::variable::DebruijnIndex;
-            pub type VarIndex = $crate::variable::VarIndex;
-            pub type Binder<T> = $crate::binder::CoreBinder<FormalityLang, T>;
-            pub type Substitution = $crate::substitution::CoreSubstitution<FormalityLang>;
-            pub type VarSubstitution = $crate::substitution::CoreVarSubstitution<FormalityLang>;
+            /// Grammar items to be included in this language.
+            pub mod grammar {
+                use super::FormalityLang;
+                pub type Variable = $crate::variable::CoreVariable<FormalityLang>;
+                pub type ExistentialVar = $crate::variable::CoreExistentialVar<FormalityLang>;
+                pub type UniversalVar = $crate::variable::CoreUniversalVar<FormalityLang>;
+                pub type BoundVar = $crate::variable::CoreBoundVar<FormalityLang>;
+                pub type DebruijnIndex = $crate::variable::DebruijnIndex;
+                pub type VarIndex = $crate::variable::VarIndex;
+                pub type Binder<T> = $crate::binder::CoreBinder<FormalityLang, T>;
+                pub type Substitution = $crate::substitution::CoreSubstitution<FormalityLang>;
+                pub type VarSubstitution = $crate::substitution::CoreVarSubstitution<FormalityLang>;
+            }
 
             /// Parses `text` as a term with no bindings in scope.
             #[track_caller]
@@ -161,7 +175,7 @@ macro_rules! declare_language {
             pub fn term_with<T, B>(bindings: impl IntoIterator<Item = B>, text: &str) -> $crate::Fallible<T>
             where
                 T: Parse,
-                B: $crate::Upcast<(String, $param)>,
+                B: $crate::Upcast<(String, <FormalityLang as Language>::Parameter)>,
             {
                 $crate::parse::core_term_with::<FormalityLang, T, B>(bindings, text)
             }

diff --git a/crates/formality-core/src/parse.rs b/crates/formality-core/src/parse.rs
@@ -14,7 +14,7 @@ use std::fmt::Debug;
 /// Trait for parsing a [`Term<L>`](`crate::term::Term`) as input.
 /// Typically this is auto-generated with the `#[term]` procedural macro,
 /// but you can implement it by hand if you want a very customized parse.
-pub trait CoreParse<L: Language>: Sized + Debug + Clone + Eq {
+pub trait CoreParse<L: Language>: Sized + Debug + Clone + Eq + 'static {
     /// Parse a single instance of this type, returning an error if no such
     /// instance is present.
     ///
@@ -24,7 +24,7 @@ pub trait CoreParse<L: Language>: Sized + Debug + Clone + Eq {
 }
 
 mod parser;
-pub use parser::{skip_whitespace, ActiveVariant, Parser};
+pub use parser::{skip_whitespace, ActiveVariant, Parser, Precedence};
 
 /// Parses `text` as a term with the given bindings in scope.
 ///
@@ -58,7 +58,7 @@ where
 }
 
 /// Record from a successful parse.
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, PartialEq, Eq)]
 pub struct SuccessfulParse<'t, T> {
     /// The new point in the input, after we've consumed whatever text we have.
     text: &'t str,
@@ -76,21 +76,15 @@ pub struct SuccessfulParse<'t, T> {
     /// reduction.
     reductions: Vec<&'static str>,
 
+    /// The precedence of this parse, which is derived from the value given
+    /// to `parse_variant`.
+    precedence: Precedence,
+
     /// The value produced.
     value: T,
 }
 
 impl<'t, T> SuccessfulParse<'t, T> {
-    #[track_caller]
-    pub fn new(text: &'t str, reductions: Vec<&'static str>, value: T) -> Self {
-        // assert!(!reductions.is_empty());
-        Self {
-            text,
-            reductions,
-            value,
-        }
-    }
-
     /// Extract the value parsed and the remaining text,
     /// ignoring the reductions.
     pub fn finish(self) -> (T, &'t str) {
@@ -103,6 +97,7 @@ impl<'t, T> SuccessfulParse<'t, T> {
         SuccessfulParse {
             text: self.text,
             reductions: self.reductions,
+            precedence: self.precedence,
             value: op(self.value),
         }
     }
@@ -117,6 +112,7 @@ where
         SuccessfulParse {
             text: term.text,
             reductions: term.reductions,
+            precedence: term.precedence,
             value: term.value.upcast(),
         }
     }
@@ -172,7 +168,7 @@ pub type ParseResult<'t, T> = Result<SuccessfulParse<'t, T>, Set<ParseError<'t>>
 pub type TokenResult<'t, T> = Result<(T, &'t str), Set<ParseError<'t>>>;
 
 /// Tracks the variables in scope at this point in parsing.
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, Default)]
 pub struct Scope<L: Language> {
     bindings: Vec<(String, CoreParameter<L>)>,
 }
@@ -222,10 +218,7 @@ where
 {
     fn parse<'t>(scope: &Scope<L>, text: &'t str) -> ParseResult<'t, Self> {
         Parser::single_variant(scope, text, "Vec", |p| {
-            p.expect_char('[')?;
-            let v = p.comma_nonterminal()?;
-            p.expect_char(']')?;
-            Ok(v)
+            p.delimited_nonterminal('[', false, ']')
         })
     }
 }
@@ -277,7 +270,13 @@ where
 {
     fn parse<'t>(scope: &Scope<L>, text: &'t str) -> ParseResult<'t, Self> {
         Parser::single_variant(scope, text, "Binder", |p| {
-            p.expect_char(L::BINDING_OPEN)?;
+            match p.expect_char(L::BINDING_OPEN) {
+                Ok(()) => {}
+                Err(_) => {
+                    return Ok(CoreBinder::dummy(p.nonterminal()?));
+                }
+            }
+
             let bindings: Vec<Binding<L>> = p.comma_nonterminal()?;
             p.expect_char(L::BINDING_CLOSE)?;