XYZ
th with '5th'
code_tag.replace_with("5")
+ elif code_tag.attrs and 'class' in code_tag.attrs and 'hl' in code_tag['class'] and 'lean' in code_tag['class']:
+ code_tag.decompose()
# Delete docstring content (for now)
for element in soup.find_all(class_="namedocs"):
diff --git a/.vale/styles/Lean/TechnicalTerms.yml b/.vale/styles/Lean/TechnicalTerms.yml
index b670fd2..4f3e7a1 100644
--- a/.vale/styles/Lean/TechnicalTerms.yml
+++ b/.vale/styles/Lean/TechnicalTerms.yml
@@ -2,6 +2,7 @@ extends: substitution
message: Use '%s' instead of '%s'
level: error
ignorecase: true
+capitalize: true
swap:
'typeclass': 'type class'
'typeclasses': 'type classes'
@@ -19,3 +20,8 @@ swap:
'semireducible': 'semi-reducible'
'\b(.+)\b simps': 'non-terminal simps'
'\babelian\b': 'Abelian'
+ 'context free grammar': 'context-free grammar'
+ 'left associative': 'left-associative'
+ 'right associative': 'right-associative'
+ 'pretty-printer': 'pretty printer'
+ 'pretty printed': 'pretty-printed'
diff --git a/.vale/styles/config/ignore/terms.txt b/.vale/styles/config/ignore/terms.txt
index 3395804..e9ebf9e 100644
--- a/.vale/styles/config/ignore/terms.txt
+++ b/.vale/styles/config/ignore/terms.txt
@@ -2,6 +2,10 @@ Abelian
Packrat
accessor
accessors
+antiquotation
+antiquotation's
+antiquotations
+antiquotations'
bitvector
bitvectors
bitwise
@@ -15,6 +19,11 @@ cumulative
cumulativity
declaratively
definitionally
+delaborate
+delaborated
+delaborates
+delaborating
+delaboration
desugar
desugared
desugaring
@@ -28,6 +37,9 @@ enum
equational
extensional
extensionality
+functor
+functor's
+functors
guillemet
guillemets
hoc
@@ -38,6 +50,7 @@ initializers
injectivity
interoperate
interoperates
+interrobang
iterator
iterator's
iterators
@@ -53,6 +66,7 @@ metatheoretic
metavariable
metavariable's
metavariables
+mixfix
monoid
monomorphic
monomorphism
@@ -60,14 +74,25 @@ multiset
multisets
namespace
namespaces
+nonterminal
+nonterminals
nullary
parameters'
+parenthesization
+parenthesizer
+parenthesizers
+parser's
polymorphic
polymorphically
+postfix
predicative
predicativity
prepending
propositionally
+quasiquotation
+quasiquotations
+quasiquote
+quasiquoted
recursor
recursor's
recursors
@@ -77,9 +102,9 @@ se
semigroup
semireducible
simp
-simps
simproc
simprocs
+simps
subgoal
subgoals
subproblem
@@ -100,4 +125,7 @@ toolchain's
toolchains
unary
unescaped
+unexpander
+unexpanders
uninstantiated
+unparenthesized
diff --git a/Manual.lean b/Manual.lean
index f2d06f5..14279d8 100644
--- a/Manual.lean
+++ b/Manual.lean
@@ -44,6 +44,7 @@ tag := "monads-and-do"
:::planned 102
This chapter will describe `do`-notation in Lean:
+ * Overview of {deftech}[monads]
* Desugaring of `do` and its associated control structures
* Comprehensive description of the syntax of `do`-notation
* Definition of being in the "same `do`-block"
@@ -85,6 +86,14 @@ Describe {name}`Option`, including the default coercions and its API.
{include 0 Manual.NotationsMacros}
+# Output from Lean
+
+:::planned 122
+ * {deftech}[Unexpanders]
+ * {deftech key:="delaborate"}[Delaboration]
+ * {deftech}[Pretty printing]
+ * Parenthesizers
+:::
# Elan
%%%
diff --git a/Manual/Elaboration.lean b/Manual/Elaboration.lean
index 54eb3c9..6abd792 100644
--- a/Manual/Elaboration.lean
+++ b/Manual/Elaboration.lean
@@ -6,6 +6,7 @@ Author: David Thrane Christiansen
import VersoManual
import Manual.Meta
+import Manual.Papers
open Verso.Genre Manual
@@ -14,27 +15,6 @@ set_option pp.rawOnError true
open Lean (Syntax SourceInfo)
-def pratt73 : InProceedings where
- title := .concat (inlines!"Top down operator precedence")
- authors := #[.concat (inlines!"Vaughan Pratt")]
- year := 1973
- booktitle := .concat (inlines!"Proceedings of the 1st Annual ACM SIGACT-SIGPLAN Symposium on Principles of Programming Languages")
-
-def carneiro19 : Thesis where
- title := .concat (inlines!"The Type Theory of Lean")
- author := .concat (inlines!"Mario Carneiro")
- year := 2019
- university := .concat (inlines!"Carnegie Mellon University")
- url := some "https://github.com/digama0/lean-type-theory/releases/download/v1.0/main.pdf"
- degree := .concat (inlines!"Masters thesis")
-
-def ullrich23 : Thesis where
- title := .concat (inlines!"An Extensible Theorem Proving Frontend")
- author := .concat (inlines!"Sebastian Ullrich")
- year := 2023
- university := .concat (inlines!"Karlsruhe Institute of Technology")
- url := some "https://www.lean-lang.org/papers/thesis-sebastian.pdf"
- degree := .concat (inlines!"Dr. Ing. dissertation")
#doc (Manual) "Elaboration and Compilation" =>
%%%
@@ -92,7 +72,7 @@ tag := "parser"
Lean's parser is a recursive-descent parser that uses dynamic tables based on Pratt parsing{citep pratt73}[] to resolve operator precedence and associativity.
When grammars are unambiguous, the parser does not need to backtrack; in the case of ambiguous grammars, a memoization table similar to that used in Packrat parsing avoids exponential blowup.
Parsers are highly extensible: users may define new syntax in any command, and that syntax becomes available in the next command.
-The open namespaces in the current {tech}[scope] also influences which parsing rules are used, because parser extensions may be set to be active only when a given namespace is open.
+The open namespaces in the current {tech}[section scope] also influence which parsing rules are used, because parser extensions may be set to be active only when a given namespace is open.
When ambiguity is encountered, the longest matching parse is selected.
If there is no unique longest match, then both matching parses are saved in the syntax tree in a {deftech}[choice node] to be resolved later by the elaborator.
@@ -127,7 +107,7 @@ Elaboration of both commands and terms may be recursive, both because of command
Command and term elaboration have different capabilities.
Command elaboration may have side effects on an environment, and it has access to run arbitrary computations in {lean}`IO`.
Lean environments contain the usual mapping from names to definitions along with additional data defined in {deftech}[environment extensions], which are additional tables associated with an environment; environment extensions are used to track most other information about Lean code, including {tactic}`simp` lemmas, custom pretty printers, and internals such as the compiler's intermediate representations.
-Command elaboration also maintains a message log with the contents of the compiler's informational output, warnings, and errors, a set of {tech}[info trees] that associate metadata with the original syntax (used for interactive features such as displaying proof states, identifier completion, and showing documentation), accumulated debugging traces, the open {tech}[scopes], and some internal state related to macro expansion.
+Command elaboration also maintains a message log with the contents of the compiler's informational output, warnings, and errors, a set of {tech}[info trees] that associate metadata with the original syntax (used for interactive features such as displaying proof states, identifier completion, and showing documentation), accumulated debugging traces, the open {tech}[section scopes], and some internal state related to macro expansion.
Term elaboration may modify all of these fields except the open scopes.
Additionally, it has access to all the machinery needed to create fully-explicit terms in the core language from Lean's terse, friendly syntax, including unification, type class instance synthesis, and type checking.
diff --git a/Manual/Intro.lean b/Manual/Intro.lean
index 8bfa1c2..2d6950e 100644
--- a/Manual/Intro.lean
+++ b/Manual/Intro.lean
@@ -153,6 +153,17 @@ skip
Identifiers in code examples are hyperlinked to their documentation.
+Examples of code with syntax errors are shown with an indicator of where the parser stopped, along with the error message:
+```syntaxError intro
+def f : Option Nat → Type
+ | some 0 => Unit
+ | => Option (f t)
+ | none => Empty
+```
+```leanOutput intro
+{{line}}
}}
diff --git a/Manual/Meta/ParserAlias.lean b/Manual/Meta/ParserAlias.lean
new file mode 100644
index 0000000..99dd3c8
--- /dev/null
+++ b/Manual/Meta/ParserAlias.lean
@@ -0,0 +1,125 @@
+/-
+Copyright (c) 2024 Lean FRO LLC. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Author: David Thrane Christiansen
+-/
+
+import Lean.Elab.Term
+import Lean.Elab.Tactic
+
+import Verso.Code.Highlighted
+import Verso.Doc.ArgParse
+import Verso.Doc.Suggestion
+import SubVerso.Highlighting.Code
+import VersoManual
+
+import Manual.Meta.Basic
+import Manual.Meta.PPrint
+import Manual.Meta.Lean
+import Manual.Meta.Lean.Scopes
+
+namespace Manual
+
+open Lean Elab Term Tactic
+open Verso ArgParse Doc Elab Genre.Manual Html Code Highlighted.WebAssets
+open Manual.Meta.Lean.Scopes
+open SubVerso.Highlighting
+
+def parserAliasDomain := `Manual.parserAlias
+
+def Block.parserAlias (name : Name) (declName : Name) («show» : Option String) (stackSz? : Option Nat) (autoGroupArgs : Bool) (docs? : Option String) (argCount : Nat) : Block where
+ name := `Manual.Block.parserAlias
+ data := ToJson.toJson (name, declName, «show», stackSz?, autoGroupArgs, docs?, argCount)
+
+structure ParserAliasOptions where
+ name : Name
+ «show» : Option String
+
+def ParserAliasOptions.parse [Monad m] [MonadError m] : ArgParse m ParserAliasOptions :=
+ ParserAliasOptions.mk <$> .positional `name .name <*> .named `show .string true
+
+
+
+@[directive_expander parserAlias]
+def parserAlias : DirectiveExpander
+ | args, more => do
+ let opts ← ParserAliasOptions.parse.run args
+ let {declName, stackSz?, autoGroupArgs} ← Parser.getParserAliasInfo opts.name
+ let docs? ← findDocString? (← getEnv) declName
+
+ let some mdAst := MD4Lean.parse (docs?.getD "")
+ | throwError "Failed to parse docstring as Markdown"
+ let contents ← mdAst.blocks.mapM (Markdown.blockFromMarkdown · Markdown.strongEmphHeaders)
+ let userContents ← more.mapM elabBlock
+
+ let argCount :=
+ match (← Lean.Parser.getAlias Lean.Parser.parserAliasesRef opts.name) with
+ | some (.unary ..) => 1
+ | some (.binary ..) => 2
+ | _ => 0
+
+ pure #[← ``(Verso.Doc.Block.other (Block.parserAlias $(quote opts.name) $(quote declName) $(quote opts.show) $(quote stackSz?) $(quote autoGroupArgs) $(quote docs?) $(quote argCount)) #[$(contents ++ userContents),*])]
+
+
+open Verso.Genre.Manual.Markdown in
+open Lean Elab Term Parser Tactic Doc in
+@[block_extension Block.parserAlias]
+def parserAlias.descr : BlockDescr where
+ init st := st
+ |>.setDomainTitle parserAliasDomain "Parser Alias Documentation"
+ |>.setDomainDescription parserAliasDomain "Detailed descriptions of parser aliases"
+
+ traverse id info _ := do
+ let .ok (name, _declName, «show», _stackSz?, _autoGroupArgs, _docs?, _) := FromJson.fromJson? (α := Name × Name × Option String × Option Nat × Bool × Option String × Nat) info
+ | do logError "Failed to deserialize docstring data while traversing a parser alias"; pure none
+ let path ← (·.path) <$> read
+ let _ ← Verso.Genre.Manual.externalTag id path <| show.getD name.toString
+ Index.addEntry id {term := Doc.Inline.code <| show.getD name.toString}
+
+ modify fun st => st.saveDomainObject parserAliasDomain name.toString id
+
+ pure none
+ toHtml := some <| fun _goI goB id info contents =>
+ open Verso.Doc.Html in
+ open Verso.Output Html in do
+ let .ok (name, declName, «show», stackSz?, autoGroupArgs, docs?, argCount) := FromJson.fromJson? (α := Name × Name × Option String × Option Nat × Bool × Option String × Nat) info
+ | do Verso.Doc.Html.HtmlT.logError "Failed to deserialize parser alias data while generating HTML for a parser alias docstring"; pure .empty
+ let x : Highlighted := .token ⟨.keyword declName none docs?, show.getD name.toString⟩
+ let args : Highlighted :=
+ match argCount with
+ | 1 => .seq <|
+ #[.token ⟨.unknown, "("⟩, .token ⟨.unknown, "p"⟩, .token ⟨.unknown, ")"⟩]
+ | 2 => .seq <|
+ #[.token ⟨.unknown, "("⟩, .token ⟨.unknown, "p1"⟩, .token ⟨.unknown, ", "⟩, .token ⟨.unknown, "p2"⟩, .token ⟨.unknown, ")"⟩]
+ | _ => .empty
+
+ let xref ← HtmlT.state
+ let idAttr := xref.htmlId id
+
+ let arity :=
+ match stackSz? with
+ | some n => s!"Arity: {n}"
+ | none => "Arity is sum of arguments' arities"
+ let grp :=
+ if autoGroupArgs then
+ some {{"Automatically wraps arguments in a " "null"
" node unless there's exactly one"}}
+ else none
+ let meta :=
+ match grp with
+ | none => {{{{arity}}
}} + | some g => {{{{← (Highlighted.seq #[x, args]).toHtml}}+
+{{← bnfHtml bnf }}}} @@ -558,6 +915,7 @@ where open Verso.Output.Html in match t with | .bnf => (pure {{{{·}}}}) + | .comment => (pure {{{{·}}}}) | .error => (pure {{{{·}}}}) | .keyword => (pure {{{{·}}}}) | .nonterminal k doc? => nonTermHtmlOf k doc? @@ -568,6 +926,19 @@ where {{·}} }}) + | .localName x n cat doc? => + let doc := + match doc? with + | none => .empty + | some d => {{{{d}}
}} + -- The "token" class below triggers binding highlighting + (pure {{ + ++ {{·}} + + }}) + def Inline.syntaxKind : Inline where name := `Manual.syntaxKind diff --git a/Manual/Meta/Table.lean b/Manual/Meta/Table.lean new file mode 100644 index 0000000..64d2958 --- /dev/null +++ b/Manual/Meta/Table.lean @@ -0,0 +1,132 @@ +/- +Copyright (c) 2024 Lean FRO LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Author: David Thrane Christiansen +-/ + +import VersoManual +import Lean.Elab.InfoTree.Types + +import Manual.Meta.Figure + +open Verso Doc Elab +open Verso.Genre Manual +open Verso.ArgParse + +open Lean Elab + +set_option pp.rawOnError true + +namespace Manual + +def Block.table (columns : Nat) (header : Bool) (name : Option String) : Block where + name := `Manual.table + data := ToJson.toJson (columns, header, name, (none : Option Tag)) + +structure TableConfig where + /-- Name for refs -/ + name : Option String := none + header : Bool := false + + +def TableConfig.parse [Monad m] [MonadInfoTree m] [MonadLiftT CoreM m] [MonadEnv m] [MonadError m] [MonadFileMap m] : ArgParse m TableConfig := + TableConfig.mk <$> .named `tag .string true <*> ((·.getD false) <$> .named `header .bool true) + +@[directive_expander table] +def table : DirectiveExpander + | args, contents => do + let cfg ← TableConfig.parse.run args + -- The table should be a list of lists. Extract them! + let #[contentStx@`
{{x.toString}} " : " {{cat.toString}}
{{doc}}] := contents + | throwError "Expected a single unordered list" + let preRows ← items.mapM getLi + let rows ← preRows.mapM fun blks => do + let #[` ] := blks.filter (·.isOfKind ``Verso.Syntax.ul) + | throwError "Each row should have exactly one list in it" + cellItems.mapM getLi + if h : rows.size = 0 then + throwErrorAt contentStx "Expected at least one row" + else + let columns := rows[0].size + if columns = 0 then + throwErrorAt contentStx "Expected at least one column" + if rows.any (·.size != columns) then + throwErrorAt contentStx s!"Expected all rows to have same number of columns, but got {rows.map (·.size)}" + + let flattened := rows.flatten + let blocks : Array (Syntax.TSepArray `term ",") ← flattened.mapM (·.mapM elabBlock) + -- Figures are represented using the first block to hold the caption. Storing it in the JSON + -- entails repeated (de)serialization. + pure #[← ``(Block.other (Block.table $(quote columns) $(quote cfg.header) $(quote cfg.name)) #[Block.ul #[$[Verso.Doc.ListItem.mk 0 #[$blocks,*]],*]])] + +where + getLi + | ` => pure content + | other => throwErrorAt other "Expected list item" + + +@[block_extension table] +def table.descr : BlockDescr where + traverse id data contents := do + match FromJson.fromJson? data (α := Nat × Bool × Option String × Option Tag) with + | .error e => logError s!"Error deserializing table data: {e}"; pure none + | .ok (_, _, none, _) => pure none + | .ok (c, hdr, some x, none) => + let path ← (·.path) <$> read + let tag ← Verso.Genre.Manual.externalTag id path x + pure <| some <| Block.other {Block.table c hdr none with id := some id, data := toJson (c, some x, some tag)} contents + | .ok (_, _, some _, some _) => pure none + toTeX := none + toHtml := + open Verso.Doc.Html in + open Verso.Output.Html in + some <| fun goI goB id data blocks => do + match FromJson.fromJson? data (α := Nat × Bool × Option String × Option Tag) with + | .error e => + HtmlT.logError s!"Error deserializing table data: {e}" + return .empty + | .ok (columns, header, _, _) => + if let #[.ul items] := blocks then + let xref ← HtmlT.state + let attrs := xref.htmlId id + let mut items := items + let mut rows := #[] + while items.size > 0 do + rows := rows.push (items.take columns |>.map (·.contents)) + items := items.extract columns items.size + + return {{ + + {{← rows.mapIdxM fun i r => do + let cols ← Output.Html.seq <$> r.mapM fun c => do + let cell : Output.Html ← c.mapM goB + if header && i == 0 then + pure {{
+ }} + + else + HtmlT.logError "Malformed table" + return .empty + extraCss := [ +r##" +table.tabular { + margin: auto; + border-spacing: 1rem; +} +table.tabular td, table.tabular th { + text-align: left; + vertical-align: top; +} +table.tabular td > p:first-child, table.tabular th > p:first-child { + margin-top: 0; +} +table.tabular td > p:last-child, table.tabular th > p:first-child { + margin-bottom: 0; +} +"## + ] diff --git a/Manual/NotationsMacros.lean b/Manual/NotationsMacros.lean index 238372a..ad607e3 100644 --- a/Manual/NotationsMacros.lean +++ b/Manual/NotationsMacros.lean @@ -7,10 +7,12 @@ Author: David Thrane Christiansen import VersoManual import Manual.Meta -import Manual.Language.Classes -import Manual.Language.Functions -import Manual.Language.Files -import Manual.Language.InductiveTypes +import Manual.Papers + +import Manual.NotationsMacros.Operators +import Manual.NotationsMacros.Precedence +import Manual.NotationsMacros.Notations +import Manual.NotationsMacros.SyntaxDef import Lean.Parser.Command @@ -29,65 +31,1143 @@ tag := "language-extension" %%% Different mathematical fields have their own notational conventions, and many notations are re-used with differing meanings in different fields. -It is important that formal developments are able to use established notations: formalized mathematics is already difficult, and the mental overhead of translating between syntaxes can be substantial. +It is important that formal developments are able to use established notations: formalizing mathematics is already difficult, and the mental overhead of translating between syntaxes can be substantial. At the same time, it's important to be able to control the scope of notational extensions. Many fields use related notations with very different meanings, and it should be possible to combine developments from these separate fields in a way where both readers and the system know which convention is in force in any given region of a file. -Lean addresses the problem of notational extensibility with a variety of mechanisms, each of which solves different aspects of the problem. +Lean addresses the problem of notational extensibility with a variety of mechanisms, each of which solves a different aspect of the problem. They can be combined flexibly to achieve the necessary results: * The {ref "parser"}_extensible parser_ {index}[parser] allows a great variety of notational conventions to be implemented declaratively, and combined flexibly. * {ref "macro-and-elab"}[Macros] allow new syntax to be easily mapped to existing syntax, which is a simple way to provide meaning to new constructs. Due to {tech}[hygiene] and automatic propagation of source positions, this process doesn't interfere with Lean's interactive features. - * {ref "macro-and-elab"}[Elaborators] provide new notations with the same tools available to Lean's own syntax in cases where a macro is insufficiently expressive. + * {ref "macro-and-elab"}[Elaborators] provide new syntax with the same tools available to Lean's own syntax in cases where a macro is insufficiently expressive. + * {ref "notations"}[Notations] allow the simultaneous definition of a parser extension, a macro, and a pretty printer. + When defining infix, prefix, or postfix operators, {ref "operators"}[custom operators] automatically take care of precedence and associativity. + * Low-level parser extensions allow the parser to be extended in ways that modify its rules for tokens and whitespace, or that even completely replace Lean's syntax. This is an advanced topic that requires familiarity with Lean internals; nevertheless, the possibility of doing this without modifying the compiler is important. This reference manual is written using a language extension that replaces Lean's concrete syntax with a Markdown-like language for writing documents, but the source files are still Lean files. + +{include 0 Manual.NotationsMacros.Operators} + +{include 0 Manual.NotationsMacros.Precedence} + +{include 0 Manual.NotationsMacros.Notations} -# Notations +{include 0 Manual.NotationsMacros.SyntaxDef} + +# Macros %%% -tag := "notations" +tag := "macros" %%% -:::planned 69 -A presentation of the `notation` command and how to define infix operators +{deftech}_Macros_ are transformations from {name Lean.Syntax}`Syntax` to {name Lean.Syntax}`Syntax` that occur during {tech key:="elaborator"}[elaboration] and during {ref "tactic-macros"}[tactic execution]. +Replacing syntax with the result of transforming it with a macro is called {deftech}_macro expansion_. +Multiple macros may be associated with a single {tech}[syntax kind], and they are attempted in order of definition. +Macros are run in a {tech}[monad] that has access to some compile-time metadata and has the ability to either emit an error message or to delegate to subsequent macros, but the macro monad is much less powerful than the elaboration monads. + +```lean (show := false) +section +open Lean (Syntax MacroM) +``` + +Macros are associated with {tech}[syntax kinds]. +An internal table maps syntax kinds to macros of type {lean}`Syntax → MacroM Syntax`. +Macros delegate to the next entry in the table by throwing the {name Lean.Macro.Exception.unsupportedSyntax}`unsupportedSyntax` exception. +A given {name}`Syntax` value _is a macro_ when there is a macro associated with its syntax kind that does not throw {name Lean.Macro.Exception.unsupportedSyntax}`unsupportedSyntax`. +If a macro throws any other exception, an error is reported to the user. +{tech}[Syntax categories] are irrelevant to macro expansion; however, because each syntax kind is typically associated with a single syntax category, they do not interfere in practice. + +::::keepEnv +:::example "Macro Error Reporting" +The following macro reports an error when its parameter is the literal numeral five. +It expands to its argument in all other cases. +```lean +syntax &"notFive" term:arg : term +open Lean in +macro_rules + | `(term|notFive 5) => Macro.throwError "'5' is not allowed here" + | `(term|notFive $e) => pure e +``` + +When applied to terms that are not syntactically the numeral five, elaboration succeeds: +```lean (name := notFiveAdd) +#eval notFive (2 + 3) +``` +```leanOutput notFiveAdd +5 +``` + +When the error case is triggered, the user receives an error message: +```lean (name := notFiveFive) (error := true) +#eval notFive 5 +``` +```leanOutput notFiveFive +'5' is not allowed here +``` ::: +:::: -::: TODO - * How to define them - * Precedence and associativity - * Limitations - when not? +Before elaborating a piece of syntax, the elaborator checks whether its {tech}[syntax kind] has macros associated with it. +These are attempted in order. +If a macro succeeds, potentially returning syntax with a different kind, the check is repeated and macros are expanded again until the outermost layer of syntax is no longer a macro. +Elaboration or tactic execution can then proceed. +Only the outermost layer of syntax (typically a {name Lean.Syntax.node}`node`) is expanded, and the output of macro expansion may contain nested syntax that is a macro. +These nested macros are expanded in turn when the elaborator reaches them. + +In particular, macro expansion occurs in three situations in Lean: + + 1. During term elaboration, macros in the outermost layer of the syntax to be elaborated are expanded prior to invoking the {ref "elaborators"}[syntax's term elaborator]. + + 2. During command elaboration, macros in the outermost layer of the syntax to be elaborated are expanded prior to invoking the {ref "elaborators"}[syntax's command elaborator]. + + 3. During tactic execution, macros in the outermost layer of the syntax to be elaborated are expanded {ref "tactic-macros"}[prior to executing the syntax as a tactic]. + + +```lean (keep := false) (show := false) +-- Test claim in preceding paragraph that it's OK for macros to give up prior to elab +syntax "doubled " term:arg : term + +macro_rules + | `(term|doubled $n:num) => `($n * 2) + | `(term|doubled $_) => Lean.Macro.throwUnsupported + +/-- info: 10 -/ +#guard_msgs in +#eval doubled 5 + +/-- +error: elaboration function for 'termDoubled_' has not been implemented + doubled (5 + 2) +-/ +#guard_msgs in +#eval doubled (5 + 2) + +elab_rules : term + | `(term|doubled $e:term) => Lean.Elab.Term.elabTerm e none + +/-- info: 7 -/ +#guard_msgs in +#eval doubled (5 + 2) +``` + +## Hygiene +%%% +tag := "macro-hygiene" +%%% + +A macro is {deftech (key:="hygiene")}_hygienic_ if its expansion cannot result in identifier capture. +{deftech}[Identifier capture] is when an identifier ends up referring to a binding site other than that which is in scope where the identifier occurs in the source code. +There are two types of identifier capture: + * If a macro's expansion introduces binders, then identifiers that are parameters to the macro may end up referring to the introduced binders if their names happen to match. + * If a macro's expansion is intended to refer to a name, but the macro is used in a context that either locally binds this name or in which a new global name has been introduced, it may end up referring to the wrong name. + +The first kind of variable capture can be avoided by ensuring that every binding introduced by a macro uses a freshly generated, globally-unique name, while the second can be avoided by always using fully-qualified names to refer to constants. +The fresh names must be generated again at each invocation of the macro to avoid variable capture in recursive macros. +These techniques are error-prone. +Variable capture issues are difficult to test for because they rely on coincidences of name choices, and consistently applying these techniques results in noisy code. + +Lean features automatic hygiene: in almost all cases, macros are automatically hygienic. +Capture by introduced bindings is avoided by annotating identifiers introduced by a macro with {deftech}_macro scopes_, which uniquely identify each invocation of macro expansion. +If the binding and the use of the identifier have the same macro scopes, then they were introduced by the same step of macro expansion and should refer to one another. +Similarly, uses of global names in code generated by a macro are not captured by local bindings in the context in which they are expanded because these use sites have macro scopes that are not present in the binding occurrence. +Capture by newly-introduced global names is prevented by annotating potential global name references with the set of global names that match at quotation time in code produced in the macro's body. +Identifiers annotated with potential referents are called {deftech}_pre-resolved identifiers_, and the {lean}`Syntax.Preresolved` field on the {name}`Syntax.ident` constructor is used to store the potential referents. +During elaboration, if an identifier has pre-resolved global names associated with it, then other global names are not considered as valid reference targets. + +The introduction of macro scopes and pre-resolved identifiers to generated syntax occurs during {tech}[quotation]. +Macros that construct syntax by other means than quotation should also ensure hygiene by some other means. +For more details on Lean's hygiene algorithm, please consult {citet beyondNotations ullrich23}[]. + +## The Macro Monad +%%% +tag := "macro-monad" +%%% + +The macro monad {name Lean.MacroM}`MacroM` is sufficiently powerful to implement hygiene and report errors. +Macro expansion does not have the ability to modify the environment directly, to carry out unification, to examine the current local context, or to do anything else that only makes sense in one particular context. +This allows the same macro mechanism to be used throughout Lean, and it makes macros much easier to write than {tech}[elaborators]. + +{docstring Lean.MacroM} + +{docstring Lean.Macro.expandMacro?} + +{docstring Lean.Macro.trace} + +### Exceptions and Errors +%%% +tag := "macro-exceptions" +%%% + +The {name Lean.Macro.Exception.unsupportedSyntax}`unsupportedSyntax` exception is used for control flow during macro expansion. +It indicates that the current macro is incapable of expanding the received syntax, but that an error has not occurred. +The exceptions thrown by {name Lean.Macro.throwError}`throwError` and {name Lean.Macro.throwErrorAt}`throwErrorAt` terminate macro expansion, reporting the error to the user. + +{docstring Lean.Macro.throwUnsupported} + +{docstring Lean.Macro.Exception.unsupportedSyntax} + +{docstring Lean.Macro.throwError} + +{docstring Lean.Macro.throwErrorAt} + +### Hygiene-Related Operations +%%% +tag := "macro-monad-hygiene" +%%% + +{tech}[Hygiene] is implemented by adding {tech}[macro scopes] to the identifiers that occur in syntax. +Ordinarily, the process of {tech}[quotation] adds all necessary scopes, but macros that construct syntax directly must add macro scopes to the identifiers that they introduce. + +{docstring Lean.Macro.withFreshMacroScope} + +{docstring Lean.Macro.addMacroScope} + +### Querying the Environment +%%% +tag := "macro-environment" +%%% + +Macros have only limited support for querying the environment. +They can check whether a constant exists and resolve names, but further introspection is unavailable. + +{docstring Lean.Macro.hasDecl} + +{docstring Lean.Macro.getCurrNamespace} + +{docstring Lean.Macro.resolveNamespace} + +{docstring Lean.Macro.resolveGlobalName} + +## Quotation +%%% +tag := "quotation" +%%% + +{deftech}_Quotation_ marks code for representation as data of type {name}`Syntax`. +Quoted code is parsed, but not elaborated—while it must be syntactically correct, it need not make sense. +Quotation makes it much easier to programmatically generate code: rather than reverse-engineering the specific nesting of {name Lean.Syntax.node}`node` values that Lean's parser would produce, the parser can be directly invoked to create them. +This is also more robust in the face of refactoring of the grammar that may change the internals of the parse tree without affecting the user-visible concrete syntax. +Quotation in Lean is surrounded by ```(`` and `)`. + +The syntactic category or parser being quoted may be indicated by placing its name after the opening backtick and parenthesis, followed by a vertical bar (`|`). +As a special case, the name `tactic` may be used to parse either tactics or sequences of tactics. +If no syntactic category or parser is provided, Lean attempts to parse the quotation both as a term and as a non-empty sequence of commands. +Term quotations have higher priority than command quotations, so in cases of ambiguity, the interpretation as a term is chosen; this can be overridden by explicitly indicating that the quotation is of a command sequence. + +::::keepEnv +:::example "Term vs Command Quotation Syntax" +```lean (show := false) +open Lean +``` + +In the following example, the contents of the quotation could either be a function application or a sequence of commands. +Both match the same region of the file, so the {tech}[local longest-match rule] is not relevant. +Term quotation has a higher priority than command quotation, so the quotation is interpreted as a term. +Terms expect their {tech}[antiquotations] to have type {lean}``TSyntax `term`` rather than {lean}``TSyntax `command``. +```lean (error := true) (name := cmdQuot) +example (cmd1 cmd2 : TSyntax `command) : MacroM (TSyntax `command) := `($cmd1 $cmd2) +``` +The result is two type errors like the following: +```leanOutput cmdQuot +application type mismatch + cmd1.raw +argument + cmd1 +has type + TSyntax `command : Type +but is expected to have type + TSyntax `term : Type +``` + +The type of the quotation ({lean}``MacroM (TSyntax `command)``) is not used to select a result because syntax priorities are applied prior to elaboration. +In this case, specifying that the antiquotations are commands resolves the ambiguity because function application would require terms in these positions: +```lean +example (cmd1 cmd2 : TSyntax `command) : MacroM (TSyntax `command) := `($cmd1:command $cmd2:command) +``` +Similarly, inserting a command into the quotation eliminates the possibility that it could be a term: +```lean +example (cmd1 cmd2 : TSyntax `command) : MacroM (TSyntax `command) := `($cmd1 $cmd2 #eval "hello!") +``` ::: +:::: + +```lean (show := false) +-- There is no way to extract parser priorities (they're only saved in the Pratt tables next to +-- compiled Parser code), so this test of priorities checks the observable relative priorities of the +-- quote parsers. + +/-- +info: do + let _ ← Lean.MonadRef.mkInfoFromRefPos + let _ ← Lean.getCurrMacroScope + let _ ← Lean.getMainModule + pure { raw := { raw := Syntax.missing }.raw } : MacroM (Lean.TSyntax `term) +-/ +#guard_msgs in +#check (`($(⟨.missing⟩)) : MacroM _) +/-- +info: do + let info ← Lean.MonadRef.mkInfoFromRefPos + let _ ← Lean.getCurrMacroScope + let _ ← Lean.getMainModule + pure + { + raw := + Syntax.node2 info `Lean.Parser.Term.app { raw := Syntax.missing }.raw + (Syntax.node1 info `null { raw := Syntax.missing }.raw) } : MacroM (Lean.TSyntax `term) +-/ +#guard_msgs in +#check (`($(⟨.missing⟩) $(⟨.missing⟩)) : MacroM _) +/-- +info: do + let info ← Lean.MonadRef.mkInfoFromRefPos + let _ ← Lean.getCurrMacroScope + let _ ← Lean.getMainModule + pure + { + raw := + Syntax.node2 info `null { raw := Syntax.missing }.raw + { raw := Syntax.missing }.raw } : MacroM (Lean.TSyntax `command) +-/ +#guard_msgs in +#check (`($(⟨.missing⟩):command $(⟨.missing⟩)) : MacroM _) + +/-- +info: do + let _ ← Lean.MonadRef.mkInfoFromRefPos + let _ ← Lean.getCurrMacroScope + let _ ← Lean.getMainModule + pure { raw := { raw := Syntax.missing }.raw } : MacroM (Lean.TSyntax `tactic) +-/ +#guard_msgs in +#check (`(tactic| $(⟨.missing⟩):tactic) : MacroM _) + +/-- +info: do + let info ← Lean.MonadRef.mkInfoFromRefPos + let _ ← Lean.getCurrMacroScope + let _ ← Lean.getMainModule + pure + { + raw := + Syntax.node1 info `Lean.Parser.Tactic.seq1 + (Syntax.node3 info `null { raw := Syntax.missing }.raw (Syntax.atom info ";") + { raw := Syntax.missing }.raw) } : MacroM (Lean.TSyntax `tactic.seq) +-/ +#guard_msgs in +#check (`(tactic| + $(⟨.missing⟩):tactic; $(⟨.missing⟩)) : MacroM _) +``` + +:::freeSyntax term (open := false) -# Defining New Syntax +Lean's syntax includes quotations for terms, commands, tactics, and sequences of tactics, as well as a general quotation syntax that allows any input that Lean can parse to be quoted. +Term quotations have the highest priority, followed by tactic quotations, general quotations, and finally command quotations. -## Syntax Categories and Extensions +```grammar +`(term|`($_:term)) +******* +"`("$_:command+")" +******* +`(term|`(tactic| $_:tactic)) +******* +`(term|`(tactic| $_:tactic;*)) +******* +"`("p:ident"|"/-- Parse a {p} here -/")" +``` +::: + +```lean (show := false) +section M +variable {m : Type → Type} +open Lean (MonadRef MonadQuotation) +open Lean.Elab.Term (TermElabM) +open Lean.Elab.Command (CommandElabM) +open Lean.Elab.Tactic (TacticM) +``` + +Rather than having type {name}`Syntax`, quotations are monadic actions with type {lean}`m Syntax`. +Quotation is monadic because it implements {tech}[hygiene] by adding {tech}[macro scopes] and pre-resolving identifiers, as described in {ref "macro-hygiene"}[the section on hygiene]. +The specific monad to be used is an implicit parameter to the quotation, and any monad for which there is an instance of the {name}`MonadQuotation` type class is suitable. +{name}`MonadQuotation` extends {name}`MonadRef`, which gives the quotation access to the source location of the syntax that the macro expander or elaborator is currently processing. {name}`MonadQuotation` additionally includes the ability to add {tech}[macro scopes] to identifiers and use a fresh macro scope for a sub-task. +Monads that support quotation include {name}`MacroM`, {name}`TermElabM`, {name}`CommandElabM`, and {name}`TacticM`. + +```lean (show := false) +end M +``` + + +```lean (show := false) +-- Verify claim about monads above +open Lean in +example [Monad m] [MonadQuotation m] : m Syntax := `(term|2 + 2) +``` + +### Quasiquotation %%% -tag := "syntax-categories" +tag := "quasiquotation" %%% -# Macros +{deftech}_Quasiquotation_ is a form of quotation that may contain {deftech}_antiquotations_, which are regions of the quotation that are not quoted, but instead are expressions that are evaluated to yield syntax. +A quasiquotation is essentially a template; the outer quoted region provides a fixed framework that always yields the same outer syntax, while the antiquotations yield the parts of the final syntax that vary. +All quotations in Lean are quasiquotations, so no special syntax is needed to distinguish quasiquotations from other quotations. +The quotation process does not add macro scopes to identifiers that are inserted via antiquotations, because these identifiers either come from another quotation (in which case they already have macro scopes) or from the macro's input (in which case they should not have macro scopes, because they are not introduced by the macro). + +Basic antiquotations consist of a dollar sign (`$`) immediately followed by an identifier. +This means that the value of the corresponding variable, which should be a syntax tree, is to be substituted into this position of the quoted syntax. +Entire expressions may be used as antiquotations by wrapping them in parentheses. + +```lean (show := false) +section +open Lean +example (e : Term) : MacroM Syntax := `(term| $e) + +example (e : Term) : MacroM Syntax := `(term| $(e)) + +--example (e : Term) : MacroM Syntax := `(term| $ (e)) + +end +``` + + + +```lean (show := false) +section +open Lean (TSyntax SyntaxNodeKinds) +variable {c : SyntaxNodeKinds} +``` + +Lean's parser assigns every antiquotation a syntax category based on what the parser expects at the given position. +If the parser expects syntax category {lean}`c`, then the antiquotation's type is {lean}`TSyntax c`. + + +Some syntax categories can be matched by elements of other categories. +For example, numeric and string literals are valid terms in addition to being their own syntax categories. +Antiquotations may be annotated with the expected category by suffixing them with a colon and the category name, which causes the parser to validate that the annotated category is acceptable in the given position and construct any intermediate layers that are required in the parse tree. + +:::freeSyntax antiquot title:="Antiquotations" open := false +```grammar +"$"ident(":"ident)? +******* +"$("term")"(":"ident)? +``` +Whitespace is not permitted between the dollar sign ('$') that initiates an antiquotation and the identifier or parenthesized term that follows. +Similarly, no whitespace is permitted around the colon that annotates the syntax category of the antiquotation. +::: + +:::example "Quasiquotation" + +Both forms of antiquotation are used in this example. +Because natural numbers are not syntax, {name Lean.quote}`quote` is used to transform a number into syntax that represents it. + +```lean +open Lean in +example [Monad m] [MonadQuotation m] (x : Term) (n : Nat) : m Syntax := + `($x + $(quote (n + 2))) +``` +::: + +:::::keepEnv +::::example "Antiquotation Annotations" +```lean (show := false) +open Lean +``` + +This example requires that {lean}`m` is a monad that can perform quotation. +```lean +variable {m : Type → Type} [Monad m] [MonadQuotation m] +``` + +By default, the antiquotation `$e` is expected to be a term, because that's the syntactic category that's immediately expected as the second argument to addition. +```lean (name := ex1) +def ex1 (e) := show m _ from `(2 + $e) +#check ex1 +``` +```leanOutput ex1 +ex1 {m : Type → Type} [Monad m] [MonadQuotation m] (e : TSyntax `term) : m (TSyntax `term) +``` + +Annotating `$e` as a numeric literal succeeds, because numeric literals are also valid terms. +The expected type of the parameter `e` changes to ``TSyntax `num``. +```lean (name := ex2) +def ex2 (e) := show m _ from `(2 + $e:num) +#check ex2 +``` +```leanOutput ex2 +ex2 {m : Type → Type} [Monad m] [MonadQuotation m] (e : TSyntax `num) : m (TSyntax `term) +``` + +Spaces are not allowed between the dollar sign and the identifier. +```syntaxError ex2err1 +def ex2 (e) := show m _ from `(2 + $ e:num) +``` +```leanOutput ex2err1 +{{cell}} }} + else + pure {{{{cell}} }} + pure {{{{cols}} }} + }} +:1:35: expected '`(tactic|' or no space before spliced term +``` + +Spaces are also not allowed before the colon: +```syntaxError ex2err2 +def ex2 (e) := show m _ from `(2 + $e :num) +``` +```leanOutput ex2err2 + :1:38: expected ')' +``` +:::: +::::: + +```lean (show := false) +end +``` + +::::keepEnv +:::example "Expanding Quasiquotation" +Printing the definition of {name}`f` demonstrates the expansion of a quasiquotation. +```lean (name := expansion) +open Lean in +def f [Monad m] [MonadQuotation m] (x : Term) (n : Nat) : m Syntax := + `(fun k => $x + $(quote (n + 2)) + k) +#print f +``` +```leanOutput expansion +def f : {m : Type → Type} → [inst : Monad m] → [inst : Lean.MonadQuotation m] → Lean.Term → Nat → m Syntax := +fun {m} [Monad m] [Lean.MonadQuotation m] x n => do + let info ← Lean.MonadRef.mkInfoFromRefPos + let scp ← Lean.getCurrMacroScope + let mainModule ← Lean.getMainModule + pure + { + raw := + Syntax.node2 info `Lean.Parser.Term.fun (Syntax.atom info "fun") + (Syntax.node4 info `Lean.Parser.Term.basicFun + (Syntax.node1 info `null (Syntax.ident info "k".toSubstring' (Lean.addMacroScope mainModule `k scp) [])) + (Syntax.node info `null #[]) (Syntax.atom info "=>") + (Syntax.node3 info `«term_+_» + (Syntax.node3 info `«term_+_» x.raw (Syntax.atom info "+") (Lean.quote `term (n + 2)).raw) + (Syntax.atom info "+") + (Syntax.ident info "k".toSubstring' (Lean.addMacroScope mainModule `k scp) []))) }.raw +``` + +```lean (show := false) +section +variable {x : Term} {n : Nat} +``` + +In this output, the quotation is a {keywordOf Lean.Parser.Term.do}`do` block. +It begins by constructing the source information for the resulting syntax, obtained by querying the compiler about the current user syntax being processed. +It then obtains the current macro scope and the name of the module being processed, because macro scopes are added with respect to a module to enable independent compilation and avoid the need for a global counter. +It then constructs a node using helpers such as {name}`Syntax.node1` and {name}`Syntax.node2`, which create a {name}`Syntax.node` with the indicated number of children. +The macro scope is added to each identifier, and {name Lean.TSyntax.raw}`TSyntax.raw` is used to extract the contents of typed syntax wrappers. +The antiquotations of {lean}`x` and {lean}`quote (n + 2)` occur directly in the expansion, as parameters to {name}`Syntax.node3`. + +```lean (show := false) +end +``` +::: +:::: + + +### Splices %%% -tag := "macros" +tag := "splices" %%% -:::planned 71 - * Definition of {deftech}_macro_ - * `macro_rules` - * Syntax patterns - * Backtracking on expansion failure - * {deftech}[Hygiene] and quotation - * The `macro` command +In addition to including other syntax via antiquotations, quasiquotations can include {deftech}_splices_. +Splices indicate that the elements of an array are to be inserted in order. +The repeated elements may include separators, such as the commas between list or array elements. +Splices may consist of an ordinary antiquotation with a {deftech}_splice suffix_, or they may be {deftech}_extended splices_ that provide additional repeated structure. + +Splice suffixes consist of either an asterisk or a valid atom followed by an asterisk (`*`). +Suffixes may follow any identifier or term antiquotation. +An antiquotation with the splice suffix `*` corresponds to a use of `many` or `many1`; both the `*` and `+` suffixes in syntax rules correspond to the `*` splice suffix. +An antiquotation with a splice suffix that includes an atom prior to the asterisk corresponds to a use of `sepBy` or `sepBy1`. +The splice suffix `?` corresponds to a use of `optional` or the `?` suffix in a syntax rule. +Because `?` is a valid identifier character, identifiers must be parenthesized to use it as a suffix. + +While there is overlap between repetition specifiers for syntax and antiquotation suffixes, they have distinct syntaxes. +When defining syntax, the suffixes `*`, `+`, `,*`, `,+`, `,*,?`, and `,+,?` are built in to Lean. +There is no shorter way to specify separators other than `,`. +Antiquotation suffixes are either just `*` or whatever atom was provided to `sepBy` or `sepBy1` followed by `*`. +The syntax repetitions `+` and `*` correspond to the splice suffix `*`; the repetitions `,*`, `,+`, `,*,?`, and `,+,?` correspond to `,*`. +The optional suffix `?` in syntax and splices correspond with each other. + + +:::table (header := true) + * ignore + - Syntax Repetition + - Splice Suffix + * ignore + - `+` `*` + - `*` + * ignore + - `,*` `,+` `,*,?` `,+,?` + - `,*` + * ignore + - `sepBy(_, "S")` `sepBy1(_, "S")` + - `S*` + * ignore + - `?` + - `?` ::: -## Matching Syntax +::::keepEnv +:::example "Suffixed Splices" +```lean (show := false) +open Lean +open Lean.Elab.Command (CommandElabM) +``` + +This example requires that {lean}`m` is a monad that can perform quotation. +```lean +variable {m : Type → Type} [Monad m] [MonadQuotation m] +``` + +By default, the antiquotation `$e` is expected to be an array of terms separated by commas, as is expected in the body of a list: +```lean (name := ex1) +def ex1 (xs) := show m _ from `(#[$xs,*]) +#check ex1 +``` +```leanOutput ex1 +ex1 {m : Type → Type} [Monad m] [MonadQuotation m] (xs : Syntax.TSepArray `term ",") : m (TSyntax `term) +``` + +However, Lean includes a collection of coercions between various representations of arrays that will automatically insert or remove separators, so an ordinary array of terms is also acceptable: +```lean (name := ex2) +def ex2 (xs : Array (TSyntax `term)) := show m _ from `(#[$xs,*]) +#check ex2 +``` +```leanOutput ex2 +ex2 {m : Type → Type} [Monad m] [MonadQuotation m] (xs : Array (TSyntax `term)) : m (TSyntax `term) +``` + +Repetition annotations may also be used with term antiquotations and syntax category annotations. +This example is in {name Lean.Elab.Command.CommandElabM}`CommandElabM` so the result can be conveniently logged. +```lean (name := ex3) +def ex3 (size : Nat) := show CommandElabM _ from do + let mut nums : Array Nat := #[] + for i in [0:size] do + nums := nums.push i + let stx ← `(#[$(nums.map (Syntax.mkNumLit ∘ toString)):num,*]) + -- Using logInfo here causes the syntax to be rendered via the pretty printer. + logInfo stx + +#eval ex3 4 +``` +```leanOutput ex3 +#[0, 1, 2, 3] +``` +::: +:::: + +::::keepEnv +:::example "Non-Comma Separators" +The following unconventional syntax for lists separates numeric elements by either em dashes or double asterisks, rather than by commas. +```lean +syntax "⟦" sepBy1(num, " — ") "⟧": term +syntax "⟦" sepBy1(num, " ** ") "⟧": term +``` +This means that `—*` and `***` are valid splice suffixes between the `⟦` and `⟧` atoms. +In the case of `***`, the first two asterisks are the atom in the syntax rule, while the third is the repetition suffix. +```lean +macro_rules + | `(⟦$n:num—*⟧) => `(⟦$n***⟧) + | `(⟦$n:num***⟧) => `([$n,*]) +``` +```lean (name := nonComma) +#eval ⟦1 — 2 — 3⟧ +``` +```leanOutput nonComma +[1, 2, 3] +``` +::: +:::: + +::::keepEnv +:::example "Optional Splices" +The following syntax declaration optionally matches a term between two tokens. +The parentheses around the nested `term` are needed because `term?` is a valid identifier. +```lean (show := false) +open Lean +``` +```lean +syntax "⟨| " (term)? " |⟩": term +``` + +The `?` splice suffix for a term expects an {lean}`Option Term`: +```lean +def mkStx [Monad m] [MonadQuotation m] (e) : m Term := + `(⟨| $(e)? |⟩) +``` +```lean (name := checkMkStx) +#check mkStx +``` +```leanOutput checkMkStx +mkStx {m : Type → Type} [Monad m] [MonadQuotation m] (e : Option (TSyntax `term)) : m Term +``` + +Supplying {name}`some` results in the optional term being present. +```lean (name := someMkStx) +#eval do logInfo (← mkStx (some (quote 5))) +``` +```leanOutput someMkStx +⟨| 5 |⟩ +``` + +Supplying {name}`none` results in the optional term being absent. +```lean (name := noneMkStx) +#eval do logInfo (← mkStx none)) +``` +```leanOutput noneMkStx +⟨| |⟩ +``` + +::: +:::: + +```lean (show := false) +section +open Lean Syntax +variable {k k' : SyntaxNodeKinds} {sep : String} [Coe (TSyntax k) (TSyntax k')] +-- Demonstrate the coercions between different kinds of repeated syntax + +/-- info: instCoeHTCTOfCoeHTC -/ +#guard_msgs in +#synth CoeHTCT (TSyntaxArray k) (TSepArray k sep) + +/-- info: instCoeHTCTOfCoeHTC -/ +#guard_msgs in +#synth CoeHTCT (TSyntaxArray k) (TSepArray k' sep) + +/-- info: instCoeHTCTOfCoeHTC -/ +#guard_msgs in +#synth CoeHTCT (Array (TSyntax k)) (TSepArray k sep) + +/-- info: instCoeHTCTOfCoeHTC -/ +#guard_msgs in +#synth CoeHTCT (TSepArray k sep) (TSyntaxArray k) + +end +``` + +### Token Antiquotations +%%% +tag := "token-antiquotations" +%%% + +In addition to antiquotations of complete syntax, Lean features {deftech}_token antiquotations_ which allow the source information of an atom to be replaced with the source information from some other syntax. +This is primarily useful to control the placement of error messages or other information that Lean reports to users. +A token antiquotation does not allow an arbitrary atom to be inserted via evaluation. +A token antiquotation consists of an atom (that is, a keyword) + +:::freeSyntax antiquot (open := true) (title := "Token Antiquotations") +Token antiquotations replace the source information (of type {name Lean.SourceInfo}`SourceInfo`) on a token with the source information from some other syntax. + +```grammar +atom"%$"ident +``` +::: + ::: TODO - * Quasiquotations - * Troubleshooting: longest match + +More complex splices with brackets + +::: + +## Matching Syntax +%%% +tag := "quote-patterns" +%%% + +Quasiquotations can be used in pattern matching to recognize syntax that matches a template. +Just as antiquotations in a quotation that's used as a term are regions that are treated as ordinary non-quoted expressions, antiquotations in patterns are regions that are treated as ordinary Lean patterns. +Quote patterns are compiled differently from other patterns, so they can't be intermixed with non-quote patterns in a single {keywordOf Lean.Parser.Term.match}`match` expression. +Like ordinary quotations, quote patterns are first processed by Lean's parser. +The parser's output is then compiled into code that determines whether there is a match. +Syntax matching assumes that the syntax being matched was produced by Lean's parser, either via quotation or directly in user code, and uses this to omit some checks. +For example, if nothing but a particular keyword can be present in a given position, the check may be omitted. + +Syntax matches a quote pattern in the following cases: + + + : Atoms + + Keyword atoms (such as {keywordOf termIfThenElse}`if` or {keywordOf Lean.Parser.Term.match}`match`) result in singleton nodes whose kind is `token.` followed by the atom. + In many cases, it is not necessary to check for specific atom values because the grammar allows only a single keyword, and no checking will be performed. + If the syntax of the term being matched requires the check, then the node kind is compared. + + Literals, such as string or numeric literals, are compared via their underlying string representation. + The pattern ```(0x15)`` and the quotation ```(21)`` do not match. + + : Nodes + + If both the pattern and the value being matched represent {name}`Syntax.node`, there is a match when both have the same syntax kind, the same number of children, and each child pattern matches the corresponding child value. + + : Identifiers + + If both the pattern and the value being matched are identifiers, then their literal {name Lean.Name}`Name` values are compared for equality modulo macro scopes. + Identifiers that “look” the same match, and it does not matter if they refer to the same binding. + This design choice allows quote pattern matching to be used in contexts that don't have access to a compile-time environment in which names can be compared by reference. + + +Because quotation pattern matching is based on the node kinds emitted by the parser, quotations that look identical may not match if they come from different syntax categories. +If in doubt, including the syntax category in the quotation can help. + +## Defining Macros +%%% +tag := "defining-macros" +%%% + + +There are two primary ways to define macros: the {keywordOf Lean.Parser.Command.macro_rules}`macro_rules` command and the {keywordOf Lean.Parser.Command.macro}`macro` command. +The {keywordOf Lean.Parser.Command.macro_rules}`macro_rules` command associates a macro with existing syntax, while the {keywordOf Lean.Parser.Command.macro}`macro` command simultaneously defines new syntax and a macro that translates it to existing syntax. +The {keywordOf Lean.Parser.Command.macro}`macro` command can be seen as a generalization of {keywordOf Lean.Parser.Command.notation}`notation` that allows the expansion to be generated programmatically, rather than simply by substitution. + +### The `macro_rules` Command +%%% +tag := "macro_rules" +%%% + +:::syntax command + +The {keywordOf Lean.Parser.Command.macro_rules}`macro_rules` command takes a sequence of rewrite rules, specified as syntax pattern matches, and adds each as a macro. +The rules are attempted in order, before previously-defined macros, and later macro definitions may add further macro rules. + +```grammar +$[$d:docComment]? +$[@[$attrs,*]]? +$_:attrKind macro_rules $[(kind := $k)]? + $[| `(free{(p:ident"|")?/-- Suitable syntax for {p} -/}) => $e]* +``` +::: + +The patterns in the macros must be quotation patterns. +They may match syntax from any syntax category, but a given pattern can only ever match a single syntax kind. +If no category or parser is specified for the quotation, then it may match terms or (sequences of) commands, but never both. +In case of ambiguity, the term parser is chosen. + +Internally, macros are tracked in a table that maps each {tech}[syntax kind] to its macros. +The {keywordOf Lean.Parser.Command.macro_rules}`macro_rules` command may be explicitly annotated with a syntax kind. + +If a syntax kind is explicitly provided, the macro definition checks that each quotation pattern has that kind. +If the parse result for the quotation was a {tech}[choice node] (that is, if the parse was ambiguous), then the pattern is duplicated once for each alternative with the specified kind. +It is an error if none of the alternatives have the specified kind. + +If no kind is provided explicitly, then the kind determined by the parser is used for each pattern. +The patterns are not required to all have the same syntax kind; macros are defined for each syntax kind used by at least one of the patterns. +It is an error if the parse result for a quotation pattern was a {tech}[choice node] (that is, if the parse was ambiguous). + +The documentation comment associated with {keywordOf Lean.Parser.Command.macro_rules}`macro_rules` is displayed to users if the syntax itself has no documentation comment. +Otherwise, the documentation comment for the syntax itself is shown. + +As with {ref "notations"}[notations] and {ref "operators"}[operators], macro rules may be declared `scoped` or `local`. +Scoped macros are only active when the current namespace is open, and local macro rules are only active in the current {tech}[section scope]. + +::::keepEnv +:::example "Idiom Brackets" +Idiom brackets are an alternative syntax for working with applicative functors. +If the idiom brackets contain a function application, then the function is wrapped in {name}`pure` and applied to each argument using `<*>`. {TODO}[Operator hyperlinking to docs] +Lean does not support idiom brackets by default, but they can be defined using a macro. +```lean +syntax (name := idiom) "⟦" (term:arg)+ "⟧" : term + +macro_rules + | `(⟦$f $args*⟧) => do + let mut out ← `(pure $f) + for arg in args do + out ← `($out <*> $arg) + return out +``` + +This new syntax can be used immediately. +```lean +def addFirstThird [Add α] (xs : List α) : Option α := + ⟦Add.add xs[0]? xs[2]?⟧ +``` +```lean (name := idiom1) +#eval addFirstThird (α := Nat) [] +``` +```leanOutput idiom1 +none +``` +```lean (name := idiom2) +#eval addFirstThird [1] +``` +```leanOutput idiom2 +none +``` +```lean (name := idiom3) +#eval addFirstThird [1,2,3,4] +``` +```leanOutput idiom3 +some 4 +``` +::: +:::: + +::::keepEnv +:::example "Scoped Macros" +Scoped macro rules are active only in their namespace. +When the namespace `ConfusingNumbers` is open, numeric literals will be assigned an incorrect meaning. +````lean +namespace ConfusingNumbers +```` + +The following macro recognizes terms that are odd numeric literals, and replaces them with double their value. +If it unconditionally replaced them with double their value, then macro expansion would become an infinite loop because the same rule would always match the output. + +```lean +scoped macro_rules + | `($n:num) => do + if n.getNat % 2 = 0 then Lean.Macro.throwUnsupported + let n' := (n.getNat * 2) + `($(Syntax.mkNumLit (info := n.raw.getHeadInfo) (toString n'))) +``` + +Once the namespace ends, the macro is no longer used. +````lean +end ConfusingNumbers +```` + +Without opening the namespace, numeric literals function in the usual way. +```lean (name := nums1) +#eval (3, 4) +``` +```leanOutput nums1 +(3, 4) +``` + +When the namespace is open, the macro replaces {lean}`3` with {lean}`6`. +```lean (name := nums2) +open ConfusingNumbers + +#eval (3, 4) +``` +```leanOutput nums2 +(6, 4) +``` + +It is not typically useful to change the interpretation of numeric or other literals in macros. +However, scoped macros can be very useful when adding new rules to extensible tactics such as {tactic}`trivial` that work well with the contents of the namespaces but should not always be used. +::: +:::: + +Behind the scenes, a {keywordOf Lean.Parser.Command.macro_rules}`macro_rules` command generates one macro function for each syntax kind that is matched in its quote patterns. +This function has a default case that throws the {name Lean.Macro.Exception.unsupportedSyntax}`unsupportedSyntax` exception, so further macros may be attempted. + + +A single {keywordOf Lean.Parser.Command.macro_rules}`macro_rules` command with two rules is not always equivalent to two separate single-match commands. +First, the rules in a {keywordOf Lean.Parser.Command.macro_rules}`macro_rules` are tried from top to bottom, but recently-declared macros are attempted first, so the order would need to be reversed. +Additionally, if an earlier rule in the macro throws the {name Lean.Macro.Exception.unsupportedSyntax}`unsupportedSyntax` exception, then the later rules are not tried; if they were instead in separate {keywordOf Lean.Parser.Command.macro_rules}`macro_rules` commands, then they would be attempted. + +::::example "One vs. Two Sets of Macro Rules" + +```lean (show := false) +open Lean.Macro +``` + +The `arbitrary!` macro is intended to expand to some arbitrarily-determined value of a given type. + +```lean +syntax (name := arbitrary!) "arbitrary!" term:arg : term +``` + +:::keepEnv +```lean +macro_rules + | `(arbitrary! ()) => `(()) + | `(arbitrary! Nat) => `(42) + | `(arbitrary! ($t1 × $t2)) => `((arbitrary! $t1, arbitrary! $t2)) + | `(arbitrary! Nat) => `(0) +``` + +Users may extend it by defining further sets of macro rules, such as this rule for {lean}`Empty` that fails: +```lean +macro_rules + | `(arbitrary! Empty) => throwUnsupported +``` + +```lean (name := arb1) +#eval arbitrary! (Nat × Nat) +``` +```leanOutput arb1 +(42, 42) +``` +::: + +:::keepEnv +If all of the macro rules had been defined as individual cases, then the result would have instead used the later case for {lean}`Nat`. +This is because the rules in a single {keywordOf Lean.Parser.Command.macro_rules}`macro_rules` command are checked from top to bottom, but more recently-defined {keywordOf Lean.Parser.Command.macro_rules}`macro_rules` commands take precedence over earlier ones. + +```lean +macro_rules + | `(arbitrary! ()) => `(()) +macro_rules + | `(arbitrary! Nat) => `(42) +macro_rules + | `(arbitrary! ($t1 × $t2)) => `((arbitrary! $t1, arbitrary! $t2)) +macro_rules + | `(arbitrary! Nat) => `(0) +macro_rules + | `(arbitrary! Empty) => throwUnsupported +``` + +```lean (name := arb2) +#eval arbitrary! (Nat × Nat) +``` +```leanOutput arb2 +(0, 0) +``` +::: + +Additionally, if any rule throws the {name Lean.Macro.Exception.unsupportedSyntax}`unsupportedSyntax` exception, no further rules in that command are checked. +```lean +macro_rules + | `(arbitrary! Nat) => throwUnsupported + | `(arbitrary! Nat) => `(42) + +macro_rules + | `(arbitrary! Int) => `(42) +macro_rules + | `(arbitrary! Int) => throwUnsupported +``` + +The case for {lean}`Nat` fails to elaborate, because macro expansion did not translate the {keywordOf arbitrary!}`arbitrary!` syntax into something supported by the elaborator. +```lean (name := arb3) (error := true) +#eval arbitrary! Nat +``` +```leanOutput arb3 +elaboration function for 'arbitrary!' has not been implemented + arbitrary! Nat +``` + +The case for {lean}`Int` succeeds, because the first set of macro rules are attempted after the second throws the exception. +```lean (name := arb4) +#eval arbitrary! Int +``` +```leanOutput arb4 +42 +``` +:::: + + +### The `macro` Command +%%% +tag := "macro-command" +%%% + +```lean (show := false) +section +open Lean +``` + +The {keywordOf Lean.Parser.Command.macro}`macro` command simultaneously defines a new {tech}[syntax rule] and associates it with a {tech}[macro]. +Unlike {keywordOf Lean.Parser.Command.notation}`notation`, which can define only new term syntax and in which the expansion is a term into which the parameters are to be substituted, the {keywordOf Lean.Parser.Command.macro}`macro` command may define syntax in any {tech}[syntax category] and it may use arbitrary code in the {name}`MacroM` monad to generate the expansion. +Because macros are so much more flexible than notations, Lean cannot automatically generate an unexpander; this means that new syntax implemented via the {keywordOf Lean.Parser.Command.macro}`macro` command is available for use in _input_ to Lean, but Lean's output does not use it without further work. + +:::syntax command +```grammar +$[$_:docComment]? +$[@[$attrs,*]]? +$_:attrKind macro$[:$p]? $[(name := $_)]? $[(priority := $_)]? $xs:macroArg* : $k:ident => + $tm +``` +::: + +:::syntax Lean.Parser.Command.macroArg (open := false) +A macro's arguments are either syntax items (as used in the {keywordOf Lean.Parser.Command.syntax}`syntax` command) or syntax items with attached names. +```grammar +$s:stx +``` +```grammar +$x:ident:$stx +``` ::: -## Macro Attribute +In the expansion, the names that are attached to syntax items are bound; they have type {name Lean.TSyntax}`TSyntax` for the appropriate syntax kinds. +If the syntax matched by the parser does not have a defined kind (e.g. because the name is applied to a complex specification), then the type is {lean}`TSyntax Name.anonymous`. + +```lean (show := false) (keep := false) +-- Check the typing rules +open Lean Elab Term Macro Meta + +elab "dbg_type " e:term ";" body:term : term => do + let e' ← elabTerm e none + let t ← inferType e' + logInfoAt e t + elabTerm body none + +/-- +info: TSyntax `str +--- +info: TSyntax Name.anonymous +--- +info: Syntax.TSepArray `num "," +-/ +#guard_msgs in +macro "gah!" thing:str other:(str <|> num) arg:num,* : term => do + dbg_type thing; pure () + dbg_type other; pure () + dbg_type arg; pure () + return quote s!"{thing.raw} ||| {other.raw} ||| {arg.getElems}" + +/-- info: "(str \"\\\"one\\\"\") ||| (num \"44\") ||| #[(num \"2\"), (num \"3\")]" : String -/ +#guard_msgs in +#check gah! "one" 44 2,3 + +``` + +The documentation comment is associated with the new syntax, and the attribute kind (none, `local`, or `scoped`) governs the visibility of the macro just as it does for notations: `scoped` macros are available in the namespace in which they are defined or in any {tech}[section scope] that opens that namespace, while `local` macros are available only in the local section scope. + +Behind the scenes, the {keywordOf Lean.Parser.Command.macro}`macro` command is itself implemented by a macro that expands it to a {keywordOf Lean.Parser.Command.syntax}`syntax` command and a {keywordOf Lean.Parser.Command.macro_rules}`macro_rules` command. +Any attributes applied to the macro command are applied to the syntax definition, but not to the {keywordOf Lean.Parser.Command.macro_rules}`macro_rules` command. + +```lean (show := false) +end +``` + +### The Macro Attribute +%%% +tag := "macro-attribute" +%%% + +{tech}[Macros] can be manually added to a syntax kind using the {keywordOf Lean.Parser.Attr.macro}`macro` attribute. +This low-level means of specifying macros is typically not useful, except as a result of code generation by macros that themselves generate macro definitions. + +:::syntax attr label:="attribute" +The {keywordOf Lean.Parser.Attr.macro}`macro` attribute specifies that a function is to be considered a {tech}[macro] for the specified syntax kind. +```grammar +macro $_:ident +``` +::: + +::::keepEnv +:::example "The Macro Attribute" +```lean (show := false) +open Lean Macro +``` +```lean +/-- Generate a list based on N syntactic copies of a term -/ +syntax (name := rep) "[" num " !!! " term "]" : term + +@[macro rep] +def expandRep : Macro + | `([ $n:num !!! $e:term]) => + let e' := Array.mkArray n.getNat e + `([$e',*]) + | _ => + throwUnsupported +``` + +Evaluating this new expression demonstrates that the macro is present. +```lean (name := attrEx1) +#eval [3 !!! "hello"] +``` +```leanOutput attrEx1 +["hello", "hello", "hello"] +``` +::: +:::: -## The `macro_rules` and `macro` commands # Elaborators %%% @@ -95,5 +1175,5 @@ tag := "elaborators" %%% :::planned 72 -For now, a quick overview of elaborators - detailed description to be written in a later revision +For now, a quick overview of term and command elaborators, with a detailed description to be written in a later revision. ::: diff --git a/Manual/NotationsMacros/Notations.lean b/Manual/NotationsMacros/Notations.lean new file mode 100644 index 0000000..4c9a616 --- /dev/null +++ b/Manual/NotationsMacros/Notations.lean @@ -0,0 +1,199 @@ +/- +Copyright (c) 2024 Lean FRO LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Author: David Thrane Christiansen +-/ + +import VersoManual + +import Manual.Meta + +import Lean.Parser.Command + +open Manual + +open Verso.Genre +open Verso.Genre.Manual + +set_option pp.rawOnError true + +set_option linter.unusedVariables false + +#doc (Manual) "Notations" => +%%% +tag := "notations" +%%% + +The term {deftech}_notation_ is used in two ways in Lean: it can refer to the general concept of concise ways of writing down ideas, and it is the name of a language feature that allows notations to be conveniently implemented with little code. +Like custom operators, Lean notations allow the grammar of terms to be extended with new forms. +However, notations are more general: the new syntax may freely intermix required keywords or operators with subterms, and they provide more precise control over precedence levels. +Notations may also rearrange their parameters in the resulting subterms, while infix operators provide them to the function term in a fixed order. +Because notations may define operators that use a mix of prefix, infix, and postfix components, they can be called {deftech}_mixfix_ operators. + +:::syntax command +Notations are defined using the {keywordOf Lean.Parser.Command.notation}`notation` command. + +```grammar +$[$_:docComment]? +$[$_:attributes]? +$_:attrKind notation$[:$_:prec]? $[(name := $_:ident)]? $[(priority := $_:prio)]? $[$_:notationItem]* => $_:term +``` +::: + +:::syntax Lean.Parser.Command.notationItem (open := false) +The body of a notation definition consists of a sequence of {deftech}_notation items_, which may be either string literals or identifiers with optional precedences. +```grammar +$s:str +``` +```grammar +$x:ident$[:$_:prec]? +``` +::: + +As with operator declarations, the contents of the documentation comments are shown to users while they interact with the new syntax. +Adding the {attr}`inherit_doc` attribute causes the documentation comment of the function at the head of the term into which the notation expands to be copied to the new syntax. +Other attributes may be added to invoke other compile-time metaprograms on the resulting definition. + +Notations interact with {tech}[section scopes] in the same manner as attributes and operators. +By default, notations are available in any module that transitively imports the one in which they are established, but they may be declared `scoped` or `local` to restrict their availability either to contexts in which the current namespace has been opened or to the current {tech}[section scope], respectively. + +Like operators, the {tech}[local longest-match rule] is used while parsing notations. +If more than one notation ties for the longest match, the declared priorities are used to determine which parse result applies. +If this still does not resolve the ambiguity, then all are saved, and the elaborator is expected to attempt all of them, succeeding when exactly one can be elaborated. + +Rather than a single operator with its fixity and token, the body of a notation declaration consists of a sequence of {deftech}_notation items_, which may be either new {tech}[atoms] (including both keywords such as `if`, `#eval`, or `where` and symbols such as `=>`, `+`, `↗`, `⟦`, or `⋉`) or positions for terms. +Just as they do in operators, string literals identify the placement of atoms. +Leading and trailing spaces in the strings do not affect parsing, but they cause Lean to insert spaces in the corresponding position when displaying the syntax in {tech}[proof states] and error messages. +Identifiers indicate positions where terms are expected, and name the corresponding term so it can be inserted in the notation's expansion. + +While custom operators have a single notion of precedence, there are many involved in a notation. +The notation itself has a precedence, as does each term to be parsed. +The notation's precedence determines which contexts it may be parsed in: the parser only attempts to parse productions whose precedence is at least as high as the current context. +For example, because multiplication has higher precedence than addition, the parser will attempt to parse an infix multiplication term while parsing the arguments to addition, but not vice versa. +The precedence of each term to be parsed determines which other productions may occur in them. + +If no precedence is supplied for the notation itself, the default value depends on the form of the notation. +If the notation both begins and ends with an atom (represented by string literals), then the default precedence is `max`.{TODO}[keywordOf] +This applies both to notations that consist only of a single atom and to notations with multiple items, in which the first and last items are both atoms. +Otherwise, the default precedence of the whole notation is `lead`. +If no precedence is provided for notation items that are terms, then they default to precedence `min`. + +```lean (keep := false) (show := false) + +-- Test for default precedences for notations + +/-- Parser max -/ +notation "takesMax " e:max => e +/-- Parser lead -/ +notation "takesLead " e:lead => e +/-- Parser min -/ +notation "takesMin " e:min => e + +/-- Take the first one -/ +notation e1 " <# " e2 => e1 + +/-- Take the first one in brackets! -/ +notation "<<<<<" e1 " <<# " e2 ">>>>>" => e1 + +elab "#parse_test " "[" e:term "]" : command => do + Lean.logInfoAt e (toString e) + pure () + +-- Here, takesMax vs takesLead distinguishes the notations + +/-- info: («term_<#_» (termTakesMax_ "takesMax" (num "1")) "<#" (num "2")) -/ +#guard_msgs in +#parse_test [ takesMax 1 <# 2 ] + +/-- info: (termTakesLead_ "takesLead" («term_<#_» (num "1") "<#" (num "2"))) -/ +#guard_msgs in +#parse_test [ takesLead 1 <# 2 ] + + +-- Here, takesMax vs takesLead does not distinguish the notations because both have precedence `max` + +/-- +info: (termTakesMax_ "takesMax" («term<<<<<_<<#_>>>>>» "<<<<<" (num "1") "<<#" (num "2") ">>>>>")) +-/ +#guard_msgs in +#parse_test [ takesMax <<<<< 1 <<# 2 >>>>> ] + +/-- +info: (termTakesLead_ "takesLead" («term<<<<<_<<#_>>>>>» "<<<<<" (num "1") "<<#" (num "2") ">>>>>")) +-/ +#guard_msgs in +#parse_test [ takesLead <<<<< 1 <<# 2 >>>>> ] +``` + +After the required double arrow ({keywordOf Lean.Parser.Command.notation}`=>`), the notation is provided with an expansion. +While operators are always expanded by applying their function to the operator's arguments in order, notations may place their term items in any position in the expansion. +The terms are referred to by name. +Term items may occur any number of times in the expansion. +Because notation expansion is a purely syntactic process that occurs prior to elaboration or code generation, duplicating terms in the expansion may lead to duplicated computation when the resulting term is evaluated, or even duplicated side effects when working in a monad. + +::::keepEnv +:::example "Ignored Terms in Notation Expansion" +This notation ignores its first parameter: +```lean +notation (name := ignore) "ignore " _ign:arg e:arg => e +``` + +The term in the ignored position is discarded, and Lean never attempts to elaborate it, so terms that would otherwise result in errors can be used here: +```lean (name := ignore) +#eval ignore (2 + "whatever") 5 +``` +```leanOutput ignore +5 +``` + +However, the ignored term must still be syntactically valid: +```syntaxError ignore' (category := command) +#eval ignore (2 +) 5 +``` +```leanOutput ignore' + :1:17: expected term +``` +::: +:::: + +::::keepEnv +:::example "Duplicated Terms in Notation Expansion" + +The {keywordOf dup}`dup!` notation duplicates its sub-term. + +```lean +notation (name := dup) "dup!" t:arg => (t, t) +``` + +Because the term is duplicated, it can be elaborated separately with different types: +```lean +def e : Nat × Int := dup! (2 + 2) +``` + +Printing the resulting definition demonstrates that the work of addition will be performed twice: +```lean (name := dup) +#print e +``` +```leanOutput dup +def e : Nat × Int := +(2 + 2, 2 + 2) +``` +::: +:::: + + +When the expansion consists of the application of a function defined in the global environment and each term in the notation occurs exactly once, an {tech}[unexpander] is generated. +The new notation will be displayed in {tech}[proof states], error messages, and other output from Lean when matching function application terms otherwise would have been displayed. +As with custom operators, Lean does not track whether the notation was used in the original term; it is used at every opportunity in Lean's output. + +# Operators and Notations +%%% +tag := "operators-and-notations" +%%% + +Internally, operator declarations are translated into notation declarations. +Term notation items are inserted where the operator would expect arguments, and in the corresponding positions in the expansion. +For prefix and postfix operators, the notation's precedence as well as the precedences of its term items is the operator's declared precedence. +For non-associative infix operators, the notation's precedence is the declared precedence, but both arguments are parsed at a precedence level that is one higher, which prevents successive uses of the notation without parentheses. +Associative infix operators use the operator's precedence for the notation and for one argument, while a precedence that is one level higher is used for the other argument; this prevents successive applications in one direction only. +Left-associative operators use the higher precedence for their right argument, while right-associative operators use the higher precedence for their left argument. diff --git a/Manual/NotationsMacros/Operators.lean b/Manual/NotationsMacros/Operators.lean new file mode 100644 index 0000000..0e2d5bc --- /dev/null +++ b/Manual/NotationsMacros/Operators.lean @@ -0,0 +1,339 @@ +/- +Copyright (c) 2024 Lean FRO LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Author: David Thrane Christiansen +-/ + +import VersoManual + +import Manual.Meta + +import Lean.Parser.Command + +open Manual + +open Verso.Genre +open Verso.Genre.Manual + +set_option pp.rawOnError true + +set_option linter.unusedVariables false + +#doc (Manual) "Custom Operators" => +%%% +tag := "operators" +%%% + +Lean supports custom infix, prefix, and postfix operators. +New operators can be added by any Lean library, and the new operators have equal status to those that are part of the language. +Each new operator is assigned an interpretation as a function, after which uses of the operator are translated into uses of the function. +The operator's translation into a function call is referred to as its {deftech}_expansion_. +If this function is a {tech}[type class] {tech}[method], then the resulting operator can be overloaded by defining instances of the class. + +All operators have a {deftech}_precedence_. +Operator precedence determines the order of operations for unparenthesized expressions: because multiplication has a higher precedence than addition, {lean}`2 + 3 * 4` is equivalent to {lean}`2 + (3 * 4)`, and {lean}`2 * 3 + 4` is equivalent to {lean}`(2 * 3) + 4`. +Infix operators additionally have an {deftech}_associativity_ that determines the meaning of a chain of operators that have the same precedence: + +: {deftech}[Left-associative] + + These operators nest to the left. + Addition is left- associative, so {lean}`2 + 3 + 4 + 5` is equivalent to {lean}`((2 + 3) + 4) + 5`. + +: {deftech}[Right-associative] + + These operators nest to the right. + The product type is right-associative, so {lean}`Nat × String × Unit × Option Int` is equivalent to {lean}`Nat × (String × (Unit × Option Int))`. + +: {deftech}[Non-associative] + + Chaining these operators is a syntax error. + Explicit parenthesization is required. + Equality is non-associative, so the following is an error: + + ```syntaxError eqs (category := term) + 1 + 2 = 3 = 2 + 1 + ``` + The parser error is: + ```leanOutput eqs + :1:10: expected end of input + ``` +::::keepEnv +:::example "Precedence for Prefix and Infix Operators" +```lean (show := false) +axiom A : Prop +axiom B : Prop +example : (¬A ∧ B = (¬A) ∧ B) = (¬A ∧ ((B = ¬A) ∧ B)) := rfl +example : (¬A ∧ B) = ((¬A) ∧ B) := rfl +``` + +The proposition {lean}`¬A ∧ B` is equivalent to {lean}`(¬A) ∧ B`, because `¬` has a higher precedence than `∧`. +Because `∧` has higher precedence than `=` and is right-associative, {lean}`¬A ∧ B = (¬A) ∧ B` is equivalent to {lean}`¬A ∧ ((B = ¬A) ∧ B)`. +::: +:::: + +Lean provides commands for defining new operators: +:::syntax command +Non-associative infix operators are defined using {keywordOf Lean.Parser.Command.mixfix}`infix`: +```grammar +$[$_:docComment]? +$[$_:attributes]? +$_:attrKind infix:$_ $[(name := $x)]? $[(priority := $_:prio)]? $s:str => $t:term +``` + +Left-associative infix operators are defined using {keywordOf Lean.Parser.Command.mixfix}`infixl`: +```grammar +$[$_:docComment]? +$[$_:attributes]? +$_:attrKind infixl:$_ $[(name := $x)]? $[(priority := $_:prio)]? $s:str => $t:term +``` + +Right-associative infix operators are defined using {keywordOf Lean.Parser.Command.mixfix}`infixr`: +```grammar +$[$_:docComment]? +$[$_:attributes]? +$_:attrKind infixr:$_ $[(name := $x)]? $[(priority := $_:prio)]? $s:str => $t:term +``` + +Prefix operators are defined using {keywordOf Lean.Parser.Command.mixfix}`prefix`: +```grammar +$[$_:docComment]? +$[$_:attributes]? +$_:attrKind prefix:$_ $[(name := $x)]? $[(priority := $_:prio)]? $s:str => $t:term +``` + +Postfix operators are defined using {keywordOf Lean.Parser.Command.mixfix}`postfix`: +```grammar +$[$_:docComment]? +$[$_:attributes]? +$_:attrKind postfix:$_ $[(name := $x)]? $[(priority := $_:prio)]? $s:str => $t:term +``` +::: + +Each of these commands may be preceded by {tech}[documentation comments] and {tech}[attributes]. +The documentation comment is shown when the user hovers their mouse over the operator, and attributes may invoke arbitrary metaprograms, just as for any other declaration. +The attribute {attr}`inherit_doc` causes the documentation of the function that implements the operator to be re-used for the operator itself. + +Operators interact with {tech}[section scopes] in the same manner as attributes. +By default, operators are available in any module that transitively imports the one in which they are established, but they may be declared `scoped` or `local` to restrict their availability either to contexts in which the current namespace has been opened or to the current {tech}[section scope], respectively. + +Custom operators require a {ref "precedence"}[precedence] specifier, following a colon. +There is no default precedence to fall back to for custom operators. + +Operators may be explicitly named. +This name denotes the extension to Lean's syntax, and is primarily used for metaprogramming. +If no name is explicitly provided, then Lean generates one based on the operator. +The specifics of the assignment of this name should not be relied upon, both because the internal name assignment algorithm may change and because the introduction of similar operators in upstream dependencies may lead to a clash, in which case Lean will modify the assigned name until it is unique. + +::::keepEnv +:::example "Assigned Operator Names" +Given this infix operator: +```lean +infix:90 " ⤴ " => Option.getD +``` +the internal name {name}`«term_⤴_»` is assigned to the resulting parser extension. +::: +:::: + +::::keepEnv +:::example "Provided Operator Names" +Given this infix operator: +```lean +infix:90 (name := getDOp) " ⤴ " => Option.getD +``` +the resulting parser extension is named {name}`getDOp`. +::: +:::: + +::::keepEnv +:::example "Inheriting Documentation" +Given this infix operator: +```lean +@[inherit_doc] +infix:90 " ⤴ " => Option.getD +``` +the resulting parser extension has the same documentation as {name}`Option.getD`. +::: +:::: + + + +When multiple operators are defined that share the same syntax, Lean's parser attempts all of them. +If more than one succeed, the one that used the most input is selected—this is called the {deftech}_local longest-match rule_. +In some cases, parsing multiple operators may succeed, all of them covering the same range of the input. +In these cases, the operator's {tech}[priority] is used to select the appropriate result. +Finally, if multiple operators with the same priority tie for the longest match, the parser saves all of the results, and the elaborator attempts each in turn, failing if elaboration does not succeed on exactly one of them. + +:::::keepEnv + +::::example "Ambiguous Operators and Priorities" + +:::keepEnv +Defining an alternative implementation of `+` as {lean}`Or` requires only an infix operator declaration. +```lean +infix:65 " + " => Or +``` + +With this declaration, Lean attempts to elaborate addition both using the built-in syntax for {name}`HAdd.hAdd` and the new syntax for {lean}`Or`: +```lean (name := trueOrFalse1) +#check True + False +``` +```leanOutput trueOrFalse1 +True + False : Prop +``` +```lean (name := twoPlusTwo1) +#check 2 + 2 +``` +```leanOutput twoPlusTwo1 +2 + 2 : Nat +``` + +However, because the new operator is not associative, the {tech}[local longest-match rule] means that only {name}`HAdd.hAdd` applies to an unparenthesized three-argument version: +```lean (error := true) (name := trueOrFalseOrTrue1) +#check True + False + True +``` +```leanOutput trueOrFalseOrTrue1 +failed to synthesize + HAdd Prop Prop ?m.38 +Additional diagnostic information may be available using the `set_option diagnostics true` command. +``` + +::: + +:::keepEnv +If the infix operator is declared with high priority, then Lean does not try the built-in {name}`HAdd.hAdd` operator in ambiguous cases: +```lean +infix:65 (priority := high) " + " => Or +``` + +```lean (name := trueOrFalse2) +#check True + False +``` +```leanOutput trueOrFalse2 +True + False : Prop +``` +```lean (name := twoPlusTwo2) (error := true) +#check 2 + 2 +``` +```leanOutput twoPlusTwo2 +failed to synthesize + OfNat Prop 2 +numerals are polymorphic in Lean, but the numeral `2` cannot be used in a context where the expected type is + Prop +due to the absence of the instance above +Additional diagnostic information may be available using the `set_option diagnostics true` command. +``` + +The new operator is not associative, so the {tech}[local longest-match rule] means that only {name}`HAdd.hAdd` applies to the three-argument version: +```lean (error := true) (name := trueOrFalseOrTrue2) +#check True + False + True +``` +```leanOutput trueOrFalseOrTrue2 +failed to synthesize + HAdd Prop Prop ?m.20 +Additional diagnostic information may be available using the `set_option diagnostics true` command. +``` +::: + +:::: +::::: + + +The actual operator is provided as a string literal. +The new operator must satisfy the following requirements: + * It must contain at least one character. + * The first character may not be a single or double quote (`'` or `"`), unless the operator is `''`. + * It may not begin with a backtick (`````) followed by a character that would be a valid prefix of a quoted name. + * It may not begin with a digit. + * It may not include internal whitespace. + +The operator string literal may begin or end with a space. +These are not part of the operator's syntax, and their presence does not require spaces around uses of the operator. +However, the presence of spaces cause Lean to insert spaces when showing the operator to the user. +Omitting them causes the operator's arguments to be displayed immediately next to the operator itself. + + +:::keepEnv +```lean (show := false) +-- Test claim about internal whitespace in preceding paragraph +infix:99 " <<<< >>>> " => Nat.add + +/-- info: 67 -/ +#guard_msgs in +#eval 12 <<<< >>>> 55 + +-- Test claim about internal whitespace +infix:99 " <<<< >>>> " => Nat.mul + +/-- info: 660 -/ +#guard_msgs in +#eval 12 <<<< >>>> 55 + +--- Test negative claims about allowed atoms +/-- +error: invalid atom +--- +error: invalid syntax node kind 'bogus' +-/ +#guard_msgs in +infix:9 (name := bogus) "" => Nat.mul + +#guard_msgs in +infix:9 (name := nonbogus) " ` " => Nat.mul + +/-- +error: invalid atom +--- +error: invalid syntax node kind 'bogus' +-/ +#guard_msgs in +infix:9 (name := bogus) "`a" => Nat.mul + +``` +::: + +Finally, the operator's meaning is provided, separated from the operator by {keywordOf Lean.Parser.Command.mixfix}`=>`. +This may be any Lean term. +Uses of the operator are desugared into function applications, with the provided term in the function position. +Prefix and postfix operators apply the term to their single argument as an explicit argument. +Infix operators apply the term to the left and right arguments, in that order. +Other than its ability to accept arguments at each call site, there are no specific requirements imposed on the term. +Operators may construct functions, so the term may expect more parameters than the operator. +Implicit and {tech}[instance-implicit] parameters are resolved at each application site, which allows the operator to be defined by a {tech}[type class] {tech}[method]. + +```lean (show := false) (keep := false) +-- Double-check claims about operators above +prefix:max "blah" => Nat.add +#check (blah 5) +``` + +If the term consists either of a name from the global environment or of an application of such a name to one or more arguments, then Lean automatically generates an {tech}[unexpander] for the operator. +This means that the operator will be displayed in {tech}[proof states], error messages, and other output from Lean when the function term otherwise would have been displayed. +Lean does not track whether the operator was used in the original term; it is inserted at every opportunity. + +:::::keepEnv +::::example "Custom Operators in Lean's Output" +The function {lean}`perhapsFactorial` computes a factorial for a number if it's not too large. +```lean +def fact : Nat → Nat + | 0 => 1 + | n+1 => (n + 1) * fact n + +def perhapsFactorial (n : Nat) : Option Nat := + if n < 8 then some (fact n) else none +``` + +The postfix interrobang operator can be used to represent it. +```lean +postfix:90 "‽" => perhapsFactorial +``` + +When attempting to prove that {lean}`∀ n, n ≥ 8 → (perhapsFactorial n).isNone`, the initial proof state uses the new operator, even though the theorem as written does not: +```proofState +∀ n, n ≥ 8 → (perhapsFactorial n).isNone := by skip +/-- +⊢ ∀ (n : Nat), n ≥ 8 → n‽.isNone = true +-/ + +``` +:::: +::::: diff --git a/Manual/NotationsMacros/Precedence.lean b/Manual/NotationsMacros/Precedence.lean new file mode 100644 index 0000000..9a41f0f --- /dev/null +++ b/Manual/NotationsMacros/Precedence.lean @@ -0,0 +1,70 @@ +/- +Copyright (c) 2024 Lean FRO LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Author: David Thrane Christiansen +-/ + +import VersoManual + +import Manual.Meta + +import Lean.Parser.Command + +open Manual + +open Verso.Genre +open Verso.Genre.Manual + +set_option pp.rawOnError true + +set_option linter.unusedVariables false + +#doc (Manual) "Precedence" => +%%% +tag := "precedence" +%%% + +Infix operators, notations, and other syntactic extensions to Lean make use of explicit {tech}[precedence] annotations. +While precedences in Lean can technically be any natural number, by convention they range from {evalPrec}`min` to {evalPrec}`max`, respectively denoted `min` and `max`.{TODO}[Fix the keywordOf operator and use it here] +Function application has the highest precedence. + +:::syntax prec (open := false) +Most operator precedences consist of explicit numbers. +The named precedence levels denote the outer edges of the range, close to the minimum or maximum, and are typically used by more involved syntax extensions. +```grammar +$n:num +``` + +Precedences may also be denoted as sums or differences of precedences; these are typically used to assign precedences that are relative to one of the named precedences. +```grammar +$p + $p +``` +```grammar +$p - $p +``` +```grammar +($p) +``` + +The maximum precedence is used to parse terms that occur in a function position. +Operators should typically not use use this level, because it can interfere with users' expectation that function application binds more tightly than any other operator, but it is useful in more involved syntax extensions to indicate how other constructs interact with function application. +```grammar +max +``` + +Argument precedence is one less than the maximum precedence. +This level is useful for defining syntax that should be treated as an argument to a function, such as {keywordOf Lean.Parser.Term.fun}`fun` or {keywordOf Lean.Parser.Term.do}`do`. +```grammar +arg +``` + +Lead precedence is less that argument precedence, and should be used for custom syntax that should not occur as a function argument, such as {keywordOf Lean.Parser.Term.let}`let`. +```grammar +lead +``` + +The minimum precedence can be used to ensure that an operator binds less tightly than all other operators. +```grammar +min +``` +::: diff --git a/Manual/NotationsMacros/SyntaxDef.lean b/Manual/NotationsMacros/SyntaxDef.lean new file mode 100644 index 0000000..dda147c --- /dev/null +++ b/Manual/NotationsMacros/SyntaxDef.lean @@ -0,0 +1,797 @@ +/- +Copyright (c) 2024 Lean FRO LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Author: David Thrane Christiansen +-/ + +import VersoManual + +import Manual.Meta + +import Lean.Parser.Command + +open Manual + +open Verso.Genre +open Verso.Genre.Manual + +set_option pp.rawOnError true + +set_option linter.unusedVariables false + +#doc (Manual) "Defining New Syntax" => + +Lean's uniform representation of syntax is very general and flexible. +This means that extensions to Lean's parser do not require extensions to the representation of parsed syntax. + +# Syntax Model + +Lean's parser produces a concrete syntax tree, of type {name}`Lean.Syntax`. +{name}`Lean.Syntax` is an inductive type that represents all of Lean's syntax, including commands, terms, tactics, and any custom extensions. +All of these are represented by a few basic building blocks: + +: {deftech}[Atoms] + + Atoms are the fundamental terminals of the grammar, including literals (such as those for characters and numbers), parentheses, operators, and keywords. + +: {deftech}[Identifiers] + + :::keepEnv + ```lean (show := false) + variable {α : Type u} + variable {x : α} + ``` + Identifiers represent names, such as {lean}`x`, {lean}`Nat`, or {lean}`Nat.add`. + Identifier syntax includes a list of pre-resolved names that the identifier might refer to. + ::: + +: {deftech}[Nodes] + + Nodes represent the parsing of nonterminals. + Nodes contain a {deftech}_syntax kind_, which identifies the syntax rule that the node results from, along with an array of child {name Lean.Syntax}`Syntax` values. + +: Missing Syntax + + When the parser encounters an error, it returns a partial result, so Lean can provide some feedback about partially-written programs or programs that contain mistakes. + Partial results contain one or more instances of missing syntax. + +Atoms and identifiers are collectively referred to as {deftech}_tokens_. + +{docstring Lean.Syntax} + +{docstring Lean.Syntax.Preresolved} + +# Syntax Node Kinds + +Syntax node kinds typically identify the parser that produced the node. +This is one place where the names given to operators or notations (or their automatically-generated internal names) occur. +While only nodes contain a field that identifies their kind, identifiers have the kind {name Lean.identKind}`identKind` by convention, while atoms have their internal string as their kind by convention. +Lean's parser wraps each keyword atom `KW` in a singleton node whose kind is ```token.KW``. +The kind of a syntax value can be extracted using {name Lean.Syntax.getKind}`Syntax.getKind`. + +{docstring Lean.SyntaxNodeKind} + +{docstring Lean.Syntax.isOfKind} + +{docstring Lean.Syntax.getKind} + +{docstring Lean.Syntax.setKind} + +# Token and Literal Kinds + +A number of named kinds are associated with the basic tokens produced by the parser. +Typically, single-token syntax productions consist of a {name Lean.Syntax.node}`node` that contains a single {name Lean.Syntax.atom}`atom`; the kind saved in the node allows the value to be recognized. +Atoms for literals are not interpreted by the parser: string atoms include their leading and trailing double-quote characters along with any escape sequences contained within, and hexadecimal numerals are saved as a string that begins with {lean}`"0x"`. +{ref "typed-syntax-helpers"}[Helpers] such as {name}`Lean.TSyntax.getString` are provided to perform this decoding on demand. + +```lean (show := false) (keep := false) +-- Verify claims about atoms and nodes +open Lean in +partial def noInfo : Syntax → Syntax + | .node _ k children => .node .none k (children.map noInfo) + | .ident _ s x pre => .ident .none s x pre + | .atom _ s => .atom .none s + | .missing => .missing +/-- +info: Lean.Syntax.node (Lean.SourceInfo.none) `num #[Lean.Syntax.atom (Lean.SourceInfo.none) "0xabc123"] +-/ +#guard_msgs in +#eval noInfo <$> `(term|0xabc123) + +/-- +info: Lean.Syntax.node (Lean.SourceInfo.none) `str #[Lean.Syntax.atom (Lean.SourceInfo.none) "\"ab\\tc\""] +-/ +#guard_msgs in +#eval noInfo <$> `(term|"ab\tc") +``` + +{docstring Lean.identKind} + +{docstring Lean.strLitKind} + +{docstring Lean.interpolatedStrKind} + +{docstring Lean.interpolatedStrLitKind} + +{docstring Lean.charLitKind} + +{docstring Lean.numLitKind} + +{docstring Lean.scientificLitKind} + +{docstring Lean.nameLitKind} + +{docstring Lean.fieldIdxKind} + +# Internal Kinds + +{docstring Lean.groupKind} + +{docstring Lean.nullKind} + +{docstring Lean.choiceKind} + +{docstring Lean.hygieneInfoKind} + +# Source Positions +%%% +tag := "source-info" +%%% + +Atoms, identifiers, and nodes optionally contain {deftech}[source information] that tracks their correspondence with the original file. +The parser saves source information for all tokens, but not for nodes; position information for parsed nodes is reconstructed from their first and last tokens. +Not all {name Lean.Syntax}`Syntax` data results from the parser: it may be the result of {tech}[macro expansion], in which case it typically contains a mix of generated and parsed syntax, or it may be the result of {tech key:="delaborate"}[delaborating] an internal term to display it to a user. +In these use cases, nodes may themselves contain source information. + +Source information comes in two varieties: + +: {deftech}[Original] + + Original source information comes from the parser. + In addition to the original source location, it also contains leading and trailing whitespace that was skipped by the parser, which allows the original string to be reconstructed. + This whitespace is saved as offsets into the string representation of the original source code (that is, as {name}`Substring`) to avoid having to allocate copies of substrings. + +: {deftech}[Synthetic] + + Synthetic source information comes from metaprograms (including macros) or from Lean's internals. + Because there is no original string to be reconstructed, it does not save leading and trailing whitespace. + Synthetic source positions are used to provide accurate feedback even when terms have been automatically transformed, as well as to track the correspondence between elaborated expressions and their presentation in Lean's output. + A synthetic position may be marked {deftech}_canonical_, in which case some operations that would ordinarily ignore synthetic positions will treat it as if it were not. + +{docstring Lean.SourceInfo} + +# Inspecting Syntax + +```lean (show := false) +section Inspecting +open Lean +``` + +There are three primary ways to inspect {lean}`Syntax` values: + + : The {lean}`Repr` Instance + + The {lean}`Repr Syntax` instance produces a very detailed representation of syntax in terms of the constructors of the {lean}`Syntax` type. + + : The {lean}`ToString` Instance + + The {lean}`ToString Syntax` instance produces a compact view, representing certain syntax kinds with particular conventions that can make it easier to read at a glance. + This instance suppresses source position information. + + : The Pretty Printer + + Lean's pretty printer attempts to render the syntax as it would look in a source file, but fails if the nesting structure of the syntax doesn't match the expected shape. + +::::keepEnv +:::example "Representing Syntax as Constructors" +The {name}`Repr` instance's representation of syntax can be inspected by quoting it in the context of {keywordOf Lean.Parser.Command.eval}`#eval`, which can run actions in the command elaboration monad {name Lean.Elab.Command.CommandElabM}`CommandElabM`. +To reduce the size of the example output, the helper {lean}`removeSourceInfo` is used to remove source information prior to display. +```lean +partial def removeSourceInfo : Syntax → Syntax + | .atom _ str => .atom .none str + | .ident _ str x pre => .ident .none str x pre + | .node _ k children => .node .none k (children.map removeSourceInfo) + | .missing => .missing +``` + +```lean (name := reprStx1) +#eval do + let stx ← `(2 + $(⟨.missing⟩)) + logInfo (repr (removeSourceInfo stx.raw)) +``` +```leanOutput reprStx1 +Lean.Syntax.node + (Lean.SourceInfo.none) + `«term_+_» + #[Lean.Syntax.node (Lean.SourceInfo.none) `num #[Lean.Syntax.atom (Lean.SourceInfo.none) "2"], + Lean.Syntax.atom (Lean.SourceInfo.none) "+", Lean.Syntax.missing] +``` + +In the second example, {tech}[macro scopes] inserted by quotation are visible on the call to {name}`List.length`. +```lean (name := reprStx2) +#eval do + let stx ← `(List.length ["Rose", "Daffodil", "Lily"]) + logInfo (repr (removeSourceInfo stx.raw)) +``` +The contents of the {tech}[pre-resolved identifier] {name}`List.length` are visible here: +```leanOutput reprStx2 +Lean.Syntax.node + (Lean.SourceInfo.none) + `Lean.Parser.Term.app + #[Lean.Syntax.ident + (Lean.SourceInfo.none) + "List.length".toSubstring + (Lean.Name.mkNum `List.length._@.Manual.NotationsMacros.SyntaxDef._hyg 2) + [Lean.Syntax.Preresolved.decl `List.length [], Lean.Syntax.Preresolved.namespace `List.length], + Lean.Syntax.node + (Lean.SourceInfo.none) + `null + #[Lean.Syntax.node + (Lean.SourceInfo.none) + `«term[_]» + #[Lean.Syntax.atom (Lean.SourceInfo.none) "[", + Lean.Syntax.node + (Lean.SourceInfo.none) + `null + #[Lean.Syntax.node (Lean.SourceInfo.none) `str #[Lean.Syntax.atom (Lean.SourceInfo.none) "\"Rose\""], + Lean.Syntax.atom (Lean.SourceInfo.none) ",", + Lean.Syntax.node (Lean.SourceInfo.none) `str #[Lean.Syntax.atom (Lean.SourceInfo.none) "\"Daffodil\""], + Lean.Syntax.atom (Lean.SourceInfo.none) ",", + Lean.Syntax.node (Lean.SourceInfo.none) `str #[Lean.Syntax.atom (Lean.SourceInfo.none) "\"Lily\""]], + Lean.Syntax.atom (Lean.SourceInfo.none) "]"]]] +``` +::: +:::: + +The {name}`ToString` instance represents the constructors of {name}`Syntax` as follows: + + * The {name Syntax.ident}`ident` constructor is represented as the underlying name. Source information and pre-resolved names are not shown. + * The {name Syntax.atom}`atom` constructor is represented as a string. + * The {name Syntax.missing}`missing` constructor is represented by ` `. + * The representation of the {name Syntax.node}`node` constructor depends on the kind. + If the kind is ```null``, then the node is represented by its child nodes order in square brackets. + Otherwise, the node is represented by its kind followed by its child nodes, both surrounded by parentheses. + +:::example "Syntax as Strings" +The string representation of syntax can be inspected by quoting it in the context of {keywordOf Lean.Parser.Command.eval}`#eval`, which can run actions in the command elaboration monad {name Lean.Elab.Command.CommandElabM}`CommandElabM`. + +```lean (name := toStringStx1) +#eval do + let stx ← `(2 + $(⟨.missing⟩)) + logInfo (toString stx) +``` +```leanOutput toStringStx1 +(«term_+_» (num "2") "+" ) +``` + +In the second example, {tech}[macro scopes] inserted by quotation are visible on the call to {name}`List.length`. +```lean (name := toStringStx2) +#eval do + let stx ← `(List.length ["Rose", "Daffodil", "Lily"]) + logInfo (toString stx) +``` +```leanOutput toStringStx2 +(Term.app + `List.length._@.Manual.NotationsMacros.SyntaxDef._hyg.2 + [(«term[_]» "[" [(str "\"Rose\"") "," (str "\"Daffodil\"") "," (str "\"Lily\"")] "]")]) +``` +::: + +Pretty printing syntax is typically most useful when including it in a message to a user. +Normally, Lean automatically invokes the pretty printer when necessary. +However, {name}`ppTerm` can be explicitly invoked if needed. + +::::keepEnv +:::example "Pretty-Printed Syntax" +```lean (show := false) +open Lean Elab Command +``` + +The string representation of syntax can be inspected by quoting it in the context of {keywordOf Lean.Parser.Command.eval}`#eval`, which can run actions in the command elaboration monad {name Lean.Elab.Command.CommandElabM}`CommandElabM`. +Because new syntax declarations also equip the pretty printer with instructions for displaying them, the pretty printer requires a configuration object. +This context can be constructed with a helper: +```lean +def getPPContext : CommandElabM PPContext := do + return { + env := (← getEnv), + opts := (← getOptions), + currNamespace := (← getCurrNamespace), + openDecls := (← getOpenDecls) + } +``` + +```lean (name := ppStx1) +#eval show CommandElabM Unit from do + let stx ← `(2 + 5) + let fmt ← ppTerm (← getPPContext) stx + logInfo fmt +``` +```leanOutput ppStx1 +2 + 5 +``` + +In the second example, the {tech}[macro scopes] inserted on {name}`List.length` by quotation cause it to be displayed with a dagger (`✝`). +```lean (name := ppStx2) +#eval do + let stx ← `(List.length ["Rose", "Daffodil", "Lily"]) + let fmt ← ppTerm (← getPPContext) stx + logInfo fmt +``` +```leanOutput ppStx2 +List.length✝ ["Rose", "Daffodil", "Lily"] +``` + +Pretty printing wraps lines and inserts indentation automatically. +A {tech}[coercion] typically converts the pretty printer's output to the type expected by {name}`logInfo`, using a default layout width. +The width can be controlled by explicitly calling {name Std.Format.pretty}`pretty` with a named argument. +```lean (name := ppStx3) +#eval do + let flowers := #["Rose", "Daffodil", "Lily"] + let manyFlowers := flowers ++ flowers ++ flowers + let stx ← `(List.length [$(manyFlowers.map (quote (k := `term))),*]) + let fmt ← ppTerm (← getPPContext) stx + logInfo (fmt.pretty (width := 40)) +``` +```leanOutput ppStx3 +List.length✝ + ["Rose", "Daffodil", "Lily", "Rose", + "Daffodil", "Lily", "Rose", + "Daffodil", "Lily"] +``` +::: + + +:::: + +```lean (show := false) +end Inspecting +``` + +# Typed Syntax + +Syntax may additionally be annotated with a type that specifies which {tech}[syntax category] it belongs to. +{TODO}[Describe the problem here - complicated invisible internal invariants leading to weird error msgs] +The {name Lean.TSyntax}`TSyntax` structure contains a type-level list of syntax categories along with a syntax tree. +The list of syntax categories typically contains precisely one element, in which case the list structure itself is not shown. + +{docstring Lean.TSyntax} + +{tech}[Quasiquotations] prevent the substitution of typed syntax that does not come from the correct syntactic category. +For many of Lean's built-in syntactic categories, there is a set of {tech}[coercions] that appropriately wrap one kind of syntax for another category, such as a coercion from the syntax of string literals to the syntax of terms. +Additionally, many helper functions that are only valid on some syntactic categories are defined for the appropriate typed syntax only. + +```lean (show := false) +/-- info: instCoeHTCTOfCoeHTC -/ +#guard_msgs in +open Lean in +#synth CoeHTCT (TSyntax `str) (TSyntax `term) +``` + +The constructor of {name Lean.TSyntax}`TSyntax` is public, and nothing prevents users from constructing values that break internal invariants. +The use of {name Lean.TSyntax}`TSyntax` should be seen as a way to reduce common mistakes, rather than rule them out entirely. + + +In addition to {name Lean.TSyntax}`TSyntax`, there are types that represent arrays of syntax, with or without separators. +These correspond to {TODO}[xref] repeated elements in syntax declarations or antiquotations. + +{docstring Lean.TSyntaxArray} + +{docstring Lean.Syntax.TSepArray} + +# Aliases + +A number of aliases are provided for commonly-used typed syntax varieties. +These aliases allow code to be written at a higher level of abstraction. + +{docstring Lean.Term} + +{docstring Lean.Command} + +{docstring Lean.Syntax.Level} + +{docstring Lean.Syntax.Tactic} + +{docstring Lean.Prec} + +{docstring Lean.Prio} + +{docstring Lean.Ident} + +{docstring Lean.StrLit} + +{docstring Lean.CharLit} + +{docstring Lean.NameLit} + +{docstring Lean.NumLit} + +{docstring Lean.ScientificLit} + +{docstring Lean.HygieneInfo} + +# Helpers for Typed Syntax +%%% +tag := "typed-syntax-helpers" +%%% + +For literals, Lean's parser produces a singleton node that contains an {name Lean.Syntax.atom}`atom`. +The inner atom contains a string with source information, while the node's kind specifies how the atom is to be interpreted. +This may involve decoding string escape sequences or interpreting base-16 numeric literals. +The helpers in this section perform the correct interpretation. + +{docstring Lean.TSyntax.getId} + +{docstring Lean.TSyntax.getName} + +{docstring Lean.TSyntax.getNat} + +{docstring Lean.TSyntax.getScientific} + +{docstring Lean.TSyntax.getString} + +{docstring Lean.TSyntax.getChar} + +{docstring Lean.TSyntax.getHygieneInfo} + +# Syntax Categories +%%% +tag := "syntax-categories" +%%% + +Lean's parser contains a table of {deftech}_syntax categories_, which correspond to nonterminals in a context-free grammar. +Some of the most important categories are terms, commands, universe levels, priorities, precedences, and the categories that represent tokens such as literals. +Typically, each {tech}[syntax kind] corresponds to a category. +New categories can be declared using {keywordOf Lean.Parser.Command.syntaxCat}`declare_syntax_cat`. + +:::syntax command +Declares a new syntactic category. + +```grammar +$[$_:docComment]? +declare_syntax_cat $_ $[(behavior := $_)]? +``` +::: + +The leading identifier behavior is an advanced feature that usually does not need to be modified. +It controls the behavior of the parser when it encounters an identifier, and can sometimes cause the identifier to be treated as a non-reserved keyword instead. +This is used to avoid turning the name of every {ref "tactics"}[tactic] into a reserved keyword. + +{docstring Lean.Parser.LeadingIdentBehavior} + +# Syntax Rules + +Each {tech}[syntax category] is associated with a set of {deftech}_syntax rules_, which correspond to productions in a context-free grammar. +Syntax rules can be defined using the {keywordOf Lean.Parser.Command.syntax}`syntax` command. + +:::syntax command +```grammar +$[$_:docComment]? +$[$_:attributes]? +$_:attrKind +syntax$[:$p]? $[(name := $x)]? $[(priority := $p)]? $_* : $c +``` +::: + +As with operator and notation declarations, the contents of the documentation comments are shown to users while they interact with the new syntax. +Attributes may be added to invoke compile-time metaprograms on the resulting definition. + +Syntax rules interact with {tech}[section scopes] in the same manner as attributes, operators, and notations. +By default, syntax rules are available to the parser in any module that transitively imports the one in which they are established, but they may be declared `scoped` or `local` to restrict their availability either to contexts in which the current namespace has been opened or to the current {tech}[section scope], respectively. + +When multiple syntax rules for a category can match the current input, the {tech}[local longest-match rule] is used to select one of them. +Like notations and operators, if there is a tie for the longest match then the declared priorities are used to determine which parse result applies. +If this still does not resolve the ambiguity, then all the results that tied are saved. +The elaborator is expected to attempt all of them, succeeding when exactly one can be elaborated. + +The syntax rule's precedence, written immediately after the {keywordOf Lean.Parser.Command.syntax}`syntax` keyword, restricts the parser to use this new syntax only when the precedence context is at least the provided value. +{TODO}[Default precedence] +Just as with operators and notations, syntax rules may be manually provided with a name; if they are not, an otherwise-unused name is generated. +Whether provided or generated, this name is used as the syntax kind in the resulting {name Lean.Syntax.node}`node`. + +The body of a syntax declaration is even more flexible than that of a notation. +String literals specify atoms to match. +Subterms may be drawn from any syntax category, rather than just terms, and they may be optional or repeated, with or without interleaved comma separators. +Identifiers in syntax rules indicate syntax categories, rather than naming subterms as they do in notations. + + +Finally, the syntax rule specifies which syntax category it extends. +It is an error to declare a syntax rule in a nonexistent category. + +```lean (show := false) +-- verify preceding para +/-- error: unknown category 'nuhUh' -/ +#guard_msgs in +syntax "blah" : nuhUh +``` + + +:::syntax stx (open := false) +The syntactic category `stx` is the grammar of specifiers that may occur in the body of a {keywordOf Lean.Parser.Command.syntax}`syntax` command. + +String literals are parsed as {tech}[atoms] (including both keywords such as `if`, `#eval`, or `where`): +```grammar +$s:str +``` +Leading and trailing spaces in the strings do not affect parsing, but they cause Lean to insert spaces in the corresponding position when displaying the syntax in {tech}[proof states] and error messages. +Ordinarily, valid identifiers occurring as atoms in syntax rules become reserved keywords. +Preceding a string literal with an ampersand (`&`) suppresses this behavior: +```grammar +&$s:str +``` + +Identifiers specify the syntactic category expected in a given position, and may optionally provide a precedence:{TODO}[Default prec here?] +```grammar +$x:ident$[:$p]? +``` + +The `*` modifier is the Kleene star, matching zero or more repetitions of the preceding syntax. +It can also be written using `many`. +```grammar +$s:stx * +``` +The `+` modifier matches one or more repetitions of the preceding syntax. +It can also be written using `many1`. +```grammar +$s:stx + +``` +The `?` modifier makes a subterm optional, and matches zero or one, but not more, repetitions of the preceding syntax. +It can also be written as `optional`. +```grammar +$s:stx ? +``` +```grammar +optional($s:stx) +``` + +The `,*` modifier matches zero or more repetitions of the preceding syntax with interleaved commas. +It can also be written using `sepBy`. +```grammar +$_:stx ,* +``` + +The `,+` modifier matches one or more repetitions of the preceding syntax with interleaved commas. +It can also be written using `sepBy1`. +```grammar +$_:stx ,+ +``` + +The `,*,?` modifier matches zero or more repetitions of the preceding syntax with interleaved commas, allowing an optional trailing comma after the final repetition. +It can also be written using `sepBy` with the `allowTrailingSep` modifier. +```grammar +$_:stx ,*,? +``` + +The `,+,?` modifier matches one or more repetitions of the preceding syntax with interleaved commas, allowing an optional trailing comma after the final repetition. +It can also be written using `sepBy1` with the `allowTrailingSep` modifier. +```grammar +$_:stx ,+,? +``` + +The `<|>` operator, which can be written `orelse`, matches either syntax. +However, if the first branch consumes any tokens, then it is committed to, and failures will not be backtracked: +```grammar +$_:stx <|> $_:stx +``` +```grammar +orelse($_:stx, $_:stx) +``` + +The `!` operator matches the complement of its argument. +If its argument fails, then it succeeds, resetting the parsing state. +```grammar +! $_:stx +``` + +Syntax specifiers may be grouped using parentheses. +```grammar +($_:stx) +``` + +Repetitions may be defined using `many` and `many1`. +The latter requires at least one instance of the repeated syntax. +```grammar +many($_:stx) +``` +```grammar +many1($_:stx) +``` + +Repetitions with separators may be defined using `sepBy` and `sepBy1`, which respectively match zero or more occurrences and one or more occurrences, separated by some other syntax. +They come in three varieties: + * The two-parameter version uses the atom provided in the string literal to parse the separators, and does not allow trailing separators. + * The three-parameter version uses the third parameter to parse the separators, using the atom for pretty-printing. + * The four-parameter version optionally allows the separator to occur an extra time at the end of the sequence. + The fourth argument must always literally be the keyword `allowTrailingSep`. + +```grammar +sepBy($_:stx, $_:str) +``` +```grammar +sepBy($_:stx, $_:str, $_:stx) +``` +```grammar +sepBy($_:stx, $_:str, $_:stx, allowTrailingSep) +``` +```grammar +sepBy1($_:stx, $_:str) +``` +```grammar +sepBy1($_:stx, $_:str, $_:stx) +``` +```grammar +sepBy1($_:stx, $_:str, $_:stx, allowTrailingSep) +``` +::: + +::::keepEnv +:::example "Parsing Matched Parentheses and Brackets" + +A language that consists of matched parentheses and brackets can be defined using syntax rules. +The first step is to declare a new {tech}[syntax category]: +```lean +declare_syntax_cat balanced +``` +Next, rules can be added for parentheses and square brackets. +To rule out empty strings, the base cases consist of empty pairs. +```lean +syntax "(" ")" : balanced +syntax "[" "]" : balanced +syntax "(" balanced ")" : balanced +syntax "[" balanced "]" : balanced +syntax balanced balanced : balanced +``` + +In order to invoke Lean's parser on these rules, there must also be an embedding from the new syntax category into one that may already be parsed: +```lean +syntax (name := termBalanced) "balanced " balanced : term +``` + +These terms cannot be elaborated, but reaching an elaboration error indicates that parsing succeeded: +```lean +/-- +error: elaboration function for 'termBalanced' has not been implemented + balanced () +-/ +#guard_msgs in +example := balanced () + +/-- +error: elaboration function for 'termBalanced' has not been implemented + balanced [] +-/ +#guard_msgs in +example := balanced [] + +/-- +error: elaboration function for 'termBalanced' has not been implemented + balanced [[]()([])] +-/ +#guard_msgs in +example := balanced [[] () ([])] +``` + +Similarly, parsing fails when they are mismatched: +```syntaxError mismatch +example := balanced [() (]] +``` +```leanOutput mismatch + :1:25: expected ')' or balanced +``` +::: +:::: + +::::keepEnv +:::example "Parsing Comma-Separated Repetitions" +A variant of list literals that requires double square brackets and allows a trailing comma can be added with the following syntax: +```lean +syntax "[[" term,*,? "]]" : term +``` + +Adding a {deftech}[macro] that describes how to translate it into an ordinary list literal allows it to be used in tests. +```lean +macro_rules + | `(term|[[$e:term,*]]) => `([$e,*]) +``` + +```lean (name := evFunnyList) +#eval [["Dandelion", "Thistle",]] +``` +```leanOutput evFunnyList +["Dandelion", "Thistle"] +``` + +::: +:::: + +# Indentation +%%% +tag := "syntax-indentation" +%%% + +Internally, the parser maintains a saved source position. +Syntax rules may include instructions that interact with these saved positions, causing parsing to fail when a condition is not met. +Indentation-sensitive constructs, such as {keywordOf Lean.Parser.Term.do}`do`, save a source position, parse their constituent parts while taking this saved position into account, and then restore the original position. + +In particular, Indentation-sensitvity is specified by combining {name Lean.Parser.withPosition}`withPosition` or {name Lean.Parser.withPositionAfterLinebreak}`withPositionAfterLinebreak`, which save the source position at the start of parsing some other syntax, with {name Lean.Parser.checkColGt}`colGt`, {name Lean.Parser.checkColGe}`colGe`, and {name Lean.Parser.checkColEq}`colEq`, which compare the current column with the column from the most recently-saved position. +{name Lean.Parser.checkLineEq}`lineEq` can also be used to ensure that two positions are on the same line in the source file. + +:::parserAlias withPosition +::: + +:::parserAlias withoutPosition +::: + +:::parserAlias withPositionAfterLinebreak +::: + +:::parserAlias colGt +::: + +:::parserAlias colGe +::: + +:::parserAlias colEq +::: + +:::parserAlias lineEq +::: + + +::::keepEnv +:::example "Aligned Columns" +This syntax for saving notes takes a bulleted list of items, each of which must be aligned at the same column. +```lean +syntax "note " ppLine withPosition((colEq "• " str ppLine)+) : term +``` + +There is no elaborator or macro associated with this syntax, but the following example is accepted by the parser: +```lean (error := true) (name := noteEx1) +#check + note + • "One" + • "Two" +``` +```leanOutput noteEx1 +elaboration function for '«termNote__•__»' has not been implemented + note + • "One" + • "Two" +``` + +The syntax does not require that the list is indented with respect to the opening token, which would require an extra `withPosition` and a `colGt`. +```lean (error := true) (name := noteEx15) +#check + note +• "One" +• "Two" +``` +```leanOutput noteEx15 +elaboration function for '«termNote__•__»' has not been implemented + note + • "One" + • "Two" +``` + + +The following examples are not syntactically valid because the columns of the bullet points do not match. +```syntaxError noteEx2 +#check + note + • "One" + • "Two" +``` +```leanOutput noteEx2 + :4:3: expected end of input +``` + +```syntaxError noteEx2 +#check + note + • "One" + • "Two" +``` +```leanOutput noteEx2 + :4:5: expected end of input +``` +::: +:::: diff --git a/Manual/Papers.lean b/Manual/Papers.lean new file mode 100644 index 0000000..e864178 --- /dev/null +++ b/Manual/Papers.lean @@ -0,0 +1,50 @@ +/- +Copyright (c) 2024 Lean FRO LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Author: David Thrane Christiansen +-/ + +import Manual.Meta.Bibliography + +open Manual + +def beyondNotations : InProceedings where + title := .concat (inlines!"Beyond notations: Hygienic macro expansion for theorem proving languages") + authors := #[.concat (inlines!"Sebastian Ullrich"), .concat (inlines!"Leonardo de Moura")] + year := 2020 + booktitle := .concat (inlines!"Proceedings of the International Joint Conference on Automated Reasoning") + + +def carneiro19 : Thesis where + title := .concat (inlines!"The Type Theory of Lean") + author := .concat (inlines!"Mario Carneiro") + year := 2019 + university := .concat (inlines!"Carnegie Mellon University") + url := some "https://github.com/digama0/lean-type-theory/releases/download/v1.0/main.pdf" + degree := .concat (inlines!"Masters thesis") + +def castPaper : ArXiv where + title := .concat (inlines!"Simplifying Casts and Coercions") + authors := #[.concat (inlines!"Robert Y. Lewis"), .concat (inlines!"Paul-Nicolas Madelaine")] + year := 2020 + id := "2001.10594" + +def pratt73 : InProceedings where + title := .concat (inlines!"Top down operator precedence") + authors := #[.concat (inlines!"Vaughan Pratt")] + year := 1973 + booktitle := .concat (inlines!"Proceedings of the 1st Annual ACM SIGACT-SIGPLAN Symposium on Principles of Programming Languages") + +def tabledRes : ArXiv where + title := .concat (inlines!"Tabled typeclass resolution") + authors := #[.concat (inlines!"Daniel Selsam"), .concat (inlines!"Sebastian Ullrich"), .concat (inlines!"Leonardo de Moura")] + year := 2020 + id := "2001.04301" + +def ullrich23 : Thesis where + title := .concat (inlines!"An Extensible Theorem Proving Frontend") + author := .concat (inlines!"Sebastian Ullrich") + year := 2023 + university := .concat (inlines!"Karlsruhe Institute of Technology") + url := some "https://www.lean-lang.org/papers/thesis-sebastian.pdf" + degree := .concat (inlines!"Dr. Ing. dissertation") diff --git a/Manual/Tactics.lean b/Manual/Tactics.lean index 89ec288..5c4f705 100644 --- a/Manual/Tactics.lean +++ b/Manual/Tactics.lean @@ -339,7 +339,7 @@ gt : ↑i > 5 Additionally, non-proof terms may be hidden when they are too large. In particular, Lean will hide terms that are below a configurable depth threshold, and it will hide the remainder of a term once a certain amount in total has been printed. Showing deep terms can enabled or disabled with the option {option}`pp.deepTerms`, and the depth threshold can be configured with the option {option}`pp.deepTerms.threshold`. -The maximum number of pretty-printer steps can be configured with the option {option}`pp.maxSteps`. +The maximum number of pretty printer steps can be configured with the option {option}`pp.maxSteps`. Printing very large terms can lead to slowdowns or even stack overflows in tooling; please be conservative when adjusting these options' values. {optionDocs pp.deepTerms} diff --git a/Manual/Tactics/Reference.lean b/Manual/Tactics/Reference.lean index 50704ed..1abf672 100644 --- a/Manual/Tactics/Reference.lean +++ b/Manual/Tactics/Reference.lean @@ -9,6 +9,7 @@ import VersoManual import Lean.Parser.Term import Manual.Meta +import Manual.Papers import Manual.Tactics.Reference.Simp @@ -18,11 +19,6 @@ set_option pp.rawOnError true set_option linter.unusedVariables false -def castPaper : ArXiv where - title := .concat (inlines!"Simplifying Casts and Coercions") - authors := #[.concat (inlines!"Robert Y. Lewis"), .concat (inlines!"Paul-Nicolas Madelaine")] - year := 2020 - id := "2001.10594" #doc (Manual) "Tactic Reference" => diff --git a/lake-manifest.json b/lake-manifest.json index 50f9585..5df1c90 100644 --- a/lake-manifest.json +++ b/lake-manifest.json @@ -25,7 +25,7 @@ "type": "git", "subDir": null, "scope": "", - "rev": "e865c8a4951c69359a410e6c0781fa9cc769a482", + "rev": "b1d1865f979882dbc52c386877bc6708d9b56aec", "name": "verso", "manifestFile": "lake-manifest.json", "inputRev": "main",