diff --git a/src/Lean/Parser.lean b/src/Lean/Parser.lean index 7745db80ed20..03baf4d754e5 100644 --- a/src/Lean/Parser.lean +++ b/src/Lean/Parser.lean @@ -46,6 +46,7 @@ builtin_initialize register_parser_alias (kind := interpolatedStrKind) interpolatedStr register_parser_alias orelse register_parser_alias andthen { stackSz? := none } + register_parser_alias recover registerAlias "notFollowedBy" ``notFollowedBy (notFollowedBy · "element") Parenthesizer.registerAlias "notFollowedBy" notFollowedBy.parenthesizer diff --git a/src/Lean/Parser/Basic.lean b/src/Lean/Parser/Basic.lean index a767cce45811..bfa4873133a2 100644 --- a/src/Lean/Parser/Basic.lean +++ b/src/Lean/Parser/Basic.lean @@ -202,8 +202,9 @@ def trailingNode (n : SyntaxNodeKind) (prec lhsPrec : Nat) (p : Parser) : Traili def mergeOrElseErrors (s : ParserState) (error1 : Error) (iniPos : String.Pos) (mergeErrors : Bool) : ParserState := match s with - | ⟨stack, lhsPrec, pos, cache, some error2⟩ => - if pos == iniPos then ⟨stack, lhsPrec, pos, cache, some (if mergeErrors then error1.merge error2 else error2)⟩ + | ⟨stack, lhsPrec, pos, cache, some error2, errs⟩ => + if pos == iniPos then + ⟨stack, lhsPrec, pos, cache, some (if mergeErrors then error1.merge error2 else error2), errs⟩ else s | other => other @@ -284,7 +285,7 @@ instance : OrElse Parser where def atomicFn (p : ParserFn) : ParserFn := fun c s => let iniPos := s.pos match p c s with - | ⟨stack, lhsPrec, _, cache, some msg⟩ => ⟨stack, lhsPrec, iniPos, cache, some msg⟩ + | ⟨stack, lhsPrec, _, cache, some msg, errs⟩ => ⟨stack, lhsPrec, iniPos, cache, some msg, errs⟩ | other => other /-- The `atomic(p)` parser parses `p`, returns the same result as `p` and fails iff `p` fails, @@ -295,6 +296,58 @@ This is important for the `p <|> q` combinator, because it is not backtracking, This parser has the same arity as `p` - it produces the same result as `p`. -/ def atomic : Parser → Parser := withFn atomicFn +/-- Information about the state of the parse prior to the failing parser's execution -/ +structure RecoveryContext where + /-- The position prior to the failing parser -/ + initialPos : String.Pos + /-- The syntax stack height prior to the failing parser's execution -/ + initialSize : Nat +deriving BEq, DecidableEq, Repr + +/-- +Recover from errors in `p` using `recover` to consume input until a known-good state has appeared. +If `recover` fails itself, then no recovery is performed. + +`recover` is provided with information about the failing parser's effects , and it is run in the +state immediately after the failure. -/ +def recoverFn (p : ParserFn) (recover : RecoveryContext → ParserFn) : ParserFn := fun c s => + let iniPos := s.pos + let iniSz := s.stxStack.size + let s' := p c s + if let some msg := s'.errorMsg then + let s' := recover ⟨iniPos, iniSz⟩ c {s' with errorMsg := none} + if s'.hasError then s' + else {s' with + pos := s'.pos, + lhsPrec := s'.lhsPrec, + cache := s'.cache, + errorMsg := none, + recoveredErrors := s'.recoveredErrors.push (s'.pos, s'.stxStack, msg) } + else s' + +/-- +Recover from errors in `parser` using `handler` to consume input until a known-good state has appeared. +If `handler` fails itself, then no recovery is performed. + +`handler` is provided with information about the failing parser's effects , and it is run in the +state immediately after the failure. + +The interactions between <|> and `recover'` are subtle, especially for syntactic +categories that admit user extension. Consider avoiding it in these cases. -/ +def recover' (parser : Parser) (handler : RecoveryContext → Parser) : Parser where + info := parser.info + fn := recoverFn parser.fn fun s => handler s |>.fn + +/-- +Recover from errors in `parser` using `handler` to consume input until a known-good state has appeared. +If `handler` fails itself, then no recovery is performed. + +`handler` is run in the state immediately after the failure. + +The interactions between <|> and `recover` are subtle, especially for syntactic +categories that admit user extension. Consider avoiding it in these cases. -/ +def recover (parser handler : Parser) : Parser := recover' parser fun _ => handler + def optionalFn (p : ParserFn) : ParserFn := fun c s => let iniSz := s.stackSize let iniPos := s.pos @@ -955,11 +1008,11 @@ private def tokenFnAux : ParserFn := fun c s => private def updateTokenCache (startPos : String.Pos) (s : ParserState) : ParserState := -- do not cache token parsing errors, which are rare and usually fatal and thus not worth an extra field in `TokenCache` match s with - | ⟨stack, lhsPrec, pos, ⟨_, catCache⟩, none⟩ => + | ⟨stack, lhsPrec, pos, ⟨_, catCache⟩, none, errs⟩ => if stack.size == 0 then s else let tk := stack.back - ⟨stack, lhsPrec, pos, ⟨{ startPos := startPos, stopPos := pos, token := tk }, catCache⟩, none⟩ + ⟨stack, lhsPrec, pos, ⟨{ startPos := startPos, stopPos := pos, token := tk }, catCache⟩, none, errs⟩ | other => other def tokenFn (expected : List String := []) : ParserFn := fun c s => @@ -1258,21 +1311,24 @@ def keepTop (s : SyntaxStack) (startStackSize : Nat) : SyntaxStack := def keepNewError (s : ParserState) (oldStackSize : Nat) : ParserState := match s with - | ⟨stack, lhsPrec, pos, cache, err⟩ => ⟨keepTop stack oldStackSize, lhsPrec, pos, cache, err⟩ + | ⟨stack, lhsPrec, pos, cache, err, errs⟩ => ⟨keepTop stack oldStackSize, lhsPrec, pos, cache, err, errs⟩ def keepPrevError (s : ParserState) (oldStackSize : Nat) (oldStopPos : String.Pos) (oldError : Option Error) (oldLhsPrec : Nat) : ParserState := match s with - | ⟨stack, _, _, cache, _⟩ => ⟨stack.shrink oldStackSize, oldLhsPrec, oldStopPos, cache, oldError⟩ + | ⟨stack, _, _, cache, _, errs⟩ => + ⟨stack.shrink oldStackSize, oldLhsPrec, oldStopPos, cache, oldError, errs⟩ def mergeErrors (s : ParserState) (oldStackSize : Nat) (oldError : Error) : ParserState := match s with - | ⟨stack, lhsPrec, pos, cache, some err⟩ => - ⟨stack.shrink oldStackSize, lhsPrec, pos, cache, if oldError == err then some err else some (oldError.merge err)⟩ + | ⟨stack, lhsPrec, pos, cache, some err, errs⟩ => + let newError := if oldError == err then err else oldError.merge err + ⟨stack.shrink oldStackSize, lhsPrec, pos, cache, some newError, errs⟩ | other => other def keepLatest (s : ParserState) (startStackSize : Nat) : ParserState := match s with - | ⟨stack, lhsPrec, pos, cache, _⟩ => ⟨keepTop stack startStackSize, lhsPrec, pos, cache, none⟩ + | ⟨stack, lhsPrec, pos, cache, _, errs⟩ => + ⟨keepTop stack startStackSize, lhsPrec, pos, cache, none, errs⟩ def replaceLongest (s : ParserState) (startStackSize : Nat) : ParserState := s.keepLatest startStackSize diff --git a/src/Lean/Parser/Command.lean b/src/Lean/Parser/Command.lean index 28f6bea11ba8..4ef5314b2afb 100644 --- a/src/Lean/Parser/Command.lean +++ b/src/Lean/Parser/Command.lean @@ -17,6 +17,21 @@ namespace Parser namespace Command +/-- Skip input until the next character that satisfies the predicate, then skip whitespace -/ +private def skipUntil (pred : Char → Bool) : Parser where + fn := + andthenFn + (takeUntilFn pred) + (takeWhileFn Char.isWhitespace) + +/-- Skip input until the next whitespace character (used for error recovery for certain tokens) -/ +private def skipUntilWs : Parser := skipUntil Char.isWhitespace + +/-- Skip input until the next whitespace character or delimiter (used for error recovery for certain + tokens, especially names that occur in signatures) -/ +private def skipUntilWsOrDelim : Parser := skipUntil fun c => + c.isWhitespace || c == '(' || c == ')' || c == ':' || c == '{' || c == '}' || c == '|' + /-- Syntax quotation for (sequences of) commands. The identical syntax for term quotations takes priority, @@ -67,7 +82,7 @@ def declModifiers (inline : Bool) := leading_parser optional («partial» <|> «nonrec») /-- `declId` matches `foo` or `foo.{u,v}`: an identifier possibly followed by a list of universe names -/ def declId := leading_parser - ident >> optional (".{" >> sepBy1 ident ", " >> "}") + ident >> optional (".{" >> sepBy1 (recover ident (skipUntil (fun c => c.isWhitespace || c ∈ [',', '}']))) ", " >> "}") /-- `declSig` matches the signature of a declaration with required type: a list of binders and then `: type` -/ def declSig := leading_parser many (ppSpace >> (Term.binderIdent <|> Term.bracketedBinder)) >> Term.typeSpec @@ -99,18 +114,18 @@ def «abbrev» := leading_parser def optDefDeriving := optional (ppDedent ppLine >> atomic ("deriving " >> notSymbol "instance") >> sepBy1 ident ", ") def «def» := leading_parser - "def " >> declId >> ppIndent optDeclSig >> declVal >> optDefDeriving + "def " >> recover declId skipUntilWsOrDelim >> ppIndent optDeclSig >> declVal >> optDefDeriving def «theorem» := leading_parser - "theorem " >> declId >> ppIndent declSig >> declVal + "theorem " >> recover declId skipUntilWsOrDelim >> ppIndent declSig >> declVal def «opaque» := leading_parser - "opaque " >> declId >> ppIndent declSig >> optional declValSimple + "opaque " >> recover declId skipUntilWsOrDelim >> ppIndent declSig >> optional declValSimple /- As `declSig` starts with a space, "instance" does not need a trailing space if we put `ppSpace` in the optional fragments. -/ def «instance» := leading_parser Term.attrKind >> "instance" >> optNamedPrio >> optional (ppSpace >> declId) >> ppIndent declSig >> declVal def «axiom» := leading_parser - "axiom " >> declId >> ppIndent declSig + "axiom " >> recover declId skipUntilWsOrDelim >> ppIndent declSig /- As `declSig` starts with a space, "example" does not need a trailing space. -/ def «example» := leading_parser "example" >> ppIndent optDeclSig >> declVal @@ -143,11 +158,11 @@ or an element `head : α` followed by a list `tail : List α`. For more information about [inductive types](https://lean-lang.org/theorem_proving_in_lean4/inductive_types.html). -/ def «inductive» := leading_parser - "inductive " >> declId >> ppIndent optDeclSig >> optional (symbol " :=" <|> " where") >> + "inductive " >> recover declId skipUntilWsOrDelim >> ppIndent optDeclSig >> optional (symbol " :=" <|> " where") >> many ctor >> optional (ppDedent ppLine >> computedFields) >> optDeriving def classInductive := leading_parser atomic (group (symbol "class " >> "inductive ")) >> - declId >> ppIndent optDeclSig >> + recover declId skipUntilWsOrDelim >> ppIndent optDeclSig >> optional (symbol " :=" <|> " where") >> many ctor >> optDeriving def structExplicitBinder := leading_parser atomic (declModifiers true >> "(") >> @@ -174,7 +189,9 @@ def classTk := leading_parser def «extends» := leading_parser " extends " >> sepBy1 termParser ", " def «structure» := leading_parser - (structureTk <|> classTk) >> declId >> + (structureTk <|> classTk) >> + -- Note: no error recovery here due to clashing with the `class abbrev` syntax + declId >> ppIndent (many (ppSpace >> Term.bracketedBinder) >> optional «extends» >> Term.optType) >> optional ((symbol " := " <|> " where ") >> optional structCtor >> structFields) >> optDeriving @@ -183,7 +200,7 @@ def «structure» := leading_parser («abbrev» <|> «def» <|> «theorem» <|> «opaque» <|> «instance» <|> «axiom» <|> «example» <|> «inductive» <|> classInductive <|> «structure») @[builtin_command_parser] def «deriving» := leading_parser - "deriving " >> "instance " >> derivingClasses >> " for " >> sepBy1 ident ", " + "deriving " >> "instance " >> derivingClasses >> " for " >> sepBy1 (recover ident skip) ", " @[builtin_command_parser] def noncomputableSection := leading_parser "noncomputable " >> "section" >> optional (ppSpace >> ident) @[builtin_command_parser] def «section» := leading_parser diff --git a/src/Lean/Parser/Extension.lean b/src/Lean/Parser/Extension.lean index 70fecd0ee62a..6d6ba50eb4de 100644 --- a/src/Lean/Parser/Extension.lean +++ b/src/Lean/Parser/Extension.lean @@ -451,7 +451,7 @@ def runParserCategory (env : Environment) (catName : Name) (input : String) (fil let p := andthenFn whitespace (categoryParserFnImpl catName) let ictx := mkInputContext input fileName let s := p.run ictx { env, options := {} } (getTokenTable env) (mkParserState input) - if s.hasError then + if !s.allErrors.isEmpty then Except.error (s.toErrorMsg ictx) else if input.atEnd s.pos then Except.ok s.stxStack.back diff --git a/src/Lean/Parser/Module.lean b/src/Lean/Parser/Module.lean index 40f43ef5dc57..9c5bac972096 100644 --- a/src/Lean/Parser/Module.lean +++ b/src/Lean/Parser/Module.lean @@ -36,8 +36,8 @@ structure ModuleParserState where recovering : Bool := false deriving Inhabited -private def mkErrorMessage (c : InputContext) (s : ParserState) (e : Parser.Error) : Message := Id.run do - let mut pos := s.pos +private partial def mkErrorMessage (c : InputContext) (pos : String.Pos) (stk : SyntaxStack) (e : Parser.Error) : Message := Id.run do + let mut pos := pos let mut endPos? := none let mut e := e unless e.unexpectedTk.isMissing do @@ -52,7 +52,7 @@ private def mkErrorMessage (c : InputContext) (s : ParserState) (e : Parser.Erro e := { e with unexpected } -- if there is an unexpected token, include preceding whitespace as well as the expected token could -- be inserted at any of these places to fix the error; see tests/lean/1971.lean - if let .original (trailing := trailing) .. := s.stxStack.back.getTailInfo then + if let some trailing := lastTrailing stk then if trailing.stopPos == pos then pos := trailing.startPos { fileName := c.fileName @@ -60,6 +60,12 @@ private def mkErrorMessage (c : InputContext) (s : ParserState) (e : Parser.Erro endPos := c.fileMap.toPosition <$> endPos? keepFullRange := true data := toString e } +where + -- Error recovery might lead to there being some "junk" on the stack + lastTrailing (s : SyntaxStack) : Option Substring := + s.toSubarray.findSomeRevM? (m := Id) fun stx => + if let .original (trailing := trailing) .. := stx.getTailInfo then pure (some trailing) + else none def parseHeader (inputCtx : InputContext) : IO (Syntax × ModuleParserState × MessageLog) := do let dummyEnv ← mkEmptyEnvironment @@ -67,12 +73,10 @@ def parseHeader (inputCtx : InputContext) : IO (Syntax × ModuleParserState × M let tokens := Module.updateTokens (getTokenTable dummyEnv) let s := p.run inputCtx { env := dummyEnv, options := {} } tokens (mkParserState inputCtx.input) let stx := if s.stxStack.isEmpty then .missing else s.stxStack.back - match s.errorMsg with - | some errorMsg => - let msg := mkErrorMessage inputCtx s errorMsg - pure (stx, { pos := s.pos, recovering := true }, { : MessageLog }.add msg) - | none => - pure (stx, { pos := s.pos }, {}) + let mut messages : MessageLog := {} + for (pos, stk, err) in s.allErrors do + messages := messages.add <| mkErrorMessage inputCtx pos stk err + pure (stx, {pos := s.pos, recovering := s.hasError}, messages) private def mkEOI (pos : String.Pos) : Syntax := let atom := mkAtom (SourceInfo.original "".toSubstring pos "".toSubstring pos) "" @@ -103,6 +107,9 @@ partial def parseCommand (inputCtx : InputContext) (pmctx : ParserModuleContext) let pos' := pos let p := andthenFn whitespace topLevelCommandParserFn let s := p.run inputCtx pmctx (getTokenTable pmctx.env) { cache := initCacheForInput inputCtx.input, pos } + -- save errors from sub-recoveries + for (rpos, rstk, recovered) in s.recoveredErrors do + messages := messages.add <| mkErrorMessage inputCtx rpos rstk recovered pos := s.pos if recovering && !s.stxStack.isEmpty && s.stxStack.back.isAntiquot then -- top-level antiquotation during recovery is most likely remnant from unfinished quotation, ignore @@ -121,7 +128,7 @@ partial def parseCommand (inputCtx : InputContext) (pmctx : ParserModuleContext) We claim a syntactically incorrect command containing no token or identifier is irrelevant for intellisense and should be ignored. -/ let ignore := s.stxStack.isEmpty || s.stxStack.back.getPos?.isNone unless recovering && ignore do - messages := messages.add <| mkErrorMessage inputCtx s errorMsg + messages := messages.add <| mkErrorMessage inputCtx s.pos s.stxStack errorMsg recovering := true if ignore then continue diff --git a/src/Lean/Parser/Types.lean b/src/Lean/Parser/Types.lean index d2663a016ad0..b01ae4d55cf6 100644 --- a/src/Lean/Parser/Types.lean +++ b/src/Lean/Parser/Types.lean @@ -198,6 +198,7 @@ structure ParserState where pos : String.Pos := 0 cache : ParserCache errorMsg : Option Error := none + recoveredErrors : Array (String.Pos × SyntaxStack × Error) := #[] namespace ParserState @@ -232,16 +233,9 @@ def next (s : ParserState) (input : String) (pos : String.Pos) : ParserState := def next' (s : ParserState) (input : String) (pos : String.Pos) (h : ¬ input.atEnd pos) : ParserState := { s with pos := input.next' pos h } -def toErrorMsg (ctx : InputContext) (s : ParserState) : String := - match s.errorMsg with - | none => "" - | some msg => - let pos := ctx.fileMap.toPosition s.pos - mkErrorStringWithPos ctx.fileName pos (toString msg) - def mkNode (s : ParserState) (k : SyntaxNodeKind) (iniStackSz : Nat) : ParserState := match s with - | ⟨stack, lhsPrec, pos, cache, err⟩ => + | ⟨stack, lhsPrec, pos, cache, err, recovered⟩ => if err != none && stack.size == iniStackSz then -- If there is an error but there are no new nodes on the stack, use `missing` instead. -- Thus we ensure the property that an syntax tree contains (at least) one `missing` node @@ -251,25 +245,28 @@ def mkNode (s : ParserState) (k : SyntaxNodeKind) (iniStackSz : Nat) : ParserSta -- `node k1 p1 <|> ... <|> node kn pn` when all parsers fail. With the code below we -- instead return a less misleading single `missing` node without randomly selecting any `ki`. let stack := stack.push Syntax.missing - ⟨stack, lhsPrec, pos, cache, err⟩ + ⟨stack, lhsPrec, pos, cache, err, recovered⟩ else let newNode := Syntax.node SourceInfo.none k (stack.extract iniStackSz stack.size) let stack := stack.shrink iniStackSz let stack := stack.push newNode - ⟨stack, lhsPrec, pos, cache, err⟩ + ⟨stack, lhsPrec, pos, cache, err, recovered⟩ def mkTrailingNode (s : ParserState) (k : SyntaxNodeKind) (iniStackSz : Nat) : ParserState := match s with - | ⟨stack, lhsPrec, pos, cache, err⟩ => + | ⟨stack, lhsPrec, pos, cache, err, errs⟩ => let newNode := Syntax.node SourceInfo.none k (stack.extract (iniStackSz - 1) stack.size) let stack := stack.shrink (iniStackSz - 1) let stack := stack.push newNode - ⟨stack, lhsPrec, pos, cache, err⟩ + ⟨stack, lhsPrec, pos, cache, err, errs⟩ + +def allErrors (s : ParserState) : Array (String.Pos × SyntaxStack × Error) := + s.recoveredErrors ++ (s.errorMsg.map (fun e => #[(s.pos, s.stxStack, e)])).getD #[] @[inline] def setError (s : ParserState) (e : Error) : ParserState := match s with - | ⟨stack, lhsPrec, pos, cache, _⟩ => ⟨stack, lhsPrec, pos, cache, some e⟩ + | ⟨stack, lhsPrec, pos, cache, _, errs⟩ => ⟨stack, lhsPrec, pos, cache, some e, errs⟩ def mkError (s : ParserState) (msg : String) : ParserState := s.setError { expected := [msg] } |>.pushSyntax .missing @@ -313,6 +310,14 @@ def mkUnexpectedTokenError (s : ParserState) (msg : String) (iniPos : String.Pos def mkUnexpectedErrorAt (s : ParserState) (msg : String) (pos : String.Pos) : ParserState := s.setPos pos |>.mkUnexpectedError msg +def toErrorMsg (ctx : InputContext) (s : ParserState) : String := Id.run do + let mut errStr := "" + for (pos, _stk, err) in s.allErrors do + if errStr != "" then errStr := errStr ++ "\n" + let pos := ctx.fileMap.toPosition pos + errStr := errStr ++ mkErrorStringWithPos ctx.fileName pos (toString err) + errStr + end ParserState def ParserFn := ParserContext → ParserState → ParserState @@ -415,7 +420,7 @@ def withCacheFn (parserName : Name) (p : ParserFn) : ParserFn := fun c s => Id.r if let some r := s.cache.parserCache.find? key then -- TODO: turn this into a proper trace once we have these in the parser --dbg_trace "parser cache hit: {parserName}:{s.pos} -> {r.stx}" - return ⟨s.stxStack.push r.stx, r.lhsPrec, r.newPos, s.cache, r.errorMsg⟩ + return ⟨s.stxStack.push r.stx, r.lhsPrec, r.newPos, s.cache, r.errorMsg, s.recoveredErrors⟩ let initStackSz := s.stxStack.raw.size let s := withStackDrop initStackSz p c { s with lhsPrec := 0, errorMsg := none } if s.stxStack.raw.size != initStackSz + 1 then diff --git a/src/Lean/PrettyPrinter/Formatter.lean b/src/Lean/PrettyPrinter/Formatter.lean index da8effa217f0..2d1824bd4248 100644 --- a/src/Lean/PrettyPrinter/Formatter.lean +++ b/src/Lean/PrettyPrinter/Formatter.lean @@ -193,6 +193,12 @@ def withMaybeTag (pos? : Option String.Pos) (x : FormatterM Unit) : Formatter := -- them in turn. Uses the syntax traverser non-linearly! p1 <|> p2 +@[combinator_formatter recover] +def recover.formatter (fmt : PrettyPrinter.Formatter) := fmt + +@[combinator_formatter recover'] +def recover'.formatter (fmt : PrettyPrinter.Formatter) := fmt + -- `mkAntiquot` is quite complex, so we'd rather have its formatter synthesized below the actual parser definition. -- Note that there is a mutual recursion -- `categoryParser -> mkAntiquot -> termParser -> categoryParser`, so we need to introduce an indirection somewhere diff --git a/src/Lean/PrettyPrinter/Parenthesizer.lean b/src/Lean/PrettyPrinter/Parenthesizer.lean index 6c94c6388588..7ef8fff6c8b4 100644 --- a/src/Lean/PrettyPrinter/Parenthesizer.lean +++ b/src/Lean/PrettyPrinter/Parenthesizer.lean @@ -266,6 +266,12 @@ def visitToken : Parenthesizer := do -- them in turn. Uses the syntax traverser non-linearly! p1 <|> p2 +@[combinator_parenthesizer recover] +def recover.parenthesizer (p : PrettyPrinter.Parenthesizer) : PrettyPrinter.Parenthesizer := p + +@[combinator_parenthesizer recover'] +def recover'.parenthesizer (p : PrettyPrinter.Parenthesizer) : PrettyPrinter.Parenthesizer := p + -- `mkAntiquot` is quite complex, so we'd rather have its parenthesizer synthesized below the actual parser definition. -- Note that there is a mutual recursion -- `categoryParser -> mkAntiquot -> termParser -> categoryParser`, so we need to introduce an indirection somewhere diff --git a/stage0/src/stdlib_flags.h b/stage0/src/stdlib_flags.h index 0699845ba452..cb8a954a4d16 100644 --- a/stage0/src/stdlib_flags.h +++ b/stage0/src/stdlib_flags.h @@ -6,7 +6,7 @@ options get_default_options() { // see https://lean-lang.org/lean4/doc/dev/bootstrap.html#further-bootstrapping-complications #if LEAN_IS_STAGE0 == 1 // switch to `true` for ABI-breaking changes affecting meta code - opts = opts.update({"interpreter", "prefer_native"}, false); + opts = opts.update({"interpreter", "prefer_native"}, true); // switch to `true` for changing built-in parsers used in quotations opts = opts.update({"internal", "parseQuotWithCurrentStage"}, false); // toggling `parseQuotWithCurrentStage` may also require toggling the following option if macros/syntax diff --git a/tests/lean/1971.lean.expected.out b/tests/lean/1971.lean.expected.out index c383f7f5e6b3..8b0c40583952 100644 --- a/tests/lean/1971.lean.expected.out +++ b/tests/lean/1971.lean.expected.out @@ -2,5 +2,6 @@ 1971.lean:22:6-24:3: error: unexpected token 'def'; expected ':=', 'where' or '|' 1971.lean:30:9-30:13: error: unexpected token 'def'; expected term 1971.lean:34:9-36:7: error: unexpected token 'example'; expected identifier +1971.lean:36:11-36:12: error: unexpected token; expected command 1971.lean:39:17-41:7: error: unexpected token 'example'; expected 'false', 'true', numeral or string literal 1971.lean:45:11: error: unterminated string literal diff --git a/tests/lean/parserRecovery.lean b/tests/lean/parserRecovery.lean new file mode 100644 index 000000000000..08be3555ee6b --- /dev/null +++ b/tests/lean/parserRecovery.lean @@ -0,0 +1,114 @@ +import Lean + +/-! +# Parser error recovery + +This test is to make sure that the error recovery feature is working as intended. In particular, it +checks that certain errors in the head of a declaration (e.g. an invalid name) don't prevent further +parse errors from being reported. +-/ + +open Lean Parser + +/-! +## Error recovery for Lean commands +-/ +def "foo" (x : Nat) : := 5 + +def 5 : Nat := 3 + +theorem yep : True := by trivial + +theorem () : Nat := 99 +theorem 2.4 : Nat := ) +theorem "nat" : Nat := ) +theorem 3 : Nat := ) + +opaque ((( : Nat := ) + +axiom "nope" : 4 = ⟩ + +class inductive () where + | "" : ) + +inductive 44: Type where | "" : 44 + +inductive 44 : Type where | "" : 44 + +inductive 44 (n : Nat) : Type where | "" : 44 + +inductive 44(n : Nat) : Type where | "" : 44 + +inductive 44| "" : 44 + +inductive !!! where + | "" (x : Nat)) : Bogus + +#eval Foo.a + +inductive Item.{4, 5, 6, 7, a, "foo", b, c} : Nat where + | foo : ) + | bar : ) + +example := "" + +/-! +## Error recovery for custom syntax + +Adding recovery to Lean's term grammar doesn't work well with the primitive tools at hand +(extensibility + token tables + backtracking are a real challenge), but for DSLs it can be useful. +This tests that error recovery works for a term syntax extension. +-/ + +def altParen : Parser := + "{|" >> termParser (prec := 51) >> recover "|}" skip + +macro:50 e:altParen : term => pure ⟨e.raw[1]⟩ + +-- These terms show multiple errors due to recovery from missing |} tokens +#eval ([ {|2 + 3] * {| 3 + |} + +-- Only one error here +#eval ([ (2 + 3] * ( 3 + ) ) + +example := "" + +/-! +## Error recovery for custom commands + +This command can recover from many parse errors. Make sure that there's no arbitrary small limit by +testing a few recoveries. +-/ + +def nonws : Parser where + fn := + andthenFn + (andthenFn (satisfyFn (fun c => !c.isWhitespace && c != '#')) (takeWhileFn (!·.isWhitespace))) + whitespace + +def thing : Parser := + recover ident nonws + + +open Lean Elab Command in +elab "#go " xs:thing* (Lean.Parser.eoi <|> "#stop") : command => do + let xs := toString xs + elabCommand <| ← `(#eval s!"hey {$(quote xs)}") + +-- Many errors due to recovery! +#go +hey +5 +a +g +def +5 +hey +( 99 + ( +#stop + +-- Even though the command had errors, this is still running as expected: + +def x := 5 +#eval x diff --git a/tests/lean/parserRecovery.lean.expected.out b/tests/lean/parserRecovery.lean.expected.out new file mode 100644 index 000000000000..fa6cbd293d67 --- /dev/null +++ b/tests/lean/parserRecovery.lean.expected.out @@ -0,0 +1,46 @@ +parserRecovery.lean:16:3-16:9: error: unexpected token; expected identifier +parserRecovery.lean:16:21-16:25: error: unexpected token ':='; expected term +parserRecovery.lean:18:3-18:5: error: unexpected token; expected identifier +parserRecovery.lean:22:7-22:9: error: unexpected token '('; expected identifier +parserRecovery.lean:22:9-22:10: error: unexpected token ')'; expected '_' or identifier +parserRecovery.lean:23:7-23:11: error: unexpected token; expected identifier +parserRecovery.lean:23:20-23:22: error: unexpected token ')'; expected term +parserRecovery.lean:24:7-24:13: error: unexpected token; expected identifier +parserRecovery.lean:24:22-24:24: error: unexpected token ')'; expected term +parserRecovery.lean:25:7-25:9: error: unexpected token; expected identifier +parserRecovery.lean:25:18-25:20: error: unexpected token ')'; expected term +parserRecovery.lean:27:6-27:8: error: unexpected token '('; expected identifier +parserRecovery.lean:27:8-27:9: error: unexpected token '('; expected '_' or identifier +parserRecovery.lean:29:5-29:12: error: unexpected token; expected identifier +parserRecovery.lean:29:18-29:20: error: unexpected token '⟩'; expected term +parserRecovery.lean:31:15-31:17: error: unexpected token '('; expected identifier +parserRecovery.lean:31:17-31:18: error: unexpected token ')'; expected '_' or identifier +parserRecovery.lean:34:9-34:12: error: unexpected token; expected identifier +parserRecovery.lean:34:27: error: expected token +parserRecovery.lean:36:9-36:12: error: unexpected token; expected identifier +parserRecovery.lean:36:28: error: expected token +parserRecovery.lean:38:9-38:12: error: unexpected token; expected identifier +parserRecovery.lean:38:38: error: expected token +parserRecovery.lean:40:9-40:12: error: unexpected token; expected identifier +parserRecovery.lean:40:37: error: expected token +parserRecovery.lean:42:9-42:12: error: unexpected token; expected identifier +parserRecovery.lean:42:14: error: expected token +parserRecovery.lean:44:9-44:11: error: unexpected token '!'; expected identifier +parserRecovery.lean:45:4: error: expected token +parserRecovery.lean:47:6-47:11: error: unknown identifier 'Foo.a' +parserRecovery.lean:49:16-49:17: error: unexpected token; expected identifier +parserRecovery.lean:49:18-49:20: error: unexpected token; expected identifier +parserRecovery.lean:49:21-49:23: error: unexpected token; expected identifier +parserRecovery.lean:49:24-49:26: error: unexpected token; expected identifier +parserRecovery.lean:49:30-49:36: error: unexpected token; expected identifier +parserRecovery.lean:50:9-50:11: error: unexpected token ')'; expected term +parserRecovery.lean:69:16-69:17: error: unexpected token ']'; expected '|}' +parserRecovery.lean:69:20: error: unexpected token at this precedence level; consider parenthesizing the term +parserRecovery.lean:72:15-72:16: error: unexpected token ']'; expected ')', ',' or ':' +parserRecovery.lean:100:3-101:1: error: unexpected token; expected identifier +parserRecovery.lean:103:1-104:3: error: unexpected token 'def'; expected identifier +parserRecovery.lean:105:0-105:1: error: unexpected token; expected identifier +parserRecovery.lean:106:3-107:1: error: unexpected token '('; expected identifier +parserRecovery.lean:107:6-107:8: error: unexpected token; expected identifier +parserRecovery.lean:108:2-108:3: error: unexpected token '('; expected identifier +5