Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: custom error recovery in parser #3413

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/Lean/Parser.lean
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ builtin_initialize
register_parser_alias (kind := interpolatedStrKind) interpolatedStr
register_parser_alias orelse
register_parser_alias andthen { stackSz? := none }
register_parser_alias recover

registerAlias "notFollowedBy" ``notFollowedBy (notFollowedBy · "element")
Parenthesizer.registerAlias "notFollowedBy" notFollowedBy.parenthesizer
Expand Down
76 changes: 66 additions & 10 deletions src/Lean/Parser/Basic.lean
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,9 @@ def trailingNode (n : SyntaxNodeKind) (prec lhsPrec : Nat) (p : Parser) : Traili

def mergeOrElseErrors (s : ParserState) (error1 : Error) (iniPos : String.Pos) (mergeErrors : Bool) : ParserState :=
match s with
| ⟨stack, lhsPrec, pos, cache, some error2⟩ =>
if pos == iniPos then ⟨stack, lhsPrec, pos, cache, some (if mergeErrors then error1.merge error2 else error2)⟩
| ⟨stack, lhsPrec, pos, cache, some error2, errs⟩ =>
if pos == iniPos then
⟨stack, lhsPrec, pos, cache, some (if mergeErrors then error1.merge error2 else error2), errs⟩
else s
| other => other

Expand Down Expand Up @@ -284,7 +285,7 @@ instance : OrElse Parser where
def atomicFn (p : ParserFn) : ParserFn := fun c s =>
let iniPos := s.pos
match p c s with
| ⟨stack, lhsPrec, _, cache, some msg⟩ => ⟨stack, lhsPrec, iniPos, cache, some msg⟩
| ⟨stack, lhsPrec, _, cache, some msg, errs⟩ => ⟨stack, lhsPrec, iniPos, cache, some msg, errs
| other => other

/-- The `atomic(p)` parser parses `p`, returns the same result as `p` and fails iff `p` fails,
Expand All @@ -295,6 +296,58 @@ This is important for the `p <|> q` combinator, because it is not backtracking,
This parser has the same arity as `p` - it produces the same result as `p`. -/
def atomic : Parser → Parser := withFn atomicFn

/-- Information about the state of the parse prior to the failing parser's execution -/
structure RecoveryContext where
/-- The position prior to the failing parser -/
initialPos : String.Pos
/-- The syntax stack height prior to the failing parser's execution -/
initialSize : Nat
deriving BEq, DecidableEq, Repr

/--
Recover from errors in `p` using `recover` to consume input until a known-good state has appeared.
If `recover` fails itself, then no recovery is performed.

`recover` is provided with information about the failing parser's effects , and it is run in the
state immediately after the failure. -/
def recoverFn (p : ParserFn) (recover : RecoveryContext → ParserFn) : ParserFn := fun c s =>
let iniPos := s.pos
let iniSz := s.stxStack.size
let s' := p c s
if let some msg := s'.errorMsg then
let s' := recover ⟨iniPos, iniSz⟩ c {s' with errorMsg := none}
if s'.hasError then s'
else {s' with
pos := s'.pos,
lhsPrec := s'.lhsPrec,
cache := s'.cache,
errorMsg := none,
recoveredErrors := s'.recoveredErrors.push (s'.pos, s'.stxStack, msg) }
else s'

/--
Recover from errors in `parser` using `handler` to consume input until a known-good state has appeared.
If `handler` fails itself, then no recovery is performed.

`handler` is provided with information about the failing parser's effects , and it is run in the
state immediately after the failure.

The interactions between <|> and `recover'` are subtle, especially for syntactic
categories that admit user extension. Consider avoiding it in these cases. -/
def recover' (parser : Parser) (handler : RecoveryContext → Parser) : Parser where
info := parser.info
fn := recoverFn parser.fn fun s => handler s |>.fn

/--
Recover from errors in `parser` using `handler` to consume input until a known-good state has appeared.
If `handler` fails itself, then no recovery is performed.

`handler` is run in the state immediately after the failure.

The interactions between <|> and `recover` are subtle, especially for syntactic
categories that admit user extension. Consider avoiding it in these cases. -/
def recover (parser handler : Parser) : Parser := recover' parser fun _ => handler

def optionalFn (p : ParserFn) : ParserFn := fun c s =>
let iniSz := s.stackSize
let iniPos := s.pos
Expand Down Expand Up @@ -955,11 +1008,11 @@ private def tokenFnAux : ParserFn := fun c s =>
private def updateTokenCache (startPos : String.Pos) (s : ParserState) : ParserState :=
-- do not cache token parsing errors, which are rare and usually fatal and thus not worth an extra field in `TokenCache`
match s with
| ⟨stack, lhsPrec, pos, ⟨_, catCache⟩, none⟩ =>
| ⟨stack, lhsPrec, pos, ⟨_, catCache⟩, none, errs⟩ =>
if stack.size == 0 then s
else
let tk := stack.back
⟨stack, lhsPrec, pos, ⟨{ startPos := startPos, stopPos := pos, token := tk }, catCache⟩, none⟩
⟨stack, lhsPrec, pos, ⟨{ startPos := startPos, stopPos := pos, token := tk }, catCache⟩, none, errs
| other => other

def tokenFn (expected : List String := []) : ParserFn := fun c s =>
Expand Down Expand Up @@ -1258,21 +1311,24 @@ def keepTop (s : SyntaxStack) (startStackSize : Nat) : SyntaxStack :=

def keepNewError (s : ParserState) (oldStackSize : Nat) : ParserState :=
match s with
| ⟨stack, lhsPrec, pos, cache, err⟩ => ⟨keepTop stack oldStackSize, lhsPrec, pos, cache, err⟩
| ⟨stack, lhsPrec, pos, cache, err, errs⟩ => ⟨keepTop stack oldStackSize, lhsPrec, pos, cache, err, errs

def keepPrevError (s : ParserState) (oldStackSize : Nat) (oldStopPos : String.Pos) (oldError : Option Error) (oldLhsPrec : Nat) : ParserState :=
match s with
| ⟨stack, _, _, cache, _⟩ => ⟨stack.shrink oldStackSize, oldLhsPrec, oldStopPos, cache, oldError⟩
| ⟨stack, _, _, cache, _, errs⟩ =>
⟨stack.shrink oldStackSize, oldLhsPrec, oldStopPos, cache, oldError, errs⟩

def mergeErrors (s : ParserState) (oldStackSize : Nat) (oldError : Error) : ParserState :=
match s with
| ⟨stack, lhsPrec, pos, cache, some err⟩ =>
⟨stack.shrink oldStackSize, lhsPrec, pos, cache, if oldError == err then some err else some (oldError.merge err)⟩
| ⟨stack, lhsPrec, pos, cache, some err, errs⟩ =>
let newError := if oldError == err then err else oldError.merge err
⟨stack.shrink oldStackSize, lhsPrec, pos, cache, some newError, errs⟩
| other => other

def keepLatest (s : ParserState) (startStackSize : Nat) : ParserState :=
match s with
| ⟨stack, lhsPrec, pos, cache, _⟩ => ⟨keepTop stack startStackSize, lhsPrec, pos, cache, none⟩
| ⟨stack, lhsPrec, pos, cache, _, errs⟩ =>
⟨keepTop stack startStackSize, lhsPrec, pos, cache, none, errs⟩

def replaceLongest (s : ParserState) (startStackSize : Nat) : ParserState :=
s.keepLatest startStackSize
Expand Down
34 changes: 25 additions & 9 deletions src/Lean/Parser/Command.lean
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,20 @@ namespace Parser

namespace Command

/-- Skip input until the next character that satisfies the predicate, then skip whitespace -/
private def recoverUntil (pred : Char → Bool) : Parser where
fn :=
andthenFn
(takeUntilFn pred)
(takeWhileFn Char.isWhitespace)

/-- Skip input until the next whitespace character (used for error recovery for certain tokens) -/
private def recoverUntilWs : Parser := recoverUntil Char.isWhitespace

/-- Skip input until the next whitespace character (used for error recovery for certain tokens) -/
david-christiansen marked this conversation as resolved.
Show resolved Hide resolved
private def recoverUntilWsOrDelim : Parser := recoverUntil fun c =>
c.isWhitespace || c == '(' || c == ')' || c == ':' || c == '{' || c == '}' || c == '|'

/--
Syntax quotation for (sequences of) commands.
The identical syntax for term quotations takes priority,
Expand Down Expand Up @@ -67,7 +81,7 @@ def declModifiers (inline : Bool) := leading_parser
optional («partial» <|> «nonrec»)
/-- `declId` matches `foo` or `foo.{u,v}`: an identifier possibly followed by a list of universe names -/
def declId := leading_parser
ident >> optional (".{" >> sepBy1 ident ", " >> "}")
ident >> optional (".{" >> sepBy1 (recover ident (recoverUntil (fun c => c.isWhitespace || c ∈ [',', '}']))) ", " >> "}")
david-christiansen marked this conversation as resolved.
Show resolved Hide resolved
/-- `declSig` matches the signature of a declaration with required type: a list of binders and then `: type` -/
def declSig := leading_parser
many (ppSpace >> (Term.binderIdent <|> Term.bracketedBinder)) >> Term.typeSpec
Expand Down Expand Up @@ -99,18 +113,18 @@ def «abbrev» := leading_parser
def optDefDeriving :=
optional (ppDedent ppLine >> atomic ("deriving " >> notSymbol "instance") >> sepBy1 ident ", ")
def «def» := leading_parser
"def " >> declId >> ppIndent optDeclSig >> declVal >> optDefDeriving
"def " >> recover declId recoverUntilWsOrDelim >> ppIndent optDeclSig >> declVal >> optDefDeriving
def «theorem» := leading_parser
"theorem " >> declId >> ppIndent declSig >> declVal
"theorem " >> recover declId recoverUntilWsOrDelim >> ppIndent declSig >> declVal
def «opaque» := leading_parser
"opaque " >> declId >> ppIndent declSig >> optional declValSimple
"opaque " >> recover declId recoverUntilWsOrDelim >> ppIndent declSig >> optional declValSimple
/- As `declSig` starts with a space, "instance" does not need a trailing space
if we put `ppSpace` in the optional fragments. -/
def «instance» := leading_parser
Term.attrKind >> "instance" >> optNamedPrio >>
optional (ppSpace >> declId) >> ppIndent declSig >> declVal
def «axiom» := leading_parser
"axiom " >> declId >> ppIndent declSig
"axiom " >> recover declId recoverUntilWsOrDelim >> ppIndent declSig
/- As `declSig` starts with a space, "example" does not need a trailing space. -/
def «example» := leading_parser
"example" >> ppIndent optDeclSig >> declVal
Expand Down Expand Up @@ -143,11 +157,11 @@ or an element `head : α` followed by a list `tail : List α`.
For more information about [inductive types](https://lean-lang.org/theorem_proving_in_lean4/inductive_types.html).
-/
def «inductive» := leading_parser
"inductive " >> declId >> ppIndent optDeclSig >> optional (symbol " :=" <|> " where") >>
"inductive " >> recover declId recoverUntilWsOrDelim >> ppIndent optDeclSig >> optional (symbol " :=" <|> " where") >>
many ctor >> optional (ppDedent ppLine >> computedFields) >> optDeriving
def classInductive := leading_parser
atomic (group (symbol "class " >> "inductive ")) >>
declId >> ppIndent optDeclSig >>
recover declId recoverUntilWsOrDelim >> ppIndent optDeclSig >>
optional (symbol " :=" <|> " where") >> many ctor >> optDeriving
def structExplicitBinder := leading_parser
atomic (declModifiers true >> "(") >>
Expand All @@ -174,7 +188,9 @@ def classTk := leading_parser
def «extends» := leading_parser
" extends " >> sepBy1 termParser ", "
def «structure» := leading_parser
(structureTk <|> classTk) >> declId >>
(structureTk <|> classTk) >>
-- Note: no error recovery here due to clashing with the `class abbrev` syntax
declId >>
ppIndent (many (ppSpace >> Term.bracketedBinder) >> optional «extends» >> Term.optType) >>
optional ((symbol " := " <|> " where ") >> optional structCtor >> structFields) >>
optDeriving
Expand All @@ -183,7 +199,7 @@ def «structure» := leading_parser
(«abbrev» <|> «def» <|> «theorem» <|> «opaque» <|> «instance» <|> «axiom» <|> «example» <|>
«inductive» <|> classInductive <|> «structure»)
@[builtin_command_parser] def «deriving» := leading_parser
"deriving " >> "instance " >> derivingClasses >> " for " >> sepBy1 ident ", "
"deriving " >> "instance " >> derivingClasses >> " for " >> sepBy1 (recover ident skip) ", "
@[builtin_command_parser] def noncomputableSection := leading_parser
"noncomputable " >> "section" >> optional (ppSpace >> ident)
@[builtin_command_parser] def «section» := leading_parser
Expand Down
2 changes: 1 addition & 1 deletion src/Lean/Parser/Extension.lean
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ def runParserCategory (env : Environment) (catName : Name) (input : String) (fil
let p := andthenFn whitespace (categoryParserFnImpl catName)
let ictx := mkInputContext input fileName
let s := p.run ictx { env, options := {} } (getTokenTable env) (mkParserState input)
if s.hasError then
if !s.allErrors.isEmpty then
Except.error (s.toErrorMsg ictx)
else if input.atEnd s.pos then
Except.ok s.stxStack.back
Expand Down
19 changes: 14 additions & 5 deletions src/Lean/Parser/Module.lean
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ structure ModuleParserState where
recovering : Bool := false
deriving Inhabited

private def mkErrorMessage (c : InputContext) (s : ParserState) (e : Parser.Error) : Message := Id.run do
let mut pos := s.pos
private partial def mkErrorMessage (c : InputContext) (pos : String.Pos) (stk : SyntaxStack) (e : Parser.Error) : Message := Id.run do
let mut pos := pos
let mut endPos? := none
let mut e := e
unless e.unexpectedTk.isMissing do
Expand All @@ -52,14 +52,20 @@ private def mkErrorMessage (c : InputContext) (s : ParserState) (e : Parser.Erro
e := { e with unexpected }
-- if there is an unexpected token, include preceding whitespace as well as the expected token could
-- be inserted at any of these places to fix the error; see tests/lean/1971.lean
if let .original (trailing := trailing) .. := s.stxStack.back.getTailInfo then
if let some trailing := lastTrailing stk then
if trailing.stopPos == pos then
pos := trailing.startPos
{ fileName := c.fileName
pos := c.fileMap.toPosition pos
endPos := c.fileMap.toPosition <$> endPos?
keepFullRange := true
data := toString e }
where
-- Error recovery might lead to there being some "junk" on the stack
lastTrailing (s : SyntaxStack) : Option Substring :=
if s.size < 1 then none
else if let .original (trailing := trailing) .. := s.back.getTailInfo then some trailing
else lastTrailing s.pop
david-christiansen marked this conversation as resolved.
Show resolved Hide resolved

def parseHeader (inputCtx : InputContext) : IO (Syntax × ModuleParserState × MessageLog) := do
let dummyEnv ← mkEmptyEnvironment
Expand All @@ -69,7 +75,7 @@ def parseHeader (inputCtx : InputContext) : IO (Syntax × ModuleParserState × M
let stx := if s.stxStack.isEmpty then .missing else s.stxStack.back
match s.errorMsg with
| some errorMsg =>
let msg := mkErrorMessage inputCtx s errorMsg
let msg := mkErrorMessage inputCtx s.pos s.stxStack errorMsg
pure (stx, { pos := s.pos, recovering := true }, { : MessageLog }.add msg)
david-christiansen marked this conversation as resolved.
Show resolved Hide resolved
| none =>
pure (stx, { pos := s.pos }, {})
Expand Down Expand Up @@ -103,6 +109,9 @@ partial def parseCommand (inputCtx : InputContext) (pmctx : ParserModuleContext)
let pos' := pos
let p := andthenFn whitespace topLevelCommandParserFn
let s := p.run inputCtx pmctx (getTokenTable pmctx.env) { cache := initCacheForInput inputCtx.input, pos }
-- save errors from sub-recoveries
for (rpos, rstk, recovered) in s.recoveredErrors do
messages := messages.add <| mkErrorMessage inputCtx rpos rstk recovered
david-christiansen marked this conversation as resolved.
Show resolved Hide resolved
pos := s.pos
if recovering && !s.stxStack.isEmpty && s.stxStack.back.isAntiquot then
-- top-level antiquotation during recovery is most likely remnant from unfinished quotation, ignore
Expand All @@ -121,7 +130,7 @@ partial def parseCommand (inputCtx : InputContext) (pmctx : ParserModuleContext)
We claim a syntactically incorrect command containing no token or identifier is irrelevant for intellisense and should be ignored. -/
let ignore := s.stxStack.isEmpty || s.stxStack.back.getPos?.isNone
unless recovering && ignore do
messages := messages.add <| mkErrorMessage inputCtx s errorMsg
messages := messages.add <| mkErrorMessage inputCtx s.pos s.stxStack errorMsg
recovering := true
if ignore then
continue
Expand Down
33 changes: 19 additions & 14 deletions src/Lean/Parser/Types.lean
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ structure ParserState where
pos : String.Pos := 0
cache : ParserCache
errorMsg : Option Error := none
recoveredErrors : Array (String.Pos × SyntaxStack × Error) := #[]

namespace ParserState

Expand Down Expand Up @@ -232,16 +233,9 @@ def next (s : ParserState) (input : String) (pos : String.Pos) : ParserState :=
def next' (s : ParserState) (input : String) (pos : String.Pos) (h : ¬ input.atEnd pos) : ParserState :=
{ s with pos := input.next' pos h }

def toErrorMsg (ctx : InputContext) (s : ParserState) : String :=
match s.errorMsg with
| none => ""
| some msg =>
let pos := ctx.fileMap.toPosition s.pos
mkErrorStringWithPos ctx.fileName pos (toString msg)

def mkNode (s : ParserState) (k : SyntaxNodeKind) (iniStackSz : Nat) : ParserState :=
match s with
| ⟨stack, lhsPrec, pos, cache, err⟩ =>
| ⟨stack, lhsPrec, pos, cache, err, recovered⟩ =>
if err != none && stack.size == iniStackSz then
-- If there is an error but there are no new nodes on the stack, use `missing` instead.
-- Thus we ensure the property that an syntax tree contains (at least) one `missing` node
Expand All @@ -251,25 +245,28 @@ def mkNode (s : ParserState) (k : SyntaxNodeKind) (iniStackSz : Nat) : ParserSta
-- `node k1 p1 <|> ... <|> node kn pn` when all parsers fail. With the code below we
-- instead return a less misleading single `missing` node without randomly selecting any `ki`.
let stack := stack.push Syntax.missing
⟨stack, lhsPrec, pos, cache, err⟩
⟨stack, lhsPrec, pos, cache, err, recovered
else
let newNode := Syntax.node SourceInfo.none k (stack.extract iniStackSz stack.size)
let stack := stack.shrink iniStackSz
let stack := stack.push newNode
⟨stack, lhsPrec, pos, cache, err⟩
⟨stack, lhsPrec, pos, cache, err, recovered

def mkTrailingNode (s : ParserState) (k : SyntaxNodeKind) (iniStackSz : Nat) : ParserState :=
match s with
| ⟨stack, lhsPrec, pos, cache, err⟩ =>
| ⟨stack, lhsPrec, pos, cache, err, errs⟩ =>
let newNode := Syntax.node SourceInfo.none k (stack.extract (iniStackSz - 1) stack.size)
let stack := stack.shrink (iniStackSz - 1)
let stack := stack.push newNode
⟨stack, lhsPrec, pos, cache, err⟩
⟨stack, lhsPrec, pos, cache, err, errs⟩

def allErrors (s : ParserState) : Array (String.Pos × SyntaxStack × Error) :=
s.recoveredErrors ++ (s.errorMsg.map (fun e => #[(s.pos, s.stxStack, e)])).getD #[]

@[inline]
def setError (s : ParserState) (e : Error) : ParserState :=
match s with
| ⟨stack, lhsPrec, pos, cache, _⟩ => ⟨stack, lhsPrec, pos, cache, some e⟩
| ⟨stack, lhsPrec, pos, cache, _, errs⟩ => ⟨stack, lhsPrec, pos, cache, some e, errs

def mkError (s : ParserState) (msg : String) : ParserState :=
s.setError { expected := [msg] } |>.pushSyntax .missing
Expand Down Expand Up @@ -313,6 +310,14 @@ def mkUnexpectedTokenError (s : ParserState) (msg : String) (iniPos : String.Pos
def mkUnexpectedErrorAt (s : ParserState) (msg : String) (pos : String.Pos) : ParserState :=
s.setPos pos |>.mkUnexpectedError msg

def toErrorMsg (ctx : InputContext) (s : ParserState) : String := Id.run do
let mut errStr := ""
for (pos, _stk, err) in s.allErrors do
if errStr != "" then errStr := errStr ++ "\n"
let pos := ctx.fileMap.toPosition pos
errStr := errStr ++ mkErrorStringWithPos ctx.fileName pos (toString err)
errStr

end ParserState

def ParserFn := ParserContext → ParserState → ParserState
Expand Down Expand Up @@ -415,7 +420,7 @@ def withCacheFn (parserName : Name) (p : ParserFn) : ParserFn := fun c s => Id.r
if let some r := s.cache.parserCache.find? key then
-- TODO: turn this into a proper trace once we have these in the parser
--dbg_trace "parser cache hit: {parserName}:{s.pos} -> {r.stx}"
return ⟨s.stxStack.push r.stx, r.lhsPrec, r.newPos, s.cache, r.errorMsg⟩
return ⟨s.stxStack.push r.stx, r.lhsPrec, r.newPos, s.cache, r.errorMsg, s.recoveredErrors
let initStackSz := s.stxStack.raw.size
let s := withStackDrop initStackSz p c { s with lhsPrec := 0, errorMsg := none }
if s.stxStack.raw.size != initStackSz + 1 then
Expand Down
Loading
Loading