Skip to content

Commit

Permalink
switch to checking for exactly one \n to account for \r\n, add dedent…
Browse files Browse the repository at this point in the history
… example
  • Loading branch information
kmill committed Nov 7, 2023
1 parent a2de83e commit d62eb06
Show file tree
Hide file tree
Showing 7 changed files with 53 additions and 18 deletions.
8 changes: 4 additions & 4 deletions doc/lexical_structure.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ special characters:
[Unicode table](https://unicode-table.com/en/) so "\xA9 Copyright 2021" is "© Copyright 2021".
- `\uHHHH` puts the character represented by the 4 digit hexadecimal into the string, so the following
string "\u65e5\u672c" will become "日本" which means "Japan".
- `\` newline whitespace* is a "gap" that is equivalent to the empty string, useful for letting a
string literal span across multiple lines. To reduce confusion regarding the interpretation,
it is an error for the whitespace after the newline to include any newlines itself.
- `\whitespace+`` is a "gap" that is equivalent to the empty string, useful for letting a
string literal span across multiple lines. To reduce confusion regarding its interpretation,
the parser raises an error if the whitespace does not contain exactly one newline.

So the complete syntax is:

Expand All @@ -91,7 +91,7 @@ So the complete syntax is:
string_char : [^"\\]
char_escape : "\" ("\" | '"' | "'" | "n" | "t" | "x" hex_char{2} | "u" hex_char{4})
hex_char : [0-9a-fA-F]
string_gap : "\" newline whitespace*
string_gap : "\" whitespace+
```

Char Literals
Expand Down
7 changes: 4 additions & 3 deletions src/Init/Meta.lean
Original file line number Diff line number Diff line change
Expand Up @@ -773,10 +773,11 @@ def decodeQuotedChar (s : String) (i : String.Pos) : Option (Char × String.Pos)
else
none

/-- Decode a valid string gap. Note that this function is more permissive than the parser;
this does not require that the gap not include any additional newlines. -/
/-- Decode a valid string gap after the `\`.
Note that this function is more permissive than the parser;
this function does not put any conditions on newlines within the whitespace. -/
def decodeStringGap (s : String) (i : String.Pos) : Option String.Pos := do
guard <| s.get i == '\n'
guard <| (s.get i).isWhitespace
s.nextWhile Char.isWhitespace (s.next i)

partial def decodeStrLitAux (s : String) (i : String.Pos) (acc : String) : Option String := do
Expand Down
29 changes: 18 additions & 11 deletions src/Lean/Parser/Basic.lean
Original file line number Diff line number Diff line change
Expand Up @@ -564,27 +564,34 @@ def hexDigitFn : ParserFn := fun c s =>
if curr.isDigit || ('a' <= curr && curr <= 'f') || ('A' <= curr && curr <= 'F') then s.setPos i
else s.mkUnexpectedError "invalid hexadecimal numeral"

/-- Parses the whitespace after the newline for a string gap,
raising an error if an additional newline appears in the whitespace. -/
partial def stringGap : ParserFn := fun c s =>
/-- Parses the whitespace after the `\` when there is a string gap.
Raises an error if the whitespace does not contain exactly one newline character.
(Note: this logic makes string gaps insensitive to `\r\n` vs `\n` line endings.) -/
partial def stringGap (afterNewline : Bool) : ParserFn := fun c s =>
let i := s.pos
if h : c.input.atEnd i then s
if h : c.input.atEnd i then s -- let strLitFnAux handle the EOI error if !afterNewline
else
let curr := c.input.get' i h
if curr == '\n' then
s.mkUnexpectedError "unexpected additional newline in string gap"
if afterNewline then
-- Having more than one newline in a string gap is visually confusing
s.mkUnexpectedError "unexpected additional newline in string gap"
else
stringGap true c (s.next' c.input i h)
else if curr == '\t' then
-- Mimick the whitespace parser
s.mkUnexpectedError "tabs are not allowed; please configure your editor to expand them"
else if curr.isWhitespace then
stringGap c (s.next' c.input i h)
else
stringGap afterNewline c (s.next' c.input i h)
else if afterNewline then
s
else
s.mkUnexpectedError "expecting newline in string gap"

/-- Parse a string quotation after a `\`.
- `isQuotable` determines which characters are valid escapes
- `inString` enables features that are only valid within strings,
in particular `"\" newline whitespace*` gaps. -/
in particular `"\" whitespace+` gaps. -/
def quotedCharCoreFn (isQuotable : Char → Bool) (inString : Bool) : ParserFn := fun c s =>
let input := c.input
let i := s.pos
Expand All @@ -597,8 +604,8 @@ def quotedCharCoreFn (isQuotable : Char → Bool) (inString : Bool) : ParserFn :
andthenFn hexDigitFn hexDigitFn c (s.next' input i h)
else if curr == 'u' then
andthenFn hexDigitFn (andthenFn hexDigitFn (andthenFn hexDigitFn hexDigitFn)) c (s.next' input i h)
else if inString && curr == '\n' then
stringGap c (s.next' input i h)
else if inString && curr.isWhitespace then
stringGap false c s
else
s.mkUnexpectedError "invalid escape sequence"

Expand All @@ -608,7 +615,7 @@ def isQuotableCharDefault (c : Char) : Bool :=
def quotedCharFn : ParserFn :=
quotedCharCoreFn isQuotableCharDefault false

/-- Like `quotedCharFn` but enables escapes that are only valid inside strings. In particular, gaps ("\" newline whitespace*). -/
/-- Like `quotedCharFn` but enables escapes that are only valid inside strings. In particular, gaps ("\" whitespace+). -/
def quotedStringFn : ParserFn :=
quotedCharCoreFn isQuotableCharDefault true

Expand Down
24 changes: 24 additions & 0 deletions tests/lean/string_gaps.lean
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import Lean

#eval "a\
b"
#eval "a\
Expand Down Expand Up @@ -28,3 +30,25 @@ b"
#eval s!"a\
{"b"}\
"

-- Scala-style stripMargin
def String.dedent (s : String) : Option String :=
let parts := s.split (· == '\n') |>.map String.trimLeft
match parts with
| [] => ""
| [p] => p
| p₀ :: parts =>
if !parts.all (·.startsWith "|") then
none
else
p₀ ++ "\n" ++ String.intercalate "\n" (parts.map fun p => p.drop 1)

elab "d!" s:str : term => do
let some s := s.raw.isStrLit? | Lean.Elab.throwIllFormedSyntax
let some s := String.dedent s | Lean.Elab.throwIllFormedSyntax
pure $ Lean.mkStrLit s

#eval d!"this is \
line 1
| line 2, indented
|line 3"
1 change: 1 addition & 0 deletions tests/lean/string_gaps.lean.expected.out
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@
"ab"
"ab"
"ab"
"this is line 1\n line 2, indented\nline 3"
1 change: 1 addition & 0 deletions tests/lean/string_gaps_err_no_newline.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#eval "a\ b"
1 change: 1 addition & 0 deletions tests/lean/string_gaps_err_no_newline.lean.expected.out
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
string_gaps_err_no_newline.lean:1:10: error: expecting newline in string gap

0 comments on commit d62eb06

Please sign in to comment.