diff --git a/doc/lexical_structure.md b/doc/lexical_structure.md index bccf63f7ceac..c32241314887 100644 --- a/doc/lexical_structure.md +++ b/doc/lexical_structure.md @@ -79,9 +79,9 @@ special characters: [Unicode table](https://unicode-table.com/en/) so "\xA9 Copyright 2021" is "© Copyright 2021". - `\uHHHH` puts the character represented by the 4 digit hexadecimal into the string, so the following string "\u65e5\u672c" will become "日本" which means "Japan". -- `\` newline whitespace* is a "gap" that is equivalent to the empty string, useful for letting a -string literal span across multiple lines. To reduce confusion regarding the interpretation, -it is an error for the whitespace after the newline to include any newlines itself. +- `\whitespace+`` is a "gap" that is equivalent to the empty string, useful for letting a +string literal span across multiple lines. To reduce confusion regarding its interpretation, +the parser raises an error if the whitespace does not contain exactly one newline. So the complete syntax is: @@ -91,7 +91,7 @@ So the complete syntax is: string_char : [^"\\] char_escape : "\" ("\" | '"' | "'" | "n" | "t" | "x" hex_char{2} | "u" hex_char{4}) hex_char : [0-9a-fA-F] - string_gap : "\" newline whitespace* + string_gap : "\" whitespace+ ``` Char Literals diff --git a/src/Init/Meta.lean b/src/Init/Meta.lean index 27edf085643f..158a5c9d87c4 100644 --- a/src/Init/Meta.lean +++ b/src/Init/Meta.lean @@ -773,10 +773,11 @@ def decodeQuotedChar (s : String) (i : String.Pos) : Option (Char × String.Pos) else none -/-- Decode a valid string gap. Note that this function is more permissive than the parser; -this does not require that the gap not include any additional newlines. -/ +/-- Decode a valid string gap after the `\`. +Note that this function is more permissive than the parser; +this function does not put any conditions on newlines within the whitespace. -/ def decodeStringGap (s : String) (i : String.Pos) : Option String.Pos := do - guard <| s.get i == '\n' + guard <| (s.get i).isWhitespace s.nextWhile Char.isWhitespace (s.next i) partial def decodeStrLitAux (s : String) (i : String.Pos) (acc : String) : Option String := do diff --git a/src/Lean/Parser/Basic.lean b/src/Lean/Parser/Basic.lean index 377f0369a0dd..61d775ab89a8 100644 --- a/src/Lean/Parser/Basic.lean +++ b/src/Lean/Parser/Basic.lean @@ -564,27 +564,34 @@ def hexDigitFn : ParserFn := fun c s => if curr.isDigit || ('a' <= curr && curr <= 'f') || ('A' <= curr && curr <= 'F') then s.setPos i else s.mkUnexpectedError "invalid hexadecimal numeral" -/-- Parses the whitespace after the newline for a string gap, -raising an error if an additional newline appears in the whitespace. -/ -partial def stringGap : ParserFn := fun c s => +/-- Parses the whitespace after the `\` when there is a string gap. +Raises an error if the whitespace does not contain exactly one newline character. +(Note: this logic makes string gaps insensitive to `\r\n` vs `\n` line endings.) -/ +partial def stringGap (afterNewline : Bool) : ParserFn := fun c s => let i := s.pos - if h : c.input.atEnd i then s + if h : c.input.atEnd i then s -- let strLitFnAux handle the EOI error if !afterNewline else let curr := c.input.get' i h if curr == '\n' then - s.mkUnexpectedError "unexpected additional newline in string gap" + if afterNewline then + -- Having more than one newline in a string gap is visually confusing + s.mkUnexpectedError "unexpected additional newline in string gap" + else + stringGap true c (s.next' c.input i h) else if curr == '\t' then -- Mimick the whitespace parser s.mkUnexpectedError "tabs are not allowed; please configure your editor to expand them" else if curr.isWhitespace then - stringGap c (s.next' c.input i h) - else + stringGap afterNewline c (s.next' c.input i h) + else if afterNewline then s + else + s.mkUnexpectedError "expecting newline in string gap" /-- Parse a string quotation after a `\`. - `isQuotable` determines which characters are valid escapes - `inString` enables features that are only valid within strings, - in particular `"\" newline whitespace*` gaps. -/ + in particular `"\" whitespace+` gaps. -/ def quotedCharCoreFn (isQuotable : Char → Bool) (inString : Bool) : ParserFn := fun c s => let input := c.input let i := s.pos @@ -597,8 +604,8 @@ def quotedCharCoreFn (isQuotable : Char → Bool) (inString : Bool) : ParserFn : andthenFn hexDigitFn hexDigitFn c (s.next' input i h) else if curr == 'u' then andthenFn hexDigitFn (andthenFn hexDigitFn (andthenFn hexDigitFn hexDigitFn)) c (s.next' input i h) - else if inString && curr == '\n' then - stringGap c (s.next' input i h) + else if inString && curr.isWhitespace then + stringGap false c s else s.mkUnexpectedError "invalid escape sequence" @@ -608,7 +615,7 @@ def isQuotableCharDefault (c : Char) : Bool := def quotedCharFn : ParserFn := quotedCharCoreFn isQuotableCharDefault false -/-- Like `quotedCharFn` but enables escapes that are only valid inside strings. In particular, gaps ("\" newline whitespace*). -/ +/-- Like `quotedCharFn` but enables escapes that are only valid inside strings. In particular, gaps ("\" whitespace+). -/ def quotedStringFn : ParserFn := quotedCharCoreFn isQuotableCharDefault true diff --git a/tests/lean/string_gaps.lean b/tests/lean/string_gaps.lean index 5f77df559b3a..03e70c72b874 100644 --- a/tests/lean/string_gaps.lean +++ b/tests/lean/string_gaps.lean @@ -1,3 +1,5 @@ +import Lean + #eval "a\ b" #eval "a\ @@ -28,3 +30,25 @@ b" #eval s!"a\ {"b"}\ " + +-- Scala-style stripMargin +def String.dedent (s : String) : Option String := + let parts := s.split (· == '\n') |>.map String.trimLeft + match parts with + | [] => "" + | [p] => p + | p₀ :: parts => + if !parts.all (·.startsWith "|") then + none + else + p₀ ++ "\n" ++ String.intercalate "\n" (parts.map fun p => p.drop 1) + +elab "d!" s:str : term => do + let some s := s.raw.isStrLit? | Lean.Elab.throwIllFormedSyntax + let some s := String.dedent s | Lean.Elab.throwIllFormedSyntax + pure $ Lean.mkStrLit s + +#eval d!"this is \ + line 1 + | line 2, indented + |line 3" diff --git a/tests/lean/string_gaps.lean.expected.out b/tests/lean/string_gaps.lean.expected.out index efc7f4185258..632600b50750 100644 --- a/tests/lean/string_gaps.lean.expected.out +++ b/tests/lean/string_gaps.lean.expected.out @@ -11,3 +11,4 @@ "ab" "ab" "ab" +"this is line 1\n line 2, indented\nline 3" diff --git a/tests/lean/string_gaps_err_no_newline.lean b/tests/lean/string_gaps_err_no_newline.lean new file mode 100644 index 000000000000..6d574fd9cd6f --- /dev/null +++ b/tests/lean/string_gaps_err_no_newline.lean @@ -0,0 +1 @@ +#eval "a\ b" diff --git a/tests/lean/string_gaps_err_no_newline.lean.expected.out b/tests/lean/string_gaps_err_no_newline.lean.expected.out new file mode 100644 index 000000000000..2287c5c2997b --- /dev/null +++ b/tests/lean/string_gaps_err_no_newline.lean.expected.out @@ -0,0 +1 @@ +string_gaps_err_no_newline.lean:1:10: error: expecting newline in string gap