Skip to content

Commit

Permalink
Major refactoring of string handling
Browse files Browse the repository at this point in the history
- Support interpolated strings
- Multiline strings are lifted to the top-level of the program
  because by default the F# includes the indentation of the current
  scope as part of the string, this made it very difficult to format
  multiline strings because there were no guarantees what the
  actual result would end up being.

  We also normalize multiline strings to match C# raw-string literals
  on how the indentation is managed. This may not work 100% accurately
  but it should be an improvement and it can be improved in the future.
  • Loading branch information
vipentti committed Dec 8, 2023
1 parent a3b6831 commit 7ad83c9
Show file tree
Hide file tree
Showing 33 changed files with 1,617 additions and 636 deletions.
3 changes: 1 addition & 2 deletions editors/vscode/visp-fs/syntaxes/visp-fs.tmLanguage.json
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,6 @@
{ "include": "#hashset" },
{ "include": "#sexp" },
{ "include": "#read-metadata" },
{ "include": "#string" },
{
"match": "(?<=\\()(.+?)(?=\\s|\\))",
"captures": {
Expand All @@ -328,7 +327,7 @@
"name": "comment.line.shebang.visp"
},
"tripleQuotedString": {
"begin": "(?<!\\\\)(\"\"\")",
"begin": "(\"\"\")",
"beginCaptures": {
"1": { "name": "punctuation.definition.tripleQuotedString.begin.visp" }
},
Expand Down
12 changes: 12 additions & 0 deletions src/Visp.Common/PooledStringBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,18 @@ public static class PooledStringBuilder

public static StringBuilder Get() => s_pool.Get();

public static void ReturnToPool(this StringBuilder s)
{
s_pool.Return(s);
}

public static string ToStringAndClear(this StringBuilder s)
{
var ret = s.ToString();
s.Clear();
return ret;
}

public static string ToStringAndReturn(this StringBuilder s)
{
var ret = s.ToString();
Expand Down
4 changes: 3 additions & 1 deletion src/Visp.Compiler/CoreParser.fs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ module CoreParser =
Transforms.Helpers.runTransforms tfs expr

let transformFile file =
Transforms.Helpers.transformParsedFile expandExpr file
file
|> Transforms.StringLifter.liftLiteralStrings
|> Transforms.Helpers.transformParsedFile expandExpr

let writeParsedFile file outputStream (template: string) =
let fileWriter = Writer.CustomFileWriter(outputStream, 2, "//")
Expand Down
108 changes: 90 additions & 18 deletions src/Visp.Compiler/Lexer.fsl
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ let unexpected_char mode (lexbuf: LexBuffer<_>) =
let file = lexbuf.EndPos.FileName
failwith (sprintf "%s SyntaxError: Unexpected char: '%s' %s:line %d Column: %d" mode (lexeme lexbuf) file (lexbuf.StartPos.Line+1) lexbuf.StartPos.Column)

let startString (lexbuf: LexBuffer<_>) =
let startString nesting (lexbuf: LexBuffer<_>) =
let buf = StringBuffer()
let m = lexbuf.LexemeRange
let startp = lexbuf.StartPos
Expand All @@ -35,13 +35,35 @@ let startString (lexbuf: LexBuffer<_>) =
// Adjust the start-of-token mark back to the true start of the token
lexbuf.StartPos <- startp

let isPart = context.HasFlag(LexerStringFinisherContext.InterpolatedPart)
let isTripleQuote = context.HasFlag(LexerStringFinisherContext.TripleQuote)

let synStringKind =
if isTripleQuote then
if kind.IsInterpolated then
Syntax.SynStringKind.InterpolatedTripleQuote nesting
else
Syntax.SynStringKind.TripleQuote
else
if kind.IsInterpolated then
Syntax.SynStringKind.Interpolated nesting
else
Syntax.SynStringKind.Regular

// if kind.IsInterpolated then
// let s = buf.ToString()
// // if kind.IsInterpolatedFirst then
// // if isPart then
// // INTERP_STRING_BEGIN_PART (s, synStringKind, cont)
// // else
// // INTERP_STRING_BEGIN_END (s, synStringKind, cont)
// // else
// // if isPart then
// // INTERP_STRING_PART (s, cont)
// // else
// // INTERP_STRING_END (s, cont)
// STRING(buf.ToString(), Syntax.SynStringKind.Interpolated, cont)
// else
STRING(buf.ToString(), synStringKind, cont)
)

Expand All @@ -58,6 +80,9 @@ let letter = ['A'-'Z'] | ['a'-'z']
let SymbolicStartCharacters = ['%' '+' '-' '!' '?' '_' '-' '*' '=' '<' '>' '&' '^' ]
let SymbolicExtra = [ '.' '\'' '/' ]

// https://github.com/dotnet/fsharp/blob/ade794633b4bb495da85c92ee780068b9c6f7344/src/Compiler/lex.fsl#L300
let escape_char = ('\\' ( '\\' | "\"" | '\'' | 'a' | 'f' | 'v' | 'n' | 't' | 'b' | 'r'))

let ident_start_char = letter | SymbolicStartCharacters
let ident_char = ( ident_start_char | digit | SymbolicExtra )
let ident = ident_start_char ident_char*
Expand Down Expand Up @@ -150,20 +175,41 @@ rule token (args: LexArgs) (skip: bool) = parse
| "false" { FALSE }
| "nil" { NIL }
| '"'
{ let buf, fin, m = startString lexbuf
{ let buf, fin, m = startString 0 lexbuf
// TODO: validate
singleQuoteString (buf, fin, m, LexerStringKind.String, args) skip lexbuf
}
| "#r" '"' '"' '"'
{ let buf, fin, m = startString lexbuf
{ let buf, fin, m = startString 0 lexbuf
// TODO: validate
tripleQuoteString (buf, fin, m, LexerStringKind.String, args) skip lexbuf
}
| '$' '"' '"' '"'
{ let buf, fin, m = startString 1 lexbuf
// TODO: validate
tripleQuoteString (buf, fin, m, LexerStringKind.InterpolatedStringFirst, args) skip lexbuf
}

| ('$'+) '"' '"' '"'
{
let nest = lexeme lexbuf |> Seq.takeWhile (fun c -> c = '$') |> Seq.length
let buf, fin, m = startString nest lexbuf
// TODO: validate
tripleQuoteString (buf, fin, m, LexerStringKind.InterpolatedStringFirst, args) skip lexbuf
}

| '"' '"' '"'
{ let buf, fin, m = startString lexbuf
{ let buf, fin, m = startString 0 lexbuf
// TODO: validate
tripleQuoteString (buf, fin, m, LexerStringKind.String, args) skip lexbuf
}

| '$' '"'
{ let buf, fin, m = startString 1 lexbuf
// TODO: validate
singleQuoteString (buf, fin, m, LexerStringKind.InterpolatedStringFirst, args) skip lexbuf
}

| '*' { OP_MULT }
| '/' { OP_DIV }
| ':' ident_char+ { KEYWORD (lexeme lexbuf) }
Expand Down Expand Up @@ -223,19 +269,41 @@ and tokenStream (args: LexArgs) (skip: bool) = parse
else
INT32 (int (text.TrimEnd('l'))) }
| decimal { DECIMAL (decimal (lexeme lexbuf)) }

| '$' '"'
{ let buf, fin, m = startString 1 lexbuf
// TODO: validate
singleQuoteString (buf, fin, m, LexerStringKind.InterpolatedStringFirst, args) skip lexbuf
}

| '"'
{ let buf, fin, m = startString lexbuf
{ let buf, fin, m = startString 0 lexbuf
// TODO: validate
singleQuoteString (buf, fin, m, LexerStringKind.String, args) skip lexbuf
}

| '$' '"' '"' '"'
{ let buf, fin, m = startString 1 lexbuf
// TODO: validate
tripleQuoteString (buf, fin, m, LexerStringKind.InterpolatedStringFirst, args) skip lexbuf
}

| ('$'+) '"' '"' '"'
{
let nest = lexeme lexbuf |> Seq.takeWhile (fun c -> c = '$') |> Seq.length
let buf, fin, m = startString nest lexbuf
// TODO: validate
tripleQuoteString (buf, fin, m, LexerStringKind.InterpolatedStringFirst, args) skip lexbuf
}

| '"' '"' '"'
{ let buf, fin, m = startString lexbuf
{ let buf, fin, m = startString 0 lexbuf
// TODO: validate
tripleQuoteString (buf, fin, m, LexerStringKind.String, args) skip lexbuf
}

| "#r" '"' '"' '"'
{ let buf, fin, m = startString lexbuf
{ let buf, fin, m = startString 0 lexbuf
// TODO: validate
tripleQuoteString (buf, fin, m, LexerStringKind.String, args) skip lexbuf
}
Expand Down Expand Up @@ -267,8 +335,7 @@ and tokenStream (args: LexArgs) (skip: bool) = parse
and singleQuoteString (sargs: LexerStringArgs) (skip: bool) = parse
| '"'
{ let (buf, fin, _m, kind, args) = sargs
//let cont = LexCont.Token(args.ifdefStack, args.stringNest)
let cont = LexCont.Token(())
let cont = LexCont.Token(args.stringNest)
fin.Finish buf kind (LexerStringFinisherContext()) cont
}
| newline
Expand All @@ -277,19 +344,21 @@ and singleQuoteString (sargs: LexerStringArgs) (skip: bool) = parse
addUnicodeString buf (lexeme lexbuf)
singleQuoteString sargs skip lexbuf
}
| _
| escape_char
{
let (buf, _fin, m, kind, args) = sargs
addUnicodeString buf (lexeme lexbuf)
singleQuoteString sargs skip lexbuf
}
| _
{ let (buf, _fin, m, kind, args) = sargs
addUnicodeString buf (lexeme lexbuf)
// if not skip then
// STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.SingleQuote, kind, args.interpolationDelimiterLength, m))
// else
singleQuoteString sargs skip lexbuf }

and tripleQuoteString (sargs: LexerStringArgs) (skip: bool) = parse
| '"' '"' '"'
{ let (buf, fin, _m, kind, args) = sargs
//let cont = LexCont.Token(args.ifdefStack, args.stringNest)
let cont = LexCont.Token(())
let cont = LexCont.Token(args.stringNest)
fin.Finish buf kind (LexerStringFinisherContext.TripleQuote) cont
}
| newline
Expand All @@ -298,10 +367,13 @@ and tripleQuoteString (sargs: LexerStringArgs) (skip: bool) = parse
addUnicodeString buf (lexeme lexbuf)
tripleQuoteString sargs skip lexbuf
}
// | escape_char
// {
// let (buf, _fin, m, kind, args) = sargs
// addUnicodeString buf (lexeme lexbuf)
// singleQuoteString sargs skip lexbuf
// }
| _
{ let (buf, _fin, m, kind, args) = sargs
addUnicodeString buf (lexeme lexbuf)
// if not skip then
// STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.SingleQuote, kind, args.interpolationDelimiterLength, m))
// else
tripleQuoteString sargs skip lexbuf }
61 changes: 61 additions & 0 deletions src/Visp.Compiler/ParseUtils.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Copyright 2023 Ville Penttinen
// Distributed under the MIT License.
// https://github.com/vipentti/visp-fs/blob/main/LICENSE.md

module Visp.Compiler.ParseUtils

open FSharp.Text.Lexing
open Visp.Compiler
open Visp.Compiler.SyntaxParser
open Visp.Compiler.LexHelpers
open Visp.Compiler.Syntax.Macros

let mkTokenizerWithArgs args =
let tokens args buf =
let next =
match args.mode with
| LexMode.Default -> Lexer.token args false buf
| LexMode.TokenStream _ -> Lexer.tokenStream args false buf

// eprintfn "%A %A %i" next args.mode args.depth

match next with
| QUOTE_SYM -> args.mode <- LexMode.TokenStream TokenStreamMode.QuoteSym
| QUOTE_KW -> // args.mode <- LexMode.TokenStream TokenStreamMode.Quote
args.Nested <| LexMode.TokenStream TokenStreamMode.Quote
| QUASIQUOTE_KW -> args.Nested <| LexMode.TokenStream TokenStreamMode.Quasiquote
| SYNTAX_MACRO -> args.Nested <| LexMode.TokenStream TokenStreamMode.SyntaxMacroStart
| SYMBOL s when args.mode.IsSyntaxMacroStart ->
args.mode <- LexMode.TokenStream TokenStreamMode.Macro
macroTable.AddMacroName s
()
| MACRO_NAME _ -> args.Nested <| LexMode.TokenStream TokenStreamMode.Macro
| HASH_PAREN
| HASH_BRACKET
| LPAREN
| LBRACE
| LBRACKET
| HASH_BRACE -> args.NestIfNotDefault()
| RPAREN
| RBRACE
| RBRACKET -> args.UnnestIfNotDefault()
| _ -> ()

next

tokens args

let mkTokenizer () =
mkTokenizerWithArgs <| mkDefaultLextArgs ()


let parseStringToExpr fileName str =
let lexbuf = LexBuffer<_>.FromString str
lexbuf.EndPos <- Position.FirstLine fileName
let tokenizer = mkTokenizer ()

try
raw_expr tokenizer lexbuf
with :? ParseHelpers.SyntaxError as syn ->
outputSyntaxError syn
reraise ()
Loading

0 comments on commit 7ad83c9

Please sign in to comment.