Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Major refactoring of string handling #11

Merged
merged 1 commit into from
Dec 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions editors/vscode/visp-fs/syntaxes/visp-fs.tmLanguage.json
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,6 @@
{ "include": "#hashset" },
{ "include": "#sexp" },
{ "include": "#read-metadata" },
{ "include": "#string" },
{
"match": "(?<=\\()(.+?)(?=\\s|\\))",
"captures": {
Expand All @@ -328,7 +327,7 @@
"name": "comment.line.shebang.visp"
},
"tripleQuotedString": {
"begin": "(?<!\\\\)(\"\"\")",
"begin": "(\"\"\")",
"beginCaptures": {
"1": { "name": "punctuation.definition.tripleQuotedString.begin.visp" }
},
Expand Down
12 changes: 12 additions & 0 deletions src/Visp.Common/PooledStringBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,18 @@ public static class PooledStringBuilder

public static StringBuilder Get() => s_pool.Get();

public static void ReturnToPool(this StringBuilder s)
{
s_pool.Return(s);
}

public static string ToStringAndClear(this StringBuilder s)
{
var ret = s.ToString();
s.Clear();
return ret;
}

public static string ToStringAndReturn(this StringBuilder s)
{
var ret = s.ToString();
Expand Down
4 changes: 3 additions & 1 deletion src/Visp.Compiler/CoreParser.fs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ module CoreParser =
Transforms.Helpers.runTransforms tfs expr

let transformFile file =
Transforms.Helpers.transformParsedFile expandExpr file
file
|> Transforms.StringLifter.liftLiteralStrings
|> Transforms.Helpers.transformParsedFile expandExpr

let writeParsedFile file outputStream (template: string) =
let fileWriter = Writer.CustomFileWriter(outputStream, 2, "//")
Expand Down
108 changes: 90 additions & 18 deletions src/Visp.Compiler/Lexer.fsl
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ let unexpected_char mode (lexbuf: LexBuffer<_>) =
let file = lexbuf.EndPos.FileName
failwith (sprintf "%s SyntaxError: Unexpected char: '%s' %s:line %d Column: %d" mode (lexeme lexbuf) file (lexbuf.StartPos.Line+1) lexbuf.StartPos.Column)

let startString (lexbuf: LexBuffer<_>) =
let startString nesting (lexbuf: LexBuffer<_>) =
let buf = StringBuffer()
let m = lexbuf.LexemeRange
let startp = lexbuf.StartPos
Expand All @@ -35,13 +35,35 @@ let startString (lexbuf: LexBuffer<_>) =
// Adjust the start-of-token mark back to the true start of the token
lexbuf.StartPos <- startp

let isPart = context.HasFlag(LexerStringFinisherContext.InterpolatedPart)
let isTripleQuote = context.HasFlag(LexerStringFinisherContext.TripleQuote)

let synStringKind =
if isTripleQuote then
if kind.IsInterpolated then
Syntax.SynStringKind.InterpolatedTripleQuote nesting
else
Syntax.SynStringKind.TripleQuote
else
if kind.IsInterpolated then
Syntax.SynStringKind.Interpolated nesting
else
Syntax.SynStringKind.Regular

// if kind.IsInterpolated then
// let s = buf.ToString()
// // if kind.IsInterpolatedFirst then
// // if isPart then
// // INTERP_STRING_BEGIN_PART (s, synStringKind, cont)
// // else
// // INTERP_STRING_BEGIN_END (s, synStringKind, cont)
// // else
// // if isPart then
// // INTERP_STRING_PART (s, cont)
// // else
// // INTERP_STRING_END (s, cont)
// STRING(buf.ToString(), Syntax.SynStringKind.Interpolated, cont)
// else
STRING(buf.ToString(), synStringKind, cont)
)

Expand All @@ -58,6 +80,9 @@ let letter = ['A'-'Z'] | ['a'-'z']
let SymbolicStartCharacters = ['%' '+' '-' '!' '?' '_' '-' '*' '=' '<' '>' '&' '^' ]
let SymbolicExtra = [ '.' '\'' '/' ]

// https://github.com/dotnet/fsharp/blob/ade794633b4bb495da85c92ee780068b9c6f7344/src/Compiler/lex.fsl#L300
let escape_char = ('\\' ( '\\' | "\"" | '\'' | 'a' | 'f' | 'v' | 'n' | 't' | 'b' | 'r'))

let ident_start_char = letter | SymbolicStartCharacters
let ident_char = ( ident_start_char | digit | SymbolicExtra )
let ident = ident_start_char ident_char*
Expand Down Expand Up @@ -150,20 +175,41 @@ rule token (args: LexArgs) (skip: bool) = parse
| "false" { FALSE }
| "nil" { NIL }
| '"'
{ let buf, fin, m = startString lexbuf
{ let buf, fin, m = startString 0 lexbuf
// TODO: validate
singleQuoteString (buf, fin, m, LexerStringKind.String, args) skip lexbuf
}
| "#r" '"' '"' '"'
{ let buf, fin, m = startString lexbuf
{ let buf, fin, m = startString 0 lexbuf
// TODO: validate
tripleQuoteString (buf, fin, m, LexerStringKind.String, args) skip lexbuf
}
| '$' '"' '"' '"'
{ let buf, fin, m = startString 1 lexbuf
// TODO: validate
tripleQuoteString (buf, fin, m, LexerStringKind.InterpolatedStringFirst, args) skip lexbuf
}

| ('$'+) '"' '"' '"'
{
let nest = lexeme lexbuf |> Seq.takeWhile (fun c -> c = '$') |> Seq.length
let buf, fin, m = startString nest lexbuf
// TODO: validate
tripleQuoteString (buf, fin, m, LexerStringKind.InterpolatedStringFirst, args) skip lexbuf
}

| '"' '"' '"'
{ let buf, fin, m = startString lexbuf
{ let buf, fin, m = startString 0 lexbuf
// TODO: validate
tripleQuoteString (buf, fin, m, LexerStringKind.String, args) skip lexbuf
}

| '$' '"'
{ let buf, fin, m = startString 1 lexbuf
// TODO: validate
singleQuoteString (buf, fin, m, LexerStringKind.InterpolatedStringFirst, args) skip lexbuf
}

| '*' { OP_MULT }
| '/' { OP_DIV }
| ':' ident_char+ { KEYWORD (lexeme lexbuf) }
Expand Down Expand Up @@ -223,19 +269,41 @@ and tokenStream (args: LexArgs) (skip: bool) = parse
else
INT32 (int (text.TrimEnd('l'))) }
| decimal { DECIMAL (decimal (lexeme lexbuf)) }

| '$' '"'
{ let buf, fin, m = startString 1 lexbuf
// TODO: validate
singleQuoteString (buf, fin, m, LexerStringKind.InterpolatedStringFirst, args) skip lexbuf
}

| '"'
{ let buf, fin, m = startString lexbuf
{ let buf, fin, m = startString 0 lexbuf
// TODO: validate
singleQuoteString (buf, fin, m, LexerStringKind.String, args) skip lexbuf
}

| '$' '"' '"' '"'
{ let buf, fin, m = startString 1 lexbuf
// TODO: validate
tripleQuoteString (buf, fin, m, LexerStringKind.InterpolatedStringFirst, args) skip lexbuf
}

| ('$'+) '"' '"' '"'
{
let nest = lexeme lexbuf |> Seq.takeWhile (fun c -> c = '$') |> Seq.length
let buf, fin, m = startString nest lexbuf
// TODO: validate
tripleQuoteString (buf, fin, m, LexerStringKind.InterpolatedStringFirst, args) skip lexbuf
}

| '"' '"' '"'
{ let buf, fin, m = startString lexbuf
{ let buf, fin, m = startString 0 lexbuf
// TODO: validate
tripleQuoteString (buf, fin, m, LexerStringKind.String, args) skip lexbuf
}

| "#r" '"' '"' '"'
{ let buf, fin, m = startString lexbuf
{ let buf, fin, m = startString 0 lexbuf
// TODO: validate
tripleQuoteString (buf, fin, m, LexerStringKind.String, args) skip lexbuf
}
Expand Down Expand Up @@ -267,8 +335,7 @@ and tokenStream (args: LexArgs) (skip: bool) = parse
and singleQuoteString (sargs: LexerStringArgs) (skip: bool) = parse
| '"'
{ let (buf, fin, _m, kind, args) = sargs
//let cont = LexCont.Token(args.ifdefStack, args.stringNest)
let cont = LexCont.Token(())
let cont = LexCont.Token(args.stringNest)
fin.Finish buf kind (LexerStringFinisherContext()) cont
}
| newline
Expand All @@ -277,19 +344,21 @@ and singleQuoteString (sargs: LexerStringArgs) (skip: bool) = parse
addUnicodeString buf (lexeme lexbuf)
singleQuoteString sargs skip lexbuf
}
| _
| escape_char
{
let (buf, _fin, m, kind, args) = sargs
addUnicodeString buf (lexeme lexbuf)
singleQuoteString sargs skip lexbuf
}
| _
{ let (buf, _fin, m, kind, args) = sargs
addUnicodeString buf (lexeme lexbuf)
// if not skip then
// STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.SingleQuote, kind, args.interpolationDelimiterLength, m))
// else
singleQuoteString sargs skip lexbuf }

and tripleQuoteString (sargs: LexerStringArgs) (skip: bool) = parse
| '"' '"' '"'
{ let (buf, fin, _m, kind, args) = sargs
//let cont = LexCont.Token(args.ifdefStack, args.stringNest)
let cont = LexCont.Token(())
let cont = LexCont.Token(args.stringNest)
fin.Finish buf kind (LexerStringFinisherContext.TripleQuote) cont
}
| newline
Expand All @@ -298,10 +367,13 @@ and tripleQuoteString (sargs: LexerStringArgs) (skip: bool) = parse
addUnicodeString buf (lexeme lexbuf)
tripleQuoteString sargs skip lexbuf
}
// | escape_char
// {
// let (buf, _fin, m, kind, args) = sargs
// addUnicodeString buf (lexeme lexbuf)
// singleQuoteString sargs skip lexbuf
// }
| _
{ let (buf, _fin, m, kind, args) = sargs
addUnicodeString buf (lexeme lexbuf)
// if not skip then
// STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.SingleQuote, kind, args.interpolationDelimiterLength, m))
// else
tripleQuoteString sargs skip lexbuf }
61 changes: 61 additions & 0 deletions src/Visp.Compiler/ParseUtils.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Copyright 2023 Ville Penttinen
// Distributed under the MIT License.
// https://github.com/vipentti/visp-fs/blob/main/LICENSE.md

module Visp.Compiler.ParseUtils

open FSharp.Text.Lexing
open Visp.Compiler
open Visp.Compiler.SyntaxParser
open Visp.Compiler.LexHelpers
open Visp.Compiler.Syntax.Macros

let mkTokenizerWithArgs args =
let tokens args buf =
let next =
match args.mode with
| LexMode.Default -> Lexer.token args false buf
| LexMode.TokenStream _ -> Lexer.tokenStream args false buf

// eprintfn "%A %A %i" next args.mode args.depth

match next with
| QUOTE_SYM -> args.mode <- LexMode.TokenStream TokenStreamMode.QuoteSym
| QUOTE_KW -> // args.mode <- LexMode.TokenStream TokenStreamMode.Quote
args.Nested <| LexMode.TokenStream TokenStreamMode.Quote
| QUASIQUOTE_KW -> args.Nested <| LexMode.TokenStream TokenStreamMode.Quasiquote
| SYNTAX_MACRO -> args.Nested <| LexMode.TokenStream TokenStreamMode.SyntaxMacroStart
| SYMBOL s when args.mode.IsSyntaxMacroStart ->
args.mode <- LexMode.TokenStream TokenStreamMode.Macro
macroTable.AddMacroName s
()
| MACRO_NAME _ -> args.Nested <| LexMode.TokenStream TokenStreamMode.Macro
| HASH_PAREN
| HASH_BRACKET
| LPAREN
| LBRACE
| LBRACKET
| HASH_BRACE -> args.NestIfNotDefault()
| RPAREN
| RBRACE
| RBRACKET -> args.UnnestIfNotDefault()
| _ -> ()

next

tokens args

let mkTokenizer () =
mkTokenizerWithArgs <| mkDefaultLextArgs ()


let parseStringToExpr fileName str =
let lexbuf = LexBuffer<_>.FromString str
lexbuf.EndPos <- Position.FirstLine fileName
let tokenizer = mkTokenizer ()

try
raw_expr tokenizer lexbuf
with :? ParseHelpers.SyntaxError as syn ->
outputSyntaxError syn
reraise ()
Loading