diff --git a/beautifier/README.md b/beautifier/README.md index 5d47d29..10985b0 100644 --- a/beautifier/README.md +++ b/beautifier/README.md @@ -4,3 +4,84 @@ Scala,JavaScript,Pythonによる実装があります。 +パーサコンビネータを作成して、n()でネストしたい文法を括れば、あとはうまい事やってくれます。 + +文法定義は以下のように非常にシンプルです。 + +処理速度は、ScalaはJVMの起動が遅い分遅く、JavaScriptが起動が速くて、処理速度も悪くない。Pythonは起動が速いけど処理速度が遅いという感じです。 +ただ、文法的にJavaScriptは演算子のオーバーロードが出来ないので分かり辛く、Pythonは演算子の数が少ない。Scalaはより分かりやすいでしょう。 + +## Scala + + object parse extends PrityPrintParser { + + override protected val whiteSpace = """(?s)(\s|\(\*.*\*\))+""".r + + def keywords = ( """(let|in|if|else|then|rec|begin|end|match|with|try)\b""".r ) + def id = ( not(keywords) ~> """[_a-zA-Z0-9]+""".r ). + | ( """[+\-*/.<>=:@]+""".r ). + | ( """[,!]""".r ). + | ( """("(\\.|[^"])*")""".r ) + def exp:Parser[Any] + = ( exps ~ rep(";" ~ exps) ) + def exps = ( rep1(exp1) ) + val exp1 = ( "begin" ~ n(exp) ~ "end" ). + | ( ("match" | "try") ~ n(exp) ~ "with" ~ opt("|") ~ n{exp} ~ rep("|" ~ n{exp}) ). + | ( "(" ~ n{opt(exp)} ~ ")" ). + | ( "{" ~ n{opt(exp)} ~ "}" ). + | ( "[" ~ n{opt(exp)} ~ "]" ). + | ( "let" ~ n(opt("rec") ~ exp) ~ "in" ~ exp ). + | ( "type" ~ n(id ~ "=" ~ exp) ~ ";;" ~ exp ). + | ( "type" ~ n{id ~ "=" ~ n(opt("|") ~ exp) ~ rep("|" ~ n(exp))} ~ ";;" ). + | ( "if" ~ n{exp} ~ "then" ~ n{exp} ~ "else" ~ exp ). + | ( id ) + + def apply(str: String):String = apply(exp,str) + } + +## JavaScript + + var keywords = reg(/^(let|in|if|else|then|rec|begin|end|match|with)\b/); + + var id = (notp(keywords).next(reg(/^[_a-zA-Z0-9]+/))) + .or(reg(/^[+\-*\/.<>=:@]+/)) + .or(reg(/^[,!]/)) + .or(reg(/^("(\\.|[^"])*")/)); + var exp1 = p("begin", n(exp), "end") + .or(p(p("match").or(p("try")), n(exp), "with", opt("|"), n(exp), rep(p("|", n(exp))))) + .or(p("(", n(opt(exp)), ")")) + .or(p("{", n(opt(exp)), "}")) + .or(p("[", n(opt(exp)), "]")) + .or(p("let", n(p(opt("rec"), exp)), "in", exp)) + .or(p("type", n(p(id, "=", exp)), ";;", exp)) + .or(p("type", n(id, "=", opt("|"), n(exp), rep(p("|", n(exp)))), ";;")) + .or(p("if", n(exp), "then", n(exp), "else", exp)) + .or(id); + var exps = rep1(exp1); + function exp(i) { + return p(exps, rep(p(";", exps)))(i); + } + +## Python + + keywords = reg(r"^(let|in|if|else|then|rec|begin|end|match|with|type)\b") + + id = notp(keywords) >> reg(r"^[_a-zA-Z0-9]+") \ + | reg(r'^[+\-*\/.<>:@=][+\-*\/.<>:@=]*') \ + | reg(r'^[,!]') \ + | reg(r'^("(\\.|[^"])*")') + + exp = p(lambda i: (exps + -p(notp(";;") >> p(";"), exps))(i)) + + exp1 = p("begin", n(exp), "end") \ + | p("(", n(~exp), ")") \ + | p("{", n(~exp), "}") \ + | p("[", n(~exp), "]") \ + | p("if", n(exp), "then", n(exp), "else", exp) \ + | p("let", n(~p("rec"), exp), "in", exp) \ + | p(p("match") | p("try"), n(exp), "with", ~p("|"), n(exp), -p("|", n(exp))) \ + | p("function", ~p("|"), n(exp), -p("|", n(exp))) \ + | p("type", n(id, "=", ~p("|"), n(exp), -p("|", n(exp))), ~p(";;")) \ + | p("type", n(id, "=", exp), ~p(";;")) \ + | p("open", n(id, -p(".", id)), ~p(";;")) \ + | id diff --git a/beautifier/mincaml2js.scala b/beautifier/mincaml2js.scala index a8fdfc8..72ec713 100644 --- a/beautifier/mincaml2js.scala +++ b/beautifier/mincaml2js.scala @@ -251,6 +251,7 @@ object test extends App { */ } +/* object Universe { implicit class RichList[T](self: List[T]) { @@ -298,3 +299,4 @@ object Universe { map(methodToString) } +*/ \ No newline at end of file diff --git a/beautifier/src/Makefile b/beautifier/src/Makefile new file mode 100644 index 0000000..1c987e4 --- /dev/null +++ b/beautifier/src/Makefile @@ -0,0 +1,8 @@ +all: + ocamlyacc parser.mly + rm parser.mli + ocamllex lexer.mll + ocamlc syntax.ml parser.ml lexer.ml main.ml + +clean: + rm -f *.cm* parser.ml lexer.ml a.out diff --git a/beautifier/src/lexer.mll b/beautifier/src/lexer.mll new file mode 100644 index 0000000..d818189 --- /dev/null +++ b/beautifier/src/lexer.mll @@ -0,0 +1,83 @@ +{ +open Parser +} + +let space = [' ' '\t' '\n' '\r'] +let digit = ['0'-'9'] +let lower = ['a'-'z'] +let upper = ['A'-'Z'] + +rule token = parse +| space+ { token lexbuf } +| "(*" { comment lexbuf; token lexbuf } +| '(' { LPAREN } +| ')' { RPAREN } +| '[' { LBRACK } +| ']' { RBRACK } +| "::" { CONS } +| '@' { AT } +| "as" { AS } +| "begin" { BEGIN } +| "end" { END } +| "match" { MATCH } +| "with" { WITH } +| "when" { WHEN } +| "->" { ARROW } +| "|" { BAR } +| "type" { TYPE } +| "of" { OF } +| ";;" { SEMISEMI } +| "true" { BOOL("true") } +| "false" { BOOL("false") } +| "not" { NOT } +| digit+ { INT(Lexing.lexeme lexbuf) } +| digit+ ('.' digit*)? (['e' 'E'] ['+' '-']? digit+)? + { FLOAT(Lexing.lexeme lexbuf) } +| '-' { MINUS } +| '+' { PLUS } +| '*' { AST } +| "-." { MINUS_DOT } +| "+." { PLUS_DOT } +| "*." { AST_DOT } +| "/." { SLASH_DOT } +| '=' { EQUAL } +| "<>" { LESS_GREATER } +| "<=" { LESS_EQUAL } +| ">=" { GREATER_EQUAL } +| '<' { LESS } +| '>' { GREATER } +| "if" { IF } +| "then" { THEN } +| "else" { ELSE } +| "let" { LET } +| "in" { IN } +| "rec" { REC } +| "mutable" { MUTABLE } +| "open" { OPEN } +| '{' { LBRACE } +| '}' { RBRACE } +| ':' { COLON } +| ',' { COMMA } +| '_' { IDENT("_") } +| '.' { DOT } +| "<-" { LESS_MINUS } +| ":=" { COLON_EQUAL } +| '!' { EXCLAM } +| ';' { SEMICOLON } +| eof { EOF } +| '"' ([^ '"' '\\'] | '\\' _)* '"' { let s = Lexing.lexeme lexbuf in STRING(String.sub s 1 ((String.length s)-2))} + +| upper (digit|lower|upper|'_')* '.' lower (digit|lower|upper|'_')* { IDENT(Lexing.lexeme lexbuf) } +| upper (digit|lower|upper|'_')* { CIDENT(Lexing.lexeme lexbuf) } +| lower (digit|lower|upper|'_')* { IDENT(Lexing.lexeme lexbuf) } +| _ { + failwith + (Printf.sprintf "unknown token %s near characters %d-%d" + (Lexing.lexeme lexbuf) + (Lexing.lexeme_start lexbuf) + (Lexing.lexeme_end lexbuf)) } +and comment = parse +| "*)" { () } +| "(*" { comment lexbuf; comment lexbuf } +| eof { Format.eprintf "warning: unterminated comment@." } +| _ { comment lexbuf } diff --git a/beautifier/src/main.ml b/beautifier/src/main.ml new file mode 100644 index 0000000..fb4965b --- /dev/null +++ b/beautifier/src/main.ml @@ -0,0 +1,24 @@ +let lexbuf outchan l = + (Parser.exp Lexer.token l) + +let string s = lexbuf stdout (Lexing.from_string s) + +let file f = + let inchan = open_in (f ^ ".ml") in + let outchan = open_out (f ^ ".js") in + try + lexbuf outchan (Lexing.from_channel inchan); + close_in inchan; + close_out outchan; + with e -> (close_in inchan; close_out outchan; raise e) + +let () = + let files = ref [] in + Arg.parse + [] + (fun s -> files := !files @ [s]) + ("Min-Caml beautifier\n" ^ + Printf.sprintf "usage: %s ...filenames without \".ml\"..." Sys.argv.(0)); + List.iter + (fun f -> ignore (file f)) + !files diff --git a/beautifier/src/parser.mly b/beautifier/src/parser.mly new file mode 100644 index 0000000..38bc6bd --- /dev/null +++ b/beautifier/src/parser.mly @@ -0,0 +1,192 @@ +%{ +open Syntax +%} + +%token BOOL +%token INT +%token FLOAT +%token STRING +%token NOT +%token MINUS +%token PLUS +%token MINUS_DOT +%token PLUS_DOT +%token AST_DOT +%token SLASH_DOT +%token EQUAL +%token LESS_GREATER +%token LESS_EQUAL +%token GREATER_EQUAL +%token LESS +%token GREATER +%token IF +%token THEN +%token ELSE +%token IDENT +%token CIDENT +%token LET +%token IN +%token REC +%token COMMA +%token ARRAY_CREATE +%token DOT +%token LESS_MINUS +%token SEMICOLON +%token LPAREN +%token RPAREN +%token BEGIN END +%token MATCH WITH WHEN ARROW BAR +%token TYPE OF SEMISEMI AST +%token LBRACK RBRACK CONS AT AS +%token MUTABLE LBRACE RBRACE COLON +%token EXCLAM COLON_EQUAL +%token OPEN +%token EOF + +%right prec_let +%right SEMICOLON CONS AT AS +%right prec_if +%right LESS_MINUS COLON_EQUAL +%left COMMA +%left EQUAL LESS_GREATER LESS GREATER LESS_EQUAL GREATER_EQUAL +%left PLUS MINUS PLUS_DOT MINUS_DOT +%left AST_DOT SLASH_DOT +%right prec_unary_minus +%left prec_capp +%left prec_app +%left DOT + +%type exp +%start exp + +%% + +simple_exp: +| LBRACK exps RBRACK { $2 } +| LPAREN exp RPAREN { $2 } +| LBRACE fields RBRACE { Ls $2 } +| LBRACE fields SEMICOLON RBRACE { Ls $2 } +| BEGIN exp END { $2 } +| LPAREN RPAREN { Ls[] } +| BOOL { S $1 } +| INT { S $1 } +| FLOAT { S $1 } +| STRING { S $1 } +| IDENT { S $1 } +| simple_exp DOT LPAREN exp RPAREN { Ls[$1; $4] } +| simple_exp DOT IDENT { Ls[$1; S $3] } +| EXCLAM exp %prec DOT { Ls[S "!"; $2] } + +field: +| IDENT EQUAL exp { Ls[S $1; $3] } +fields: +| field { [$1] } +| field SEMICOLON fields { $1::$3 } + +exps: +| { Ls[] } +| exp { $1 } +| exp SEMICOLON exps { Ls[$1;S ";"; $3] } +exp: +| simple_exp { $1 } +| NOT exp %prec prec_app { $2 } +| MINUS exp %prec prec_unary_minus { $2 } +| exp CONS exp { Ls[$1; $3] } +| exp AT exp { Ls[$1; $3] } +| exp AS IDENT { Ls[$1; S $3] } + +| exp PLUS exp { Ls[$1; S "+"; $3] } +| exp MINUS exp { Ls[$1; S "-"; $3] } +| exp EQUAL exp { Ls[$1; S "=="; $3] } +| exp LESS_GREATER exp { Ls[$1; S "!="; $3] } +| exp LESS exp { Ls[$1; S "<"; $3] } +| exp GREATER exp { Ls[$1; S ">"; $3] } +| exp LESS_EQUAL exp { Ls[$1; S "<="; $3] } +| exp GREATER_EQUAL exp { Ls[$1; S ">="; $3] } +| IF exp THEN exp ELSE exp %prec prec_if { Ls[$2; $4; $6] } +| MATCH exp WITH BAR cases %prec prec_if { Ls($2 :: $5) } +| MATCH exp WITH cases %prec prec_if { Ls($2 :: $4) } +| TYPE IDENT EQUAL type1 SEMISEMI exp %prec prec_if { $6 } + +| MINUS_DOT exp %prec prec_unary_minus { Ls[S "-"; $2] } +| exp PLUS_DOT exp { Ls[$1; S "+"; $3] } +| exp MINUS_DOT exp { Ls[$1; S "-"; $3] } +| exp AST_DOT exp { Ls[$1; S "*"; $3] } +| exp SLASH_DOT exp { Ls[$1; S "/"; $3] } +| LET IDENT EQUAL exp IN exp %prec prec_let { Ls[S $2; $4; $6] } +| LET REC fundef IN exp %prec prec_let { Ls[$3;$5]} + +| OPEN CIDENT exp + %prec prec_capp + { Ls[S $2; $3] } +| CIDENT + %prec prec_capp + { S $1 } +| CIDENT exp + %prec prec_capp + { Ls[S $1; $2] } + +| exp actual_args %prec prec_app { Ls[$1; Ls $2] } +| elems { Ls $1 } +| LET LPAREN exp RPAREN EQUAL exp IN exp { Ls[$6; $3; $8] } +| LET LBRACE fields RBRACE EQUAL exp IN exp { Ls[$6; Ls $3; $8] } +| simple_exp DOT LPAREN exp RPAREN LESS_MINUS exp { Ls[$1; $4; $7] } +| simple_exp DOT IDENT LESS_MINUS exp { Ls[$1; S $3; $5] } +| simple_exp COLON_EQUAL exp { Ls[$1; S "ref"; $3] } +| exp SEMICOLON exp { Ls[S ""; $1; $3] } +| ARRAY_CREATE simple_exp simple_exp %prec prec_app { Ls[$2; $3] } +| error + { failwith + (Printf.sprintf "parse error near characters %d-%d" + (Parsing.symbol_start ()) + (Parsing.symbol_end ())) } + +fundef: +| IDENT formal_args EQUAL exp + { Ls[S $1; Ls $2; $4] } + +formal_args: +| IDENT formal_args { S $1 :: $2 } +| IDENT { [S $1] } + +actual_args: +| actual_args simple_exp %prec prec_app { $1 @ [$2] } +| simple_exp %prec prec_app { [$1] } + +elems: +| elems COMMA exp { $1 @ [$3] } +| exp COMMA exp { [$1; $3] } + + +when1: +| { S "" } +| WHEN exp { $2 } +cases: +| exp when1 ARROW exp + { [$1; $2; $4] } +| exp when1 ARROW exp BAR cases { $1:: $2:: $4::$6 } + + +tyrec: +| MUTABLE IDENT COLON type1 { "" } +| IDENT COLON type1 { "" } + +tyrecs: +| tyrec {[$1]} +| tyrec SEMICOLON tyrecs { $1::$3 } +type1: +| IDENT { $1 } +| LBRACE tyrecs RBRACE { "{record}" } +| consts { "" } +| BAR consts { "" } +types: +| type1 { [$1] } +| type1 AST types { $1::$3 } + +const: +| CIDENT OF types { [$1,$3] } +| CIDENT { [$1,[]] } + +consts: +| const { [$1] } +| const BAR consts { $1::$3 } diff --git a/beautifier/src/syntax.ml b/beautifier/src/syntax.ml new file mode 100644 index 0000000..c34ea94 --- /dev/null +++ b/beautifier/src/syntax.ml @@ -0,0 +1,19 @@ +type e = + | S of string + | Ls of e list + +let read_all file = + let inchan = open_in file in + let lines = ref [] in + let rc = + begin try + while true; do + lines := input_line inchan :: !lines + done; + "" + with + | e -> String.concat "\n" (List.rev !lines) + end + in + close_in inchan; + rc