Skip to content

Commit

Permalink
Add a more "proper" entrypoint with fuller parser (#170)
Browse files Browse the repository at this point in the history
* Implement better entrypoint

* Add some examples

* Add vector example

* Update entrypoint a bit

* Minor update

* Add PrettyPrinter

* Add parser error message

* Fix compatability with Coq-8.20.0

* Fix dune version

* Change the example extension

* Update PrettyPrinter

* Add some more examples

* Fix type checker time

* Update nary argument order
  • Loading branch information
Ailrun authored Sep 8, 2024
1 parent 7bd92e7 commit d43ab2c
Show file tree
Hide file tree
Showing 35 changed files with 1,192 additions and 216 deletions.
106 changes: 106 additions & 0 deletions driver/Lexer.mll
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
{
open Lexing
open MclttExtracted.Parser

let get_range lexbuf = (lexbuf.lex_start_p, lexbuf.lex_curr_p)

let format_position (f: Format.formatter) (p: position): unit =
Format.fprintf
f
"line %d, column %d"
p.pos_lnum
(p.pos_cnum - p.pos_bol + 1)

let format_range (f: Format.formatter) (p: position * position): unit =
Format.fprintf
f
"@[<h>%a - %a@]"
format_position (fst p)
format_position (snd p)

let token_to_string : token -> string =
function
| ARROW _ -> "->"
| AT _ -> "@"
| BAR _ -> "|"
| COLON _ -> ":"
| COMMA _ -> ","
| DARROW _ -> "=>"
| LPAREN _ -> "("
| RPAREN _ -> ")"
| ZERO _ -> "zero"
| SUCC _ -> "succ"
| REC _ -> "rec"
| RETURN _ -> "return"
| END _ -> "end"
| LAMBDA _ -> "fun"
| PI _ -> "forall"
| NAT _ -> "Nat"
| INT (_, i) -> string_of_int i
| TYPE _ -> "Type"
| VAR (_, s) -> s
| EOF _ -> "<EOF>"

let get_range_of_token : token -> (position * position) =
function
| ARROW r
| AT r
| BAR r
| COLON r
| COMMA r
| DARROW r
| LPAREN r
| RPAREN r
| ZERO r
| SUCC r
| REC r
| RETURN r
| END r
| LAMBDA r
| PI r
| NAT r
| TYPE r
| EOF r
| INT (r, _)
| VAR (r, _) -> r

let format_token (f: Format.formatter) (t: token): unit =
Format.fprintf
f
"@[<h>\"%s\" (at %a)@]"
(token_to_string t)
format_range (get_range_of_token t)
}

let string = ['a'-'z''A'-'Z']+

rule read =
parse
| "->" { ARROW (get_range lexbuf) }
| '@' { AT (get_range lexbuf) }
| '|' { BAR (get_range lexbuf) }
| ':' { COLON (get_range lexbuf) }
| ',' { COMMA (get_range lexbuf) }
| "=>" { DARROW (get_range lexbuf) }
| "(*" { comment lexbuf }
| '(' { LPAREN (get_range lexbuf) }
| ')' { RPAREN (get_range lexbuf) }
| "zero" { ZERO (get_range lexbuf) }
| "succ" { SUCC (get_range lexbuf) }
| "rec" { REC (get_range lexbuf) }
| "return" { RETURN (get_range lexbuf) }
| "end" { END (get_range lexbuf) }
| "fun" { LAMBDA (get_range lexbuf) }
| "forall" { PI (get_range lexbuf) }
| [' ' '\t'] { read lexbuf }
| ['\n'] { new_line lexbuf; read lexbuf }
| "Nat" { NAT (get_range lexbuf) }
| ['0'-'9']+ as lxm { INT (get_range lexbuf, int_of_string lxm) }
| "Type" { TYPE (get_range lexbuf) }
| string { VAR (get_range lexbuf, Lexing.lexeme lexbuf) }
| eof { EOF (get_range lexbuf) }
| _ as c { failwith (Format.asprintf "@[<v 2>Lexer error:@ @[<v 2>Unexpected character %C@ at %a@]@]@." c format_position lexbuf.lex_start_p) }
and comment =
parse
| "*)" { read lexbuf }
| _ { comment lexbuf }
39 changes: 39 additions & 0 deletions driver/Main.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
module Parser = MclttExtracted.Parser
module Entrypoint = MclttExtracted.Entrypoint
open Parser
open MenhirLibParser.Inter
open Entrypoint

let parse text =
(* Before parsing, we must generate a token stream from a lexer buffer,
which we then feed into the parser. *)
let rec loop lexbuf = lazy (Buf_cons (Lexer.read lexbuf, loop lexbuf)) in
let token_stream = loop (Lexing.from_string text) in
match Parser.prog 50 token_stream with
| Parsed_pr ((exp, _typ), _) -> Some exp
| Fail_pr_full (_, _) -> None
| _ -> None

let main ?(default_fp = "") () =
let fp =
if default_fp <> ""
then default_fp
else
begin
print_string "File path to load: ";
read_line ()
end
in
let chan = open_in fp in
(* Before parsing, we must generate a token stream from a lexer buffer,
which we then feed into the parser. *)
let rec loop lexbuf =
lazy (
try
Buf_cons (Lexer.read lexbuf, loop lexbuf)
with
| Failure s -> prerr_string s; raise Exit) in
let token_stream = loop (Lexing.from_channel chan) in
let res = Entrypoint.main 50 token_stream in
Format.printf "%a@."
PrettyPrinter.format_main_result res
207 changes: 207 additions & 0 deletions driver/PrettyPrinter.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
open MclttExtracted.Entrypoint
open MclttExtracted.Syntax

module Parser = MclttExtracted.Parser
module ParserMessages = MclttExtracted.ParserMessages

(************************************************************)
(* Formatting helpers *)
(************************************************************)

let pp_print_paren_if (cond: bool) (body: Format.formatter -> unit -> unit) (f: Format.formatter) (): unit =
(if cond then Format.pp_print_char f '(');
body f ();
(if cond then Format.pp_print_char f ')')

(************************************************************)
(* Formatting Cst.obj *)
(************************************************************)
let rec get_nat_of_obj: Cst.obj -> int option =
function
| Cst.Coq_zero -> Some 0
| Cst.Coq_succ e -> Option.map ((+) 1) (get_nat_of_obj e)
| _ -> None

let rec get_fn_params_of_obj: Cst.obj -> (string * Cst.obj) list * Cst.obj =
function
| Cst.Coq_fn (px, ep, ebody) ->
let params, ebody' = get_fn_params_of_obj ebody in
((px, ep) :: params, ebody')
| ebody -> ([], ebody)

let rec get_pi_params_of_obj: Cst.obj -> (string * Cst.obj) list * Cst.obj =
function
| Cst.Coq_pi (px, ep, eret) ->
let params, eret' = get_pi_params_of_obj eret in
((px, ep) :: params, eret')
| eret -> ([], eret)

let rec format_obj_prec (p: int) (f: Format.formatter): Cst.obj -> unit =
let open Format in
function
| Cst.Coq_typ i -> fprintf f "Type@%d" i
| Cst.Coq_nat -> fprintf f "Nat"
| Cst.Coq_zero -> fprintf f "0"
| Cst.Coq_succ e ->
begin
match get_nat_of_obj e with
| Some n -> fprintf f "%d" (1 + n)
| None ->
let impl f () = fprintf f "succ@ %a" (format_obj_prec 2) e in
pp_open_hovbox f 2;
pp_print_paren_if (p >= 2) impl f ();
pp_close_box f ()
end
| Cst.Coq_natrec (escr, mx, em, ez, sx, sr, es) ->
let impl f () =
fprintf f "@[<hv 0>@[<hov 2>rec %a@ return %s -> %a@]@ @[<hov 2>| zero =>@ %a@]@ @[<hov 2>| succ %s, %s =>@ %a@]@ end@]"
format_obj escr
mx
format_obj em
format_obj ez
sx
sr
format_obj es
in
pp_print_paren_if (p >= 1) impl f ()
| Cst.Coq_app (ef, ea) ->
let impl f () =
fprintf f "%a@ %a"
(format_obj_prec 1) ef
(format_obj_prec 2) ea;
in
pp_open_hvbox f 2;
pp_print_paren_if (p >= 2) impl f ();
pp_close_box f ()
| Cst.Coq_fn (px, ep, ebody) ->
let params, ebody' = get_fn_params_of_obj ebody in
let impl f () =
pp_print_string f "fun ";
pp_open_tbox f ();
pp_set_tab f ();
pp_print_list ~pp_sep:pp_print_tab format_obj_param f ((px, ep) :: params);
pp_close_tbox f ();
begin
if List.compare_length_with params 0 = 0
then pp_print_space f ()
else pp_force_newline f ()
end;
fprintf f "-> @[<hov 2>%a@]"
format_obj ebody'
in
pp_open_hvbox f 2;
pp_print_paren_if (p >= 1) impl f ();
pp_close_box f ()
| Cst.Coq_pi (px, ep, eret) ->
let params, eret' = get_pi_params_of_obj eret in
let impl f () =
pp_print_string f "forall ";
pp_open_tbox f ();
pp_set_tab f ();
pp_print_list ~pp_sep:pp_print_tab format_obj_param f ((px, ep) :: params);
pp_close_tbox f ();
begin
if List.compare_length_with params 0 = 0
then pp_print_space f ()
else pp_force_newline f ()
end;
fprintf f "-> @[<hov 2>%a@]"
format_obj eret'
in
pp_open_hvbox f 2;
pp_print_paren_if (p >= 1) impl f ();
pp_close_box f ()
| Cst.Coq_var x -> pp_print_string f x
and format_obj_param f (px, ep) =
Format.fprintf f "(%s : %a)"
px
format_obj ep
and format_obj f = format_obj_prec 0 f

(************************************************************)
(* Formatting exp *)
(************************************************************)
let exp_to_obj =
let new_var =
let suffix = ref 0 in
fun () ->
incr suffix;
"x" ^ string_of_int !suffix
in
let new_tyvar =
let suffix = ref 0 in
fun () ->
incr suffix;
"A" ^ string_of_int !suffix
in
let rec impl (ctx: string list): exp -> Cst.obj =
function
| Coq_a_zero -> Cst.Coq_zero
| Coq_a_succ e -> Cst.Coq_succ (impl ctx e)
| Coq_a_natrec (em, ez, es, escr) ->
let mx, sx, sr = new_var (), new_var (), new_var () in
Cst.Coq_natrec (impl ctx escr, mx, impl (mx :: ctx) em, impl ctx ez, sx, sr, impl (sr :: sx :: ctx) es)
| Coq_a_nat -> Cst.Coq_nat
| Coq_a_typ i -> Cst.Coq_typ i
| Coq_a_var x -> Cst.Coq_var (List.nth ctx x)
| Coq_a_fn (ep, ebody) ->
let px =
match ep with
| Coq_a_typ _ -> new_tyvar ()
| _ -> new_var ()
in
Cst.Coq_fn (px, impl ctx ep, impl (px :: ctx) ebody)
| Coq_a_app (ef, ea) ->
Cst.Coq_app (impl ctx ef, impl ctx ea)
| Coq_a_pi (ep, eret) ->
let px =
match ep with
| Coq_a_typ _ -> new_tyvar ()
| _ -> new_var ()
in
Cst.Coq_pi (px, impl ctx ep, impl (px :: ctx) eret)
| Coq_a_sub _ -> failwith "Invalid internal language construct"
in
impl []

let format_exp f exp = format_obj f (exp_to_obj exp)

(************************************************************)
(* Formatting nf *)
(************************************************************)

let format_nf f nf = format_exp f (nf_to_exp nf)

(************************************************************)
(* Formatting main_result *)
(************************************************************)

let format_main_result (f: Format.formatter): main_result -> unit =
let open Format in
function
| AllGood (cst_typ, cst_exp, typ, exp, nf) ->
fprintf f "@[<v 2>Parsed:@ @[<hv 0>%a@ : %a@]@]"
format_obj cst_exp
format_obj cst_typ;
pp_force_newline f ();
fprintf f "@[<v 2>Elaborated:@ @[<hv 0>%a@ : %a@]@]"
format_exp exp
format_exp typ;
pp_force_newline f ();
fprintf f "@[<v 2>Normalized Result:@ @[<hv 0>%a@ : %a@]@]"
format_nf nf
format_exp typ
| TypeCheckingFailure (typ, exp) ->
printf "@[<v 2>Type Checking Failure:@ %a@;<1 -2>is not of@ %a@]"
format_exp exp
format_exp typ
| ElaborationFailure cst_exp ->
printf "@[<v 2>Elaboration Failure:@ %a@;<1 -2>cannot be elaborated@]"
format_obj cst_exp
| ParserFailure (s, t) ->
printf "@[<v 2>Parser Failure:@ on %a:@ @ @[<hov 0>%a@]@]"
Lexer.format_token t
pp_print_text (ParserMessages.message (Parser.Aut.coq_N_of_state s))
| ParserTimeout fuel ->
printf "@[<v 2>Parser Timeout with Fuel %d@]"
fuel
2 changes: 1 addition & 1 deletion driver/test.ml → driver/Test.ml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
(* Unit test cases for parsing *)

open Main
open Parser.Cst
open Entrypoint.Cst

let x, y, z = ("x", "y", "z")

Expand Down
18 changes: 11 additions & 7 deletions driver/dune
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
(include_subdirs no)

(library
(name mcltt)
(modules main parser lexer test)
(inline_tests)
(preprocess
(pps ppx_inline_test)))
(name Mcltt)
(modules Main Lexer PrettyPrinter)
(libraries MclttExtracted)
; (inline_tests)
; (preprocess
; (pps ppx_inline_test))
)

(env
(dev
(flags
(:standard -w -67 -w -32 -w -39))))
(:standard -w -67 -w -32 -w -33 -w -39))))

(ocamllex lexer)
(ocamllex Lexer)
3 changes: 3 additions & 0 deletions driver/extracted/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Generated Files
*.ml
*.mli
2 changes: 2 additions & 0 deletions driver/extracted/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
(library
(name MclttExtracted))
Loading

0 comments on commit d43ab2c

Please sign in to comment.