-
Notifications
You must be signed in to change notification settings - Fork 0
/
lexer.mll
51 lines (43 loc) · 1.26 KB
/
lexer.mll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
{
open Parser
(* current token line number *)
let line_num = ref 1
exception Syntax_error of string
let syntax_error msg = raise (Syntax_error (msg ^ " on line " ^ (string_of_int !line_num)))
let keywords = [
"begin", BEGIN;
"end", END;
"get", GET;
"print", PRINT;
"extract", EXTRACT;
]
}
let blank = [' ' '\r' '\t']
let digit = ['0'-'9']
let digits = digit*
let alpha = ['a'-'z' 'A'-'Z']
let id = alpha (alpha | digit | '_')*
let str = "\"" (alpha | digit | '_' | ':' | '/' | '.' | '<' | '>' | '=' | ' ' | "\\\"")* "\""
rule ezreq = parse
| "|>" { PIPE }
| "=" { ASSIGN }
| digits as d {
(* parse literal *)
LITERAL (int_of_string d)
}
| id as i {
(* try keywords if not found then it's a string *)
let l = String.lowercase_ascii i in
try List.assoc l keywords
with Not_found -> ID i
}
| str as s {
(* try keywords if not found then it's a string *)
let l = String.lowercase_ascii s in
try List.assoc l keywords
with Not_found -> STR s
}
| '\n' {incr line_num; ezreq lexbuf} (* count new line characters *)
| blank { ezreq lexbuf } (* skipping blank characters *)
| _ { syntax_error "couldn't identify the token" }
| eof { EOF }