-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathsimple_markup.ml
313 lines (271 loc) · 10.2 KB
/
simple_markup.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
(* Copyright (C) 2009 Mauricio Fernandez <[email protected]> *)
open Printf
open ExtString
open ExtList
TYPE_CONV_PATH "Simple_markup"
type ref = { src : string; desc : string }
type paragraph =
Normal of par_text
| Pre of string * string option
| Heading of int * par_text
| Quote of paragraph list
| Ulist of paragraph list * paragraph list list
| Olist of paragraph list * paragraph list list
and par_text = text list
and text =
Text of string
| Emph of string
| Bold of string
| Struck of par_text
| Code of string
| Link of href
| Anchor of string
| Image of img_ref
and href = { href_target : string; href_desc : string; }
and img_ref = { img_src : string; img_alt : string; }
and par_list = paragraph list with sexp
class fold = Camlp4Filters.GenerateFold.generated
type parse_state = { max : int; current : Buffer.t; fragments : text list; }
let string_of_paragraph p = Sexplib.Sexp.to_string_hum (sexp_of_paragraph p)
let string_of_paragraphs ps = Sexplib.Sexp.to_string_hum (sexp_of_par_list ps)
let indentation ?(ts=8) s =
let rec loop n indent max =
if n >= max then indent
else match s.[n] with
' ' -> loop (n + 1) (indent + 1) max
| '\t' -> loop (n + 1) (indent + 8) max
| _ -> indent
in loop 0 0 (String.length s)
let unescape s =
let b = Buffer.create (String.length s) in
let len = String.length s in
let rec loop i =
if i >= len then Buffer.contents b
else match s.[i] with
'\\' when i < len - 1 -> Buffer.add_char b s.[i+1]; loop (i + 2)
| c -> Buffer.add_char b c; loop (i + 1)
in loop 0
let unescape_slice s ~first ~last =
unescape (String.strip (String.slice ~first ~last s))
let snd_is s c = String.length s > 1 && s.[1] = c
let snd_is_space s = snd_is s ' ' || snd_is s '\t'
let collect f x =
let rec loop acc = match f x with
None -> List.rev acc
| Some y -> loop (y :: acc)
in loop []
let push_remainder ?(first=2) indent s e =
let s = String.slice ~first s in
let s' = String.strip s in
Enum.push e (indent + first + indentation s, s', s' = "")
let adds = Buffer.add_string
let addc = Buffer.add_char
let new_fragment () = Buffer.create 8
let push_current st =
if Buffer.length st.current > 0 then
Text (Buffer.contents st.current) :: st.fragments
else st.fragments
let rec read_paragraph ?(skip_blank=true) indent e = match Enum.peek e with
None -> None
| Some (indentation, line, isblank) -> match isblank with
true ->
Enum.junk e;
if skip_blank then read_paragraph indent e else None
| false ->
if indentation < indent then
None
else begin
Enum.junk e;
read_nonempty indentation e line
end
and skip_blank_line e = match Enum.peek e with
None | Some (_, _, false) -> ()
| Some (_, _, true) -> Enum.junk e; skip_blank_line e
and read_nonempty indent e s = match s.[0] with
'!' -> read_heading s
| '*' when snd_is_space s -> push_remainder indent s e; read_ul indent e
| '#' when snd_is_space s -> push_remainder indent s e; read_ol indent e
| '{' when snd_is s '{' -> read_pre (String.slice s ~first:2) e
| '>' when snd_is_space s || s = ">" ->
(* last check needed because "> " becomes ">" *)
Enum.push e (indent, s, false); read_quote indent e
| _ -> Enum.push e (indent, s, false); read_normal e
and read_heading s =
let s' = String.strip ~chars:"!" s in
let level = String.length s - String.length s' in
Some (Heading (level, parse_text s'))
and read_ul indent e =
read_list
(fun fst others -> Ulist (fst, others))
(fun s -> snd_is_space s && s.[0] = '*')
indent e
and read_ol indent e =
read_list
(fun fst others -> Olist (fst, others))
(fun s -> snd_is_space s && s.[0] = '#')
indent e
and read_list f is_item indent e =
let read_item indent ps = collect (read_paragraph (indent + 1)) e in
let rec read_all fst others =
skip_blank_line e;
match Enum.peek e with
| Some (indentation, s, _) when indentation >= indent && is_item s ->
Enum.junk e;
push_remainder indentation s e;
read_all fst (read_item indentation [] :: others)
| None | Some _ -> f fst (List.rev others)
in Some (read_all (read_item indent []) [])
and read_pre kind e =
let kind = match kind with "" -> None | s -> Some s in
let re = Str.regexp "^\\\\+}}$" in
let unescape = function
s when Str.string_match re s 0 -> String.slice ~first:1 s
| s -> s in
(* don't forget the last \n *)
let ret ls = Some (Pre (String.concat "\n" (List.rev ("" :: ls)), kind)) in
let rec read_until_end fstindent ls = match Enum.get e with
None | Some (_, "}}", _) -> ret ls
| Some (indentation, s, _) ->
let spaces = String.make (max 0 (indentation - fstindent)) ' ' in
read_until_end fstindent ((spaces ^ unescape s) :: ls)
in match Enum.get e with
None | Some (_, "}}", _) -> ret []
| Some (indentation, s, _) -> read_until_end indentation [s]
and read_quote indent e =
let push_and_finish e elm = Enum.push e elm; raise Enum.No_more_elements in
let next_without_lt e = function
| (_, _, true) as line -> push_and_finish e line
| (n, s, false) as line ->
if n < indent || s.[0] <> '>' then
push_and_finish e line
else
let s = String.slice ~first:1 s in
let s' = String.strip s in
(String.length s - String.length s', s', s' = "")
in match collect (read_paragraph 0) (Enum.map (next_without_lt e) e) with
[] -> None
| ps -> Some (Quote ps)
and read_normal e =
let rec gettxt ls =
let return () = String.concat " " (List.rev ls) in
match Enum.peek e with
None | Some (_, _, true) -> return ()
| Some (_, l, _) -> match l.[0] with
'!' | '*' | '#' | '>' when snd_is_space l -> return ()
| '{' when snd_is l '{' -> return ()
| _ -> Enum.junk e; gettxt (l :: ls) in
let txt = gettxt [] in
Some (Normal (parse_text txt))
and parse_text s =
scan
s
{ max = String.length s;
fragments = [];
current = new_fragment (); }
0
(* scan s starting from n, upto max (exclusive) *)
and scan s st n =
let max = st.max in
if n >= max then List.rev (push_current st)
else match s.[n] with
| '`' ->
delimited (fun ~first ~last -> Code (unescape_slice s ~first ~last)) "`"
s st n
| '*' ->
delimited (fun ~first ~last -> Bold (unescape_slice s ~first ~last)) "*"
s st n
| '_' ->
delimited (fun ~first ~last -> Emph (unescape_slice s ~first ~last)) "__"
s st n
| '=' ->
delimited
(fun ~first ~last ->
Struck (scan s
{ max = last; fragments = []; current = new_fragment (); }
first))
"==" s st n
| '!' when matches_at s ~max n "![" ->
maybe_link
"![" (fun ref -> Image { img_src = ref.src; img_alt = ref.desc })
s st (n + 2)
| '[' ->
maybe_link "["
(fun ref -> match ref.src, ref.desc with
"", "" -> Text ""
| "", desc -> Link { href_target = desc; href_desc = desc }
| src, "" when src.[0] = '#' -> Anchor (String.slice ~first:1 src)
| src, desc -> Link { href_target = ref.src; href_desc = ref.desc})
s st (n + 1)
| '\\' when (n + 1) < max -> addc st.current s.[n+1]; scan s st (n + 2)
| c -> addc st.current c; scan s st (n + 1)
(* [delimited f delim first] tries to match [delim] starting from [first],
* returns Some (offset of char after closing delim) or None *)
and delimited f delim s st first =
let max = st.max in
let delim_len = String.length delim in
let scan_from_next_char () =
addc st.current s.[first];
scan s st (first + 1)
in
if not (matches_at s ~max first delim) then scan_from_next_char ()
else match scan_past ~delim s ~max (first + String.length delim) with
Some n ->
let chunk = f ~first:(first + delim_len)
~last:(n - String.length delim)
in scan s
{ st with fragments = chunk :: push_current st;
current = new_fragment () }
n
| None -> scan_from_next_char ()
and maybe_link delim f s st n = match scan_link s ~max:st.max n with
None -> adds st.current delim; scan s st n
| Some (ref, n) ->
scan s
{ st with fragments = f ref :: push_current st;
current = (new_fragment ()) }
n
(* return None if delim not found, else Some (offset of char *after* delim) *)
and scan_past ~delim s ~max n =
let re = Str.regexp (Str.quote delim) in
let rec loop m ~max =
if m >= max then None else
match (try Some (Str.search_forward re s m) with Not_found -> None) with
| Some m when m < max && s.[m-1] <> '\\' -> Some (m + String.length delim)
| Some m when m < max -> loop (m + 1) ~max
| _ -> None (* no match or >= max *)
in loop n ~max
(* returns None or offset of char after the reference
* (i.e. after closing ')'). *)
and scan_link s ~max n = match scan_past ~delim:"]" s ~max n with
None -> None
| Some end_of_desc ->
if end_of_desc >= max then None
else match s.[end_of_desc] with
'(' ->
begin match scan_past ~delim:")" s ~max (end_of_desc + 1) with
None -> None
| Some end_of_uri ->
let ref =
{
desc = unescape_slice s ~first:n ~last:(end_of_desc - 1);
src = unescape_slice s
~first:(end_of_desc + 1)
~last:(end_of_uri - 1)
}
in Some (ref, end_of_uri)
end
| _ -> None
and matches_at s ~max n delim =
let len = String.length delim in
if n + len > max then false
else
let rec loop n m k =
if k = 0 then true
else if s.[n] = delim.[m] then loop (n + 1) (m + 1) (k - 1)
else false
in loop n 0 len
let parse_enum e =
collect (read_paragraph 0)
(Enum.map (fun l -> let l' = String.strip l in (indentation l, l', l' = "")) e)
let parse_lines ls = parse_enum (List.enum ls)
let parse_text s = parse_lines ((Str.split (Str.regexp "\n") s))