Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
favonia committed Oct 20, 2024
1 parent aaddbf3 commit 58d6f68
Show file tree
Hide file tree
Showing 9 changed files with 178 additions and 174 deletions.
15 changes: 11 additions & 4 deletions src/Explication.ml
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
include ExplicationData

let dump_seg dump_tag = Utils.dump_pair (Utils.dump_option dump_tag) Utils.dump_string
let dump_marker dump_tag fmt = function
| RangeBegin tag -> Format.fprintf fmt {|@[<2>RangeBegin@ @[%a@]@]|} dump_tag tag
| RangeEnd tag -> Format.fprintf fmt {|@[<2>RangeEnd@ @[%a@]@]|} dump_tag tag
| Point tag -> Format.fprintf fmt {|@[<2>Point@ @[%a@]@]|} dump_tag tag
let dump_line dump_tag fmt {tags; segments} =
Format.fprintf fmt {|@[<1>{@[<2>tags=@,@[%a@]@];@ @[<2>segments=@ @[%a@]@]}@]|}
let dump_token dump_tag fmt = function
| String str -> Format.fprintf fmt {|@[<2>String@ "%s"@]|} (String.escaped str)
| Marker m -> Format.fprintf fmt {|@[<2>Marker@ @[<1>(%a)@]@]|} (dump_marker dump_tag) m
let dump_line dump_tag fmt {tags; tokens} =
Format.fprintf fmt {|@[<1>{@[<2>tags=@,@[%a@]@];@ @[<2>tokens=@ @[%a@]@]}@]|}
(Utils.dump_list dump_tag) tags
(Utils.dump_list (dump_seg dump_tag)) segments
(Utils.dump_list (dump_token dump_tag)) tokens
let dump_block dump_tag fmt {begin_line_num; end_line_num; lines} =
Format.fprintf fmt {|@[<1>{begin_line_num=%d;@ end_line_num=%d;@ @[<2>lines=@ @[%a@]@]}@]|}
Expand Down
5 changes: 4 additions & 1 deletion src/Explication.mli
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,8 @@ include module type of ExplicationData

(** {1 Debugging} *)

(** Ugly printer for debugging *)
(** Ugly printer for {!type:marker} *)
val dump_marker : (Format.formatter -> 'tag -> unit) -> Format.formatter -> 'tag marker -> unit

(** Ugly printer for {!type:t} *)
val dump : (Format.formatter -> 'tag -> unit) -> Format.formatter -> 'tag t -> unit
19 changes: 12 additions & 7 deletions src/ExplicationData.ml
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
(** A segment is an optionally tagged string from the user content. (Note the use of [option].) *)
type 'tag segment =
| String of 'tag option * string
| Start of 'tag
| End of 'tag
(** A marker is a delimiter of a range or a specific point. *)
type 'tag marker =
| RangeBegin of 'tag
| RangeEnd of 'tag
| Point of 'tag

(** A token is either a string or a marker. *)
type 'tag token =
| String of string
| Marker of 'tag marker

(** A line is a list of {!type:segment}s along with tags. *)
type 'tag line =
{ tags : 'tag list
; segments : 'tag segment list
{ tags : 'tag list (** All tags in this line *)
; tokens : 'tag token list
}

(** A block is a collection of consecutive lines. *)
Expand Down
91 changes: 41 additions & 50 deletions src/Explicator.ml
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,11 @@ let () = Printexc.register_printer @@
| _ -> None

let to_start_of_line (pos : Range.position) = {pos with offset = pos.start_of_line}
let default_blend ~(priority : _ -> int) t1 t2 = if priority t2 <= priority t1 then t2 else t1

module Make (Tag : Tag) = struct
type position = Range.position

(** Skip the newline sequence, assuming that [shift] is not zero. (Otherwise, it means we already reached eof.) *)
(** Skip the newline sequence, assuming that [shift] is not zero. (Otherwise, it means we already reached EOF.) *)
let eol_to_next_line shift (pos : position) : position =
assert (shift <> 0);
{ source = pos.source;
Expand All @@ -65,9 +64,8 @@ module Make (Tag : Tag) = struct

type explicator_state =
{ lines : Tag.t line bwd
; segments : Tag.t segment bwd
; remaining_tagged_lines : (Tag.t * int) list
; current_tag : Tag.t option
; tokens : Tag.t token bwd
; remaining_line_markers : (int * Tag.t) list
; cursor : Range.position
; eol : int
; eol_shift : int option
Expand All @@ -77,81 +75,75 @@ module Make (Tag : Tag) = struct
module F = Flattener.Make(Tag)

let explicate_block ~line_breaks (b : Tag.t Flattener.block) : Tag.t block =
match b.tagged_positions with
| [] -> invalid_arg "explicate_block: empty block"
| ((_, ploc) :: _) as ps ->
let source = SourceReader.load ploc.source in
match b.markers with
| [] -> invalid_arg "explicate_block: empty block; should be impossible"
| ((first_loc, _) :: _) as markers ->
let source = SourceReader.load first_loc.source in
let eof = SourceReader.length source in
let find_eol i = UserContent.find_eol ~line_breaks (SourceReader.unsafe_get source) (i, eof) in
let rec go state : (Tag.t option * Range.position) list -> _ =
let rec go state : (Range.position * Tag.t marker) list -> _ =
function
| (ptag, ploc) :: ps when state.cursor.line_num = ploc.line_num ->
if ploc.offset > eof then invalid_arg "Asai.Explicator.explicate: beyond eof; use the debug mode";
if ploc.offset > state.eol then invalid_arg "Asai.Explicator.explicate: unexpected newline; use the debug mode";
if ploc.offset = state.cursor.offset then
go {state with cursor = ploc; current_tag = ptag} ps
else
(* Still on the same line *)
let segments =
state.segments <:
(state.current_tag, read_between ~source state.cursor.offset ploc.offset)
in
go { state with segments; cursor = ploc; current_tag = ptag } ps
| (loc, marker) :: markers when state.cursor.line_num = loc.line_num (* on the same line *) ->
if loc.offset > eof then invalid_arg "Asai.Explicator.explicate: position beyond EOF; use the debug mode";
if loc.offset > state.eol then invalid_arg "Asai.Explicator.explicate: unexpected newline; use the debug mode";
let tokens =
if loc.offset = state.cursor.offset then
state.tokens <: Marker marker
else
state.tokens <: String (read_between ~source state.cursor.offset loc.offset) <: Marker marker
in
go { state with tokens; cursor = loc } markers
| ps ->
(* Shifting to the next line *)
let lines, remaining_tagged_lines =
let segments =
let lines, remaining_line_markers =
let tokens =
if state.cursor.offset < state.eol then
state.segments
<: (state.current_tag, read_between ~source state.cursor.offset state.eol)
else if Option.is_none state.eol_shift && Option.is_some state.current_tag then
state.segments
<: (state.current_tag, "‹EOF›")
state.tokens <: String (read_between ~source state.cursor.offset state.eol)
else
state.segments
state.tokens
in
let line_markers, remaining_line_markers =
Utils.span (fun (line_num, _) -> line_num = state.line_num) state.remaining_line_markers
in
let tagged_lines, remaining_tagged_lines = Utils.span (fun (_, i) -> i = state.line_num) state.remaining_tagged_lines in
(state.lines <: {segments = Bwd.to_list segments; tags = List.map fst tagged_lines}), remaining_tagged_lines
(state.lines <: {tokens = Bwd.to_list tokens; tags = List.map snd line_markers}), remaining_line_markers
in
(* Continue the process if [ps] is not empty. *)
match ps, state.eol_shift with
| [], _ ->
assert (state.line_num = b.end_line_num);
lines
| _ :: _, None -> invalid_arg "Asai.Explicator.explicate: beyond eof; use the debug mode"
| (_, ploc) :: _, Some eol_shift ->
if ploc.offset > eof then invalid_arg "Asai.Explicator.explicate: beyond eof; use the debug mode";
if ploc.offset <= state.eol then invalid_arg "Asai.Explicator.explicate: expected newline missing; use the debug mode";
if ploc.offset < state.eol + eol_shift then invalid_arg "Asai.Explicator.explicate: offset within newline; use the debug mode";
| _ :: _, None -> invalid_arg "Asai.Explicator.explicate: position beyond EOF; use the debug mode"
| (loc, _) :: _, Some eol_shift ->
if loc.offset > eof then invalid_arg "Asai.Explicator.explicate: position beyond EOF; use the debug mode";
if loc.offset <= state.eol then invalid_arg "Asai.Explicator.explicate: expected newline missing; use the debug mode";
if loc.offset < state.eol + eol_shift then invalid_arg "Asai.Explicator.explicate: offset within newline; use the debug mode";
(* Okay, p is really on the next line *)
let cursor = eol_to_next_line eol_shift {state.cursor with offset = state.eol} in
let eol, eol_shift = find_eol (state.eol + eol_shift) in
go
{ lines
; segments = Emp
; remaining_tagged_lines
; current_tag = state.current_tag
; tokens = Emp
; remaining_line_markers
; cursor
; eol
; eol_shift
; line_num = state.line_num + 1
}
ps
in
let begin_pos = to_start_of_line ploc in
let eol, eol_shift = find_eol ploc.offset in
let begin_pos = to_start_of_line first_loc in
let eol, eol_shift = find_eol first_loc.offset in
let lines =
go
{ lines = Emp
; segments = Emp
; remaining_tagged_lines = b.tagged_lines
; current_tag = None
; tokens = Emp
; remaining_line_markers = b.line_markers
; cursor = begin_pos
; eol
; eol_shift
; line_num = b.begin_line_num
}
ps
markers
in
{ begin_line_num = b.begin_line_num
; end_line_num = b.end_line_num
Expand All @@ -165,16 +157,15 @@ module Make (Tag : Tag) = struct

let check_ranges ~line_breaks ranges =
List.iter
(fun (_, range) ->
(fun (range, _) ->
let source = SourceReader.load @@ Range.source range in
let read = SourceReader.unsafe_get source in
let eof = SourceReader.length source in
try UserContent.check_range ~line_breaks ~eof read range
with UserContent.Invalid_range reason -> raise @@ Invalid_range (range, reason))
ranges

let explicate ?(line_breaks=`Traditional) ?(block_splitting_threshold=5)
?(blend=default_blend ~priority:Tag.priority) ?(debug=false) ranges =
let explicate ?(line_breaks=`Traditional) ?(block_splitting_threshold=5) ?(debug=false) ranges =
if debug then check_ranges ~line_breaks ranges;
List.map (explicate_part ~line_breaks) @@ F.flatten ~block_splitting_threshold ~blend ranges
List.map (explicate_part ~line_breaks) @@ F.flatten ~block_splitting_threshold ranges
end
2 changes: 1 addition & 1 deletion src/ExplicatorSigs.ml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ end
module type S = sig
module Tag : Tag

val explicate : ?line_breaks:[`Unicode | `Traditional] -> ?block_splitting_threshold:int -> ?debug:bool -> (Tag.t * Range.t) list -> Tag.t t
val explicate : ?line_breaks:[`Unicode | `Traditional] -> ?block_splitting_threshold:int -> ?debug:bool -> (Range.t * Tag.t) list -> Tag.t t
(** Explicate a list of ranges using content from a data reader. This function must be run under [SourceReader.run].
@param line_breaks The set of character sequences that are recognized as (hard) line breaks. The [`Unicode] set contains all Unicode character sequences in {{:https://www.unicode.org/versions/Unicode15.0.0/ch05.pdf#G41643}Unicode 15.0.0 Table 5-1.} The [`Traditional] set only contains [U+000A (LF)], [U+000D (CR)], and [U+000D U+000A (CRLF)] as line breaks. The default is the [`Traditional] set.
Expand Down
Loading

0 comments on commit 58d6f68

Please sign in to comment.