From 5e445806da44b3f98d328671daf50dc90d7dddf7 Mon Sep 17 00:00:00 2001 From: yoriyuki <yoriyuki.y@gmail.com> Date: Sat, 28 Sep 2013 23:06:18 +0900 Subject: [PATCH 01/22] Depend Batteries --- INSTALL.txt | 3 ++- _oasis | 1 + _tags | 5 ++++- setup.ml | 14 ++++++++++---- src/META | 3 ++- 5 files changed, 19 insertions(+), 7 deletions(-) diff --git a/INSTALL.txt b/INSTALL.txt index 56e4ecc..df3fea6 100644 --- a/INSTALL.txt +++ b/INSTALL.txt @@ -1,5 +1,5 @@ (* OASIS_START *) -(* DO NOT EDIT (digest: d8647aab35472aa99dc849c8470fd60c) *) +(* DO NOT EDIT (digest: 5ab0562daedc5ecac055660efd427885) *) This is the INSTALL file for the ucorelib distribution. This package uses OASIS to generate its build system. See section OASIS for @@ -11,6 +11,7 @@ Dependencies In order to compile this package, you will need: * ocaml * findlib +* batteries (>= 2.1) for library ucorelib * oUnit for executable test Installing diff --git a/_oasis b/_oasis index c13ee4a..42a02ab 100644 --- a/_oasis +++ b/_oasis @@ -12,6 +12,7 @@ Library ucorelib Modules: UCoreLib Build: true Install: true + BuildDepends: batteries (>= 2.1) Executable test BuildTools: ocamlbuild diff --git a/_tags b/_tags index 6f1005c..3519504 100644 --- a/_tags +++ b/_tags @@ -1,5 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: d1565b51aca25016b55e19f051f934e0) +# DO NOT EDIT (digest: d494b9845ed839fef1b7c3a7f0a7a07b) # Ignore VCS directories, you can use the same kind of rule outside # OASIS_START/STOP if you want to exclude directories that contains # useless stuff for the build process @@ -15,9 +15,12 @@ "_darcs": not_hygienic # Library ucorelib "src/ucorelib.cmxs": use_ucorelib +<src/*.ml{,i}>: pkg_batteries # Executable test "test/test.byte": use_ucorelib "test/test.byte": pkg_oUnit +"test/test.byte": pkg_batteries <test/*.ml{,i}>: use_ucorelib <test/*.ml{,i}>: pkg_oUnit +<test/*.ml{,i}>: pkg_batteries # OASIS_STOP diff --git a/setup.ml b/setup.ml index 409bce8..7927757 100644 --- a/setup.ml +++ b/setup.ml @@ -1,7 +1,7 @@ (* setup.ml generated for the first time by OASIS v0.3.0 *) (* OASIS_START *) -(* DO NOT EDIT (digest: 04a1fc378a4fe61d8c9370b5b29330fa) *) +(* DO NOT EDIT (digest: 911c449ba9cec6648246c74d2a72be6d) *) (* Regenerated by OASIS v0.3.0 Visit http://oasis.forge.ocamlcore.org for more information and @@ -5829,7 +5829,12 @@ let setup_t = bs_install = [(OASISExpr.EBool true, true)]; bs_path = "src"; bs_compiled_object = Best; - bs_build_depends = []; + bs_build_depends = + [ + FindlibPackage + ("batteries", + Some (OASISVersion.VGreaterEqual "2.1")) + ]; bs_build_tools = [ExternalTool "ocamlbuild"]; bs_c_sources = []; bs_data_files = []; @@ -5936,7 +5941,8 @@ let setup_t = }; oasis_fn = Some "_oasis"; oasis_version = "0.3.0"; - oasis_digest = Some "\150\245\003HB\161c\213\160i\153\236V\015l\145"; + oasis_digest = + Some "\019\243q\247\158 \231\252\201\141\022\203\145\137\004\140"; oasis_exec = None; oasis_setup_args = []; setup_update = false; @@ -5944,6 +5950,6 @@ let setup_t = let setup () = BaseSetup.setup setup_t;; -# 5948 "setup.ml" +# 5954 "setup.ml" (* OASIS_STOP *) let () = setup ();; diff --git a/src/META b/src/META index fcfbaa4..14a8c6e 100644 --- a/src/META +++ b/src/META @@ -1,7 +1,8 @@ # OASIS_START -# DO NOT EDIT (digest: 233061cbaf77fc5d77d38b183e9c0a38) +# DO NOT EDIT (digest: 368ebddb8be14c2681f69deed84d954a) version = "0.0.1" description = "A light weight Unicode library for OCaml" +requires = "batteries" archive(byte) = "ucorelib.cma" archive(byte, plugin) = "ucorelib.cma" archive(native) = "ucorelib.cmxa" From 3416dce439d2153631478920fee15e99d72c7b1e Mon Sep 17 00:00:00 2001 From: yoriyuki <yoriyuki.y@gmail.com> Date: Sat, 28 Sep 2013 23:07:49 +0900 Subject: [PATCH 02/22] New Text. append --- src/uCoreLib.ml | 717 +++++++++++++++++++++++++++++++++-------------- src/uCoreLib.mli | 97 ++++++- 2 files changed, 606 insertions(+), 208 deletions(-) diff --git a/src/uCoreLib.ml b/src/uCoreLib.ml index 4327936..4636c5a 100644 --- a/src/uCoreLib.ml +++ b/src/uCoreLib.ml @@ -1,6 +1,6 @@ -(** ulib : a feather weight Unicode library for OCaml *) +(** ucorelib : a feather weight Unicode library for OCaml *) -(* Copyright (C) 2011 Yamagata Yoriyuki. *) +(* Copyright (C) 2011, 2013 Yamagata Yoriyuki. *) (* This library is free software; you can redistribute it and/or *) (* modify it under the terms of the GNU Lesser General Public License *) @@ -73,7 +73,7 @@ (* You can contact the authour by sending email to *) (* yoriyuki.y@gmail.com *) - + exception Out_of_range exception Malformed_code @@ -406,6 +406,315 @@ module UTF8 = struct end +(* Interface to String used for Rope implementation *) +module type BaseStringType = sig + type t + + val empty : t + (* [create n] creates [n]-bytes base string *) + val create : int -> t + val init : int -> (int -> uchar) -> t + val of_string : string -> t option + val of_string_unsafe : string -> t + val of_ascii : string -> t option + + val length : t -> int + val compare : t -> t -> int + + type index + val first : t -> index + val end_pos : t -> index + val out_of_range : t -> index -> bool + val next : t -> index -> index + val move : t -> index -> int -> index + val equal_index : t -> index -> index -> bool + + (* Low level functions *) + (* bytes of substring *) + val size : t -> index -> index -> int + (* [blit s1 i1 i2 s2 j] copies the contents of [s1] from [i1] to + [i2] into the location [j] of [s2]. *) + val blit : t -> index -> index -> t -> index -> unit + (* [move_by_bytes s i x] moves index [i] by [x] bytes.*) + val move_by_bytes : t -> index -> int -> index + + val append : t -> t -> t + val sub : t -> index -> index -> t + val copy : t -> t + + val read : t -> index -> uchar +end + +module BaseString : BaseStringType = struct + include UTF8 + + let empty = "" + + let is_valid s = try validate s; true with Malformed_code -> false + + let copy = String.copy + let read = look + let append = (^) + let create = String.create + let end_pos s = String.length s + let of_string s = if is_valid s then None else Some s + let of_string_unsafe s = s + let of_ascii s = try Some (UTF8.of_ascii s) with Malformed_code -> None + let equal_index s i j = i = j + let size s i j = j - i + let blit s1 i1 i2 s2 j = String.blit s1 i1 s2 j (i2 - i1) + let move_by_bytes s i x = i + x + +end + +module Text' = struct +(* + * Rope: Rope: an implementation of the data structure described in + * + * Boehm, H., Atkinson, R., and Plass, M. 1995. Ropes: an alternative to + * strings. Softw. Pract. Exper. 25, 12 (Dec. 1995), 1315-1330. + * + * Motivated by Luca de Alfaro's extensible array implementation Vec. + * + * Copyright (C) 2013 Yoriyuki Yamagata <yoriyuki.y@gmail.com> + * Copyright (C) 2007 Mauricio Fernandez <mfp@acm.org> + * Copyright (C) 2008 Edgar Friendly <thelema314@gmail.com> + * Copyright (C) 2008 David Teller, LIFO, Universite d'Orleans + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + + open BatInt.Safe_int + + module B = BaseString + + let int_max (x:int) (y:int) = if x < y then y else x + let int_min (x:int) (y:int) = if x < y then x else y + + type base_string = {s : B.t; unused : B.index} + + type t = + Empty + | Concat of node + | Leaf of leaf + and node = {left : t; left_length : int; right : t; right_length : int; height : int} + and leaf = {b : base_string; + i : B.index; (*..............:::::::::::::::::::::.... *) + j : B.index; (* ^ ^ *) + len : int} (* i j *) + + let empty = Empty + + let length = function + Empty -> 0 + | Concat node -> node.left_length + node.right_length + | Leaf leaf -> leaf.len + + let height = function + Empty -> 0 + | Concat node -> node.height + | Leaf _ -> 1 + + (* 48 limits max rope size to 220GB on 64 bit, + * ~ 700MB on 32bit (length fields overflow after that) *) + let max_height = 48 + + let leaf_size = 256 (* bytes *) + + let make_concat l r = + let hl = height l and hr = height r in + let cl = length l and cr = length r in + Concat {left = l; left_length = cl; + right = r; right_length = cr; + height = max hl hr} + + let min_len = + let fib_tbl = Array.make max_height 0 in + let rec fib n = match fib_tbl.(n) with + 0 -> + let last = fib (n - 1) and prev = fib (n - 2) in + let r = last + prev in + let r = if r > last then r else last in (* check overflow *) + fib_tbl.(n) <- r; r + | n -> n + in + fib_tbl.(0) <- leaf_size + 1; fib_tbl.(1) <- 3 * leaf_size / 2 + 1; + Array.init max_height (fun i -> if i = 0 then 1 else fib (i - 1)) + + let max_length = min_len.(Array.length min_len - 1) + + let make_concat l r = + let hl = height l and hr = height r in + let cl = length l and cr = length r in + Concat {left = l; left_length = cl; right = r; right_length = cr; + height = 1 + int_max hl hr} + + let concat_fast l r = match l with + Empty -> r + | Leaf _ | Concat _ -> + match r with + Empty -> l + | Leaf _ | Concat _ -> make_concat l r + + (* based on Hans-J. Boehm's *) + type forest_element = { mutable c : t; mutable rlen : int } + + let not_empty = function Empty -> false | _ -> true + + let add_forest forest rope len = + let i = ref 0 in + let sum = ref empty in + while len > min_len.(!i+1) do + if not_empty forest.(!i).c then begin + sum := concat_fast forest.(!i).c !sum; + forest.(!i).c <- Empty + end; + incr i + done; + sum := concat_fast !sum rope; + let sum_len = ref (length !sum) in + while !sum_len >= min_len.(!i) do + if not_empty forest.(!i).c then begin + sum := concat_fast forest.(!i).c !sum; + sum_len := !sum_len + forest.(!i).rlen; + forest.(!i).c <- Empty; + end; + incr i + done; + decr i; + forest.(!i).c <- !sum; + forest.(!i).rlen <- !sum_len + + let concat_forest forest = + Array.fold_left (fun s x -> concat_fast x.c s) Empty forest + + let rec balance_insert rope len forest = match rope with + Empty -> () + | Leaf _ -> add_forest forest rope len + | Concat node when node.height >= max_height || len < min_len.(node.height) -> + balance_insert node.left node.left_length forest; + balance_insert node.right node.right_length forest + | x -> add_forest forest x len (* function or balanced *) + + let balance r = + match r with + Empty | Leaf _ -> r + | _ -> + let forest = Array.init max_height (fun _ -> {c = Empty; rlen = 0}) in + balance_insert r (length r) forest; + concat_forest forest + + let bal_if_needed l r = + let r = make_concat l r in + if height r < max_height then r else balance r + + let is_full_tail leaf = B.equal_index leaf.b.s leaf.j leaf.b.unused + + let leaf_append leaf_l leaf_r = + let size_l = B.size leaf_l.b.s leaf_l.i leaf_l.j in + let size_r = B.size leaf_r.b.s leaf_r.i leaf_r.j in + if size_l + size_r <= leaf_size then + let s = + if size_l + size_r <= B.length leaf_l.b.s && + is_full_tail leaf_l + then begin + B.blit leaf_r.b.s leaf_r.i leaf_r.j leaf_l.b.s leaf_l.b.unused; + leaf_l.b.s; + end else + let s = B.create leaf_size in + B.blit leaf_l.b.s leaf_l.i leaf_l.j s (B.first s); + B.blit leaf_r.b.s leaf_r.i leaf_r.j s (B.move_by_bytes s (B.first s) size_l); + s in + let b = {s = leaf_l.b.s; unused = B.move_by_bytes leaf_l.b.s leaf_l.b.unused size_r} in + let leaf = {b = b; i = leaf_l.i; + j = B.move_by_bytes leaf_l.b.s leaf_l.i size_r; + len = leaf_l.len + leaf_r.len} in + Leaf leaf + else + make_concat (Leaf leaf_l) (Leaf leaf_r) (* height = 1 *) + + + let concat_leaf l leaf_r = + match l with + | Empty -> Leaf leaf_r + | Leaf leaf_l -> leaf_append leaf_l leaf_r + | Concat node -> + match node.right with + Leaf leaf_l -> + Concat {node with right = leaf_append leaf_l leaf_r} + | _ -> bal_if_needed l (Leaf leaf_r) + + let append l = function + Empty -> l + | Leaf leaf_r as r -> concat_leaf l leaf_r + | Concat node as r -> + match node.left with + Leaf leaf_r -> + (match l with + Empty -> r + | Concat _ -> bal_if_needed l r + | Leaf leaf_l -> leaf_append leaf_l leaf_r) + | _ -> + match l with + Empty -> r + | _ -> bal_if_needed l r + + let leaf_of_uchar u = + let s = B.of_string_unsafe (UTF8.make 1 u) in + let b = {s = s; unused = B.end_pos s} in + {b = b; i = B.first b.s; j = B.end_pos b.s; len = 1} + + let append_uchar l u = concat_leaf l (leaf_of_uchar u) + + let of_uchar u = Leaf (leaf_of_uchar u) + + let init ~len ~f = + if len < 0 then failwith "Text.init: The length is minus" else + let s = B.init len f in + let b = {s = s; unused = B.end_pos s} in + Leaf {b = b; i = B.first s; j = B.end_pos s; len = len} + + let of_string s = + match B.of_string s with + None -> None + | Some s -> + let b = {s = s; unused = B.end_pos s} in + Some (Leaf {b = b; i = B.first b.s; j = B.end_pos b.s; len = B.length s}) + + let of_string_exn s = + match of_string s with + None -> raise Malformed_code + | Some text -> text + + let of_ascii s = + match B.of_ascii s with + None -> None + | Some s -> + let b = {s = s; unused = B.end_pos s} in + Some (Leaf {b = b; i = B.first b.s; j = B.end_pos b.s; len = B.length s}) + + let of_ascii_exn s = + match of_string s with + None -> raise Malformed_code + | Some text -> text + + let of_latin1 s = init (String.length s) (fun i -> UChar.unsafe_chr (Char.code s.[i])) + +end + (* * Rope: Rope: an implementation of the data structure described in * @@ -452,148 +761,148 @@ module Text = struct s exception Invalid_rope - + type t = Empty (**An empty rope*) | Concat of t * int * t * int * int (**[Concat l ls r rs h] is the concatenation of ropes [l] and [r], where [ls] is the total - length of [l], [rs] is the length of [r] - and [h] is the height of the node in the - tree, used for rebalancing. *) + length of [l], [rs] is the length of [r] + and [h] is the height of the node in the + tree, used for rebalancing. *) | Leaf of int * UTF8.t (**[Leaf l t] is string [t] with length [l], - measured in number of Unicode characters.*) - + measured in number of Unicode characters.*) + type forest_element = { mutable c : t; mutable len : int } - + let str_append = (^) let empty_str = "" let string_of_string_list l = String.concat empty_str l - - (* 48 limits max rope size to 220GB on 64 bit, - * ~ 700MB on 32bit (length fields overflow after that) *) + + (* 48 limits max rope size to 220GB on 64 bit, + * ~ 700MB on 32bit (length fields overflow after that) *) let max_height = 48 - - (* actual size will be that plus 1 word header; - * the code assumes it's an even num. - * 256 gives up to a 50% overhead in the worst case (all leaf nodes near - * half-filled *) + + (* actual size will be that plus 1 word header; + * the code assumes it's an even num. + * 256 gives up to a 50% overhead in the worst case (all leaf nodes near + * half-filled *) let leaf_size = 256 (* utf-8 characters, not bytes *) - (* =end *) - - (* =begin code *) - + (* =end *) + + (* =begin code *) + exception Out_of_bounds - + let empty = Empty - + - (* by construction, there cannot be Empty or Leaf "" leaves *) + (* by construction, there cannot be Empty or Leaf "" leaves *) let is_empty = function Empty -> true | _ -> false - + let height = function Empty | Leaf _ -> 0 | Concat(_,_,_,_,h) -> h - + let length = function Empty -> 0 | Leaf (l,_) -> l | Concat(_,cl,_,cr,_) -> cl + cr - + let make_concat l r = let hl = height l and hr = height r in let cl = length l and cr = length r in - Concat(l, cl, r, cr, if hl >= hr then hl + 1 else hr + 1) - + Concat(l, cl, r, cr, if hl >= hr then hl + 1 else hr + 1) + let min_len = let fib_tbl = Array.make max_height 0 in let rec fib n = match fib_tbl.(n) with - 0 -> - let last = fib (n - 1) and prev = fib (n - 2) in - let r = last + prev in - let r = if r > last then r else last in (* check overflow *) - fib_tbl.(n) <- r; r - | n -> n + 0 -> + let last = fib (n - 1) and prev = fib (n - 2) in + let r = last + prev in + let r = if r > last then r else last in (* check overflow *) + fib_tbl.(n) <- r; r + | n -> n in - fib_tbl.(0) <- leaf_size + 1; fib_tbl.(1) <- 3 * leaf_size / 2 + 1; - Array.init max_height (fun i -> if i = 0 then 1 else fib (i - 1)) - + fib_tbl.(0) <- leaf_size + 1; fib_tbl.(1) <- 3 * leaf_size / 2 + 1; + Array.init max_height (fun i -> if i = 0 then 1 else fib (i - 1)) + let max_length = min_len.(Array.length min_len - 1) - + let concat_fast l r = match l with - Empty -> r - | Leaf _ | Concat(_,_,_,_,_) -> - match r with - Empty -> l - | Leaf _ | Concat(_,_,_,_,_) -> make_concat l r - - (* based on Hans-J. Boehm's *) + Empty -> r + | Leaf _ | Concat(_,_,_,_,_) -> + match r with + Empty -> l + | Leaf _ | Concat(_,_,_,_,_) -> make_concat l r + + (* based on Hans-J. Boehm's *) let add_forest forest rope len = let i = ref 0 in let sum = ref empty in - while len > min_len.(!i+1) do - if forest.(!i).c <> Empty then begin - sum := concat_fast forest.(!i).c !sum; - forest.(!i).c <- Empty - end; - incr i - done; - sum := concat_fast !sum rope; - let sum_len = ref (length !sum) in - while !sum_len >= min_len.(!i) do - if forest.(!i).c <> Empty then begin - sum := concat_fast forest.(!i).c !sum; - sum_len := !sum_len + forest.(!i).len; - forest.(!i).c <- Empty; - end; - incr i - done; - decr i; - forest.(!i).c <- !sum; - forest.(!i).len <- !sum_len - + while len > min_len.(!i+1) do + if forest.(!i).c <> Empty then begin + sum := concat_fast forest.(!i).c !sum; + forest.(!i).c <- Empty + end; + incr i + done; + sum := concat_fast !sum rope; + let sum_len = ref (length !sum) in + while !sum_len >= min_len.(!i) do + if forest.(!i).c <> Empty then begin + sum := concat_fast forest.(!i).c !sum; + sum_len := !sum_len + forest.(!i).len; + forest.(!i).c <- Empty; + end; + incr i + done; + decr i; + forest.(!i).c <- !sum; + forest.(!i).len <- !sum_len + let concat_forest forest = Array.fold_left (fun s x -> concat_fast x.c s) Empty forest - + let rec balance_insert rope len forest = match rope with - Empty -> () - | Leaf _ -> add_forest forest rope len - | Concat(l,cl,r,cr,h) when h >= max_height || len < min_len.(h) -> - balance_insert l cl forest; - balance_insert r cr forest - | x -> add_forest forest x len (* function or balanced *) - + Empty -> () + | Leaf _ -> add_forest forest rope len + | Concat(l,cl,r,cr,h) when h >= max_height || len < min_len.(h) -> + balance_insert l cl forest; + balance_insert r cr forest + | x -> add_forest forest x len (* function or balanced *) + let balance r = match r with - Empty | Leaf _ -> r - | _ -> - let forest = Array.init max_height (fun _ -> {c = Empty; len = 0}) in - balance_insert r (length r) forest; - concat_forest forest - + Empty | Leaf _ -> r + | _ -> + let forest = Array.init max_height (fun _ -> {c = Empty; len = 0}) in + balance_insert r (length r) forest; + concat_forest forest + let bal_if_needed l r = let r = make_concat l r in - if height r < max_height then r else balance r - + if height r < max_height then r else balance r + let concat_str l = function Empty | Concat(_,_,_,_,_) -> invalid_arg "concat_str" | Leaf (lenr, rs) as r -> match l with - | Empty -> r - | Leaf (lenl, ls) -> - let slen = lenr + lenl in - if slen <= leaf_size then Leaf ((lenl+lenr),(str_append ls rs)) - else make_concat l r (* height = 1 *) - | Concat(ll, cll, Leaf (lenlr ,lrs), clr, h) -> - let slen = clr + lenr in - if clr + lenr <= leaf_size then - Concat(ll, cll, Leaf ((lenlr + lenr),(str_append lrs rs)), slen, h) - else - bal_if_needed l r - | _ -> bal_if_needed l r - + | Empty -> r + | Leaf (lenl, ls) -> + let slen = lenr + lenl in + if slen <= leaf_size then Leaf ((lenl+lenr),(str_append ls rs)) + else make_concat l r (* height = 1 *) + | Concat(ll, cll, Leaf (lenlr ,lrs), clr, h) -> + let slen = clr + lenr in + if clr + lenr <= leaf_size then + Concat(ll, cll, Leaf ((lenlr + lenr),(str_append lrs rs)), slen, h) + else + bal_if_needed l r + | _ -> bal_if_needed l r + let append_char c r = concat_str r (Leaf (1, (UTF8.make 1 c))) let append l = function @@ -601,31 +910,31 @@ module Text = struct | Leaf _ as r -> concat_str l r | Concat(Leaf (lenrl,rls),rlc,rr,rc,h) as r -> (match l with - Empty -> r - | Concat(_,_,_,_,_) -> bal_if_needed l r - | Leaf (lenl, ls) -> - let slen = rlc + lenl in - if slen <= leaf_size then - Concat(Leaf((lenrl+lenl),(str_append ls rls)), slen, rr, rc, h) - else - bal_if_needed l r) + Empty -> r + | Concat(_,_,_,_,_) -> bal_if_needed l r + | Leaf (lenl, ls) -> + let slen = rlc + lenl in + if slen <= leaf_size then + Concat(Leaf((lenrl+lenl),(str_append ls rls)), slen, rr, rc, h) + else + bal_if_needed l r) | r -> (match l with Empty -> r | _ -> bal_if_needed l r) - + let ( ^^^ ) = append let prepend_char c r = append (Leaf (1,(UTF8.make 1 c))) r - + let get r i = let rec aux i = function - Empty -> raise Out_of_bounds - | Leaf (lens, s) -> - if i >= 0 && i < lens then UTF8.get s i - else raise Out_of_bounds - | Concat (l, cl, r, cr, _) -> - if i < cl then aux i l - else aux (i - cl) r + Empty -> raise Out_of_bounds + | Leaf (lens, s) -> + if i >= 0 && i < lens then UTF8.get s i + else raise Out_of_bounds + | Concat (l, cl, r, cr, _) -> + if i < cl then aux i l + else aux (i - cl) r in aux i r - + let copy_set us cpos c = let ipos = UTF8.ByteIndex.of_char_idx us cpos in let jpos = UTF8.ByteIndex.next us ipos in @@ -637,55 +946,55 @@ module Text = struct let rec aux i = function Empty -> raise Out_of_bounds | Leaf (lens, s) -> - if i >= 0 && i < lens then - let s = copy_set s i v in - Leaf (lens, s) - else raise Out_of_bounds + if i >= 0 && i < lens then + let s = copy_set s i v in + Leaf (lens, s) + else raise Out_of_bounds | Concat(l, cl, r, cr, _) -> - if i < cl then append (aux i l) r - else append l (aux (i - cl) r) + if i < cl then append (aux i l) r + else append l (aux (i - cl) r) in aux i r module Iter = - struct - - - (* Iterators are used for iterating efficiently over multiple ropes - at the same time *) - - type iterator = { - mutable leaf : UTF8.t; - (* Current leaf in which the iterator is *) - mutable idx : UTF8.ByteIndex.b_idx; - (* Current byte position of the iterator *) - mutable rest : t list; - (* Ropes not yet visited *) - } - - type t = iterator option - - (* Initial iterator state: *) - let make rope = { leaf = UTF8.empty; - idx = UTF8.ByteIndex.first; - rest = if rope = Empty then [] else [rope] } - - let rec next_leaf = function - | Empty :: l -> - next_leaf l - | Leaf(len, str) :: l -> - Some(str, l) - | Concat(left, left_len, right, right_len, height) :: l -> - next_leaf (left :: right :: l) - | [] -> - None - - (* Advance the iterator to the next position, and return current - character: *) - let rec next iter = - if UTF8.ByteIndex.at_end iter.leaf iter.idx then - (* We are at the end of the current leaf, find another one: *) - match next_leaf iter.rest with + struct + + + (* Iterators are used for iterating efficiently over multiple ropes + at the same time *) + + type iterator = { + mutable leaf : UTF8.t; + (* Current leaf in which the iterator is *) + mutable idx : UTF8.ByteIndex.b_idx; + (* Current byte position of the iterator *) + mutable rest : t list; + (* Ropes not yet visited *) + } + + type t = iterator option + + (* Initial iterator state: *) + let make rope = { leaf = UTF8.empty; + idx = UTF8.ByteIndex.first; + rest = if rope = Empty then [] else [rope] } + + let rec next_leaf = function + | Empty :: l -> + next_leaf l + | Leaf(len, str) :: l -> + Some(str, l) + | Concat(left, left_len, right, right_len, height) :: l -> + next_leaf (left :: right :: l) + | [] -> + None + + (* Advance the iterator to the next position, and return current + character: *) + let rec next iter = + if UTF8.ByteIndex.at_end iter.leaf iter.idx then + (* We are at the end of the current leaf, find another one: *) + match next_leaf iter.rest with | None -> None | Some(leaf, rest) -> @@ -695,17 +1004,17 @@ module Text = struct iter.rest <- rest; Some(UTF8.ByteIndex.look leaf UTF8.ByteIndex.first) end - else begin - (* Just advance in the current leaf: *) - let ch = UTF8.ByteIndex.look iter.leaf iter.idx in - iter.idx <- UTF8.ByteIndex.next iter.leaf iter.idx; - Some ch - end - - (* Same thing but map leafs: *) - let rec next_map f iter = - if UTF8.ByteIndex.at_end iter.leaf iter.idx then - match next_leaf iter.rest with + else begin + (* Just advance in the current leaf: *) + let ch = UTF8.ByteIndex.look iter.leaf iter.idx in + iter.idx <- UTF8.ByteIndex.next iter.leaf iter.idx; + Some ch + end + + (* Same thing but map leafs: *) + let rec next_map f iter = + if UTF8.ByteIndex.at_end iter.leaf iter.idx then + match next_leaf iter.rest with | None -> None | Some(leaf, rest) -> @@ -714,27 +1023,27 @@ module Text = struct iter.idx <- UTF8.ByteIndex.next leaf UTF8.ByteIndex.first; iter.rest <- rest; Some(UTF8.ByteIndex.look leaf UTF8.ByteIndex.first) - else begin - let ch = UTF8.ByteIndex.look iter.leaf iter.idx in - iter.idx <- UTF8.ByteIndex.next iter.leaf iter.idx; - Some ch - end - - (* Same thing but in reverse order: *) - - let rec prev_leaf = function - | Empty :: l -> - prev_leaf l - | Leaf(len, str) :: l -> - Some(str, l) - | Concat(left, left_len, right, right_len, height) :: l -> - prev_leaf (right :: left :: l) - | [] -> - None - - let prev iter = - if iter.idx = UTF8.ByteIndex.first then - match prev_leaf iter.rest with + else begin + let ch = UTF8.ByteIndex.look iter.leaf iter.idx in + iter.idx <- UTF8.ByteIndex.next iter.leaf iter.idx; + Some ch + end + + (* Same thing but in reverse order: *) + + let rec prev_leaf = function + | Empty :: l -> + prev_leaf l + | Leaf(len, str) :: l -> + Some(str, l) + | Concat(left, left_len, right, right_len, height) :: l -> + prev_leaf (right :: left :: l) + | [] -> + None + + let prev iter = + if iter.idx = UTF8.ByteIndex.first then + match prev_leaf iter.rest with | None -> None | Some(leaf, rest) -> @@ -742,24 +1051,24 @@ module Text = struct iter.idx <- UTF8.ByteIndex.last leaf; iter.rest <- rest; Some(UTF8.ByteIndex.look leaf iter.idx) - else begin - iter.idx <- UTF8.ByteIndex.prev iter.leaf iter.idx; - Some(UTF8.ByteIndex.look iter.leaf iter.idx) - end - end + else begin + iter.idx <- UTF8.ByteIndex.prev iter.leaf iter.idx; + Some(UTF8.ByteIndex.look iter.leaf iter.idx) + end + end - (* Can be improved? *) + (* Can be improved? *) let compare a b = let ia = Iter.make a and ib = Iter.make b in let rec loop _ = match Iter.next ia, Iter.next ib with - | None, None -> 0 - | None, _ -> -1 - | _, None -> 1 - | Some ca, Some cb -> - match UChar.compare ca cb with - | 0 -> loop () - | n -> n + | None, None -> 0 + | None, _ -> -1 + | _, None -> 1 + | Some ca, Some cb -> + match UChar.compare ca cb with + | 0 -> loop () + | n -> n in loop () diff --git a/src/uCoreLib.mli b/src/uCoreLib.mli index 97c569d..b8bccc8 100644 --- a/src/uCoreLib.mli +++ b/src/uCoreLib.mli @@ -1,5 +1,3 @@ - - (** Unicode characters. This module implements Unicode characters. @@ -39,7 +37,8 @@ (* You can contact the authour by sending email to *) (* yori@users.sourceforge.net *) - +(** Exceptions. In addition, this module could raises Invalid_arg and*) +(** BatNumber.Overflow *) exception Out_of_range exception Malformed_code @@ -246,6 +245,96 @@ module UTF8 : sig end with type buf = Buffer.t end +module Text' : sig + type t + + val empty : t + + val length : t -> int + + val max_length : int + +(** [init len f] + returns a new text which contains [len] Unicode characters. + The i-th Unicode character is initialized by [f i]. Raises + Failure if [len] is minus. *) + val init : len:int -> f:(int -> uchar) -> t + + (** Returns a text which consists of the given single character. *) + val of_uchar : uchar -> t + + (** [of_string s] converts UTF-8 encoded string [s] to Text.t + If [s] is an invalid UTF-8 string, returns None *) + val of_string : string -> t option + (** Same as above but raises Malformed_code instead of returing None *) + val of_string_exn : string -> t + + (** Returns UTF-8 encoded string. *) + val string_of : t -> string + + (** [of_string s] converts Ascii encoded string [s] to Text.t + If [s] is an invalid Ascii string, returns None *) + val of_ascii : string -> t option + (** Same as above but raises Malformed_code instead of returing None *) + val of_ascii_exn : string -> t + + (** [of_string s] converts Latin-a encoded string [s] to Text.t *) + val of_latin1 : string -> t + + (** Append two texts *) + val append : t -> t -> t + + (** Append one Unicode character to the last of the text *) + val append_uchar : t -> uchar -> t + + (** Byte order of texts *) + val compare : t -> t -> int + + (** [get s i] gets [i]-th character of [s] *) + val get : t -> int -> uchar + + (** Iterator. Also behaves like a zipper *) + type iterator + + (** The head of the text *) + val first : t -> iterator + + (** Points the last character of the text *) + val last : t -> iterator + + (** Moving around an iterator *) + val next : iterator -> iterator option + (** Raises Out_of_range if the iterator already locates in the last + character of the underlining text. *) + val next_exn : iterator -> iterator + (** [move i n] returns the iterator which locates [n]-th*) + (** characters from [i]. If such a location does not exist, return*) + (** None. If [n] is negative, move the left. *) + val move : iterator -> int -> iterator option + (** The same as above but raises Out_of_range instead or returning None.*) + val move_exn : iterator -> int -> iterator + (** Move the iterator as much as possible toward the [n]-th + character.*) + val move_as_possible : iterator -> int -> iterator + + (** Returns the value of the location which the iterator points. *) + val value : iterator -> uchar + + (** Returns the underlining text of the give iterator. *) + val base : iterator -> t + + (** Zipper like operations. *) + (** [insert i t] inserts [t] inyo the right of [i]. *) + val insert : iterator -> t -> iterator + (** [delete_left i] deletes the left side of [i]. *) + val delete_left : iterator -> iterator + (** [delete_right i] deletes the right side of [i]. *) + val delete_right : iterator -> iterator + (** [sub i n] creates the iterator which runs over substring which + begins position [i] to [n]-th character from [i].*) + val sub : iterator -> int -> iterator +end + (* Rope: a simple implementation of ropes as described in Boehm, H., Atkinson, R., and Plass, M. 1995. Ropes: an alternative to @@ -253,7 +342,7 @@ strings. Softw. Pract. Exper. 25, 12 (Dec. 1995), 1315-1330. Motivated by Luca de Alfaro's extensible array implementation Vec. -Copyright (C) 2011 Yoriyuki Yamagata <yoriyuki.y@gmail.com> +Copyright (C) 2011, 2013 Yoriyuki Yamagata <yoriyuki.y@gmail.com> 2007 Mauricio Fernandez <mfp@acm.org> http://eigenclass.org From 376b91b4f11869336823f15e782fc5a424b51f78 Mon Sep 17 00:00:00 2001 From: yoriyuki <yoriyuki.y@gmail.com> Date: Sun, 6 Oct 2013 04:29:59 +0900 Subject: [PATCH 03/22] Text.move --- src/uCoreLib.ml | 299 ++++++++++++++++++++++++++++++++++++++++++++++- src/uCoreLib.mli | 42 +++++-- 2 files changed, 324 insertions(+), 17 deletions(-) diff --git a/src/uCoreLib.ml b/src/uCoreLib.ml index 4636c5a..61afda7 100644 --- a/src/uCoreLib.ml +++ b/src/uCoreLib.ml @@ -426,8 +426,10 @@ module type BaseStringType = sig val end_pos : t -> index val out_of_range : t -> index -> bool val next : t -> index -> index + val prev : t -> index -> index val move : t -> index -> int -> index val equal_index : t -> index -> index -> bool + val compare_index : t -> index -> index -> int (* Low level functions *) (* bytes of substring *) @@ -443,6 +445,7 @@ module type BaseStringType = sig val copy : t -> t val read : t -> index -> uchar + val write : t -> index -> uchar -> index end module BaseString : BaseStringType = struct @@ -457,14 +460,42 @@ module BaseString : BaseStringType = struct let append = (^) let create = String.create let end_pos s = String.length s - let of_string s = if is_valid s then None else Some s + let of_string s = if is_valid s then None else Some (String.copy s) let of_string_unsafe s = s let of_ascii s = try Some (UTF8.of_ascii s) with Malformed_code -> None let equal_index s i j = i = j let size s i j = j - i let blit s1 i1 i2 s2 j = String.blit s1 i1 s2 j (i2 - i1) let move_by_bytes s i x = i + x - + + let write s i u = + let masq = 0b111111 in + let k = UChar.code u in + if k <= 0x7f then + if i >= String.length s then i else begin + s.[i] <- Char.unsafe_chr k; + i + 1 + end + else if k <= 0x7ff then + if i >= String.length s - 1 then i else begin + s.[i] <- Char.unsafe_chr (0xc0 lor (k lsr 6)); + s.[i+1] <- (Char.unsafe_chr (0x80 lor (k land masq))); + i+2 + end else if k <= 0xffff then + if i >= String.length s - 2 then i else begin + s.[i] <- Char.unsafe_chr (0xe0 lor (k lsr 12)); + s.[i+1] <- Char.unsafe_chr (0x80 lor ((k lsr 6) land masq)); + s.[i+2] <- Char.unsafe_chr (0x80 lor (k land masq)); + i + 3 + end else + if i >= String.length s - 3 then i else + begin + s.[i] <- Char.unsafe_chr (0xf0 + (k lsr 18)); + s.[i+1] <- Char.unsafe_chr (0x80 lor ((k lsr 12) land masq)); + s.[i+2] <- Char.unsafe_chr (0x80 lor ((k lsr 6) land masq)); + s.[i+3] <- Char.unsafe_chr (0x80 lor (k land masq)); + i + 4 + end end module Text' = struct @@ -646,7 +677,6 @@ module Text' = struct else make_concat (Leaf leaf_l) (Leaf leaf_r) (* height = 1 *) - let concat_leaf l leaf_r = match l with | Empty -> Leaf leaf_r @@ -672,14 +702,40 @@ module Text' = struct Empty -> r | _ -> bal_if_needed l r + let new_block_uchar u = + let s = B.create leaf_size in + let i = B.write s (B.first s) u in + let b = {s = s; unused = i} in + Leaf {b = b; i = (B.first s); j = i; len = 1} + + let leaf_append_uchar leaf u = + if is_full_tail leaf then + let k = B.write leaf.b.s leaf.j u in + if B.equal_index leaf.b.s k leaf.j then + make_concat (Leaf leaf) (new_block_uchar u) + else + let b = {leaf.b with unused = k} in + let leaf = {b = b; i = leaf.i; j = k; len = leaf.len + 1} in + Leaf leaf + else + make_concat (Leaf leaf) (new_block_uchar u) + let leaf_of_uchar u = let s = B.of_string_unsafe (UTF8.make 1 u) in let b = {s = s; unused = B.end_pos s} in {b = b; i = B.first b.s; j = B.end_pos b.s; len = 1} - - let append_uchar l u = concat_leaf l (leaf_of_uchar u) let of_uchar u = Leaf (leaf_of_uchar u) + + let append_uchar l u = + match l with + | Empty -> Leaf (leaf_of_uchar u) + | Leaf leaf_l -> leaf_append_uchar leaf_l u + | Concat node -> + match node.right with + Leaf leaf_l -> + Concat {node with right = leaf_append_uchar leaf_l u} + | _ -> bal_if_needed l (Leaf (leaf_of_uchar u)) let init ~len ~f = if len < 0 then failwith "Text.init: The length is minus" else @@ -711,7 +767,238 @@ module Text' = struct None -> raise Malformed_code | Some text -> text - let of_latin1 s = init (String.length s) (fun i -> UChar.unsafe_chr (Char.code s.[i])) + let of_latin1 s = + init (String.length s) (fun i -> + UChar.unsafe_chr (Char.code s.[i])) + + let rec get t n = + match t with + Empty -> None + | Leaf leaf -> + if n >= leaf.len then None else + let i = B.move leaf.b.s leaf.i n in + Some (B.read leaf.b.s i) + | Concat node -> + if n < node.left_length then get node.left n else + let n = n - node.left_length in + get node.right n + + let rec get_exn t n = + match t with + Empty -> invalid_arg "index out of bounds" + | Leaf leaf -> + if n >= leaf.len then invalid_arg "index out of bounds" else + let i = B.move leaf.b.s leaf.i n in + B.read leaf.b.s i + | Concat node -> + if n < node.left_length then get_exn node.left n else + let n = n - node.left_length in + get_exn node.right n + + (* In Left (p, t), p is the path to the parent and t is the right sibling. *) + type path = Top | Left of path * t | Right of t * path + + type iterator = {path : path; leaf : leaf; index : B.index} + + let empty_leaf = + let base = {s = B.empty; unused = B.end_pos B.empty} in + {b = base; i = B.first B.empty; j = B.end_pos B.empty; len = 0} + + let rec first_leaf_sub path = function + Empty -> (Top, empty_leaf) + | Leaf leaf -> (path, leaf) + | Concat node -> + first_leaf_sub (Left (path, node.right)) node.left + + let first_leaf = first_leaf_sub Top + + let first t = + let p, leaf = first_leaf t in + {path = p; leaf = leaf; index = B.first leaf.b.s} + + let rec end_leaf_sub path = function + Empty -> (Top, empty_leaf) + | Leaf leaf -> (path, leaf) + | Concat node -> + end_leaf_sub (Right (node.left, path)) node.right + + let end_leaf = end_leaf_sub Top + + let end_pos t = + let p, leaf = end_leaf t in + {path = p; leaf = leaf; index = B.end_pos leaf.b.s} + + let rec nth_aux p t n = + match t with + Empty -> None + | Leaf leaf -> + if n >= leaf.len then None else + let i = B.move leaf.b.s leaf.i n in + Some {path = p; leaf = leaf; index = i} + | Concat node -> + if n < node.left_length then + nth_aux (Left (p, node.right)) node.left n + else + let n = n - node.left_length in + nth_aux (Right (node.left, p)) node.right n + + let nth t n = nth_aux Top t n + + let rec nth_exn_aux p t n = + match t with + Empty -> invalid_arg "index out of bounds" + | Leaf leaf -> + if n >= leaf.len then invalid_arg "index out of bounds" else + let i = B.move leaf.b.s leaf.i n in + {path = p; leaf = leaf; index = i} + | Concat node -> + if n < node.left_length then + nth_exn_aux (Left (p, node.right)) node.left n + else + let n = n - node.left_length in + nth_exn_aux (Right (node.left, p)) node.right n + + let nth_exn t n = nth_exn_aux Top t n + + let rec next_leaf = function + Top -> None + | Left (p, t) -> + Some (first_leaf_sub p t) + | Right (t, p) -> + next_leaf p + + let next it = + if not (B.equal_index it.leaf.b.s it.index it.leaf.j) then + let i = B.next it.leaf.b.s it.index in + Some {it with index = i} + else match next_leaf it.path with + None -> None + | Some (path, leaf) -> + Some {path = path; leaf = leaf; index = leaf.i} + + let next_exn it = + if not (B.equal_index it.leaf.b.s it.index it.leaf.j) then + let i = B.next it.leaf.b.s it.index in + {it with index = i} + else match next_leaf it.path with + None -> invalid_arg "index out of bounds" + | Some (path, leaf) -> + {path = path; leaf = leaf; index = leaf.i} + + let rec prev_leaf = function + Top -> None + | Left (p, t) -> + prev_leaf p + | Right (t, p) -> + Some (end_leaf_sub p t) + + let prev it = + if not (B.equal_index it.leaf.b.s it.index it.leaf.i) then + let i = B.prev it.leaf.b.s it.index in + Some {it with index = i} + else match prev_leaf it.path with + None -> None + | Some (path, leaf) -> + Some {path = path; leaf = leaf; index = B.prev leaf.b.s leaf.j} + + let prev_exn it = + if not (B.equal_index it.leaf.b.s it.index it.leaf.i) then + let i = B.prev it.leaf.b.s it.index in + {it with index = i} + else match prev_leaf it.path with + None -> invalid_arg "index out of bounds" + | Some (path, leaf) -> + {path = path; leaf = leaf; index = B.prev leaf.b.s leaf.j} + + let rec base_aux path sub = + match path with + Top -> sub + | Left (p, t) -> + base_aux p (make_concat sub t) + | Right (t, p) -> + base_aux p (make_concat t sub) + + let rec base it = base_aux it.path (Leaf it.leaf) + + let move_ahead_leaf it n = + let rec loop i n = + if B.compare_index it.leaf.b.s i it.leaf.j > 0 then + `Out_of_range ({it with index = it.leaf.j}, n) + else if n <= 0 then `Success {it with index = i} else + loop (B.next it.leaf.b.s i) (n - 1) in + loop it.index n + + let rec move_ahead path sub it n = + match sub with + Empty -> `Out_of_range (it, n) + | Leaf leaf -> + let it = + if it.leaf == leaf then it else + {path = path; leaf = leaf; index = leaf.i} in + (match move_ahead_leaf it n with + `Success it as ret -> ret + | `Out_of_range (it, n) -> + (match path with + Top -> `Out_of_range (it, n) + | Left (p, t) -> + move_ahead (Right (sub, p)) t it n + | Right (t, p) -> + let node = make_concat t sub in + move_ahead p node it n)) + | Concat node -> + if node.left_length >= n then + move_ahead (Left (path, node.right)) node.left it n + else + move_ahead (Right (node.left, path)) node.right it (n - node.left_length) + + let move_behind_leaf it n = + let rec loop i n = + if B.compare_index it.leaf.b.s i it.leaf.i < 0 then + `Out_of_range ({it with index = it.leaf.i}, n) + else if n <= 0 then `Success {it with index = i} else + loop (B.prev it.leaf.b.s i) (n-1) in + loop it.index n + + let rec move_behind path sub it n = + match sub with + Empty -> `Out_of_range (it, n) + | Leaf leaf -> + let it = + if it.leaf == leaf then it else + {path = path; leaf = leaf; index = leaf.j} in + (match move_behind_leaf it n with + `Success it as ret -> ret + | `Out_of_range (it, n) -> + (match path with + Top -> `Out_of_range (it, n) + | Right (t, p) -> + move_behind (Left (p, sub)) t it n + | Left (p, t) -> + let node = make_concat sub t in + move_behind p node it n)) + | Concat node -> + if node.right_length >= n then + move_behind (Right (node.left, path)) node.right it n + else + move_ahead (Right (node.right, path)) node.left it (n - node.left_length) + + let move it n = + if n > 0 then move_ahead it.path (Leaf it.leaf) it n else + if n < 0 then move_behind it.path (Leaf it.leaf) it (-n) else + `Success it + + let move_exn it n = + match move it n with + `Success it -> it + | `Out_of_range _ -> invalid_arg "number out of bounds" + + let move_as_possible it n = + match move it n with + `Success it -> it + | `Out_of_range (it, n) -> it + + + let value it = B.read it.leaf.b.s it.index end diff --git a/src/uCoreLib.mli b/src/uCoreLib.mli index b8bccc8..1a4212e 100644 --- a/src/uCoreLib.mli +++ b/src/uCoreLib.mli @@ -258,7 +258,7 @@ module Text' : sig returns a new text which contains [len] Unicode characters. The i-th Unicode character is initialized by [f i]. Raises Failure if [len] is minus. *) - val init : len:int -> f:(int -> uchar) -> t + val init : int -> (int -> uchar) -> t (** Returns a text which consists of the given single character. *) val of_uchar : uchar -> t @@ -291,27 +291,44 @@ module Text' : sig val compare : t -> t -> int (** [get s i] gets [i]-th character of [s] *) - val get : t -> int -> uchar + val get : t -> int -> uchar option + (** Raises Invalid_arg *) + val get_exn : t -> int -> uchar - (** Iterator. Also behaves like a zipper *) + (** Iterator. Also behaves like a zipper. Iterator can point the + location after the last character of the text. *) type iterator (** The head of the text *) val first : t -> iterator - - (** Points the last character of the text *) - val last : t -> iterator + (** Points the end of the text *) + val end_pos : t -> iterator + (** [nth t i] return the iterator which points the begininng of + [i+1]-th character of [t]. *) + val nth : t -> int -> iterator option + (** Raises Invalid_argument "index out of bounds" if the argument is + out of bound. *) + val nth_exn : t -> int -> iterator (** Moving around an iterator *) val next : iterator -> iterator option - (** Raises Out_of_range if the iterator already locates in the last - character of the underlining text. *) + (** Raises Invalid_argument "index out of bounds" if the iterator + already locates after the last character of the underlining text. *) val next_exn : iterator -> iterator + (** Points the privious character *) + val prev : iterator -> iterator option + (** Raises Invalid_argument "index out of bounds" if the iterator + already locates in the first character of the underlining text. *) + val prev_exn : iterator -> iterator (** [move i n] returns the iterator which locates [n]-th*) (** characters from [i]. If such a location does not exist, return*) - (** None. If [n] is negative, move the left. *) - val move : iterator -> int -> iterator option - (** The same as above but raises Out_of_range instead or returning None.*) + (** [`Out_of_range (it, n)]. [it] points the last position which + [move] can success, and [n] is the number of the move to be done. + If [n] is negative, move the left. *) + val move : iterator -> int -> + [`Success of iterator | `Out_of_range of iterator * int] + (** The same as above but raises Invalid_argument "index out of + bounds" instead or returning None.*) val move_exn : iterator -> int -> iterator (** Move the iterator as much as possible toward the [n]-th character.*) @@ -333,6 +350,9 @@ module Text' : sig (** [sub i n] creates the iterator which runs over substring which begins position [i] to [n]-th character from [i].*) val sub : iterator -> int -> iterator + + (** Fold.*) + val fold : t -> init:'a -> f:('a -> uchar -> 'a) -> t -> 'a end (* Rope: a simple implementation of ropes as described in From 3843f9a687e2c236c396987673c2fe3f8d36b638 Mon Sep 17 00:00:00 2001 From: yoriyuki <yoriyuki.y@gmail.com> Date: Wed, 16 Oct 2013 22:26:43 +0900 Subject: [PATCH 04/22] new Text: passes compilation. --- src/uCoreLib.ml | 140 ++++++++++++++++++++++++++++++++++++++++------- src/uCoreLib.mli | 10 +++- 2 files changed, 126 insertions(+), 24 deletions(-) diff --git a/src/uCoreLib.ml b/src/uCoreLib.ml index 61afda7..fd2ad07 100644 --- a/src/uCoreLib.ml +++ b/src/uCoreLib.ml @@ -250,18 +250,20 @@ module UTF8 = struct let of_string_unsafe s = s let to_string_unsafe s = s - - let rec length_aux s c i = - if i >= String.length s then c else + + let rec distance_aux s i j c = + if i >= j then c else let n = Char.code (String.unsafe_get s i) in let k = if n < 0x80 then 1 else if n < 0xe0 then 2 else if n < 0xf0 then 3 else 4 in - length_aux s (c + 1) (i + k) - - let length s = length_aux s 0 0 + distance_aux s (i + k) j (c + 1) + + let distance s i j = distance_aux s i j 0 + + let length s = distance s 0 (String.length s) let rec iter_aux proc s i = if i >= String.length s then () else @@ -430,6 +432,7 @@ module type BaseStringType = sig val move : t -> index -> int -> index val equal_index : t -> index -> index -> bool val compare_index : t -> index -> index -> int + val distance : t -> index -> index -> int (* Low level functions *) (* bytes of substring *) @@ -440,9 +443,7 @@ module type BaseStringType = sig (* [move_by_bytes s i x] moves index [i] by [x] bytes.*) val move_by_bytes : t -> index -> int -> index - val append : t -> t -> t - val sub : t -> index -> index -> t - val copy : t -> t + val add_substring : Buffer.t -> t -> index -> index -> unit val read : t -> index -> uchar val write : t -> index -> uchar -> index @@ -467,6 +468,7 @@ module BaseString : BaseStringType = struct let size s i j = j - i let blit s1 i1 i2 s2 j = String.blit s1 i1 s2 j (i2 - i1) let move_by_bytes s i x = i + x + let add_substring b s i j = Buffer.add_substring b s i (j - i) let write s i u = let masq = 0b111111 in @@ -535,7 +537,7 @@ module Text' = struct let int_max (x:int) (y:int) = if x < y then y else x let int_min (x:int) (y:int) = if x < y then x else y - type base_string = {s : B.t; unused : B.index} + type base_string = {s : B.t; mutable unused : B.index} type t = Empty @@ -641,16 +643,16 @@ module Text' = struct | x -> add_forest forest x len (* function or balanced *) let balance r = + if height r < max_height then r else match r with Empty | Leaf _ -> r | _ -> let forest = Array.init max_height (fun _ -> {c = Empty; rlen = 0}) in balance_insert r (length r) forest; concat_forest forest - + let bal_if_needed l r = - let r = make_concat l r in - if height r < max_height then r else balance r + let r = make_concat l r in balance r let is_full_tail leaf = B.equal_index leaf.b.s leaf.j leaf.b.unused @@ -669,9 +671,8 @@ module Text' = struct B.blit leaf_l.b.s leaf_l.i leaf_l.j s (B.first s); B.blit leaf_r.b.s leaf_r.i leaf_r.j s (B.move_by_bytes s (B.first s) size_l); s in - let b = {s = leaf_l.b.s; unused = B.move_by_bytes leaf_l.b.s leaf_l.b.unused size_r} in - let leaf = {b = b; i = leaf_l.i; - j = B.move_by_bytes leaf_l.b.s leaf_l.i size_r; + leaf_l.b.unused <- B.move_by_bytes leaf_l.b.s leaf_l.b.unused size_r; + let leaf = {leaf_l with j = leaf_l.b.unused; len = leaf_l.len + leaf_r.len} in Leaf leaf else @@ -713,10 +714,11 @@ module Text' = struct let k = B.write leaf.b.s leaf.j u in if B.equal_index leaf.b.s k leaf.j then make_concat (Leaf leaf) (new_block_uchar u) - else - let b = {leaf.b with unused = k} in - let leaf = {b = b; i = leaf.i; j = k; len = leaf.len + 1} in + else begin + leaf.b.unused <- k; + let leaf = {leaf with j = k; len = leaf.len + 1} in Leaf leaf + end else make_concat (Leaf leaf) (new_block_uchar u) @@ -737,7 +739,7 @@ module Text' = struct Concat {node with right = leaf_append_uchar leaf_l u} | _ -> bal_if_needed l (Leaf (leaf_of_uchar u)) - let init ~len ~f = + let init len f = if len < 0 then failwith "Text.init: The length is minus" else let s = B.init len f in let b = {s = s; unused = B.end_pos s} in @@ -918,7 +920,14 @@ module Text' = struct | Right (t, p) -> base_aux p (make_concat t sub) - let rec base it = base_aux it.path (Leaf it.leaf) + let base it = balance (base_aux it.path (Leaf it.leaf)) + + let rec pos_path = function + Top -> 0 + | Left (p, t) -> pos_path p + | Right (t, p) -> length t + + let pos it = pos_path it.path + B.distance it.leaf.b.s it.leaf.i it.index let move_ahead_leaf it n = let rec loop i n = @@ -997,9 +1006,98 @@ module Text' = struct `Success it -> it | `Out_of_range (it, n) -> it + let rec delete_left_pos path sub = + match path with + Top -> (path, sub) + | Left (p, t) -> delete_left_pos p t + | Right (t, p) -> (p, sub) + + let delete_left it = + let p, _ = delete_left_pos it.path (Leaf it.leaf) in + let leaf = {it.leaf with i = it.index; + len = B.distance it.leaf.b.s it.index it.leaf.j} in + let it = {it with path = p; leaf = leaf} in + let b = base it in + first b + + let rec delete_right_pos path sub = + match path with + Top -> (path, sub) + | Right (t, p) -> delete_right_pos p t + | Left (p, t) -> (p, sub) + + let delete_right it = + let p, _ = delete_right_pos it.path (Leaf it.leaf) in + let leaf = {it.leaf with j = it.index; + len = B.distance it.leaf.b.s it.leaf.i it.index} in + let it = {it with path = p; leaf = leaf} in + let b = base it in + end_pos b + + let sub it len = + let it = delete_left it in + match move it len with + `Out_of_range _ -> None + | `Success it -> + Some (delete_right it) + + let sub_exn it len = + match sub it len with + None -> invalid_arg "iterator out of bound" + | Some it -> it + + let insert it text = + let n = pos it in + let left = base (delete_right it) in + let right = base (delete_left it) in + let base = (append (append left text) right) in + match nth base n with + None -> assert false + | Some it -> it let value it = B.read it.leaf.b.s it.index + let fold_leaf leaf a f = + let rec loop a i = + if B.compare_index leaf.b.s i leaf.j >= 0 then a else + let a' = f a (B.read leaf.b.s i) in + loop a' (B.next leaf.b.s i) in + loop a leaf.i + + let rec fold t a f = + match t with + Empty -> a + | Leaf leaf -> fold_leaf leaf a f + | Concat node -> + let a' = fold node.left a f in + fold node.right a' f + + let rec compare_iterator it1 it2 = + let u1 = value it1 in + let u2 = value it2 in + let sgn = UChar.compare u1 u2 in + if sgn <> 0 then sgn else + match next it1, next it2 with + None, None -> 0 + | None, _ -> -1 + | _, None -> 1 + | Some it1, Some i2 -> compare_iterator it1 it2 + + let compare t1 t2 = compare_iterator (first t1) (first t2) + + let rec string_of_aux b = function + Empty -> () + | Leaf leaf -> + B.add_substring b leaf.b.s leaf.i leaf.j + | Concat node -> + string_of_aux b node.left; + string_of_aux b node.right + + let string_of t = + let b = Buffer.create 0 in + string_of_aux b t; + Buffer.contents b + end (* diff --git a/src/uCoreLib.mli b/src/uCoreLib.mli index 1a4212e..f832683 100644 --- a/src/uCoreLib.mli +++ b/src/uCoreLib.mli @@ -339,9 +339,11 @@ module Text' : sig (** Returns the underlining text of the give iterator. *) val base : iterator -> t + (** Returns the position of the iterator *) + val pos : iterator -> int (** Zipper like operations. *) - (** [insert i t] inserts [t] inyo the right of [i]. *) + (** [insert i t] inserts [t] into the right of [i]. *) val insert : iterator -> t -> iterator (** [delete_left i] deletes the left side of [i]. *) val delete_left : iterator -> iterator @@ -349,10 +351,12 @@ module Text' : sig val delete_right : iterator -> iterator (** [sub i n] creates the iterator which runs over substring which begins position [i] to [n]-th character from [i].*) - val sub : iterator -> int -> iterator + val sub : iterator -> int -> iterator option + (** Raise invalid_arg "iterator out of bound".*) + val sub_exn : iterator -> int -> iterator (** Fold.*) - val fold : t -> init:'a -> f:('a -> uchar -> 'a) -> t -> 'a + val fold : t -> 'a -> ('a -> uchar -> 'a) -> 'a end (* Rope: a simple implementation of ropes as described in From b8f196517128e9e75c9831d5d041bf15804e919a Mon Sep 17 00:00:00 2001 From: yoriyuki <yoriyuki.y@gmail.com> Date: Thu, 17 Oct 2013 23:57:49 +0900 Subject: [PATCH 05/22] Pass the test --- src/uCoreLib.ml | 2 +- test.log | 3 +++ test/test.ml | Bin 24255 -> 26002 bytes 3 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 test.log diff --git a/src/uCoreLib.ml b/src/uCoreLib.ml index fd2ad07..db8201c 100644 --- a/src/uCoreLib.ml +++ b/src/uCoreLib.ml @@ -461,7 +461,7 @@ module BaseString : BaseStringType = struct let append = (^) let create = String.create let end_pos s = String.length s - let of_string s = if is_valid s then None else Some (String.copy s) + let of_string s = if is_valid s then Some (String.copy s) else None let of_string_unsafe s = s let of_ascii s = try Some (UTF8.of_ascii s) with Malformed_code -> None let equal_index s i j = i = j diff --git a/test.log b/test.log new file mode 100644 index 0000000..a8cef8a --- /dev/null +++ b/test.log @@ -0,0 +1,3 @@ +............................................................................................................................................................................................................................................................................................................................................................. +Ran: 349 tests in: 2.03 seconds. +OK \ No newline at end of file diff --git a/test/test.ml b/test/test.ml index 88fac65443c65bc7c0d0eb916cf99bb9e8655c43..9c8f8d40a58392363dd0f8bd6af13c2ebf44521f 100644 GIT binary patch delta 253 zcmdnLmvPc*#trq-8X8&(A*mH5>Izz#T)Fuvr8%hzAqut-F)ps0)Dne~)Z&u(lF9rW zrHmnyJ2(P?<Xw(t&Jexy)ROqrioD4goaUn0AU%3%`8g>H#R>)r8fm3@3fYtQamw*# zYg%(DfI+sl;p78MVvO07C&=nd-Y=WS8={w=7GGRal$n>VP&_$J&SLUHImO9G<<?B* z6t<c?U!R*FNd?%h<MQ?#aEo~ru1xL{s}@F6q)@D>xw%eS$(T`n@&Z4N$*27cCjaxZ OooweHx;e)$SO5U8WK~Q6 delta 19 bcmbPqnsNVL#trq-n~fC}jW-vE$_oGhQs4(G From 0fa54cb4de321373b251f9db8fde6ec7950aa1cc Mon Sep 17 00:00:00 2001 From: yoriyuki <yoriyuki.y@gmail.com> Date: Fri, 1 Nov 2013 21:55:01 +0900 Subject: [PATCH 06/22] until get --- src/uCoreLib.ml | 40 +++++++++++++++++++++------------------- test/test.ml | Bin 26002 -> 30122 bytes 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/src/uCoreLib.ml b/src/uCoreLib.ml index db8201c..fc930c0 100644 --- a/src/uCoreLib.ml +++ b/src/uCoreLib.ml @@ -537,7 +537,7 @@ module Text' = struct let int_max (x:int) (y:int) = if x < y then y else x let int_min (x:int) (y:int) = if x < y then x else y - type base_string = {s : B.t; mutable unused : B.index} + type base_string = {mutable s : B.t; mutable unused : B.index} type t = Empty @@ -659,23 +659,23 @@ module Text' = struct let leaf_append leaf_l leaf_r = let size_l = B.size leaf_l.b.s leaf_l.i leaf_l.j in let size_r = B.size leaf_r.b.s leaf_r.i leaf_r.j in - if size_l + size_r <= leaf_size then - let s = - if size_l + size_r <= B.length leaf_l.b.s && - is_full_tail leaf_l - then begin - B.blit leaf_r.b.s leaf_r.i leaf_r.j leaf_l.b.s leaf_l.b.unused; - leaf_l.b.s; - end else - let s = B.create leaf_size in - B.blit leaf_l.b.s leaf_l.i leaf_l.j s (B.first s); - B.blit leaf_r.b.s leaf_r.i leaf_r.j s (B.move_by_bytes s (B.first s) size_l); - s in + if size_l + size_r <= leaf_size then begin + if size_l + size_r <= + B.size leaf_l.b.s (B.first leaf_l.b.s) (B.end_pos leaf_l.b.s) + && is_full_tail leaf_l + then begin + B.blit leaf_r.b.s leaf_r.i leaf_r.j leaf_l.b.s leaf_l.b.unused; + end else begin + let s = B.create leaf_size in + B.blit leaf_l.b.s leaf_l.i leaf_l.j s (B.first s); + B.blit leaf_r.b.s leaf_r.i leaf_r.j s (B.move_by_bytes s (B.first s) size_l); + leaf_l.b.s <- s; + end; leaf_l.b.unused <- B.move_by_bytes leaf_l.b.s leaf_l.b.unused size_r; let leaf = {leaf_l with j = leaf_l.b.unused; len = leaf_l.len + leaf_r.len} in Leaf leaf - else + end else make_concat (Leaf leaf_l) (Leaf leaf_r) (* height = 1 *) let concat_leaf l leaf_r = @@ -690,7 +690,7 @@ module Text' = struct let append l = function Empty -> l - | Leaf leaf_r as r -> concat_leaf l leaf_r + | Leaf leaf_r -> concat_leaf l leaf_r | Concat node as r -> match node.left with Leaf leaf_r -> @@ -710,7 +710,7 @@ module Text' = struct Leaf {b = b; i = (B.first s); j = i; len = 1} let leaf_append_uchar leaf u = - if is_full_tail leaf then + if is_full_tail leaf then begin let k = B.write leaf.b.s leaf.j u in if B.equal_index leaf.b.s k leaf.j then make_concat (Leaf leaf) (new_block_uchar u) @@ -719,7 +719,7 @@ module Text' = struct let leaf = {leaf with j = k; len = leaf.len + 1} in Leaf leaf end - else + end else make_concat (Leaf leaf) (new_block_uchar u) let leaf_of_uchar u = @@ -736,8 +736,10 @@ module Text' = struct | Concat node -> match node.right with Leaf leaf_l -> - Concat {node with right = leaf_append_uchar leaf_l u} - | _ -> bal_if_needed l (Leaf (leaf_of_uchar u)) + Concat {node with + right = leaf_append_uchar leaf_l u; + right_length = node.right_length + 1} + | _ -> bal_if_needed l (new_block_uchar u) let init len f = if len < 0 then failwith "Text.init: The length is minus" else diff --git a/test/test.ml b/test/test.ml index 9c8f8d40a58392363dd0f8bd6af13c2ebf44521f..109d19743a680df9083e7512045902ae41a8515a 100644 GIT binary patch delta 3594 zcmbtX&2Jl35SL5T)~+Zi$_FS38E@s-n^unPG!=57Y7(e=X;DRrR8eHsyK85Wy$<VV zlvb5i+>j6l@($b(S2!W%#ElCl{s|;N9N^MhkIcM}{j!s$uu3fNn~yhNznSsjuel%o z&i(k!(rRTDZo9pGH-OH_x0yHahp=iEdM<;&@|{7y#h_xs+px<&FE(n00t)Q9LpFrY zAOP30_aZ?p$B^7&Ew{hV9vx22hLzoF&-J@(4|c2K(1hA%0l*p#-2nMM9a%j<rY_#8 zIG|?2-GT2Ggds|yQbN|CG+D^%Xjsxr&)1YjL<yw_F^Qb1TKoI1?~n%8071)|qpo8n z*`{CWa0P|+Tf<Bl==q)umNLS^o3;Y}tUAm8Sy<t<wLD*1tgHbb9m7a&i6|;{7<TCv zz6%@WIlp&vmVdQ4H>2e|-*FE-zf1AvKP{U4kHylGgV6$fQU(n=t&zQF1^nrm*(<e! zdaZ*GI0L)~pl>lu9t$49`-8p<mHRu$S+(~9@V>sgz;C`(u|$l8DOwYZC|LP)tc3hX z#EEjqFngOB9!jRpF@rjJN*42bmzN7IR6b+@dc<Ej{{~-M&GX-ve<_{qh1!&|u@n z(c|Ui+>EeF)>th0{q-&W+sZW##hJYXgVc*!L=mg_GIVKT)eMENu?&-yA@r4X92D6- z@F_Fr(ckhWqJrn36jDkX(1g-~LEhSf{=OBs7!g)BvFgXpHbu_U(cG;j3=lz0yqD7z zhyGhb+w+un{M^*V(6HLJxIGp_Q#5A_?FPYOVv;f-)LGKZVElRYTVw3Xp2a-Bo{gxu zqi2PJW<0FdYWQC_CV&e`QMnBqT^GVKs-1DWP|xuK^|U}J+KKi=)?)^z3j5hcA{>ab zE2EUC0BR{?JdWE@E-J8u&u=aiej$f>iIc<#6v=)k&6Efp4|a;G+a`iIBot!;3HMO8 z%4Wg?;Xq0d`<29fB+-6h2|Fbbp5_uKC2|@G_p}lhB2PMHUcWlSr{Qdvj<d+@!~`4{ z{#MTlx>%_3=%L>J;S&n*CY0OsVbh00A8r|*d=!U0fNAVEZU+lPj6b9t=79fIc{xNR z;G;V-RZt)Gde9#@upu=7Y>zq}H>g^UBa_+&t6J#`lmG<KjK`U(m12<1st^zo)O91= z^G~)fc9XFw2H0@aefGd-u8)JhooPbIl$tWS%O#>S5-G(@#Bk!@-FmHNU_W5NNI*Uk z8%6oRi}0l_(P6?{z(2l&LXF8nTiZMDynD-#LJCq!=yiT>Wjb7kl9TGgq<w2!NoaAC za0e638skN`YH8=G!5riGGEOwn)KaTQ>@7uP#jp%2BAqZot(aC~9g12ax9-fZ7;R*2 zpUhe}B`Zz<EXWhJHWVv{2+Q_F)^)||A}bbmv?@%n^_pVEu2}Az%(@|1LxXy?A=bTk zL%2Q`<^Ku6ECSUk&<38*%@Y>+7iTZso`Amp1Tg+|s`%yvWrT1Wi%9QUV*y0ydi>`z zbJ(@!E|(lA+4zS8MnWEJQ-4AG3Cc(7#fR&mXe$QXXg0-Csh4kD!VI}XrfBkrh_tY# zNOV91M6OsIQ$&5J7$mftxJWheXF`@!0bj0J)NtfOnuh_d2;JsqR?l<zdmqdv7r=!F zl}l0EXnOucCjY@wm^Y%ynYubURaT1iD8+&5#i3%5%S@I-o)U{3mnk|Ph4|wkrcMY} ya>`~IWhrM>qDHAll1WIl$P}kM+rcKDH?aAnX!x#<K@6-cQEQgDII`~7r~d`2g<g69 delta 85 zcmV-b0IL70>j9F>0kD@Hld3Tulg|(?ldmDglkhT)lYlcuvq>W!0SXFiWpp5PWpi|2 rlYnLxlb;r5lkX%Ivy3lWGn2?=9+Ti<JOOl*AYxRLfMzkXhh-xW1Qa5T From c02acb4d31f19bb90a228a8965fb360ae12cf565 Mon Sep 17 00:00:00 2001 From: yoriyuki <yoriyuki.y@gmail.com> Date: Fri, 1 Nov 2013 22:05:11 +0900 Subject: [PATCH 07/22] first --- test/test.ml | Bin 30122 -> 30350 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/test/test.ml b/test/test.ml index 109d19743a680df9083e7512045902ae41a8515a..2d6a388d6cfa7b93716c9e2479b17c0c28ecd26b 100644 GIT binary patch delta 313 zcmZ4Wnz8RK;|3Nz*5u0MoYcvRdI|zuTsf&F3OR{I>8bH03bqQ9FKBvC?$>jb04vec z$ko)e=2B2d$<Ip#(TYWr|LMw2{>daadA6SLWO02(PMF@z$p@IkCN~7IO^!Dan4F={ z69_j#Az4!ai32qcVs%MsaY=khd|GBvafyP4rh+X{7UZN%kZVHpAmUIvCOc{hGpQ#| z-k>YuQI?ofnhI2<sR`5o_5{?V)V!4Vg8X7EX2E3OF3tmL-Q2DJ!c>w|32c#)f}NF> N0<x1f7p7VY007vZWbyz2 delta 88 zcmeBs%ed+_;|3PJ$<Os9Ca=&ep4`jEGkJoZ<>Wdop~+uZ<bZ79$p-q0EQ&=6nUfDN niA^rhR}xRu)5z6SNY+$<GPo2JQu6asH-{L!Fx?!UW-b5#ghL&V From 924096bf230e6c4043ff4cf243742ed2b2a3c531 Mon Sep 17 00:00:00 2001 From: yoriyuki <yoriyuki.y@gmail.com> Date: Sat, 2 Nov 2013 16:56:37 +0900 Subject: [PATCH 08/22] last --- src/uCoreLib.ml | 8 ++++---- src/uCoreLib.mli | 4 ++-- test/test.ml | Bin 30350 -> 30430 bytes 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/uCoreLib.ml b/src/uCoreLib.ml index fc930c0..0bbde18 100644 --- a/src/uCoreLib.ml +++ b/src/uCoreLib.ml @@ -818,7 +818,7 @@ module Text' = struct let first t = let p, leaf = first_leaf t in - {path = p; leaf = leaf; index = B.first leaf.b.s} + {path = p; leaf = leaf; index = leaf.i} let rec end_leaf_sub path = function Empty -> (Top, empty_leaf) @@ -828,9 +828,9 @@ module Text' = struct let end_leaf = end_leaf_sub Top - let end_pos t = + let last t = let p, leaf = end_leaf t in - {path = p; leaf = leaf; index = B.end_pos leaf.b.s} + {path = p; leaf = leaf; index = B.prev leaf.b.s leaf.j} let rec nth_aux p t n = match t with @@ -1034,7 +1034,7 @@ module Text' = struct len = B.distance it.leaf.b.s it.leaf.i it.index} in let it = {it with path = p; leaf = leaf} in let b = base it in - end_pos b + last b let sub it len = let it = delete_left it in diff --git a/src/uCoreLib.mli b/src/uCoreLib.mli index f832683..e0dbb22 100644 --- a/src/uCoreLib.mli +++ b/src/uCoreLib.mli @@ -301,8 +301,8 @@ module Text' : sig (** The head of the text *) val first : t -> iterator - (** Points the end of the text *) - val end_pos : t -> iterator + (** Points the last element of the text *) + val last : t -> iterator (** [nth t i] return the iterator which points the begininng of [i+1]-th character of [t]. *) val nth : t -> int -> iterator option diff --git a/test/test.ml b/test/test.ml index 2d6a388d6cfa7b93716c9e2479b17c0c28ecd26b..2e4325dd7cf85dc31454868f44f1aab35c6e1746 100644 GIT binary patch delta 83 zcmeBs%XsfC;|5g&rkupd2lYik)J=U^;pEEXoK!sxLjxdC$jwhtFx8yAQCD~}j{*1Q jPy;42QBI|t#NrYq1v@J%g_6|blK7JNoW#w+Y1RS&qZb;` delta 67 zcmccjma*?G;|5g&{nWga_=5am1r1FFTP_8KoYWG9%n}7#g%CZMOip4^dTKmSI1ebl Ox!Zu*Z1eIoa{&Mzg%*<l From acf702d1afe802edf05baa0d1ed662f4bc14abc8 Mon Sep 17 00:00:00 2001 From: yoriyuki <yoriyuki.y@gmail.com> Date: Tue, 5 Nov 2013 08:14:55 +0900 Subject: [PATCH 09/22] nth --- test/test.ml | Bin 30430 -> 30646 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/test/test.ml b/test/test.ml index 2e4325dd7cf85dc31454868f44f1aab35c6e1746..8d5d418189944838afea2dab0752d869d6d97fb2 100644 GIT binary patch delta 139 zcmccjmT}vA#tpp&d^xEl3MHw<CGjQkc_kT>KQgIFWR@t{Dun0(x$&tLc?vm+Md_*W zB?_674>F1IRVL@8>S<&u<mRU+m}*Yms4F~~$AFh5B|k59vaY(MAW#=vhoOOif#K#Z Z16easP9>m(l7gL;6_Rb6m!???006!FFN6R9 delta 19 bcmdn?p7GvW#tpp&oBa%>%r@7jn+pH{U`+@@ From 7d9e606d4d6cd07e11dc03d316ea8b2785861b24 Mon Sep 17 00:00:00 2001 From: yoriyuki <yoriyuki.y@gmail.com> Date: Mon, 11 Nov 2013 21:55:43 +0900 Subject: [PATCH 10/22] next --- src/uCoreLib.ml | 8 ++++---- test/test.ml | Bin 30646 -> 31094 bytes 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/uCoreLib.ml b/src/uCoreLib.ml index 0bbde18..872fee9 100644 --- a/src/uCoreLib.ml +++ b/src/uCoreLib.ml @@ -872,8 +872,8 @@ module Text' = struct next_leaf p let next it = - if not (B.equal_index it.leaf.b.s it.index it.leaf.j) then - let i = B.next it.leaf.b.s it.index in + let i = B.next it.leaf.b.s it.index in + if not (B.equal_index it.leaf.b.s i it.leaf.j) then Some {it with index = i} else match next_leaf it.path with None -> None @@ -881,8 +881,8 @@ module Text' = struct Some {path = path; leaf = leaf; index = leaf.i} let next_exn it = - if not (B.equal_index it.leaf.b.s it.index it.leaf.j) then - let i = B.next it.leaf.b.s it.index in + let i = B.next it.leaf.b.s it.index in + if not (B.equal_index it.leaf.b.s i it.leaf.j) then {it with index = i} else match next_leaf it.path with None -> invalid_arg "index out of bounds" diff --git a/test/test.ml b/test/test.ml index 8d5d418189944838afea2dab0752d869d6d97fb2..b3f16780388525dace895fdf736cec5eb5e4bc32 100644 GIT binary patch delta 369 zcmZ9I&q~8U5XPw%Bt40T_WC7INI=s@P-rB%h{xn&?;#}9W+DIBtXM(uA@U+Zp=aMn z^x)au#elenS$5`|-?uY|kJ0zn=q>hEB?$=XL?V)!b^^A8U;?N#X958@ZD6n64db0a zner_q!U3!M&#}3nT}zpuT<Wr?tR&ZFsG1yCN+B`7AnG1U#p>JTY-~zy|9&*>+r{*? zY%+hIpJU#^3}m!0F$@C?_HB`Jm%1!%C4TU6{^hwQJC1G|MDokZ??(GqsmKbfT|Jz# z#v>DjE&*{L9iQ}8bQW57!z6K2lWW0S$^)i_&$w?Yg6zEr^;Y~qJ`_KfVtMT&@A7(P SSjxmg==*<Hx_{ZOPJaLlLUCgN delta 19 bcmezNiE-O|#tnXko4*<vnr*hvG#3B>Wrhf% From d763e8678911204718f6193e4fd058f7187e2125 Mon Sep 17 00:00:00 2001 From: yoriyuki <yoriyuki.y@gmail.com> Date: Mon, 7 Apr 2014 03:36:15 +0900 Subject: [PATCH 11/22] Before removing "move" --- src/uCoreLib.ml | 110 ++++++++++++++++++++++++++++-------------------- 1 file changed, 65 insertions(+), 45 deletions(-) diff --git a/src/uCoreLib.ml b/src/uCoreLib.ml index 872fee9..3f2bde5 100644 --- a/src/uCoreLib.ml +++ b/src/uCoreLib.ml @@ -410,7 +410,7 @@ end (* Interface to String used for Rope implementation *) module type BaseStringType = sig - type t + type t = string val empty : t (* [create n] creates [n]-bytes base string *) @@ -466,9 +466,15 @@ module BaseString : BaseStringType = struct let of_ascii s = try Some (UTF8.of_ascii s) with Malformed_code -> None let equal_index s i j = i = j let size s i j = j - i - let blit s1 i1 i2 s2 j = String.blit s1 i1 s2 j (i2 - i1) + + let blit s1 i1 i2 s2 j = + let endpos = next s1 i2 in + String.blit s1 i1 s2 j (endpos - i1) + let move_by_bytes s i x = i + x - let add_substring b s i j = Buffer.add_substring b s i (j - i) + let add_substring b s i j = + let endpos = next s j in + Buffer.add_substring b s i (endpos - i) let write s i u = let masq = 0b111111 in @@ -545,7 +551,8 @@ module Text' = struct | Leaf of leaf and node = {left : t; left_length : int; right : t; right_length : int; height : int} and leaf = {b : base_string; - i : B.index; (*..............:::::::::::::::::::::.... *) + (* i points the first character, j points the start of last character*) + i : B.index; (*..............::::::::::::::::::::::... *) j : B.index; (* ^ ^ *) len : int} (* i j *) @@ -654,14 +661,16 @@ module Text' = struct let bal_if_needed l r = let r = make_concat l r in balance r - let is_full_tail leaf = B.equal_index leaf.b.s leaf.j leaf.b.unused + let is_full_tail leaf = + let tail = B.next leaf.b.s leaf.j in + B.equal_index leaf.b.s tail leaf.b.unused let leaf_append leaf_l leaf_r = - let size_l = B.size leaf_l.b.s leaf_l.i leaf_l.j in - let size_r = B.size leaf_r.b.s leaf_r.i leaf_r.j in + let size_l = B.size leaf_l.b.s leaf_l.i (B.next leaf_l.b.s leaf_l.j) in + let size_r = B.size leaf_r.b.s leaf_r.i (B.next leaf_r.b.s leaf_r.j) in if size_l + size_r <= leaf_size then begin if size_l + size_r <= - B.size leaf_l.b.s (B.first leaf_l.b.s) (B.end_pos leaf_l.b.s) + B.size leaf_l.b.s leaf_l.i (B.end_pos leaf_l.b.s) && is_full_tail leaf_l then begin B.blit leaf_r.b.s leaf_r.i leaf_r.j leaf_l.b.s leaf_l.b.unused; @@ -672,7 +681,7 @@ module Text' = struct leaf_l.b.s <- s; end; leaf_l.b.unused <- B.move_by_bytes leaf_l.b.s leaf_l.b.unused size_r; - let leaf = {leaf_l with j = leaf_l.b.unused; + let leaf = {leaf_l with j = B.prev leaf_l.b.s leaf_l.b.unused; len = leaf_l.len + leaf_r.len} in Leaf leaf end else @@ -705,27 +714,30 @@ module Text' = struct let new_block_uchar u = let s = B.create leaf_size in - let i = B.write s (B.first s) u in - let b = {s = s; unused = i} in - Leaf {b = b; i = (B.first s); j = i; len = 1} + let k = B.write s (B.first s) u in + let b = {s = s; unused = k} in + Leaf {b = b; i = (B.first s); j = (B.first s); len = 1} + + let is_full leaf = + B.compare_index leaf.b.s + (B.next leaf.b.s leaf.j) + (B.end_pos leaf.b.s) >= 0 let leaf_append_uchar leaf u = - if is_full_tail leaf then begin - let k = B.write leaf.b.s leaf.j u in - if B.equal_index leaf.b.s k leaf.j then - make_concat (Leaf leaf) (new_block_uchar u) - else begin - leaf.b.unused <- k; - let leaf = {leaf with j = k; len = leaf.len + 1} in - Leaf leaf - end - end else + if is_full leaf then + make_concat (Leaf leaf) (new_block_uchar u) + else if is_full_tail leaf then + let k = B.write leaf.b.s leaf.b.unused u in + let leaf = {leaf with j = leaf.b.unused; len = leaf.len + 1} in + leaf.b.unused <- k; + Leaf leaf + else make_concat (Leaf leaf) (new_block_uchar u) let leaf_of_uchar u = let s = B.of_string_unsafe (UTF8.make 1 u) in let b = {s = s; unused = B.end_pos s} in - {b = b; i = B.first b.s; j = B.end_pos b.s; len = 1} + {b = b; i = B.first b.s; j = B.first b.s; len = 1} let of_uchar u = Leaf (leaf_of_uchar u) @@ -745,14 +757,15 @@ module Text' = struct if len < 0 then failwith "Text.init: The length is minus" else let s = B.init len f in let b = {s = s; unused = B.end_pos s} in - Leaf {b = b; i = B.first s; j = B.end_pos s; len = len} + Leaf {b = b; i = B.first s; j = B.prev s (B.end_pos s); len = len} let of_string s = match B.of_string s with None -> None | Some s -> let b = {s = s; unused = B.end_pos s} in - Some (Leaf {b = b; i = B.first b.s; j = B.end_pos b.s; len = B.length s}) + Some (Leaf {b = b; i = B.first b.s; j = B.prev s (B.end_pos s); + len = B.length s}) let of_string_exn s = match of_string s with @@ -764,7 +777,9 @@ module Text' = struct None -> None | Some s -> let b = {s = s; unused = B.end_pos s} in - Some (Leaf {b = b; i = B.first b.s; j = B.end_pos b.s; len = B.length s}) + Some (Leaf {b = b; i = B.first b.s; + j = B.move_by_bytes b.s (B.end_pos b.s) (-1); + len = B.length s}) let of_ascii_exn s = match of_string s with @@ -806,7 +821,7 @@ module Text' = struct let empty_leaf = let base = {s = B.empty; unused = B.end_pos B.empty} in - {b = base; i = B.first B.empty; j = B.end_pos B.empty; len = 0} + {b = base; i = B.first B.empty; j = B.prev B.empty (B.first B.empty); len = 0} let rec first_leaf_sub path = function Empty -> (Top, empty_leaf) @@ -830,7 +845,7 @@ module Text' = struct let last t = let p, leaf = end_leaf t in - {path = p; leaf = leaf; index = B.prev leaf.b.s leaf.j} + {path = p; leaf = leaf; index = leaf.j} let rec nth_aux p t n = match t with @@ -873,7 +888,7 @@ module Text' = struct let next it = let i = B.next it.leaf.b.s it.index in - if not (B.equal_index it.leaf.b.s i it.leaf.j) then + if not (B.compare_index it.leaf.b.s i it.leaf.j > 0) then Some {it with index = i} else match next_leaf it.path with None -> None @@ -882,7 +897,7 @@ module Text' = struct let next_exn it = let i = B.next it.leaf.b.s it.index in - if not (B.equal_index it.leaf.b.s i it.leaf.j) then + if not (B.compare_index it.leaf.b.s i it.leaf.j > 0) then {it with index = i} else match next_leaf it.path with None -> invalid_arg "index out of bounds" @@ -903,7 +918,7 @@ module Text' = struct else match prev_leaf it.path with None -> None | Some (path, leaf) -> - Some {path = path; leaf = leaf; index = B.prev leaf.b.s leaf.j} + Some {path = path; leaf = leaf; index = leaf.j} let prev_exn it = if not (B.equal_index it.leaf.b.s it.index it.leaf.i) then @@ -912,7 +927,7 @@ module Text' = struct else match prev_leaf it.path with None -> invalid_arg "index out of bounds" | Some (path, leaf) -> - {path = path; leaf = leaf; index = B.prev leaf.b.s leaf.j} + {path = path; leaf = leaf; index = leaf.j} let rec base_aux path sub = match path with @@ -934,7 +949,7 @@ module Text' = struct let move_ahead_leaf it n = let rec loop i n = if B.compare_index it.leaf.b.s i it.leaf.j > 0 then - `Out_of_range ({it with index = it.leaf.j}, n) + `Out_of_range ({it with index = i}, n) else if n <= 0 then `Success {it with index = i} else loop (B.next it.leaf.b.s i) (n - 1) in loop it.index n @@ -945,9 +960,9 @@ module Text' = struct | Leaf leaf -> let it = if it.leaf == leaf then it else - {path = path; leaf = leaf; index = leaf.i} in + {it with index = leaf.i} in (match move_ahead_leaf it n with - `Success it as ret -> ret + `Success it as ret -> ret | `Out_of_range (it, n) -> (match path with Top -> `Out_of_range (it, n) @@ -963,11 +978,16 @@ module Text' = struct move_ahead (Right (node.left, path)) node.right it (n - node.left_length) let move_behind_leaf it n = - let rec loop i n = - if B.compare_index it.leaf.b.s i it.leaf.i < 0 then - `Out_of_range ({it with index = it.leaf.i}, n) - else if n <= 0 then `Success {it with index = i} else - loop (B.prev it.leaf.b.s i) (n-1) in + if it.leaf.len < n then + let i = B.prev it.leaf.b.s (B.first it.leaf.b.s) in + `Out_of_range ({it with index = i}, n - it.leaf.len) + else + let rec loop i n = + if n <= 0 then `Success {it with index = i} else + if B.compare_index it.leaf.b.s i it.leaf.i < 0 then + `Out_of_range ({it with index = i}, n) + else + loop (B.prev it.leaf.b.s i) (n-1) in loop it.index n let rec move_behind path sub it n = @@ -988,10 +1008,10 @@ module Text' = struct let node = make_concat sub t in move_behind p node it n)) | Concat node -> - if node.right_length >= n then + if node.right_length > n then move_behind (Right (node.left, path)) node.right it n else - move_ahead (Right (node.right, path)) node.left it (n - node.left_length) + move_behind (Right (node.right, path)) node.left it (n - node.right_length) let move it n = if n > 0 then move_ahead it.path (Leaf it.leaf) it n else @@ -1017,7 +1037,7 @@ module Text' = struct let delete_left it = let p, _ = delete_left_pos it.path (Leaf it.leaf) in let leaf = {it.leaf with i = it.index; - len = B.distance it.leaf.b.s it.index it.leaf.j} in + len = 1 + B.distance it.leaf.b.s it.index it.leaf.j} in let it = {it with path = p; leaf = leaf} in let b = base it in first b @@ -1031,7 +1051,7 @@ module Text' = struct let delete_right it = let p, _ = delete_right_pos it.path (Leaf it.leaf) in let leaf = {it.leaf with j = it.index; - len = B.distance it.leaf.b.s it.leaf.i it.index} in + len = 1 + B.distance it.leaf.b.s it.leaf.i it.index} in let it = {it with path = p; leaf = leaf} in let b = base it in last b @@ -1061,7 +1081,7 @@ module Text' = struct let fold_leaf leaf a f = let rec loop a i = - if B.compare_index leaf.b.s i leaf.j >= 0 then a else + if B.compare_index leaf.b.s i leaf.j > 0 then a else let a' = f a (B.read leaf.b.s i) in loop a' (B.next leaf.b.s i) in loop a leaf.i From e9a8494c275e29d8d41cf6df5046275dd1398b07 Mon Sep 17 00:00:00 2001 From: yoriyuki <yoriyuki.y@gmail.com> Date: Mon, 7 Apr 2014 04:47:02 +0900 Subject: [PATCH 12/22] New Interface --- src/uCoreLib.ml | 1083 ++++------------------------------------------ src/uCoreLib.mli | 633 +-------------------------- test/test.ml | Bin 31094 -> 26661 bytes 3 files changed, 106 insertions(+), 1610 deletions(-) diff --git a/src/uCoreLib.ml b/src/uCoreLib.ml index 3f2bde5..f1a48d7 100644 --- a/src/uCoreLib.ml +++ b/src/uCoreLib.ml @@ -1,6 +1,6 @@ -(** ucorelib : a feather weight Unicode library for OCaml *) +(** ucorelib : core Unicode library for OCaml *) -(* Copyright (C) 2011, 2013 Yamagata Yoriyuki. *) +(* Copyright (C) 2011, 2013, 2014 Yamagata Yoriyuki. *) (* This library is free software; you can redistribute it and/or *) (* modify it under the terms of the GNU Lesser General Public License *) @@ -34,7 +34,6 @@ (* You can contact the authour by sending email to *) (* yoriyuki.y@gmail.com *) - (** Unicode (ISO-UCS) characters. This module implements Unicode characters. @@ -506,7 +505,7 @@ module BaseString : BaseStringType = struct end end -module Text' = struct +module Text = struct (* * Rope: Rope: an implementation of the data structure described in * @@ -946,88 +945,6 @@ module Text' = struct let pos it = pos_path it.path + B.distance it.leaf.b.s it.leaf.i it.index - let move_ahead_leaf it n = - let rec loop i n = - if B.compare_index it.leaf.b.s i it.leaf.j > 0 then - `Out_of_range ({it with index = i}, n) - else if n <= 0 then `Success {it with index = i} else - loop (B.next it.leaf.b.s i) (n - 1) in - loop it.index n - - let rec move_ahead path sub it n = - match sub with - Empty -> `Out_of_range (it, n) - | Leaf leaf -> - let it = - if it.leaf == leaf then it else - {it with index = leaf.i} in - (match move_ahead_leaf it n with - `Success it as ret -> ret - | `Out_of_range (it, n) -> - (match path with - Top -> `Out_of_range (it, n) - | Left (p, t) -> - move_ahead (Right (sub, p)) t it n - | Right (t, p) -> - let node = make_concat t sub in - move_ahead p node it n)) - | Concat node -> - if node.left_length >= n then - move_ahead (Left (path, node.right)) node.left it n - else - move_ahead (Right (node.left, path)) node.right it (n - node.left_length) - - let move_behind_leaf it n = - if it.leaf.len < n then - let i = B.prev it.leaf.b.s (B.first it.leaf.b.s) in - `Out_of_range ({it with index = i}, n - it.leaf.len) - else - let rec loop i n = - if n <= 0 then `Success {it with index = i} else - if B.compare_index it.leaf.b.s i it.leaf.i < 0 then - `Out_of_range ({it with index = i}, n) - else - loop (B.prev it.leaf.b.s i) (n-1) in - loop it.index n - - let rec move_behind path sub it n = - match sub with - Empty -> `Out_of_range (it, n) - | Leaf leaf -> - let it = - if it.leaf == leaf then it else - {path = path; leaf = leaf; index = leaf.j} in - (match move_behind_leaf it n with - `Success it as ret -> ret - | `Out_of_range (it, n) -> - (match path with - Top -> `Out_of_range (it, n) - | Right (t, p) -> - move_behind (Left (p, sub)) t it n - | Left (p, t) -> - let node = make_concat sub t in - move_behind p node it n)) - | Concat node -> - if node.right_length > n then - move_behind (Right (node.left, path)) node.right it n - else - move_behind (Right (node.right, path)) node.left it (n - node.right_length) - - let move it n = - if n > 0 then move_ahead it.path (Leaf it.leaf) it n else - if n < 0 then move_behind it.path (Leaf it.leaf) it (-n) else - `Success it - - let move_exn it n = - match move it n with - `Success it -> it - | `Out_of_range _ -> invalid_arg "number out of bounds" - - let move_as_possible it n = - match move it n with - `Success it -> it - | `Out_of_range (it, n) -> it - let rec delete_left_pos path sub = match path with Top -> (path, sub) @@ -1039,8 +956,7 @@ module Text' = struct let leaf = {it.leaf with i = it.index; len = 1 + B.distance it.leaf.b.s it.index it.leaf.j} in let it = {it with path = p; leaf = leaf} in - let b = base it in - first b + base it let rec delete_right_pos path sub = match path with @@ -1053,29 +969,49 @@ module Text' = struct let leaf = {it.leaf with j = it.index; len = 1 + B.distance it.leaf.b.s it.leaf.i it.index} in let it = {it with path = p; leaf = leaf} in - let b = base it in - last b - - let sub it len = - let it = delete_left it in - match move it len with - `Out_of_range _ -> None - | `Success it -> - Some (delete_right it) - - let sub_exn it len = - match sub it len with + base it + + let sub t ~pos ~len = + match nth t pos with + None -> None + | Some it -> + let s = delete_left it in + match nth s len with + None -> None + | Some it -> + Some (delete_right it) + + + let sub_exn t ~pos ~len = + match sub t ~pos ~len with None -> invalid_arg "iterator out of bound" - | Some it -> it + | Some t -> t - let insert it text = - let n = pos it in - let left = base (delete_right it) in - let right = base (delete_left it) in - let base = (append (append left text) right) in - match nth base n with - None -> assert false - | Some it -> it + let insert t pos text = + match nth t pos with + None -> None + | Some it -> + let left = delete_right it in + let right = delete_left it in + Some (append (append left text) right) + + let insert_exn t pos text = + match insert t pos text with + None -> invalid_arg "index out of bound" + | Some t -> t + + let delete t ~pos ~len = + match nth t pos, nth t (pos + len -1) with + None, _ | _, None -> None + | Some it1, Some it2 -> + let left = delete_right it1 in + let right = delete_left it2 in + Some (append left right) + + let delete_exn t ~pos ~len = + match delete t ~pos ~len with + None -> invalid_arg "index out of bound" + | Some t -> t let value it = B.read it.leaf.b.s it.index @@ -1094,6 +1030,10 @@ module Text' = struct let a' = fold node.left a f in fold node.right a' f + let iter t f = + let g () u = f u in + fold t () g + let rec compare_iterator it1 it2 = let u1 = value it1 in let u2 = value it2 in @@ -1119,6 +1059,8 @@ module Text' = struct let b = Buffer.create 0 in string_of_aux b t; Buffer.contents b + + let to_string = string_of end @@ -1150,864 +1092,9 @@ end * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *) -module Text = struct - - (**Low-level optimization*) - let int_max (x:int) (y:int) = if x < y then y else x - let int_min (x:int) (y:int) = if x < y then x else y - - let splice s1 off len s2 = - let len1 = String.length s1 and len2 = String.length s2 in - let off = if off < 0 then len1 + off - 1 else off in - let len = int_min (len1 - off) len in - let out_len = len1 - len + len2 in - let s = String.create out_len in - String.blit s1 0 s 0 off; (* s1 before splice point *) - String.blit s2 0 s off len2; (* s2 at splice point *) - String.blit s1 (off+len) s (off+len2) (len1 - (off+len)); (* s1 after off+len *) - s - - exception Invalid_rope - - type t = - Empty (**An empty rope*) - | Concat of t * int * t * int * int (**[Concat l ls r rs h] is the concatenation of - ropes [l] and [r], where [ls] is the total - length of [l], [rs] is the length of [r] - and [h] is the height of the node in the - tree, used for rebalancing. *) - | Leaf of int * UTF8.t (**[Leaf l t] is string [t] with length [l], - measured in number of Unicode characters.*) - - type forest_element = { mutable c : t; mutable len : int } - - let str_append = (^) - let empty_str = "" - let string_of_string_list l = String.concat empty_str l - - - - (* 48 limits max rope size to 220GB on 64 bit, - * ~ 700MB on 32bit (length fields overflow after that) *) - let max_height = 48 - - (* actual size will be that plus 1 word header; - * the code assumes it's an even num. - * 256 gives up to a 50% overhead in the worst case (all leaf nodes near - * half-filled *) - let leaf_size = 256 (* utf-8 characters, not bytes *) - (* =end *) - - (* =begin code *) - - exception Out_of_bounds - - let empty = Empty - - - - (* by construction, there cannot be Empty or Leaf "" leaves *) - let is_empty = function Empty -> true | _ -> false - - let height = function - Empty | Leaf _ -> 0 - | Concat(_,_,_,_,h) -> h - - let length = function - Empty -> 0 - | Leaf (l,_) -> l - | Concat(_,cl,_,cr,_) -> cl + cr - - let make_concat l r = - let hl = height l and hr = height r in - let cl = length l and cr = length r in - Concat(l, cl, r, cr, if hl >= hr then hl + 1 else hr + 1) - - let min_len = - let fib_tbl = Array.make max_height 0 in - let rec fib n = match fib_tbl.(n) with - 0 -> - let last = fib (n - 1) and prev = fib (n - 2) in - let r = last + prev in - let r = if r > last then r else last in (* check overflow *) - fib_tbl.(n) <- r; r - | n -> n - in - fib_tbl.(0) <- leaf_size + 1; fib_tbl.(1) <- 3 * leaf_size / 2 + 1; - Array.init max_height (fun i -> if i = 0 then 1 else fib (i - 1)) - - let max_length = min_len.(Array.length min_len - 1) - - let concat_fast l r = match l with - Empty -> r - | Leaf _ | Concat(_,_,_,_,_) -> - match r with - Empty -> l - | Leaf _ | Concat(_,_,_,_,_) -> make_concat l r - - (* based on Hans-J. Boehm's *) - let add_forest forest rope len = - let i = ref 0 in - let sum = ref empty in - while len > min_len.(!i+1) do - if forest.(!i).c <> Empty then begin - sum := concat_fast forest.(!i).c !sum; - forest.(!i).c <- Empty - end; - incr i - done; - sum := concat_fast !sum rope; - let sum_len = ref (length !sum) in - while !sum_len >= min_len.(!i) do - if forest.(!i).c <> Empty then begin - sum := concat_fast forest.(!i).c !sum; - sum_len := !sum_len + forest.(!i).len; - forest.(!i).c <- Empty; - end; - incr i - done; - decr i; - forest.(!i).c <- !sum; - forest.(!i).len <- !sum_len - - let concat_forest forest = - Array.fold_left (fun s x -> concat_fast x.c s) Empty forest - - let rec balance_insert rope len forest = match rope with - Empty -> () - | Leaf _ -> add_forest forest rope len - | Concat(l,cl,r,cr,h) when h >= max_height || len < min_len.(h) -> - balance_insert l cl forest; - balance_insert r cr forest - | x -> add_forest forest x len (* function or balanced *) - - let balance r = - match r with - Empty | Leaf _ -> r - | _ -> - let forest = Array.init max_height (fun _ -> {c = Empty; len = 0}) in - balance_insert r (length r) forest; - concat_forest forest - - let bal_if_needed l r = - let r = make_concat l r in - if height r < max_height then r else balance r - - let concat_str l = function - Empty | Concat(_,_,_,_,_) -> invalid_arg "concat_str" - | Leaf (lenr, rs) as r -> - match l with - | Empty -> r - | Leaf (lenl, ls) -> - let slen = lenr + lenl in - if slen <= leaf_size then Leaf ((lenl+lenr),(str_append ls rs)) - else make_concat l r (* height = 1 *) - | Concat(ll, cll, Leaf (lenlr ,lrs), clr, h) -> - let slen = clr + lenr in - if clr + lenr <= leaf_size then - Concat(ll, cll, Leaf ((lenlr + lenr),(str_append lrs rs)), slen, h) - else - bal_if_needed l r - | _ -> bal_if_needed l r - - let append_char c r = concat_str r (Leaf (1, (UTF8.make 1 c))) - - let append l = function - Empty -> l - | Leaf _ as r -> concat_str l r - | Concat(Leaf (lenrl,rls),rlc,rr,rc,h) as r -> - (match l with - Empty -> r - | Concat(_,_,_,_,_) -> bal_if_needed l r - | Leaf (lenl, ls) -> - let slen = rlc + lenl in - if slen <= leaf_size then - Concat(Leaf((lenrl+lenl),(str_append ls rls)), slen, rr, rc, h) - else - bal_if_needed l r) - | r -> (match l with Empty -> r | _ -> bal_if_needed l r) - - let ( ^^^ ) = append - - let prepend_char c r = append (Leaf (1,(UTF8.make 1 c))) r - - let get r i = - let rec aux i = function - Empty -> raise Out_of_bounds - | Leaf (lens, s) -> - if i >= 0 && i < lens then UTF8.get s i - else raise Out_of_bounds - | Concat (l, cl, r, cr, _) -> - if i < cl then aux i l - else aux (i - cl) r - in aux i r - - let copy_set us cpos c = - let ipos = UTF8.ByteIndex.of_char_idx us cpos in - let jpos = UTF8.ByteIndex.next us ipos in - let i = UTF8.ByteIndex.to_int ipos - and j = UTF8.ByteIndex.to_int jpos in - splice us i (j-i) (UTF8.of_char c) - - let set r i v = - let rec aux i = function - Empty -> raise Out_of_bounds - | Leaf (lens, s) -> - if i >= 0 && i < lens then - let s = copy_set s i v in - Leaf (lens, s) - else raise Out_of_bounds - | Concat(l, cl, r, cr, _) -> - if i < cl then append (aux i l) r - else append l (aux (i - cl) r) - in aux i r - - - module Iter = - struct - - - (* Iterators are used for iterating efficiently over multiple ropes - at the same time *) - - type iterator = { - mutable leaf : UTF8.t; - (* Current leaf in which the iterator is *) - mutable idx : UTF8.ByteIndex.b_idx; - (* Current byte position of the iterator *) - mutable rest : t list; - (* Ropes not yet visited *) - } - - type t = iterator option - - (* Initial iterator state: *) - let make rope = { leaf = UTF8.empty; - idx = UTF8.ByteIndex.first; - rest = if rope = Empty then [] else [rope] } - - let rec next_leaf = function - | Empty :: l -> - next_leaf l - | Leaf(len, str) :: l -> - Some(str, l) - | Concat(left, left_len, right, right_len, height) :: l -> - next_leaf (left :: right :: l) - | [] -> - None - - (* Advance the iterator to the next position, and return current - character: *) - let rec next iter = - if UTF8.ByteIndex.at_end iter.leaf iter.idx then - (* We are at the end of the current leaf, find another one: *) - match next_leaf iter.rest with - | None -> - None - | Some(leaf, rest) -> - if leaf = "" then None else begin - iter.leaf <- leaf; - iter.idx <- UTF8.ByteIndex.next leaf UTF8.ByteIndex.first; - iter.rest <- rest; - Some(UTF8.ByteIndex.look leaf UTF8.ByteIndex.first) - end - else begin - (* Just advance in the current leaf: *) - let ch = UTF8.ByteIndex.look iter.leaf iter.idx in - iter.idx <- UTF8.ByteIndex.next iter.leaf iter.idx; - Some ch - end - - (* Same thing but map leafs: *) - let rec next_map f iter = - if UTF8.ByteIndex.at_end iter.leaf iter.idx then - match next_leaf iter.rest with - | None -> - None - | Some(leaf, rest) -> - let leaf = f leaf in - iter.leaf <- leaf; - iter.idx <- UTF8.ByteIndex.next leaf UTF8.ByteIndex.first; - iter.rest <- rest; - Some(UTF8.ByteIndex.look leaf UTF8.ByteIndex.first) - else begin - let ch = UTF8.ByteIndex.look iter.leaf iter.idx in - iter.idx <- UTF8.ByteIndex.next iter.leaf iter.idx; - Some ch - end - - (* Same thing but in reverse order: *) - - let rec prev_leaf = function - | Empty :: l -> - prev_leaf l - | Leaf(len, str) :: l -> - Some(str, l) - | Concat(left, left_len, right, right_len, height) :: l -> - prev_leaf (right :: left :: l) - | [] -> - None - - let prev iter = - if iter.idx = UTF8.ByteIndex.first then - match prev_leaf iter.rest with - | None -> - None - | Some(leaf, rest) -> - iter.leaf <- leaf; - iter.idx <- UTF8.ByteIndex.last leaf; - iter.rest <- rest; - Some(UTF8.ByteIndex.look leaf iter.idx) - else begin - iter.idx <- UTF8.ByteIndex.prev iter.leaf iter.idx; - Some(UTF8.ByteIndex.look iter.leaf iter.idx) - end - end - - (* Can be improved? *) - let compare a b = - let ia = Iter.make a and ib = Iter.make b in - let rec loop _ = - match Iter.next ia, Iter.next ib with - | None, None -> 0 - | None, _ -> -1 - | _, None -> 1 - | Some ca, Some cb -> - match UChar.compare ca cb with - | 0 -> loop () - | n -> n - in - loop () - - -let of_ustring ustr = - (* We need fast access to raw bytes: *) - let bytes = UTF8.to_string_unsafe ustr in - let byte_length = String.length bytes in - - (* - [rope] is the accumulator - - [start_byte_idx] is the byte position of the current slice - - [current_byte_idx] is the current byte position - - [slice_size] is the number of unicode characters contained - between [start_byte_idx] and [current_byte_idx] *) - let rec loop rope start_byte_idx current_byte_idx slice_size = - if current_byte_idx = byte_length then begin - - if slice_size = 0 then - rope - else - add_slice rope start_byte_idx current_byte_idx slice_size - - end else begin - let next_byte_idx = UTF8.next ustr current_byte_idx in - - if slice_size = leaf_size then - (* We have enough unicode characters for this slice, extract - it and add a leaf to the rope: *) - loop (add_slice rope start_byte_idx current_byte_idx slice_size) - next_byte_idx next_byte_idx 0 - else - loop rope start_byte_idx next_byte_idx (slice_size + 1) - end - and add_slice rope start_byte_idx end_byte_idx slice_size = - append rope (Leaf(slice_size, - (* This is correct, we are just extracting a - sequence of well-formed UTF-8 encoded unicode - characters: *) - UTF8.of_string_unsafe - (String.sub bytes start_byte_idx (end_byte_idx - start_byte_idx)))) - in - loop Empty 0 0 0 - - let of_string s = - (* Validate + unsafe to avoid an extra copy (it is OK because - of_ustring do not reuse its argument in the resulting rope): *) - UTF8.validate s; - of_ustring (UTF8.of_string_unsafe s) - - let append_us r us = append r (of_ustring us) - - let rec make len c = - let rec concatloop len i r = - if i <= len then - (*TODO: test for sharing among substrings *) - concatloop len (i * 2) (append r r) - else r - in - if len = 0 then Empty - else if len <= leaf_size then Leaf (len, (UTF8.make len c)) - else - let rope = concatloop len 2 (of_ustring (UTF8.make 1 c)) in - append rope (make (len - length rope) c) - - let of_uchar c = make 1 c - let of_char c = of_uchar (UChar.of_char c) - - let sub r start len = - let rec aux start len = function - Empty -> if start <> 0 || len <> 0 then raise Out_of_bounds else Empty - | Leaf (lens, s) -> - if len < 0 || start < 0 || start + len > lens then - raise Out_of_bounds - else if len > 0 then (* Leaf "" cannot happen *) - (try Leaf (len, (UTF8.sub s start len)) with _ -> raise Out_of_bounds) - else Empty - | Concat(l,cl,r,cr,_) -> - if start < 0 || len < 0 || start + len > cl + cr then raise Out_of_bounds; - let left = - if start = 0 then - if len >= cl then - l - else aux 0 len l - else if start > cl then Empty - else if start + len >= cl then - aux start (cl - start) l - else aux start len l in - let right = - if start <= cl then - let upto = start + len in - if upto = cl + cr then r - else if upto < cl then Empty - else aux 0 (upto - cl) r - else aux (start - cl) len r - in - append left right - in aux start len r - - let insert start rope r = - append (append (sub r 0 start) rope) (sub r start (length r - start)) - - let remove start len r = - append (sub r 0 start) (sub r (start + len) (length r - start - len)) - - let to_ustring r = - let rec strings l = function - Empty -> l - | Leaf (_,s) -> s :: l - | Concat(left,_,right,_,_) -> strings (strings l right) left - in - string_of_string_list (strings [] r) - - let rec bulk_iter f = function - Empty -> () - | Leaf (_,s) -> f s - | Concat(l,_,r,_,_) -> bulk_iter f l; bulk_iter f r - - let rec bulk_iteri ?(base=0) f = function - Empty -> () - | Leaf (_,s) -> f base s - | Concat(l,cl,r,_,_) -> - bulk_iteri ~base f l; - bulk_iteri ~base:(base+cl) f r - - let rec iter f = function - Empty -> () - | Leaf (_,s) -> UTF8.iter f s - | Concat(l,_,r,_,_) -> iter f l; iter f r - - - let rec iteri ?(base=0) f = function - Empty -> () - | Leaf (_,s) -> - UTF8.iteri (fun j c -> f (base + j) c) s - | Concat(l,cl,r,_,_) -> iteri ~base f l; iteri ~base:(base + cl) f r - - - let rec bulk_iteri_backwards ~top f = function - Empty -> () - | Leaf (lens,s) -> f (top-lens) s (* gives f the base position, not the top *) - | Concat(l,_,r,cr,_) -> - bulk_iteri_backwards ~top f r; - bulk_iteri_backwards ~top:(top-cr) f l - - let rec range_iter f start len = function - Empty -> if start <> 0 || len <> 0 then raise Out_of_bounds - | Leaf (lens, s) -> - let n = start + len in - if start >= 0 && len >= 0 && n <= lens then - for i = start to n - 1 do - f (UTF8.look s (UTF8.nth s i)) (*TODO: use enum to iterate efficiently*) - done - else raise Out_of_bounds - | Concat(l,cl,r,cr,_) -> - if start < 0 || len < 0 || start + len > cl + cr then raise Out_of_bounds; - if start < cl then begin - let upto = start + len in - if upto <= cl then - range_iter f start len l - else begin - range_iter f start (cl - start) l; - range_iter f 0 (upto - cl) r - end - end else begin - range_iter f (start - cl) len r - end - - let rec range_iteri f ?(base = 0) start len = function - Empty -> if start <> 0 || len <> 0 then raise Out_of_bounds - | Leaf (lens, s) -> - let n = start + len in - if start >= 0 && len >= 0 && n <= lens then - for i = start to n - 1 do - f (base+i) (UTF8.look s (UTF8.nth s i)) - (*TODO: use enum to iterate efficiently*) - done - else raise Out_of_bounds - | Concat(l,cl,r,cr,_) -> - if start < 0 || len < 0 || start + len > cl + cr then raise Out_of_bounds; - if start < cl then begin - let upto = start + len in - if upto <= cl then - range_iteri f ~base start len l - else begin - range_iteri f ~base start (cl - start) l; - range_iteri f ~base:(base + cl - start) 0 (upto - cl) r - end - end else begin - range_iteri f ~base (start - cl) len r - end - - let rec fold f a = function - Empty -> a - | Leaf (_,s) -> - UTF8.fold (fun a c -> f a c) a s - | Concat(l,_,r,_,_) -> fold f (fold f a l) r - - let rec bulk_fold f a = function - | Empty -> a - | Leaf (_, s) -> f a s - | Concat (l, _, r, _, _) -> bulk_fold f (bulk_fold f a l) r - - let to_string t = - (* We use unsafe version to avoid the copy of the non-reachable - temporary string: *) - UTF8.to_string_unsafe (to_ustring t) - - let init len f = Leaf (len, UTF8.init len f) - - let of_string_unsafe s = of_ustring (UTF8.of_string_unsafe s) - let of_int i = of_string_unsafe (string_of_int i) - let of_float f = of_string_unsafe (string_of_float f) - - let to_int r = int_of_string (UTF8.to_string_unsafe (to_ustring r)) - let to_float r = float_of_string (UTF8.to_string_unsafe (to_ustring r)) - - let bulk_map f r = bulk_fold (fun acc s -> append_us acc (f s)) Empty r - let map f r = bulk_map (fun s -> UTF8.map f s) r - - let bulk_filter_map f r = bulk_fold (fun acc s -> match f s with None -> acc | Some r -> append_us acc r) Empty r - let filter_map f r = bulk_map (UTF8.filter_map f) r - - let filter f r = bulk_map (UTF8.filter f) r - - let left r len = sub r 0 len - let right r len = let rlen = length r in sub r (rlen - len) len - let head = left - let tail r pos = sub r pos (length r - pos) - - let index r u = - let i = Iter.make r in - let rec loop n = - match Iter.next i with - | None -> raise Not_found - | Some u' -> - if UChar.eq u u' then n else - loop (n + 1) - in - loop 0 - - - module Return : sig type 'a t - (** A label which may be used to return values of type ['a]*) - - val label : ('a t -> 'a) -> 'a - (** [label f] creates a new label [x] and invokes - [f x]. If, during the execution of [f], [return x v] - is invoked, the execution of [f x] stops - immediately and [label f] returns [v]. - Otherwise, if [f x] terminates normally and - returns [y], [label f] returns [y]. - - Calling [return x v] from outside scope [f] - is a run-time error and causes termination - of the program.*) - val with_label : ('a t -> 'a) -> 'a - (**as [label]*) - - val return : 'a t -> 'a -> _ - (** Return to a label. [return l v] returns - to the point where label [l] was obtained - and produces value [l]. - - Calling [return l v] from outside the scope - of [l] (i.e. the call to function [label] - which produced [l]) is a run-time error - and causes termination of the program.*) -end = struct - type 'a t = 'a option ref - - exception Return - - let return label value = - label := Some value; - raise Return (*(Obj.repr label)*) - - let label f = - let r = ref None in - try f r - with Return when !r <> None -> (*[!r = None] may happen if the user has let the exception escape its scope *) - match !r with (*in that case, we wish the exception to fall-through for debugging purposes*) - | None -> assert false (*Should be impossible*) - | Some x -> - r := None; (*Reset the trap for sanity checks should another exception escape scope *) - x (*(not that this should be possible in that case -- let's just be careful) *) - let with_label = label - end - - let index_from r base item = - Return.with_label (fun label -> - let index_aux i c = - if c = item then Return.return label i - in - range_iteri index_aux ~base base (length r - base) r; - raise Not_found) - - - let rindex r char = - Return.with_label (fun label -> - let index_aux i us = - try - let p = UTF8.rindex us char in - Return.return label (p+i) - with Not_found -> () - in - bulk_iteri_backwards ~top:(length r) index_aux r; - raise Not_found) - - let rindex_from r start char = - let rsub = left r start in - (rindex rsub char) - - let contains r char = - Return.with_label (fun label -> - let contains_aux us = - if UTF8.contains us char then Return.return label true - in - bulk_iter contains_aux r; - false) - - let contains_from r start char = - Return.with_label (fun label -> - let contains_aux c = if c = char then Return.return label true in - range_iter contains_aux start (length r - start) r; - false) - - let rcontains_from = contains_from - - let equals r1 r2 = compare r1 r2 = 0 - - let starts_with r prefix = - let ir = Iter.make r and iprefix = Iter.make prefix in - let rec loop _ = - match Iter.next iprefix with - | None -> true - | Some ch1 -> - match Iter.next ir with - | None -> false - | Some ch2 -> UChar.compare ch1 ch2 = 0 && loop () - in - loop () - - let ends_with r suffix = - let ir = Iter.make r and isuffix = Iter.make suffix in - let rec loop _ = - match Iter.prev isuffix with - | None -> true - | Some ch1 -> - match Iter.prev ir with - | None -> false - | Some ch2 -> UChar.compare ch1 ch2 = 0 && loop () - in - loop () - - (** find [r2] within [r1] or raises Not_found *) - let find_from r1 ofs r2 = - let matchlen = length r2 in - let r2_string = to_ustring r2 in - let check_at pos = r2_string = (to_ustring (sub r1 pos matchlen)) in - (* TODO: inefficient *) - Return.with_label (fun label -> - for i = ofs to length r1 - matchlen do - if check_at i then Return.return label i - done; - raise Not_found) - - let find r1 r2 = find_from r1 0 r2 - - let rfind_from r1 suf r2 = - let matchlen = length r2 in - let r2_string = to_ustring r2 in - let check_at pos = r2_string = (to_ustring (sub r1 pos matchlen)) in - (* TODO: inefficient *) - Return.with_label (fun label -> - for i = suf - (length r1 + 1 ) downto 0 do - if check_at i then Return.return label i - done; - raise Not_found) - - let rfind r1 r2 = rfind_from r1 (length r2 - 1) r2 - - - let exists r_str r_sub = try ignore(find r_str r_sub); true with Not_found -> false - - let strip_default_chars = List.map UChar.of_char [' ';'\t';'\r';'\n'] - let strip ?(chars=strip_default_chars) rope = - let rec strip_left n iter = - match Iter.next iter with - | None -> - Empty - | Some ch when List.mem ch chars -> - strip_left (n + 1) iter - | _ -> - sub rope n (strip_right (length rope - n) (Iter.make rope)) - and strip_right n iter = - match Iter.prev iter with - | None -> - assert false - | Some ch when List.mem ch chars -> - strip_right (n - 1) iter - | _ -> - n - in - strip_left 0 (Iter.make rope) - - let lchop = function - | Empty -> Empty - | str -> sub str 1 (length str - 1) - let rchop = function - | Empty -> Empty - | str -> sub str 0 (length str - 1) - - - -let of_list l = - let e = ref l in - let get_leaf () = - Return.label - (fun label -> - let b = Buffer.create 256 in - for i = 1 to 256 do - match !e with - [] -> Return.return label (false, UTF8.of_string_unsafe (Buffer.contents b)) - | c :: rest -> Buffer.add_string b (UTF8.to_string_unsafe (UTF8.of_char c)); e := rest - done; - (true, UTF8.of_string_unsafe (Buffer.contents b) )) - in - let rec loop r = (* concat 256 characters at a time *) - match get_leaf () with - (true, us) -> loop (append r (of_ustring us)) - | (false, us) -> append r (of_ustring us) - in - loop Empty - - let splice r start len new_sub = - let start = if start >= 0 then start else (length r) + start in - append (left r start) - (append new_sub (tail r (start+len))) - - let fill r start len char = - splice r start len (make len char) - - let blit rsrc offsrc rdst offdst len = - splice rdst offdst len (sub rsrc offsrc len) - - - let list_reduce f = function [] -> invalid_arg "Empty List" - | h::t -> List.fold_left f h t - - let concat sep r_list = - if r_list = [] then empty else - list_reduce (fun r1 r2 -> append r1 (append sep r2)) r_list - - (**T concat - concat (of_string "xyz") [] = empty - **) - - let escaped r = bulk_map UTF8.escaped r - - let replace_chars f r = fold (fun acc s -> append_us acc (f s)) Empty r - - let split r sep = - let i = find r sep in - head r i, tail r (i+length sep) - - let rsplit (r:t) sep = - let i = rfind r sep in - head r i, tail r (i+length sep) - - (** - An implementation of [nsplit] in one pass. - - This implementation traverses the string backwards, hence building the list - of substrings from the end to the beginning, so as to avoid a call to [List.rev]. - *) - let nsplit str sep = - if is_empty str then [] - else let seplen = length sep in - let rec aux acc ofs = match - try Some(rfind_from str ofs sep) - with Invalid_rope -> None - with Some idx -> - (*at this point, [idx] to [idx + seplen] contains the separator, which is useless to us - on the other hand, [idx + seplen] to [ofs] contains what's just after the separator, - which is s what we want*) - let end_of_occurrence = idx + seplen in - if end_of_occurrence >= ofs then aux acc idx (*We may be at the end of the string*) - else aux ( sub str end_of_occurrence ( ofs - end_of_occurrence ) :: acc ) idx - | None -> (sub str 0 ofs)::acc - in - aux [] (length str - 1 ) - - - let join = concat - - let slice ?(first=0) ?(last=max_int) s = - let clip _min _max x = int_max _min (int_min _max x) in - let i = clip 0 (length s) - (if (first<0) then (length s) + first else first) - and j = clip 0 (length s) - (if (last<0) then (length s) + last else last) - in - if i>=j || i=length s then - Empty - else - sub s i (j-i) - - - let replace ~str ~sub ~by = - try - let i = find str sub in - (true, append (slice ~last:i str) (append by - (slice ~first:(i+(length sub)) str))) - with - Invalid_rope -> (false, str) - - - let explode r = fold (fun a u -> u :: a) [] r - - let implode l = of_list l - - type t_alias = t (* fixes [type t = t] bug below *) - - - let of_latin1 s = of_ustring (UTF8.of_latin1 s) - - (* =end *) -end - (** Aliase for Text.t *) type text = Text.t -type cursor = int +type cursor = Text.iterator module CharEncoding = struct @@ -2145,7 +1232,7 @@ module CharEncoding = struct | None -> raise Out_of_range end in try - `Success ((), (Text.iter conv text; Buffer.contents b)) + `Success ((), (Text.iter text conv; Buffer.contents b)) with Out_of_range -> `Error @@ -2191,7 +1278,7 @@ module CharEncoding = struct | None -> raise Out_of_range end in try - `Success ((), (Text.iter conv text; Buffer.contents b)) + `Success ((), (Text.iter text conv; Buffer.contents b)) with Out_of_range -> `Error @@ -2241,11 +1328,11 @@ module CharEncoding = struct if i >= String.length s then (`Start, text) else let n = Char.code s.[i] in if n < 0x80 then - decode_start s (i+1) (Text.append_char (UChar.chr n) text) + decode_start s (i+1) (Text.append_uchar text (UChar.chr n)) else if n >= 0xc2 && n <= 0xf4 then decode_second n s (i+1) text else - decode_start s (i+1) (Text.append_char subst_char text) + decode_start s (i+1) (Text.append_uchar text subst_char) and decode_second a s i text = if i >= String.length s then (`Second a, text) else @@ -2254,49 +1341,49 @@ module CharEncoding = struct if a >= 0xc2 && a <= 0xdf then if (n >= 0x80 && n <= 0xbf) then let n = (a - 0xc0) lsl 6 lor (0x7f land n) in - decode_start s (i+1) (Text.append_char (UChar.chr n) text) + decode_start s (i+1) (Text.append_uchar text (UChar.chr n)) else - decode_start s i (Text.append_char subst_char text) + decode_start s i (Text.append_uchar text subst_char) (* 3-bytes code *) else if (a >= 0xe1 && a <= 0xec) || a = 0xee || a = 0xef then if (n >= 0x80 && n <= 0xbf) then let a = (a - 0xe0) lsl 6 lor (0x7f land n) in decode_trail 1 a s (i+1) text else - decode_start s i (Text.append_char subst_char text) + decode_start s i (Text.append_uchar text subst_char) else if a = 0xe0 then if n >= 0xa0 && n <= 0xbf then let a = 0x7f land n in decode_trail 1 a s (i+1) text else - decode_start s i (Text.append_char subst_char text) + decode_start s i (Text.append_uchar text subst_char) else if a = 0xed then if n >= 0x80 && n <= 0x9f then let a = (a - 0xe0) lsl 6 lor (0x7f land n) in decode_trail 1 a s (i+1) text else - decode_start s i (Text.append_char subst_char text) + decode_start s i (Text.append_uchar text subst_char) (* 4-bytes code *) else if a = 0xf0 then if n >= 0x90 && n <= 0xbf then let a = (a - 0xf0) lsl 6 lor (0x7f land n) in decode_trail 2 a s (i+1) text else - decode_start s i (Text.append_char subst_char text) + decode_start s i (Text.append_uchar text subst_char) else if a >= 0xf1 && a <= 0xf3 then if n >= 0x80 && n <= 0xbf then let a = (a - 0xf0) lsl 6 lor (0x7f land n) in decode_trail 2 a s (i+1) text else - decode_start s i (Text.append_char subst_char text) + decode_start s i (Text.append_uchar text subst_char) else if a = 0xf4 then if n >= 0x80 && n <= 0x8f then let a = (a - 0xf0) lsl 6 lor (0x7f land n) in decode_trail 2 a s (i+1) text else - decode_start s i (Text.append_char subst_char text) + decode_start s i (Text.append_uchar text subst_char) else - decode_start s i (Text.append_char subst_char text) + decode_start s i (Text.append_uchar text subst_char) and decode_trail count a s i text = if i >= String.length s then (`Trail (count, a), text) else (* FIX ME *) @@ -2304,13 +1391,13 @@ module CharEncoding = struct if n >= 0x80 && n <= 0xbf then let a = a lsl 6 lor (0x7f land n) in if count = 1 then - decode_start s (i+1) (Text.append_char (UChar.chr a) text) + decode_start s (i+1) (Text.append_uchar text (UChar.chr a)) else if count = 2 then decode_trail 1 a s (i+1) text else assert false else - decode_start s i (Text.append_char subst_char text) + decode_start s i (Text.append_uchar text subst_char) let decode st s = match st with @@ -2347,7 +1434,7 @@ module CharEncoding = struct Buffer.add_char b (Char.chr (n2 lsr 8)); Buffer.add_char b (Char.chr (n2 land 0xff)); end in - `Success ((), (Text.iter conv text; Buffer.contents b)) + `Success ((), (Text.iter text conv; Buffer.contents b)) let terminate () = "" end @@ -2372,19 +1459,19 @@ module CharEncoding = struct if n >= 0xD800 && n <= 0xDBFF then `Surrogate n, text else if n >= 0xDC00 && n <= 0xDFFF then - `Byte c, Text.append_char subst_char text + `Byte c, Text.append_uchar text subst_char else - `Success, Text.append_char (UChar.of_int n) text + `Success, Text.append_uchar text (UChar.of_int n) | `Surrogate n -> if Char.code c < 0xDC or Char.code c > 0xdf then - `Byte c, Text.append_char subst_char text + `Byte c, Text.append_uchar text subst_char else `Surrogate_with_Byte (n, c), text | `Surrogate_with_Byte (n0, c0) -> let n = Char.code c0 in let n = (n lsl 8) lor (Char.code c) in let n1 = 0x10000 + (n0 - 0xD800) lsl 10 lor (n - 0xDC00) in - `Success, Text.append_char (UChar.of_int n1) text in + `Success, Text.append_uchar text (UChar.of_int n1) in fold_string conv (state, Text.empty) s let terminate = function @@ -2415,7 +1502,7 @@ module CharEncoding = struct Buffer.add_char b (Char.chr (n2 land 0xff)); Buffer.add_char b (Char.chr (n2 lsr 8)); end in - `Success ((), (Text.iter conv text; Buffer.contents b)) + `Success ((), (Text.iter text conv; Buffer.contents b)) let terminate () = "" end @@ -2446,9 +1533,9 @@ module CharEncoding = struct if n >= 0xD800 && n <= 0xDBFF then `Surrogate n, text else if n >= 0xDC00 && n <= 0xDFFF then - `Byte c, Text.append_char subst_char text + `Byte c, Text.append_uchar text subst_char else - `Success, Text.append_char (UChar.of_int n) text + `Success, Text.append_uchar text (UChar.of_int n) | `Surrogate n -> `Surrogate_with_Byte (n, c), text | `Surrogate_with_Byte (n0, c0) -> @@ -2456,9 +1543,9 @@ module CharEncoding = struct let n = (n lsl 8) lor (Char.code c0) in if n >= 0xDC00 && n <= 0xDFFF then let n1 = 0x10000 + (n0 - 0xD800) lsl 10 lor (n - 0xDC00) in - `Success, Text.append_char (UChar.of_int n1) text + `Success, Text.append_uchar text (UChar.of_int n1) else - `Byte c, Text.append_char subst_char text + `Byte c, Text.append_uchar text subst_char in fold_string conv (state, Text.empty) s @@ -2556,7 +1643,7 @@ module CharEncoding = struct Buffer.add_char b (Char.chr ((n lsr 16) land 0xff)); Buffer.add_char b (Char.chr ((n lsr 8) land 0xff)); Buffer.add_char b (Char.chr (n land 0xff)) in - `Success ((), (Text.iter conv text; Buffer.contents b)) + `Success ((), (Text.iter text conv; Buffer.contents b)) let terminate () = "" end @@ -2571,19 +1658,19 @@ module CharEncoding = struct let conv ((n, i), text) c = if i = 0 then if Char.code c > 0 then - (0, 0), Text.append_char subst_char text + (0, 0), Text.append_uchar text subst_char else (0, 1), text else if i = 1 then if Char.code c > 0x10 then - (0, 0), Text.append_char subst_char (Text.append_char subst_char text) + (0, 0), Text.append_uchar (Text.append_uchar text subst_char) subst_char else (Char.code c, 2), text else if i = 2 then (n lsl 8 lor (Char.code c), 3), text else if i = 3 then let n = n lsl 8 lor (Char.code c) in - (0, 0), Text.append_char (UChar.of_int n) text + (0, 0), Text.append_uchar text (UChar.of_int n) else assert false in fold_string conv (state, Text.empty) s @@ -2608,7 +1695,7 @@ module CharEncoding = struct Buffer.add_char b (Char.chr ((n lsr 8) land 0xff)); Buffer.add_char b (Char.chr ((n lsr 16) land 0xff)); Buffer.add_char b (Char.chr (n lsr 24)) in - `Success ((), (Text.iter conv text; Buffer.contents b)) + `Success ((), (Text.iter text conv; Buffer.contents b)) let terminate () = "" end @@ -2627,14 +1714,14 @@ module CharEncoding = struct (n lor (Char.code c lsl 8), 2), text else if i = 2 then if Char.code c > 0x10 then - (Char.code c, 1), Text.append_char subst_char text + (Char.code c, 1), Text.append_uchar text subst_char else (n lor (Char.code c lsl 16), 3), text else if i = 3 then if Char.code c > 0 then - (Char.code c, 1), Text.append_char subst_char text + (Char.code c, 1), Text.append_uchar text subst_char else - (0, 0), Text.append_char (UChar.of_int n) text + (0, 0), Text.append_uchar text (UChar.of_int n) else assert false in fold_string conv (state, Text.empty) s diff --git a/src/uCoreLib.mli b/src/uCoreLib.mli index e0dbb22..0cc7723 100644 --- a/src/uCoreLib.mli +++ b/src/uCoreLib.mli @@ -87,165 +87,8 @@ end (** Aliase for UChar.t *) type uchar = UChar.t -(** UTF-8 encoded Unicode strings. The type is normal string. *) +module Text : sig -(* Copyright (C) 2002, 2003, 2011 Yamagata Yoriyuki. *) - -(* This library is free software; you can redistribute it and/or *) -(* modify it under the terms of the GNU Lesser General Public License *) -(* as published by the Free Software Foundation; either version 2 of *) -(* the License, or (at your option) any later version. *) - -(* As a special exception to the GNU Library General Public License, you *) -(* may link, statically or dynamically, a "work that uses this library" *) -(* with a publicly distributed version of this library to produce an *) -(* executable file containing portions of this library, and distribute *) -(* that executable file under terms of your choice, without any of the *) -(* additional requirements listed in clause 6 of the GNU Library General *) -(* Public License. By "a publicly distributed version of this library", *) -(* we mean either the unmodified Library as distributed by the authors, *) -(* or a modified version of this library that is distributed under the *) -(* conditions defined in clause 3 of the GNU Library General Public *) -(* License. This exception does not however invalidate any other reasons *) -(* why the executable file might be covered by the GNU Library General *) -(* Public License . *) - -(* This library is distributed in the hope that it will be useful, *) -(* but WITHOUT ANY WARRANTY; without even the implied warranty of *) -(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *) -(* Lesser General Public License for more details. *) - -(* You should have received a copy of the GNU Lesser General Public *) -(* License along with this library; if not, write to the Free Software *) -(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *) -(* USA *) - -(* You can contact the authour by sending email to *) -(* yori@users.sourceforge.net *) - -module UTF8 : sig - -(** UTF-8 encoded Unicode strings. The type is normal string. *) - type t = string - -(** [validate s] - successes if s is valid UTF-8, otherwise raises Malformed_code. - Other functions assume strings are valid UTF-8, so it is prudent - to test their validity for strings from untrusted origins. *) - val validate : t -> unit - -(* All functions below assume string are valid UTF-8. If not, - * the result is unspecified. *) - -(** [get s n] returns [n]-th Unicode character of [s]. - The call requires O(n)-time. *) - val get : t -> int -> UChar.t - -(** [init len f] - returns a new string which contains [len] Unicode characters. - The i-th Unicode character is initialized by [f i] *) - val init : int -> (int -> UChar.t) -> t - -(** [length s] returns the number of Unicode characters contained in s *) - val length : t -> int - -(** Positions in the string represented by the number of bytes from the head. - The location of the first character is [0] *) - type index = int - -(** [nth s n] returns the position of the [n]-th Unicode character. - The call requires O(n)-time *) - val nth : t -> int -> index - -(** The position of the head of the first Unicode character. *) - val first : t -> index - -(** The position of the head of the last Unicode character. *) - val last : t -> index - -(** [look s i] - returns the Unicode character of the location [i] in the string [s]. *) - val look : t -> index -> UChar.t - -(** [out_of_range s i] - tests whether [i] is a position inside of [s]. *) - val out_of_range : t -> index -> bool - -(** [compare_index s i1 i2] returns - a value < 0 if [i1] is the position located before [i2], - 0 if [i1] and [i2] points the same location, - a value > 0 if [i1] is the position located after [i2]. *) - val compare_index : t -> index -> index -> int - -(** [next s i] - returns the position of the head of the Unicode character - located immediately after [i]. - If [i] is inside of [s], the function always successes. - If [i] is inside of [s] and there is no Unicode character after [i], - the position outside [s] is returned. - If [i] is not inside of [s], the behaviour is unspecified. *) - val next : t -> index -> index - -(** [prev s i] - returns the position of the head of the Unicode character - located immediately before [i]. - If [i] is inside of [s], the function always successes. - If [i] is inside of [s] and there is no Unicode character before [i], - the position outside [s] is returned. - If [i] is not inside of [s], the behaviour is unspecified. *) - val prev : t -> index -> index - -(** [move s i n] - returns [n]-th Unicode character after [i] if n >= 0, - [n]-th Unicode character before [i] if n < 0. - If there is no such character, the result is unspecified. *) - val move : t -> index -> int -> index - -(** [iter f s] - applies [f] to all Unicode characters in [s]. - The order of application is same to the order - of the Unicode characters in [s]. *) - val iter : (UChar.t -> unit) -> t -> unit - -(** Code point comparison by the lexicographic order. - [compare s1 s2] returns - a positive integer if [s1] > [s2], - 0 if [s1] = [s2], - a negative integer if [s1] < [s2]. *) - val compare : t -> t -> int - -(** Buffer module for UTF-8 strings *) - module Buf : sig - (** Buffers for UTF-8 strings. *) - type buf - - (** [create n] creates the buffer with the initial size [n]-bytes. *) - val create : int -> buf - - (* The rest of functions is similar to the ones of Buffer in stdlib. *) - (** [contents buf] returns the contents of the buffer. *) - val contents : buf -> t - - (** Empty the buffer, - but retains the internal storage which was holding the contents *) - val clear : buf -> unit - - (** Empty the buffer and de-allocate the internal storage. *) - val reset : buf -> unit - - (** Add one Unicode character to the buffer. *) - val add_char : buf -> UChar.t -> unit - - (** Add the UTF-8 string to the buffer. *) - val add_string : buf -> t -> unit - - (** [add_buffer b1 b2] adds the contents of [b2] to [b1]. - The contents of [b2] is not changed. *) - val add_buffer : buf -> buf -> unit - end with type buf = Buffer.t -end - -module Text' : sig type t val empty : t @@ -320,19 +163,6 @@ module Text' : sig (** Raises Invalid_argument "index out of bounds" if the iterator already locates in the first character of the underlining text. *) val prev_exn : iterator -> iterator - (** [move i n] returns the iterator which locates [n]-th*) - (** characters from [i]. If such a location does not exist, return*) - (** [`Out_of_range (it, n)]. [it] points the last position which - [move] can success, and [n] is the number of the move to be done. - If [n] is negative, move the left. *) - val move : iterator -> int -> - [`Success of iterator | `Out_of_range of iterator * int] - (** The same as above but raises Invalid_argument "index out of - bounds" instead or returning None.*) - val move_exn : iterator -> int -> iterator - (** Move the iterator as much as possible toward the [n]-th - character.*) - val move_as_possible : iterator -> int -> iterator (** Returns the value of the location which the iterator points. *) val value : iterator -> uchar @@ -342,451 +172,30 @@ module Text' : sig (** Returns the position of the iterator *) val pos : iterator -> int - (** Zipper like operations. *) - (** [insert i t] inserts [t] into the right of [i]. *) - val insert : iterator -> t -> iterator - (** [delete_left i] deletes the left side of [i]. *) - val delete_left : iterator -> iterator - (** [delete_right i] deletes the right side of [i]. *) - val delete_right : iterator -> iterator - (** [sub i n] creates the iterator which runs over substring which - begins position [i] to [n]-th character from [i].*) - val sub : iterator -> int -> iterator option - (** Raise invalid_arg "iterator out of bound".*) - val sub_exn : iterator -> int -> iterator - - (** Fold.*) + (** Text modifications *) + (** [insert t0 n t] inserts [t] into the left of [n]-th character in + [t0] *) + val insert : t -> int -> t -> t option + (** Raise invalid_arg "index out of bound" if [pos] is not contained in [t]*) + val insert_exn : t -> int -> t -> t + (** [delete t pos len] deletes length [len] text from [pos]-th + character in [t] *) + val delete : t -> pos:int -> len:int -> t option + (** Raise invalid_arg "index out of bound" if [pos]-[pos+len-1] is not + contained in [t] *) + val delete_exn : t -> pos:int -> len:int -> t + (** [sub t pos len] obtains the substring of [t] which starts from + [pos]-th character and has [len] characters *) + val sub : t -> pos:int -> len:int -> t option + (** Raise invalid_arg "index out of bound" if [pos]-[pos+len-1] is not + contained in [t] *) + val sub_exn : t -> pos:int -> len:int -> t + + (** Fold*) val fold : t -> 'a -> ('a -> uchar -> 'a) -> 'a end -(* Rope: a simple implementation of ropes as described in - -Boehm, H., Atkinson, R., and Plass, M. 1995. Ropes: an alternative to -strings. Softw. Pract. Exper. 25, 12 (Dec. 1995), 1315-1330. - -Motivated by Luca de Alfaro's extensible array implementation Vec. - -Copyright (C) 2011, 2013 Yoriyuki Yamagata <yoriyuki.y@gmail.com> - 2007 Mauricio Fernandez <mfp@acm.org> -http://eigenclass.org - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Library General Public -License as published by the Free Software Foundation; either -version 2 of the License, or (at your option) any later version, -with the following special exception: - -You may link, statically or dynamically, a "work that uses the -Library" with a publicly distributed version of the Library to -produce an executable file containing portions of the Library, and -distribute that executable file under terms of your choice, without -any of the additional requirements listed in clause 6 of the GNU -Library General Public License. By "a publicly distributed version -of the Library", we mean either the unmodified Library as -distributed by the author, or a modified version of the Library that is -distributed under the conditions defined in clause 2 of the GNU -Library General Public License. This exception does not however -invalidate any other reasons why the executable file might be -covered by the GNU Library General Public License. - -This library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Library General Public License for more details. - -The GNU Library General Public License is available at -http://www.gnu.org/copyleft/lgpl.html; to obtain it, you can also -write to the Free Software Foundation, Inc., 59 Temple Place - -Suite 330, Boston, MA 02111-1307, USA. -*) - -(** Heavyweight strings ("ropes") - -This module implements ropes as described in -Boehm, H., Atkinson, R., and Plass, M. 1995. Ropes: an alternative to -strings. Softw. Pract. Exper. 25, 12 (Dec. 1995), 1315-1330. - -Ropes are an alternative to strings which support efficient operations: -- determining the length of a rope in constant time -- appending or prepending a small rope to an arbitrarily large one in amortized constant time -- concat, substring, insert, remove operations in amortized logarithmic time -- access to and modification of ropes in logarithmic time - -{8 Functional nature and persistence} - -All operations are non-destructive: the original rope is never modified. When a -new rope is returned as the result of an operation, it will share as much data -as possible with its "parent". For instance, if a rope of length [n] undergoes -[m] operations (assume [n >> m]) like set, append or prepend, the modified -rope will only require [O(m)] space in addition to that taken by the -original one. - -However, Rope is an amortized data structure, and its use in a persistent setting -can easily degrade its amortized time bounds. It is thus mainly intended to be used -ephemerally. In some cases, it is possible to use Rope persistently with the same -amortized bounds by explicitly rebalancing ropes to be reused using [balance]. -Special care must be taken to avoid calling [balance] too frequently; in the limit, -calling [balance] after each modification would defeat the purpose of amortization. - - -{8 Limitations} - -The length of ropes is limited to approximately 700 Mb on 32-bit -architectures, 220 Gb on 64 bit architectures. - -@author Mauricio Fernandez -*) - -module Text : sig - type t - (** The type of the ropes. *) - - exception Out_of_bounds - (** Raised when an operation violates the bounds of the rope. *) - - exception Invalid_rope - (** An exception thrown when some operation required a rope - and received an unacceptable rope.*) - - val max_length : int - (** Maximum length of the rope (number of UTF-8 characters). *) - - (** {6 Creation and conversions} *) - - val empty : t - (** The empty rope. *) - - val of_latin1 : string -> t - (** Constructs a unicode rope from a latin-1 string. *) - - val of_string : string -> t - (** [of_string s] returns a rope corresponding to the UTF-8 encoded string [s].*) - - val to_string : t -> string - (** [to_string t] returns a UTF-8 encoded string representing [t]*) - - val of_uchar : UChar.t -> t - (** [of_uchar c] returns a rope containing exactly character [c].*) - - val of_char : char -> t - (** [of_char c] returns a rope containing exactly Latin-1 character [c].*) - - val make : int -> UChar.t -> t - (** [make i c] returns a rope of length [i] consisting of [c] chars; - it is similar to String.make *) - - val join : t -> t list -> t - (** Same as {!concat} *) - - val explode : t -> UChar.t list - (** [explode s] returns the list of characters in the rope [s]. *) - - val implode : UChar.t list -> t - (** [implode cs] returns a rope resulting from concatenating - the characters in the list [cs]. *) - - - (** {6 Properties } *) - - val is_empty : t -> bool - (** Returns whether the rope is empty or not. *) - - val length : t -> int - (** Returns the length of the rope ([O(1)]). - This is number of UTF-8 characters. *) - - val height : t -> int - (** Returns the height (depth) of the rope. *) - - val balance : t -> t - (** [balance r] returns a balanced copy of the [r] rope. Note that ropes are - automatically rebalanced when their height exceeds a given threshold, but - [balance] allows to invoke that operation explicity. *) - - (** {6 Operations } *) - - val append : t -> t -> t - (** [append r u] concatenates the [r] and [u] ropes. In general, it operates - in [O(log(min n1 n2))] amortized time. - Small ropes are treated specially and can be appended/prepended in - amortized [O(1)] time. *) - - val ( ^^^ ): t -> t -> t - (** As {!append}*) - - val append_char : UChar.t -> t -> t - (** [append_char c r] returns a new rope with the [c] character at the end - in amortized [O(1)] time. *) - - val prepend_char : UChar.t -> t -> t - (** [prepend_char c r] returns a new rope with the [c] character at the - beginning in amortized [O(1)] time. *) - - val get : t -> int -> UChar.t - (** [get r n] returns the (n+1)th character from the rope [r]; i.e. - [get r 0] returns the first character. - Operates in worst-case [O(log size)] time. - Raises Out_of_bounds if a character out of bounds is requested. *) - - val set : t -> int -> UChar.t -> t - (** [set r n c] returns a copy of rope [r] where the (n+1)th character - has been set to [c]. See also {!get}. - Operates in worst-case [O(log size)] time. *) - - val sub : t -> int -> int -> t - (** [sub r m n] returns a sub-rope of [r] containing all characters - whose indexes range from [m] to [m + n - 1] (included). - Raises Out_of_bounds in the same cases as sub. - Operates in worst-case [O(log size)] time. *) - - val insert : int -> t -> t -> t - (** [insert n r u] returns a copy of the [u] rope where [r] has been - inserted between the characters with index [n] and [n + 1] in the - original rope. The length of the new rope is - [length u + length r]. - Operates in amortized [O(log(size r) + log(size u))] time. *) - - val remove : int -> int -> t -> t - (** [remove m n r] returns the rope resulting from deleting the - characters with indexes ranging from [m] to [m + n - 1] (included) - from the original rope [r]. The length of the new rope is - [length r - n]. - Operates in amortized [O(log(size r))] time. *) - - val concat : t -> t list -> t - (** [concat sep sl] concatenates the list of ropes [sl], - inserting the separator rope [sep] between each. *) - - - (** {6 Iteration} *) - - val iter : (UChar.t -> unit) -> t -> unit - (** [iter f r] applies [f] to all the characters in the [r] rope, - in order. *) - - val iteri : ?base:int -> (int -> UChar.t -> unit) -> t -> unit - (** Operates like [iter], but also passes the index of the character - to the given function. *) - - val range_iter : (UChar.t -> unit) -> int -> int -> t -> unit - (** [rangeiter f m n r] applies [f] to all the characters whose - indices [k] satisfy [m] <= [k] < [m + n]. - It is thus equivalent to [iter f (sub m n r)], but does not - create an intermediary rope. [rangeiter] operates in worst-case - [O(n + log m)] time, which improves on the [O(n log m)] bound - from an explicit loop using [get]. - Raises Out_of_bounds in the same cases as [sub]. *) - - val range_iteri : - (int -> UChar.t -> unit) -> ?base:int -> int -> int -> t -> unit - (** As [range_iter], but passes base + index of the character in the - subrope defined by next to arguments. *) - - val fold : ('a -> UChar.t -> 'a ) -> 'a -> t -> 'a - (** [Rope.fold f a r] computes [ f (... (f (f a r0) r1)...) rN-1 ] - where [rn = Rope.get n r ] and [N = length r]. *) - - val init : int -> (int -> UChar.t) -> t - (** [init l f] returns the rope of length [l] with the chars f 0 , f - 1 , f 2 ... f (l-1). *) - - val map : (UChar.t -> UChar.t) -> t -> t - (** [map f s] returns a rope where all characters [c] in [s] have been - replaced by [f c]. **) - - val filter_map : (UChar.t -> UChar.t option) -> t -> t - (** [filter_map f l] calls [(f a0) (f a1).... (f an)] where [a0..an] are - the characters of [l]. It returns the list of elements [bi] such as - [f ai = Some bi] (when [f] returns [None], the corresponding element of - [l] is discarded). *) - - val filter : (UChar.t -> bool) -> t -> t - (** [filter f s] returns a copy of rope [s] in which only - characters [c] such that [f c = true] remain.*) - - - - (** {6 Finding}*) - - val index : t -> UChar.t -> int - (** [Rope.index s c] returns the position of the leftmost - occurrence of character [c] in rope [s]. - Raise [Not_found] if [c] does not occur in [s]. *) - - val index_from : t -> int -> UChar.t -> int - (** Same as {!Rope.index}, but start searching at the character - position given as second argument. [Rope.index s c] is - equivalent to [Rope.index_from s 0 c].*) - - val rindex : t -> UChar.t -> int - (** [Rope.rindex s c] returns the position of the rightmost - occurrence of character [c] in rope [s]. - Raise [Not_found] if [c] does not occur in [s]. *) - - val rindex_from : t -> int -> UChar.t -> int - (** Same as {!rindex}, but start - searching at the character position given as second argument. - [rindex s c] is equivalent to - [rindex_from s (length s - 1) c]. *) - - val contains : t -> UChar.t -> bool - (** [contains s c] tests if character [c] - appears in the rope [s]. *) - - val contains_from : t -> int -> UChar.t -> bool - (** [contains_from s start c] tests if character [c] appears in - the subrope of [s] starting from [start] to the end of [s]. - - @raise Invalid_argument if [start] is not a valid index of [s]. *) - - val rcontains_from : t -> int -> UChar.t -> bool - (** [rcontains_from s stop c] tests if character [c] - appears in the subrope of [s] starting from the beginning - of [s] to index [stop]. - @raise Invalid_argument if [stop] is not a valid index of [s]. *) - - val find : t -> t -> int - (** [find s x] returns the starting index of the first occurrence of - rope [x] within rope [s]. - - {b Note} This implementation is optimized for short ropes. - - @raise Invalid_rope if [x] is not a subrope of [s]. *) - - val find_from : t -> int -> t -> int - (** [find_from s ofs x] behaves as [find s x] but starts searching - at offset [ofs]. [find s x] is equivalent to [find_from s 0 x].*) - - val rfind : t -> t -> int - (** [rfind s x] returns the starting index of the last occurrence - of rope [x] within rope [s]. - - {b Note} This implementation is optimized for short ropes. - - @raise Invalid_rope if [x] is not a subrope of [s]. *) - - val rfind_from : t -> int -> t -> int - (** [rfind_from s ofs x] behaves as [rfind s x] but starts searching - at offset [ofs]. [rfind s x] is equivalent to [rfind_from s (length s - 1) x].*) - - - val starts_with : t -> t -> bool - (** [starts_with s x] returns [true] if [s] is starting with [x], [false] otherwise. *) - - val ends_with : t -> t -> bool - (** [ends_with s x] returns [true] if the rope [s] is ending with [x], [false] otherwise. *) - - val exists : t -> t -> bool - (** [exists str sub] returns true if [sub] is a subrope of [str] or - false otherwise. *) - - val left : t -> int -> t - (**[left r len] returns the rope containing the [len] first characters of [r]*) - - val right : t -> int -> t - (**[left r len] returns the rope containing the [len] last characters of [r]*) - - val head : t -> int -> t - (**as {!left}*) - - val tail : t -> int -> t - (**[tail r pos] returns the rope containing all but the [pos] first characters of [r]*) - - val strip : ?chars:(UChar.t list) -> t -> t - (** Returns the rope without the chars if they are at the beginning or - at the end of the rope. By default chars are " \t\r\n". *) - - val lchop : t -> t - (** Returns the same rope but without the first character. - does nothing if the rope is empty. *) - - val rchop : t -> t - (** Returns the same rope but without the last character. - does nothing if the rope is empty. *) - - val slice : ?first:int -> ?last:int -> t -> t - (** [slice ?first ?last s] returns a "slice" of the rope - which corresponds to the characters [s.[first]], - [s.[first+1]], ..., [s[last-1]]. Note that the character at - index [last] is {b not} included! If [first] is omitted it - defaults to the start of the rope, i.e. index 0, and if - [last] is omitted is defaults to point just past the end of - [s], i.e. [length s]. Thus, [slice s] is equivalent to - [copy s]. - - Negative indexes are interpreted as counting from the end of - the rope. For example, [slice ~last:-2 s] will return the - rope [s], but without the last two characters. - - This function {b never} raises any exceptions. If the - indexes are out of bounds they are automatically clipped. - *) - - val splice : t -> int -> int -> t -> t - (** [splice s off len rep] returns the rope in which the section of [s] - indicated by [off] and [len] has been cut and replaced by [rep]. - - Negative indices are interpreted as counting from the end of the string.*) - - val fill : t -> int -> int -> UChar.t -> t - (** [fill s start len c] returns the rope in which - characters number [start] to [start + len - 1] of [s] has - been replaced by [c]. - - @raise Invalid_argument if [start] and [len] do not - designate a valid subrope of [s]. *) - - val blit : t -> int -> t -> int -> int -> t - (** [blit src srcoff dst dstoff len] returns a copy - of [dst] in which [len] characters have been copied - from rope [src], starting at character number [srcoff], to - rope [dst], starting at character number [dstoff]. It works - correctly even if [src] and [dst] are the same rope, - and the source and destination chunks overlap. - - @raise Invalid_argument if [srcoff] and [len] do not - designate a valid subrope of [src], or if [dstoff] and [len] - do not designate a valid subrope of [dst]. *) - - val concat : t -> t list -> t - (** [concat sep sl] concatenates the list of ropes [sl], - inserting the separator rope [sep] between each. *) - - val replace : str:t -> sub:t -> by:t -> bool * t - (** [replace ~str ~sub ~by] returns a tuple constisting of a boolean - and a rope where the first occurrence of the rope [sub] - within [str] has been replaced by the rope [by]. The boolean - is [true] if a substitution has taken place, [false] otherwise. *) - - (** {6 Splitting around}*) - - val split : t -> t -> t * t - (** [split s sep] splits the rope [s] between the first - occurrence of [sep]. - @raise Invalid_rope if the separator is not found. *) - - val rsplit : t -> t -> t * t - (** [rsplit s sep] splits the rope [s] between the last - occurrence of [sep]. - @raise Invalid_rope if the separator is not found. *) - - val nsplit : t -> t -> t list - (** [nsplit s sep] splits the rope [s] into a list of ropes - which are separated by [sep]. - [nsplit "" _] returns the empty list. *) - - val compare : t -> t -> int - (** The comparison function for ropes, with the same specification as - {!Pervasives.compare}. Along with the type [t], this function [compare] - allows the module [Rope] to be passed as argument to the functors - {!Set.Make} and {!Map.Make}. *) - - (** {6 Boilerplate code}*) -end - -(** Aliase for Text.t *) type text = Text.t -type cursor = int module CharEncoding : sig diff --git a/test/test.ml b/test/test.ml index b3f16780388525dace895fdf736cec5eb5e4bc32..cc14dc1235d4d8f91425289f3274cd0402243fac 100644 GIT binary patch delta 963 zcmZ`&Pe>GD6lZMAW(r$fNutt+gzaeiXBI5cRwygVmA0*2BCw`i_ZuBqXKm&iF%QMw z0;Q<eC6XYbYZM=y@)TW!B|8OOx&$JfGQvxjzTLt7hx?u0{NC^VzTfxWo9#cso3FyV z1wo=@fr3u~o#lniW}I(XM(;o;`aI8Zu|3b0I<^INv~x!ELZ2ef<}yx}KmsC=;C1Z9 z5v`>yo3f^eW;(MRRYUx+zdak`q5E@|VNzlZ`E<tw8~h_+E5N>=h%`y%5^M{0v*Et; zVl9NP14ad5ZNa$UDc1mZ0RsxY_j}Q!?#JcO@d_6|gr2ne)P5+_6^yFm6#`pP`-RFU z7Q($AIZG%|VpEN3a~7G$Tk7GaYzptGT~*8#wd<g-EDL@x6#S?zcGO`YOOk}A!f~-A zwG=*6rBMi<WWNS)h@wUoOgtO8QVQ7P$Zt`yDR0=6mebk8bNiZIM*j5LgQB^~oJmuL zX>*r+Tu!JsfPkLMW?Vo|Wo-hbMyJ8yt(E}{9nwIji3!wl2oTP2(d6iK$_kLXMH#1# z0VO3xu5=`WnM-e3=j97cuA{5hqNU(zY|Qn(75&^Gg=0IMXR*~roULd#K8kzro7RrK z*I(^^2~H$_?4Duk$uAA0S@`B!U5Z_tDAYka@4&6-F&w$w&W(Jky$XNc8F!IjYNIw) zE?!S|;ekmB_m1|lf9aP3^XS1n-BM3+1$tm)XsCL<hWQ+dhvjBATL<~S7lByPG-x<r eIi}|BA=PI2yup`qiV40HS}9V&b_`1RfaL#o15<qf delta 3555 zcmcInUu;ul6i-Jtb_-#f*s;J?&dOl-4p!Q2(r)~5fGh?EB4eU1rrx%<-3{%%<=)#d z9!z7z=#w#iPrMjki1EP$b03|Fe`0{dM2v|~#`t7-lNm#zPoDFAx3}%qDTLUUt=~D{ zIluEe|GwV57kKzb;MQ_L%7d(cqb)n`n}Pf7TjUX#zc@3Rf)RxkHoy0DQFBmgJL!s( zPo*u@$eAU!+O~sz({(9`nkjZLA+wX+iuh9MeyfvCV+GA&e|0CT790gj%R?M=BLw*2 zQr*!kkn<G-DlmGCp9842tyxZ5yHZh$c&^6eK}$L833VfDfu(>g4w`faEhuV+5O8%2 zLN{{SGK^Feen3%VD>W=K$6z>%U_)G~0VAE7hL85@qF&U%FdYz=%!-pX^Tb0z!`p*s zRDqn?Dgu6?8Y`OSWnZ&7CPFlIR4`&-20XRqkh$rcX=sla_*rvFHws?-YP)4tj2!!{ zJIsuOTaEw{<v`iAF)J5%E@Qgk5PLNigofa%fhN4d9wc|XE?Dql;=yA`DD?B>F`?%@ zFL7`d(K^<cR<t!w>ZM~CQE(q+OIyO=(H=Reu5~i3P%#kdQAl{6>Wlz*Rx#@*D)~HC z)<`|;nVJt2#j=*BI$FIJ`9;o1BOlc!LP%^UYF2bZNB%%QZdvLzDsoVp3dkO7(v*qW zQ79{-*q5=`q2VgXW!MkPif@7gs&Y;UubRuHZO5W~W)Os^fDNQ$LdxO##PXm`!_f@K zMiX8Odd-tD%_2l?M=K|yUU_HDQdzB9RN6KfiF<5wT)yLMQBy6*G{6!4>?;uN>>}y- z4f`0ls&!yvO~4)t7dgx{6CtvnI8@_-R9f^xz+ly6xGGYYsCPHkv<^r34XWvo)FNIg zR9#dbh>CBeHQb<9FXSe3nW`w|3<mK8nH_&D0xiuZ*0{f_XwCIy_ylMsHbjF3Mnr@{ zA?#u-dCg{pa2MO!d80o>X<EC2ytcqi)W>!mke@B7PIeJw$GRq9FR4X6N4;(zhB>oD z1$|Y=fqzyl=1r@lVSSo8joR@Xx>eZprf3z%%dwC|-o*H$wIxPT<Er2aO?yIm)FmMS z=mo>Hw59>acgW$8R0Zz0<wBsFd;x|eQ7=;S0wlol9G?_1tlI*uHf-HBTByTo%-5)~ zE)ng_>Ak+I&NF9FY<)3;SR^T%O>Tv?I@LJkym>(+7FFB95yD3Yz^raNvAkK#rHfh~ zw<+y@9dI~$lviqV)7vdNHL1h`@*XyjO7?rr=50w`!5N+{qDivOzG@4zU!fx+;))8- z>*!$l)QG!JdKeg}2dNwD!yKHPU_bPY_i(Bjf>YeyE#+W%n6^vF%vJDRa1lXY#Kk@A z-snniW=r877j}FfWaoDV-B5q3ts@bqk9#JP4YGGTdf27NcJ@<Lul?^9_KXF#awR~M z@QlLZLlJg+XoxgE*!y-n$r6n1ouvc!&%L_?{3y=)hxf5ArDK5H6QX*T&|9wgq4qL6 zkl2rJw2@6HVx{iuoAK|)FElb4`@ddjYr5P9riZup)FC%E_Ra9~lf0#Q2g@86KEAw- zXe<BT(tn3jGZcwe?L-T`A84LqJNd21t<oar3OKt_NFZs5bFfl~UFm&CbO!09*OyEr zIlhhC@zJq2k~8df@@{lZUN$bciKhsxZvgvO3AsJWLXee5&$(ZZJ_t&V>Ba7TKGxYz z=7VIO{k*V~eb_h7<|h~1ISluU$y04?b@mKPpFhn$JwM=ndF12zLFe>o_oeBT`T=UN zdndp3ZyY-Hi~s+fxrqDY%q?Hp9XfrNe{rYJe&?T}lU06Y^~G6s{K9Xw>+YSo-vi>O z!ODDp8%!}Zw~M{7&>4mVJualE_({PBJWAhxb>g6R;-#6IH44Y3rZ(IG>O!oS|2kQp zn2#1t?Bk*qyaN|xEc3Ev*oynf{LxK)R2KlRi4_M?^35BZt<1~MQSJpcPmZcJ_^rY6 uauHW*bxmM5s3HQ(MgI-9{gwOMcBjR3wJJuMPc+9#msH&VyMoOd3jG6t7*~=2 From f69e95b7b63e41927c408e53584dca3fff158df4 Mon Sep 17 00:00:00 2001 From: yoriyuki <yoriyuki.y@gmail.com> Date: Tue, 15 Apr 2014 02:41:09 +0900 Subject: [PATCH 13/22] test_t_value --- test/test.ml | Bin 26661 -> 27322 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/test/test.ml b/test/test.ml index cc14dc1235d4d8f91425289f3274cd0402243fac..fba789c47e44fece836b37f2b71dba470110cbc2 100644 GIT binary patch delta 338 zcmZ2_fpOPW#tp6tj5eFy6i#wXeyPmEQkIxgI$2U)ZE~H0giuaui9%+Hf~`V`URq{R zafw37<itRU$tx73Ci|;$Nm+9#C?F}xORXr0Pp!xUY5^;IN}Mt!MWRfLCr;TM;*{Nl zD4Tp(QGBwv5+4W1?Lc>HZsybcsISJU1PX>!B?UVxD}|EO;*$81c(4e_#~=`E&84BW TnIrNVABTdLCRlLtvvhj^h-7I$ delta 49 xcmdmWm2v3>#tp6tjJBKI6i#w%ZrA>x?gvDRFh=SE-S<KTkwCs$|L0|2sL52yeD From b678924c1c7e6f1b3b7cfb5dfe473648e2130526 Mon Sep 17 00:00:00 2001 From: yoriyuki <yoriyuki.y@gmail.com> Date: Tue, 15 Apr 2014 02:45:18 +0900 Subject: [PATCH 14/22] Fix bug in Text.compare --- src/uCoreLib.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uCoreLib.ml b/src/uCoreLib.ml index f1a48d7..bb510ef 100644 --- a/src/uCoreLib.ml +++ b/src/uCoreLib.ml @@ -1043,7 +1043,7 @@ module Text = struct None, None -> 0 | None, _ -> -1 | _, None -> 1 - | Some it1, Some i2 -> compare_iterator it1 it2 + | Some it1, Some it2 -> compare_iterator it1 it2 let compare t1 t2 = compare_iterator (first t1) (first t2) From 21a5f159be6a79cba052cdf819ccf043e0aa2d19 Mon Sep 17 00:00:00 2001 From: yoriyuki <yoriyuki.y@gmail.com> Date: Tue, 15 Apr 2014 02:54:09 +0900 Subject: [PATCH 15/22] test for Text.base --- test/test.ml | Bin 27322 -> 27496 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/test/test.ml b/test/test.ml index fba789c47e44fece836b37f2b71dba470110cbc2..88e97a237a6708525d4071b16dabd9b347f27657 100644 GIT binary patch delta 96 zcmdmWmGQ+j#trS-{9HMyB?={}#U=42@kxorljSv&6-$arQx!Bq^pf*)3lfV`6-q!H lpk%5-W{IYPt%AYkR&8DbF-|3rn395>l@*c+oBN}+`2m8lAcFt^ delta 19 bcmaEHjd9mi#trS-n`Lyk4K_Q(Xz&97T4x7t From 87f312dca376b14147515d97cc883da63e7d950b Mon Sep 17 00:00:00 2001 From: yoriyuki <yoriyuki.y@gmail.com> Date: Fri, 18 Apr 2014 03:55:31 +0900 Subject: [PATCH 16/22] insert --- src/uCoreLib.ml | 43 ++++++++++++++++++++++++++----------------- test/test.ml | Bin 27496 -> 27910 bytes 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/src/uCoreLib.ml b/src/uCoreLib.ml index bb510ef..f62e846 100644 --- a/src/uCoreLib.ml +++ b/src/uCoreLib.ml @@ -422,7 +422,7 @@ module type BaseStringType = sig val length : t -> int val compare : t -> t -> int - type index + type index = int val first : t -> index val end_pos : t -> index val out_of_range : t -> index -> bool @@ -668,20 +668,28 @@ module Text = struct let size_l = B.size leaf_l.b.s leaf_l.i (B.next leaf_l.b.s leaf_l.j) in let size_r = B.size leaf_r.b.s leaf_r.i (B.next leaf_r.b.s leaf_r.j) in if size_l + size_r <= leaf_size then begin - if size_l + size_r <= - B.size leaf_l.b.s leaf_l.i (B.end_pos leaf_l.b.s) - && is_full_tail leaf_l - then begin - B.blit leaf_r.b.s leaf_r.i leaf_r.j leaf_l.b.s leaf_l.b.unused; - end else begin - let s = B.create leaf_size in - B.blit leaf_l.b.s leaf_l.i leaf_l.j s (B.first s); - B.blit leaf_r.b.s leaf_r.i leaf_r.j s (B.move_by_bytes s (B.first s) size_l); - leaf_l.b.s <- s; - end; - leaf_l.b.unused <- B.move_by_bytes leaf_l.b.s leaf_l.b.unused size_r; - let leaf = {leaf_l with j = B.prev leaf_l.b.s leaf_l.b.unused; - len = leaf_l.len + leaf_r.len} in + let leaf = + if size_l + size_r <= + B.size leaf_l.b.s leaf_l.i (B.end_pos leaf_l.b.s) + && is_full_tail leaf_l + then begin + B.blit leaf_r.b.s leaf_r.i leaf_r.j leaf_l.b.s leaf_l.b.unused; + let b = {leaf_l.b with unused = leaf_l.b.unused + size_r} in + {b = b; + i = leaf_l.i; + j = B.prev b.s b.unused; + len = leaf_l.len + leaf_r.len} + end else begin + let s = B.create leaf_size in + B.blit leaf_l.b.s leaf_l.i leaf_l.j s (B.first s); + B.blit leaf_r.b.s leaf_r.i leaf_r.j s + (B.move_by_bytes s (B.first s) size_l); + let b = {s = s; unused = size_l + size_r} in + {b = b; + i = B.first s; + j = B.prev s (B.move_by_bytes s (B.first s) (size_l + size_r)); + len = leaf_l.len + leaf_r.len} + end in Leaf leaf end else make_concat (Leaf leaf_l) (Leaf leaf_r) (* height = 1 *) @@ -981,17 +989,18 @@ module Text = struct | Some it -> Some (delete_right it) - let sub_exn t ~pos ~len = match sub t ~pos ~len with None -> invalid_arg "iterator out of bound" | Some t -> t let insert t pos text = + if pos = 0 then Some (append text t) else + if pos = length t then Some (append t text) else match nth t pos with None -> None | Some it -> - let left = delete_right it in + let left = delete_right (prev_exn it) in let right = delete_left it in Some (append (append left text) right) diff --git a/test/test.ml b/test/test.ml index 88e97a237a6708525d4071b16dabd9b347f27657..f5ac43070c7e6f21a04907332fc5bdba14936812 100644 GIT binary patch delta 225 zcmaEHjj`<(;|3WWey*I<5`~h~;*$81_=5c6$qJgGlQ(LKNNa@X<)v1X#HUu|DIl{m zOEf1N8j8ppgG7NE6o3L;$cAL*6{i-J0L^ifEm5#l2+@NGfDI~90BS2$P)bivR|1(b zxj|M^0?0{8O;ZAyS6ot*nU@})pQZpbbhEUMs(}%w63`MQ1v@J%Bv)E<aVkNKMUjCB KZMKWi;s*dfBuS$H delta 19 bcmZp>#rWbH;|3X>&273W2Al82YVZR9SW5@^ From c6b5c38c80f068991f0edaeb4de0391513ded385 Mon Sep 17 00:00:00 2001 From: yoriyuki <yoriyuki.y@gmail.com> Date: Fri, 18 Apr 2014 23:01:03 +0900 Subject: [PATCH 17/22] delete --- src/uCoreLib.ml | 14 ++++++-------- src/uCoreLib.mli | 5 +---- test/test.ml | Bin 27910 -> 28116 bytes 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/src/uCoreLib.ml b/src/uCoreLib.ml index f62e846..ac4b7ee 100644 --- a/src/uCoreLib.ml +++ b/src/uCoreLib.ml @@ -1010,17 +1010,15 @@ module Text = struct | Some t -> t let delete t ~pos ~len = - match nth t pos, nth t (pos + len -1) with - None, _ | _, None -> None + let pos, len = if len >= 0 then pos, len else pos + len + 1, -len in + match nth t (pos - 1), nth t (pos + len) with + None, None -> Empty + | None, Some it -> delete_left it + | Some it, None -> delete_right it | Some it1, Some it2 -> let left = delete_right it1 in let right = delete_left it2 in - Some (append left right) - - let delete_exn t ~pos ~len = - match delete t ~pos ~len with - None -> invalid_arg "index out of bound" - | Some t -> t + append left right let value it = B.read it.leaf.b.s it.index diff --git a/src/uCoreLib.mli b/src/uCoreLib.mli index 0cc7723..1e85996 100644 --- a/src/uCoreLib.mli +++ b/src/uCoreLib.mli @@ -180,10 +180,7 @@ module Text : sig val insert_exn : t -> int -> t -> t (** [delete t pos len] deletes length [len] text from [pos]-th character in [t] *) - val delete : t -> pos:int -> len:int -> t option - (** Raise invalid_arg "index out of bound" if [pos]-[pos+len-1] is not - contained in [t] *) - val delete_exn : t -> pos:int -> len:int -> t + val delete : t -> pos:int -> len:int -> t (** [sub t pos len] obtains the substring of [t] which starts from [pos]-th character and has [len] characters *) val sub : t -> pos:int -> len:int -> t option diff --git a/test/test.ml b/test/test.ml index f5ac43070c7e6f21a04907332fc5bdba14936812..8de5c928ed3ea1d4c5f45acd45c982533e9e4523 100644 GIT binary patch delta 115 zcmZp>#dzg5<Ayff$?IjgSyNJTQcEVwYnte$r>Cc+rYR|8=5Z+~00k9F6l@hj^uP*I zft<R6{9-F(g}R*7JS*eLj=DONXUd9hZqto0kmgi^s8Uj}v$9esNi8mkFNuc;ZN3|; G$qxYj*e6Q> delta 19 bcmca|o3ZT{<Ayff%@%qQ2AkK!Y48I8TQ>*_ From bfd7f5175f6c8d563c30f0cd4a8b2d30d5beb312 Mon Sep 17 00:00:00 2001 From: yoriyuki <yoriyuki.y@gmail.com> Date: Mon, 28 Apr 2014 00:41:17 +0900 Subject: [PATCH 18/22] sub --- src/uCoreLib.ml | 20 +++++++++++--------- src/uCoreLib.mli | 5 +---- test/test.ml | Bin 28116 -> 28320 bytes 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/uCoreLib.ml b/src/uCoreLib.ml index ac4b7ee..5b8eb25 100644 --- a/src/uCoreLib.ml +++ b/src/uCoreLib.ml @@ -980,19 +980,21 @@ module Text = struct base it let sub t ~pos ~len = + if len = 0 then Empty else + let pos, len = + if len >= 0 then pos, len else + pos + len + 1, -len in + let pos, len = + if pos >= 0 then pos, len else + 0, pos + len in match nth t pos with - None -> None + None -> Empty | Some it -> let s = delete_left it in - match nth s len with - None -> None + match nth s (len - 1) with + None -> s | Some it -> - Some (delete_right it) - - let sub_exn t ~pos ~len = - match sub t ~pos ~len with - None -> invalid_arg "iterator out of bound" - | Some t -> t + delete_right it let insert t pos text = if pos = 0 then Some (append text t) else diff --git a/src/uCoreLib.mli b/src/uCoreLib.mli index 1e85996..ece95b4 100644 --- a/src/uCoreLib.mli +++ b/src/uCoreLib.mli @@ -183,10 +183,7 @@ module Text : sig val delete : t -> pos:int -> len:int -> t (** [sub t pos len] obtains the substring of [t] which starts from [pos]-th character and has [len] characters *) - val sub : t -> pos:int -> len:int -> t option - (** Raise invalid_arg "index out of bound" if [pos]-[pos+len-1] is not - contained in [t] *) - val sub_exn : t -> pos:int -> len:int -> t + val sub : t -> pos:int -> len:int -> t (** Fold*) val fold : t -> 'a -> ('a -> uchar -> 'a) -> 'a diff --git a/test/test.ml b/test/test.ml index 8de5c928ed3ea1d4c5f45acd45c982533e9e4523..ff585dce81b5ed7a35c458dc598034e4201d808e 100644 GIT binary patch delta 102 zcmca|n{mNi#tjyFlh?`eFc+65O;*q}o_tDI9L#>MYss9Rp1xU4uhl@pnu}8js6a`< z&dN%mB(=CCz9b&Vx8A%uZVKOI*-W0vu{k`G?`CjLcF2^OT%9X4xge8+F?Moqt~&r~ Co+N<) delta 39 xcmV+?0NDSa-2v3w0kAkOv*IpuFtcr6ln#@yYYUSfZ48s0VIz|~ZWNPQZbxDp5LN&H From 61d4650607b8c3b865e4f3bf7d1805bb8f00ee6a Mon Sep 17 00:00:00 2001 From: yoriyuki <yoriyuki.y@gmail.com> Date: Mon, 28 Apr 2014 06:06:47 +0900 Subject: [PATCH 19/22] fold --- src/uCoreLib.mli | 2 +- test/test.ml | Bin 28320 -> 28565 bytes 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uCoreLib.mli b/src/uCoreLib.mli index ece95b4..e35bb1b 100644 --- a/src/uCoreLib.mli +++ b/src/uCoreLib.mli @@ -185,7 +185,7 @@ module Text : sig [pos]-th character and has [len] characters *) val sub : t -> pos:int -> len:int -> t - (** Fold*) + (** Fold *) val fold : t -> 'a -> ('a -> uchar -> 'a) -> 'a end diff --git a/test/test.ml b/test/test.ml index ff585dce81b5ed7a35c458dc598034e4201d808e..357bfe811a63260876251d0c9d95fc1ccec12267 100644 GIT binary patch delta 185 zcmZ2*mvQQS#to13Ca;s_VM)u+NtvvmX*~IquAqH!YGP4xMtp8!g+i%;LaBl+mjV!E zrYVFvXCxNsCFkcBBo?JY<m_w}3=~Q-Qu9FE)SP0V@Z^ia!WJ43wfSlB$v`~{>gnp5 z3K}7LAbS)_6ri%n8AS>PnhFT}G`TiE(d#l0=TrhIR8p|BvQj8XEiQ>Ki3bU6&WShS F2LPQUJr@7~ delta 22 ecmbPwpK-xm#to13m=qK?AJFeI*qoeT#18;-Gzkd+ From 341e6b6f7af69b2023d6b2940a48e8d5467e88be Mon Sep 17 00:00:00 2001 From: yoriyuki <yoriyuki.y@gmail.com> Date: Tue, 29 Apr 2014 03:19:25 +0900 Subject: [PATCH 20/22] Fix bug of next --- src/uCoreLib.ml | 21 +++++++++------------ test/test.ml | Bin 28565 -> 28560 bytes 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/src/uCoreLib.ml b/src/uCoreLib.ml index 5b8eb25..863c0db 100644 --- a/src/uCoreLib.ml +++ b/src/uCoreLib.ml @@ -894,22 +894,19 @@ module Text = struct next_leaf p let next it = - let i = B.next it.leaf.b.s it.index in - if not (B.compare_index it.leaf.b.s i it.leaf.j > 0) then + if (B.compare_index it.leaf.b.s it.index it.leaf.j >= 0) then + match next_leaf it.path with + None -> None + | Some (path, leaf) -> + Some {path = path; leaf = leaf; index = leaf.i} + else + let i = B.next it.leaf.b.s it.index in Some {it with index = i} - else match next_leaf it.path with - None -> None - | Some (path, leaf) -> - Some {path = path; leaf = leaf; index = leaf.i} let next_exn it = - let i = B.next it.leaf.b.s it.index in - if not (B.compare_index it.leaf.b.s i it.leaf.j > 0) then - {it with index = i} - else match next_leaf it.path with + match next it with None -> invalid_arg "index out of bounds" - | Some (path, leaf) -> - {path = path; leaf = leaf; index = leaf.i} + | Some it -> it let rec prev_leaf = function Top -> None diff --git a/test/test.ml b/test/test.ml index 357bfe811a63260876251d0c9d95fc1ccec12267..00417e04c9cb09b7c3aa4e08430b5c5ec585232b 100644 GIT binary patch delta 18 acmbPwpK-!{#tk_MoAVML@J$ZPcL4xZ00?LR delta 25 hcmbPmpK<Da#tk_MOd497a}pl#F)L_kPWH)n0RV)#2`c~q From 76342328692dd42733b6e4da89722fa893767f84 Mon Sep 17 00:00:00 2001 From: yoriyuki <yoriyuki.y@gmail.com> Date: Tue, 29 Apr 2014 03:38:18 +0900 Subject: [PATCH 21/22] Better formatting, remove unnecessary comment --- src/uCoreLib.ml | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/src/uCoreLib.ml b/src/uCoreLib.ml index 863c0db..ac682e9 100644 --- a/src/uCoreLib.ml +++ b/src/uCoreLib.ml @@ -471,6 +471,7 @@ module BaseString : BaseStringType = struct String.blit s1 i1 s2 j (endpos - i1) let move_by_bytes s i x = i + x + let add_substring b s i j = let endpos = next s j in Buffer.add_substring b s i (endpos - i) @@ -1070,33 +1071,6 @@ module Text = struct end -(* - * Rope: Rope: an implementation of the data structure described in - * - * Boehm, H., Atkinson, R., and Plass, M. 1995. Ropes: an alternative to - * strings. Softw. Pract. Exper. 25, 12 (Dec. 1995), 1315-1330. - * - * Motivated by Luca de Alfaro's extensible array implementation Vec. - * - * Copyright (C) 2007 Mauricio Fernandez <mfp@acm.org> - * Copyright (C) 2008 Edgar Friendly <thelema314@gmail.com> - * Copyright (C) 2008 David Teller, LIFO, Universite d'Orleans - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version, - * with the special exception on linking described in file LICENSE. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - *) (** Aliase for Text.t *) type text = Text.t From 575586991243041c7db03f6d2f8e0998c3199459 Mon Sep 17 00:00:00 2001 From: yoriyuki <yoriyuki.y@gmail.com> Date: Fri, 9 May 2014 23:49:22 +0900 Subject: [PATCH 22/22] pass all tests! --- src/uCoreLib.ml | 23 +++++++++++++---------- test/test.ml | Bin 28560 -> 29681 bytes 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/uCoreLib.ml b/src/uCoreLib.ml index ac682e9..e7ea257 100644 --- a/src/uCoreLib.ml +++ b/src/uCoreLib.ml @@ -181,6 +181,7 @@ module UTF8 = struct UChar.unsafe_chr n' let next s i = + if i < 0 then 0 else let n = Char.code s.[i] in if n < 0x80 then i + 1 else if n <= 0xdf then i + 2 @@ -445,6 +446,7 @@ module type BaseStringType = sig val add_substring : Buffer.t -> t -> index -> index -> unit val read : t -> index -> uchar + (* If the buffer is too small, [write i u] returns [i]. *) val write : t -> index -> uchar -> index end @@ -458,7 +460,7 @@ module BaseString : BaseStringType = struct let copy = String.copy let read = look let append = (^) - let create = String.create + let create len = String.make len '\255' let end_pos s = String.length s let of_string s = if is_valid s then Some (String.copy s) else None let of_string_unsafe s = s @@ -475,7 +477,7 @@ module BaseString : BaseStringType = struct let add_substring b s i j = let endpos = next s j in Buffer.add_substring b s i (endpos - i) - + let write s i u = let masq = 0b111111 in let k = UChar.code u in @@ -724,7 +726,7 @@ module Text = struct let s = B.create leaf_size in let k = B.write s (B.first s) u in let b = {s = s; unused = k} in - Leaf {b = b; i = (B.first s); j = (B.first s); len = 1} + Leaf {b = b; i = B.first s; j = B.first s; len = 1} let is_full leaf = B.compare_index leaf.b.s @@ -732,13 +734,14 @@ module Text = struct (B.end_pos leaf.b.s) >= 0 let leaf_append_uchar leaf u = - if is_full leaf then - make_concat (Leaf leaf) (new_block_uchar u) - else if is_full_tail leaf then + if not (is_full leaf) && is_full_tail leaf then let k = B.write leaf.b.s leaf.b.unused u in - let leaf = {leaf with j = leaf.b.unused; len = leaf.len + 1} in - leaf.b.unused <- k; - Leaf leaf + if B.equal_index leaf.b.s k leaf.b.unused then + make_concat (Leaf leaf) (new_block_uchar u) + else + let leaf = {leaf with j = leaf.b.unused; len = leaf.len + 1} in + leaf.b.unused <- k; + Leaf leaf else make_concat (Leaf leaf) (new_block_uchar u) @@ -751,7 +754,7 @@ module Text = struct let append_uchar l u = match l with - | Empty -> Leaf (leaf_of_uchar u) + | Empty -> new_block_uchar u | Leaf leaf_l -> leaf_append_uchar leaf_l u | Concat node -> match node.right with diff --git a/test/test.ml b/test/test.ml index 00417e04c9cb09b7c3aa4e08430b5c5ec585232b..6c1145b4ae16eb1c80cdad58bd2e43adff405f70 100644 GIT binary patch delta 726 zcmZvZ&q~8U5XOn9Xi-#p@L!d&AWhJg<fPR^!Gk`4UTui!+6K~1NLHvI>LVz7@*q9< z0zz(r_yQgj!JB&Y>>D_njTpgG$j&$4{N|gB59RAqx$TYd)0NAlH+DX9Rw(#{f!hv3 zn~^R93Jk0QfOw@~fp>_X4cM#sgtk~4tE^3E{-5|UL5)YVE~aDKUM)U4K(YR(V&p0m zR5YFaajgQ?q=(A%WnhL@;0vYVc;wJ#(6N0l7CfrPuU0E(5plr}f>6*@Qd|(AIY<j9 zr=VCDfXF6ylIF<qyRv~&IT|IYtAT77M$HLBLYra+(=$4e3qPq34NE1e_=CC4hb)zE zS><HDbfZjU%tvn9iT;K5{$m#Dn;3Y;ox>JPc_3Rlv+S`&m`%EPIN{)F8mJyDfsq3i z6@FgtpGcw8)kr5~$Ds4)Vv)a@>dyk0`4oR@3?`3r_tVMD>a{YEM#tYxO=<E0t13a< zpc6U~ku!yT1;_5P$fK<qiCrfoO}=R@CNK3?U$SoQlq(=kScZ+YHEfER&1{*joxiZ5 d^KRo{G^?a*+I(TZIQdsA_cwGsd2QT{e*>xq>GuEt delta 54 zcmV-60LlOH=K+x40kEGBvoH{V470T!q5-qJA;=k%st<;<csr;8v#mXY0s%0yFhEBl Mlc-`?vpZw95Rjf0i~s-t