From e5219546de57b7a7a0748ea36201ac58122ad041 Mon Sep 17 00:00:00 2001 From: Hugo Heuzard Date: Sun, 15 Oct 2023 09:13:34 +0200 Subject: [PATCH 1/3] New functions to retrieve location --- src/lib/sedlexing.ml | 66 +++++++++++++++++++++++-------------------- src/lib/sedlexing.mli | 16 +++++++++++ 2 files changed, 52 insertions(+), 30 deletions(-) diff --git a/src/lib/sedlexing.ml b/src/lib/sedlexing.ml index 5f459291..b2268543 100644 --- a/src/lib/sedlexing.ml +++ b/src/lib/sedlexing.ml @@ -261,40 +261,46 @@ let lexeme lexbuf = let lexeme_char lexbuf pos = lexbuf.buf.(lexbuf.start_pos + pos) +let lexing_position_start lexbuf = + { + Lexing.pos_fname = lexbuf.filename; + pos_lnum = lexbuf.start_line; + pos_cnum = lexbuf.start_pos + lexbuf.offset; + pos_bol = lexbuf.start_bol; + } + +let lexing_position_curr lexbuf = + { + Lexing.pos_fname = lexbuf.filename; + pos_lnum = lexbuf.curr_line; + pos_cnum = lexbuf.pos + lexbuf.offset; + pos_bol = lexbuf.curr_bol; + } + let lexing_positions lexbuf = - let start_p = - { - Lexing.pos_fname = lexbuf.filename; - pos_lnum = lexbuf.start_line; - pos_cnum = lexbuf.start_pos + lexbuf.offset; - pos_bol = lexbuf.start_bol; - } - and curr_p = - { - Lexing.pos_fname = lexbuf.filename; - pos_lnum = lexbuf.curr_line; - pos_cnum = lexbuf.pos + lexbuf.offset; - pos_bol = lexbuf.curr_bol; - } - in + let start_p = lexing_position_start lexbuf + and curr_p = lexing_position_curr lexbuf in (start_p, curr_p) +let lexing_bytes_position_start lexbuf = + { + Lexing.pos_fname = lexbuf.filename; + pos_lnum = lexbuf.start_line; + pos_cnum = lexbuf.start_bytes_pos + lexbuf.bytes_offset; + pos_bol = lexbuf.start_bytes_bol; + } + +let lexing_bytes_position_curr lexbuf = + { + Lexing.pos_fname = lexbuf.filename; + pos_lnum = lexbuf.curr_line; + pos_cnum = lexbuf.bytes_pos + lexbuf.bytes_offset; + pos_bol = lexbuf.curr_bytes_bol; + } + let lexing_bytes_positions lexbuf = - let start_p = - { - Lexing.pos_fname = lexbuf.filename; - pos_lnum = lexbuf.start_line; - pos_cnum = lexbuf.start_bytes_pos + lexbuf.bytes_offset; - pos_bol = lexbuf.start_bytes_bol; - } - and curr_p = - { - Lexing.pos_fname = lexbuf.filename; - pos_lnum = lexbuf.curr_line; - pos_cnum = lexbuf.bytes_pos + lexbuf.bytes_offset; - pos_bol = lexbuf.curr_bytes_bol; - } - in + let start_p = lexing_bytes_position_start lexbuf + and curr_p = lexing_bytes_position_curr lexbuf in (start_p, curr_p) let with_tokenizer lexer' lexbuf = diff --git a/src/lib/sedlexing.mli b/src/lib/sedlexing.mli index b129ff17..230148ed 100644 --- a/src/lib/sedlexing.mli +++ b/src/lib/sedlexing.mli @@ -133,12 +133,28 @@ val lexeme_bytes_length : lexbuf -> int by parsers like those generated by [Menhir]. *) val lexing_positions : lexbuf -> Lexing.position * Lexing.position +(** [Sedlexing.lexing_position_start lexbuf] returns the start + position, in code points, of the current token. *) +val lexing_position_start : lexbuf -> Lexing.position + +(** [Sedlexing.lexing_position_curr lexbuf] returns the end + position, in code points, of the current token. *) +val lexing_position_curr : lexbuf -> Lexing.position + (** [Sedlexing.lexing_bytes_positions lexbuf] returns the start and end positions, in bytes, of the current token, using a record of type [Lexing.position]. This is intended for consumption by parsers like those generated by [Menhir]. *) val lexing_bytes_positions : lexbuf -> Lexing.position * Lexing.position +(** [Sedlexing.lexing_bytes_position_start lexbuf] returns the start + position, in bytes, of the current token. *) +val lexing_bytes_position_start : lexbuf -> Lexing.position + +(** [Sedlexing.lexing_bytes_position_curr lexbuf] returns the end + position, in bytes, of the current token. *) +val lexing_bytes_position_curr : lexbuf -> Lexing.position + (** [Sedlexing.new_line lexbuf] increments the line count and sets the beginning of line to the current position, as though a newline character had been encountered in the input. *) From 2c7fde27a6b84071439bd974246958ce116e2520 Mon Sep 17 00:00:00 2001 From: Hugo Heuzard Date: Sun, 15 Oct 2023 17:09:01 +0200 Subject: [PATCH 2/3] Changes --- CHANGES.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 3f90d2e9..5c7e21db 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,6 @@ +# dev +- Add API for retriving start and stop positions separatly (#155) + # 3.2 (2023-06-28): - Restore compatibility with OCaml 4.08 - Use `Sedlexing.{Utf8,Utf16}.from_gen` to initialize UTF8 (resp. UTF16) lexing buffers from From f5168c9bf6ca51e5d8b469e8394f5541513f88b8 Mon Sep 17 00:00:00 2001 From: Hugo Heuzard Date: Mon, 16 Oct 2023 09:06:52 +0200 Subject: [PATCH 3/3] typo --- CHANGES.md | 2 +- src/lib/sedlexing.mli | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 5c7e21db..1f6f71e7 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,5 @@ # dev -- Add API for retriving start and stop positions separatly (#155) +- Add API for retrieving start and stop positions separately (#155) # 3.2 (2023-06-28): - Restore compatibility with OCaml 4.08 diff --git a/src/lib/sedlexing.mli b/src/lib/sedlexing.mli index 230148ed..2332f7cd 100644 --- a/src/lib/sedlexing.mli +++ b/src/lib/sedlexing.mli @@ -205,7 +205,7 @@ val next : lexbuf -> Uchar.t option lexer buffer and increments to current position. If the input stream is exhausted, the function returns -1. If a ['\n'] is encountered, the tracked line number is incremented. - + This is a private API, it should not be used by code using this module's API and can be removed at any time. *) val __private__next_int : lexbuf -> int