Skip to content

Commit

Permalink
sourcepos is based on bytes instead of characters, so column position…
Browse files Browse the repository at this point in the history
…s for inline code expressions must be adjusted when text contains multibyte characters
  • Loading branch information
yihui committed Jun 17, 2024
1 parent 9361f1d commit 3334596
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 3 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: litedown
Type: Package
Title: A Lightweight Version of R Markdown
Version: 0.0.22
Version: 0.0.23
Authors@R: c(
person("Yihui", "Xie", role = c("aut", "cre"), email = "[email protected]", comment = c(ORCID = "0000-0003-0645-5666")),
person()
Expand Down
20 changes: 18 additions & 2 deletions R/fuse.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,12 @@ crack = function(input, text = NULL) {
n_start = unlist(lapply(res, function(x) x$lines[1])) # starting line numbers
j = findInterval(m[3, ], n_start) # find which block each inline code belongs to
for (i in seq_len(ncol(m))) {
pos = as.integer(m[3:6, i]); i1 = pos[1]; i2 = pos[3]
b = res[[j[i]]]; l = b$lines
# calculate new position of code after we concatenate all lines of this block by \n
# column position is based on bytes instead of chars; needs to be adjusted to the latter
pos = char_pos(text, as.integer(m[3:6, i]))
i1 = pos[1]; i2 = pos[3]
s = nchar(b$source)
# calculate new position of code after we concatenate all lines of this block by \n
b$col = c(b$col, c(
sum(s[seq_len(i1 - l[1])] + 1) + pos[2],
sum(s[seq_len(i2 - l[1])] + 1) + pos[4]
Expand Down Expand Up @@ -194,6 +196,20 @@ set_error_handler = function(input) {
.env$input = input # store the input name for get_loc()
}

# convert byte position to character position
char_pos = function(x, p) {
x2 = x[p[c(1, 3)]]
# no need to convert if no multibyte chars
if (all(nchar(x2) == nchar(x2, 'bytes'))) return(p)
p2 = p[c(2, 4)]
Encoding(x2) = 'bytes'
x2 = substr(x2, 1, p2 - 1) # go back one char in case current column is multibyte
Encoding(x2) = 'UTF-8'
p[c(2, 4)] = nchar(x2) + 1L # go forward by one char
if (p2[2] == 0) p[4] = 0L # boundary case: \n before the closing backtick
p
}

#' @details For R scripts, text blocks are extracted by removing the leading
#' `#'` tokens. All other lines are treated as R code, which can optionally be
#' separated into chunks by consecutive lines of `#|` comments (chunk options
Expand Down

0 comments on commit 3334596

Please sign in to comment.