Skip to content

Commit

Permalink
Remove individual allocations for each line of source text
Browse files Browse the repository at this point in the history
  • Loading branch information
goto-bus-stop authored and zesterer committed Sep 29, 2023
1 parent 12170aa commit 762a905
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 55 deletions.
147 changes: 95 additions & 52 deletions src/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,32 +30,35 @@ impl<C: Cache<Id>, Id: ?Sized> Cache<Id> for Box<C> {
}

/// A type representing a single line of a [`Source`].
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub struct Line {
offset: usize,
len: usize,
chars: String,
char_len: usize,
byte_offset: usize,
byte_len: usize,
}

impl Line {
/// Get the offset of this line in the original [`Source`] (i.e: the number of characters that precede it).
pub fn offset(&self) -> usize { self.offset }

/// Get the character length of this line.
pub fn len(&self) -> usize { self.len }
pub fn len(&self) -> usize { self.char_len }

/// Get the offset span of this line in the original [`Source`].
pub fn span(&self) -> Range<usize> { self.offset..self.offset + self.len }
pub fn span(&self) -> Range<usize> { self.offset..self.offset + self.char_len }

/// Return an iterator over the characters in the line, excluding trailing whitespace.
pub fn chars(&self) -> impl Iterator<Item = char> + '_ { self.chars.chars() }
/// Get the byte offset span of this line in the original [`Source`]. This can be used to
/// directly slice into its source text.
fn byte_span(&self) -> Range<usize> { self.byte_offset..self.byte_offset + self.byte_len }
}

/// A type representing a single source that may be referred to by [`Span`]s.
///
/// In most cases, a source is a single input file.
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct Source {
text: String,
lines: Vec<Line>,
len: usize,
}
Expand All @@ -65,7 +68,8 @@ impl<S: AsRef<str>> From<S> for Source {
///
/// Note that this function can be expensive for long strings. Use an implementor of [`Cache`] where possible.
fn from(s: S) -> Self {
let mut offset = 0;
let mut char_offset = 0;
let mut byte_offset = 0;
// (Last line, last line ends with CR)
let mut last_line: Option<(Line, bool)> = None;
let mut lines: Vec<Line> = s
Expand All @@ -85,20 +89,24 @@ impl<S: AsRef<str>> From<S> for Source {

if let Some((last, ends_with_cr)) = last_line.as_mut() {
if *ends_with_cr && line == "\n" {
last.len += 1;
offset += 1;
last.char_len += 1;
last.byte_len += 1;
char_offset += 1;
byte_offset += 1;
return replace(&mut last_line, None).map(|(l, _)| l);
}
}

let len = line.chars().count();
let char_len = line.chars().count();
let ends_with_cr = line.ends_with('\r');
let line = Line {
offset,
len,
chars: line.trim_end().to_owned(),
offset: char_offset,
char_len,
byte_offset,
byte_len: line.len(),
};
offset += len;
char_offset += char_len;
byte_offset += line.byte_len;
replace(&mut last_line, Some((line, ends_with_cr))).map(|(l, _)| l)
})
.collect();
Expand All @@ -108,8 +116,9 @@ impl<S: AsRef<str>> From<S> for Source {
}

Self {
text: s.as_ref().to_string(),
lines,
len: offset,
len: char_offset,
}
}
}
Expand All @@ -119,26 +128,30 @@ impl Source {
pub fn len(&self) -> usize { self.len }

/// Return an iterator over the characters in the source.
pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
self.lines.iter().map(|l| l.chars()).flatten()
}
pub fn chars(&self) -> impl Iterator<Item = char> + '_ { self.text.chars() }

/// Get access to a specific, zero-indexed [`Line`].
pub fn line(&self, idx: usize) -> Option<&Line> { self.lines.get(idx) }
pub fn line(&self, idx: usize) -> Option<Line> { self.lines.get(idx).copied() }

/// Return an iterator over the [`Line`]s in this source.
pub fn lines(&self) -> impl ExactSizeIterator<Item = &Line> + '_ { self.lines.iter() }
pub fn lines(&self) -> impl ExactSizeIterator<Item = Line> + '_ { self.lines.iter().copied() }

/// Get the line that the given offset appears on, and the line/column numbers of the offset.
///
/// Note that the line/column numbers are zero-indexed.
pub fn get_offset_line(&self, offset: usize) -> Option<(&Line, usize, usize)> {
pub fn get_offset_line(&self, offset: usize) -> Option<(Line, usize, usize)> {
if offset <= self.len {
let idx = self.lines
let idx = self
.lines
.binary_search_by_key(&offset, |line| line.offset)
.unwrap_or_else(|idx| idx.saturating_sub(1));
let line = &self.lines[idx];
assert!(offset >= line.offset, "offset = {}, line.offset = {}", offset, line.offset);
let line = self.lines[idx];
assert!(
offset >= line.offset,
"offset = {}, line.offset = {}",
offset,
line.offset
);
Some((line, idx, offset - line.offset))
} else {
None
Expand All @@ -151,9 +164,16 @@ impl Source {
/// [`Source::line`]).
pub fn get_line_range<S: Span>(&self, span: &S) -> Range<usize> {
let start = self.get_offset_line(span.start()).map_or(0, |(_, l, _)| l);
let end = self.get_offset_line(span.end().saturating_sub(1).max(span.start())).map_or(self.lines.len(), |(_, l, _)| l + 1);
let end = self
.get_offset_line(span.end().saturating_sub(1).max(span.start()))
.map_or(self.lines.len(), |(_, l, _)| l + 1);
start..end
}

/// Get the source text for a line, excluding trailing whitespace.
pub fn get_line_text(&self, line: Line) -> Option<&'_ str> {
self.text.get(line.byte_span()).map(|text| text.trim_end())
}
}

impl Cache<()> for Source {
Expand Down Expand Up @@ -244,38 +264,61 @@ mod tests {

use super::Source;

#[test]
fn source_from() {
fn test(lines: Vec<&str>) {
let source: String = lines.iter().map(|s| *s).collect();
let source = Source::from(source);

assert_eq!(source.lines.len(), lines.len());

let mut offset = 0;
for (source_line, raw_line) in zip(source.lines.into_iter(), lines.into_iter()) {
assert_eq!(source_line.offset, offset);
assert_eq!(source_line.len, raw_line.chars().count());
assert_eq!(source_line.chars, raw_line.trim_end());
offset += source_line.len;
}

assert_eq!(source.len, offset);
fn test_from(lines: Vec<&str>) {
let source: String = lines.iter().map(|s| *s).collect();
let source = Source::from(source);

assert_eq!(source.lines.len(), lines.len());

let mut offset = 0;
for (source_line, raw_line) in zip(source.lines.iter().copied(), lines.into_iter()) {
// dbg!(source_line, &raw_line[source_line.byte_span()]);
assert_eq!(source_line.offset, offset);
assert_eq!(source_line.char_len, raw_line.chars().count());
assert_eq!(
source.get_line_text(source_line).unwrap(),
raw_line.trim_end()
);
offset += source_line.char_len;
}

test(vec![]); // Empty string
assert_eq!(source.len, offset);
}

test(vec!["Single line"]);
test(vec!["Single line with LF\n"]);
test(vec!["Single line with CRLF\r\n"]);
#[test]
fn source_from_empty() {
test_from(vec![]); // Empty string
}

test(vec!["Two\r\n", "lines\n"]);
test(vec!["Some\n", "more\r\n", "lines"]);
test(vec!["\n", "\r\n", "\n", "Empty Lines"]);
#[test]
fn source_from_single() {
test_from(vec!["Single line"]);
test_from(vec!["Single line with LF\n"]);
test_from(vec!["Single line with CRLF\r\n"]);
}

test(vec!["Trailing spaces \n", "are trimmed\t"]);
#[test]
fn source_from_multi() {
test_from(vec!["Two\r\n", "lines\n"]);
test_from(vec!["Some\n", "more\r\n", "lines"]);
test_from(vec!["\n", "\r\n", "\n", "Empty Lines"]);
}

#[test]
fn source_from_trims_trailing_spaces() {
test_from(vec!["Trailing spaces \n", "are trimmed\t"]);
}

#[test]
fn source_from_alternate_line_endings() {
// Line endings other than LF or CRLF
test(vec!["CR\r", "VT\x0B", "FF\x0C", "NEL\u{0085}", "LS\u{2028}", "PS\u{2029}"]);
test_from(vec![
"CR\r",
"VT\x0B",
"FF\x0C",
"NEL\u{0085}",
"LS\u{2028}",
"PS\u{2029}",
]);
}
}
6 changes: 3 additions & 3 deletions src/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -582,7 +582,7 @@ impl<S: Span> Report<'_, S> {

// Line
if !is_ellipsis {
for (col, c) in line.chars().enumerate() {
for (col, c) in src.get_line_text(line).unwrap().chars().enumerate() {
let color = if let Some(highlight) = get_highlight(col) {
highlight.color
} else {
Expand Down Expand Up @@ -620,7 +620,7 @@ impl<S: Span> Report<'_, S> {
&margin_label,
)?;
// Lines alternate
let mut chars = line.chars();
let mut chars = src.get_line_text(line).unwrap().chars();
for col in 0..arrow_len {
let width =
chars.next().map_or(1, |c| self.config.char_width(c, col).1);
Expand Down Expand Up @@ -674,7 +674,7 @@ impl<S: Span> Report<'_, S> {
&margin_label,
)?;
// Lines
let mut chars = line.chars();
let mut chars = src.get_line_text(line).unwrap().chars();
for col in 0..arrow_len {
let width = chars.next().map_or(1, |c| self.config.char_width(c, col).1);

Expand Down

0 comments on commit 762a905

Please sign in to comment.