Skip to content

Commit

Permalink
Support alternative String types in Source
Browse files Browse the repository at this point in the history
  • Loading branch information
goto-bus-stop authored and zesterer committed Sep 29, 2023
1 parent 762a905 commit 898858f
Showing 1 changed file with 110 additions and 43 deletions.
153 changes: 110 additions & 43 deletions src/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,16 @@ use std::{

/// A trait implemented by [`Source`] caches.
pub trait Cache<Id: ?Sized> {
/// The type used to store the string data for this cache.
///
/// Alternative types other than String can be used, but at the moment, the storage must be
/// contiguous. A primary use case for this is to use a reference-counted string instead of
/// copying the whole contents into a [`Source`].
type Storage: AsRef<str>;

/// Fetch the [`Source`] identified by the given ID, if possible.
// TODO: Don't box
fn fetch(&mut self, id: &Id) -> Result<&Source, Box<dyn fmt::Debug + '_>>;
fn fetch(&mut self, id: &Id) -> Result<&Source<Self::Storage>, Box<dyn fmt::Debug + '_>>;

/// Display the given ID. as a single inline value.
///
Expand All @@ -20,12 +27,16 @@ pub trait Cache<Id: ?Sized> {
}

impl<'b, C: Cache<Id>, Id: ?Sized> Cache<Id> for &'b mut C {
fn fetch(&mut self, id: &Id) -> Result<&Source, Box<dyn fmt::Debug + '_>> { C::fetch(self, id) }
type Storage = C::Storage;

fn fetch(&mut self, id: &Id) -> Result<&Source<Self::Storage>, Box<dyn fmt::Debug + '_>> { C::fetch(self, id) }
fn display<'a>(&self, id: &'a Id) -> Option<Box<dyn fmt::Display + 'a>> { C::display(self, id) }
}

impl<C: Cache<Id>, Id: ?Sized> Cache<Id> for Box<C> {
fn fetch(&mut self, id: &Id) -> Result<&Source, Box<dyn fmt::Debug + '_>> { C::fetch(self, id) }
type Storage = C::Storage;

fn fetch(&mut self, id: &Id) -> Result<&Source<Self::Storage>, Box<dyn fmt::Debug + '_>> { C::fetch(self, id) }
fn display<'a>(&self, id: &'a Id) -> Option<Box<dyn fmt::Display + 'a>> { C::display(self, id) }
}

Expand Down Expand Up @@ -57,22 +68,22 @@ impl Line {
///
/// In most cases, a source is a single input file.
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct Source {
text: String,
pub struct Source<I: AsRef<str>> {
text: I,
lines: Vec<Line>,
len: usize,
}

impl<S: AsRef<str>> From<S> for Source {
impl<I: AsRef<str>> From<I> for Source<I> {
/// Generate a [`Source`] from the given [`str`].
///
/// Note that this function can be expensive for long strings. Use an implementor of [`Cache`] where possible.
fn from(s: S) -> Self {
fn from(input: I) -> Self {
let mut char_offset = 0;
let mut byte_offset = 0;
// (Last line, last line ends with CR)
let mut last_line: Option<(Line, bool)> = None;
let mut lines: Vec<Line> = s
let mut lines: Vec<Line> = input
.as_ref()
.split_inclusive([
'\r', // Carriage return
Expand Down Expand Up @@ -116,19 +127,19 @@ impl<S: AsRef<str>> From<S> for Source {
}

Self {
text: s.as_ref().to_string(),
text: input,
lines,
len: char_offset,
}
}
}

impl Source {
impl<I: AsRef<str>> Source<I> {
/// Get the length of the total number of characters in the source.
pub fn len(&self) -> usize { self.len }

/// Return an iterator over the characters in the source.
pub fn chars(&self) -> impl Iterator<Item = char> + '_ { self.text.chars() }
pub fn chars(&self) -> impl Iterator<Item = char> + '_ { self.text.as_ref().chars() }

/// Get access to a specific, zero-indexed [`Line`].
pub fn line(&self, idx: usize) -> Option<Line> { self.lines.get(idx).copied() }
Expand Down Expand Up @@ -172,46 +183,60 @@ impl Source {

/// Get the source text for a line, excluding trailing whitespace.
pub fn get_line_text(&self, line: Line) -> Option<&'_ str> {
self.text.get(line.byte_span()).map(|text| text.trim_end())
self.text.as_ref().get(line.byte_span()).map(|text| text.trim_end())
}
}

impl Cache<()> for Source {
fn fetch(&mut self, _: &()) -> Result<&Source, Box<dyn fmt::Debug + '_>> { Ok(self) }
impl<I: AsRef<str>> Cache<()> for Source<I> {
type Storage = I;

fn fetch(&mut self, _: &()) -> Result<&Source<I>, Box<dyn fmt::Debug + '_>> { Ok(self) }
fn display(&self, _: &()) -> Option<Box<dyn fmt::Display>> { None }
}

impl<Id: fmt::Display + Eq> Cache<Id> for (Id, Source) {
fn fetch(&mut self, id: &Id) -> Result<&Source, Box<dyn fmt::Debug + '_>> {
if id == &self.0 { Ok(&self.1) } else { Err(Box::new(format!("Failed to fetch source '{}'", id))) }
impl<I: AsRef<str>, Id: fmt::Display + Eq> Cache<Id> for (Id, Source<I>) {
type Storage = I;

fn fetch(&mut self, id: &Id) -> Result<&Source<I>, Box<dyn fmt::Debug + '_>> {
if id == &self.0 {
Ok(&self.1)
} else {
Err(Box::new(format!("Failed to fetch source '{}'", id)))
}
}
fn display<'a>(&self, id: &'a Id) -> Option<Box<dyn fmt::Display + 'a>> { Some(Box::new(id)) }
}

/// A [`Cache`] that fetches [`Source`]s from the filesystem.
#[derive(Default, Debug, Clone)]
pub struct FileCache {
files: HashMap<PathBuf, Source>,
files: HashMap<PathBuf, Source<String>>,
}

impl Cache<Path> for FileCache {
fn fetch(&mut self, path: &Path) -> Result<&Source, Box<dyn fmt::Debug + '_>> {
type Storage = String;

fn fetch(&mut self, path: &Path) -> Result<&Source<String>, Box<dyn fmt::Debug + '_>> {
Ok(match self.files.entry(path.to_path_buf()) { // TODO: Don't allocate here
Entry::Occupied(entry) => entry.into_mut(),
Entry::Vacant(entry) => entry.insert(Source::from(&fs::read_to_string(path).map_err(|e| Box::new(e) as _)?)),
Entry::Vacant(entry) => entry.insert(Source::from(fs::read_to_string(path).map_err(|e| Box::new(e) as _)?)),
})
}
fn display<'a>(&self, path: &'a Path) -> Option<Box<dyn fmt::Display + 'a>> { Some(Box::new(path.display())) }
}

/// A [`Cache`] that fetches [`Source`]s using the provided function.
#[derive(Debug, Clone)]
pub struct FnCache<Id, F> {
sources: HashMap<Id, Source>,
pub struct FnCache<Id, F, I>
where I: AsRef<str>
{
sources: HashMap<Id, Source<I>>,
get: F,
}

impl<Id, F> FnCache<Id, F> {
impl<Id, F, I> FnCache<Id, F, I>
where I: AsRef<str>
{
/// Create a new [`FnCache`] with the given fetch function.
pub fn new(get: F) -> Self {
Self {
Expand All @@ -221,7 +246,7 @@ impl<Id, F> FnCache<Id, F> {
}

/// Pre-insert a selection of [`Source`]s into this cache.
pub fn with_sources(mut self, sources: HashMap<Id, Source>) -> Self
pub fn with_sources(mut self, sources: HashMap<Id, Source<I>>) -> Self
where Id: Eq + Hash
{
self.sources.reserve(sources.len());
Expand All @@ -232,10 +257,13 @@ impl<Id, F> FnCache<Id, F> {
}
}

impl<Id: fmt::Display + Hash + PartialEq + Eq + Clone, F> Cache<Id> for FnCache<Id, F>
where F: for<'a> FnMut(&'a Id) -> Result<String, Box<dyn fmt::Debug>>
impl<Id: fmt::Display + Hash + PartialEq + Eq + Clone, F, I> Cache<Id> for FnCache<Id, F, I>
where I: AsRef<str>,
F: for<'a> FnMut(&'a Id) -> Result<I, Box<dyn fmt::Debug>>,
{
fn fetch(&mut self, id: &Id) -> Result<&Source, Box<dyn fmt::Debug + '_>> {
type Storage = I;

fn fetch(&mut self, id: &Id) -> Result<&Source<I>, Box<dyn fmt::Debug + '_>> {
Ok(match self.sources.entry(id.clone()) {
Entry::Occupied(entry) => entry.into_mut(),
Entry::Vacant(entry) => entry.insert(Source::from((self.get)(id)?)),
Expand All @@ -246,33 +274,32 @@ impl<Id: fmt::Display + Hash + PartialEq + Eq + Clone, F> Cache<Id> for FnCache<

/// Create a [`Cache`] from a collection of ID/strings, where each corresponds to a [`Source`].
pub fn sources<Id, S, I>(iter: I) -> impl Cache<Id>
where
Id: fmt::Display + Hash + PartialEq + Eq + Clone + 'static,
I: IntoIterator<Item = (Id, S)>,
S: AsRef<str>,
where Id: fmt::Display + Hash + PartialEq + Eq + Clone + 'static,
I: IntoIterator<Item = (Id, S)>,
S: AsRef<str>,
{
FnCache::new((move |id| Err(Box::new(format!("Failed to fetch source '{}'", id)) as _)) as fn(&_) -> _)
.with_sources(iter
.into_iter()
.map(|(id, s)| (id, Source::from(s.as_ref())))
.map(|(id, s)| (id, Source::from(s)))
.collect())
}

#[cfg(test)]
mod tests {
use std::iter::zip;
use std::sync::Arc;

use super::Source;

fn test_from(lines: Vec<&str>) {
fn test_with_lines(lines: Vec<&str>) {
let source: String = lines.iter().map(|s| *s).collect();
let source = Source::from(source);

assert_eq!(source.lines.len(), lines.len());

let mut offset = 0;
for (source_line, raw_line) in zip(source.lines.iter().copied(), lines.into_iter()) {
// dbg!(source_line, &raw_line[source_line.byte_span()]);
assert_eq!(source_line.offset, offset);
assert_eq!(source_line.char_len, raw_line.chars().count());
assert_eq!(
Expand All @@ -287,32 +314,32 @@ mod tests {

#[test]
fn source_from_empty() {
test_from(vec![]); // Empty string
test_with_lines(vec![]); // Empty string
}

#[test]
fn source_from_single() {
test_from(vec!["Single line"]);
test_from(vec!["Single line with LF\n"]);
test_from(vec!["Single line with CRLF\r\n"]);
test_with_lines(vec!["Single line"]);
test_with_lines(vec!["Single line with LF\n"]);
test_with_lines(vec!["Single line with CRLF\r\n"]);
}

#[test]
fn source_from_multi() {
test_from(vec!["Two\r\n", "lines\n"]);
test_from(vec!["Some\n", "more\r\n", "lines"]);
test_from(vec!["\n", "\r\n", "\n", "Empty Lines"]);
test_with_lines(vec!["Two\r\n", "lines\n"]);
test_with_lines(vec!["Some\n", "more\r\n", "lines"]);
test_with_lines(vec!["\n", "\r\n", "\n", "Empty Lines"]);
}

#[test]
fn source_from_trims_trailing_spaces() {
test_from(vec!["Trailing spaces \n", "are trimmed\t"]);
test_with_lines(vec!["Trailing spaces \n", "are trimmed\t"]);
}

#[test]
fn source_from_alternate_line_endings() {
// Line endings other than LF or CRLF
test_from(vec![
test_with_lines(vec![
"CR\r",
"VT\x0B",
"FF\x0C",
Expand All @@ -321,4 +348,44 @@ mod tests {
"PS\u{2029}",
]);
}

#[test]
fn source_from_other_string_types() {
let raw = r#"A raw string
with multiple
lines behind
an Arc"#;
let arc = Arc::from(raw);
let source = Source::from(arc);

assert_eq!(source.lines.len(), 4);

let mut offset = 0;
for (source_line, raw_line) in zip(source.lines.iter().copied(), raw.split_inclusive('\n'))
{
assert_eq!(source_line.offset, offset);
assert_eq!(source_line.char_len, raw_line.chars().count());
assert_eq!(
source.get_line_text(source_line).unwrap(),
raw_line.trim_end()
);
offset += source_line.char_len;
}

assert_eq!(source.len, offset);
}

#[test]
fn source_from_reference() {
let raw = r#"A raw string
with multiple
lines"#;

fn non_owning_source<'a>(input: &'a str) -> Source<&'a str> {
Source::from(input)
}

let source = non_owning_source(raw);
assert_eq!(source.lines.len(), 3);
}
}

0 comments on commit 898858f

Please sign in to comment.