Skip to content

Commit

Permalink
Add byte spans
Browse files Browse the repository at this point in the history
Closes zesterer#8
Closes Duplicate issues zesterer#71 and zesterer#57
  • Loading branch information
VonTum committed Feb 29, 2024
1 parent 1037cd6 commit 3dfac9a
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 11 deletions.
15 changes: 15 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,15 @@ pub enum CharSet {
Ascii,
}

/// Possible character sets to use when rendering diagnostics.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum IndexType {
/// Byte spans. Always results in O(1) loopups
Byte,
/// Char based spans. May incur O(n) lookups
Char,
}

/// A type used to configure a report
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct Config {
Expand All @@ -369,6 +378,7 @@ pub struct Config {
color: bool,
tab_width: usize,
char_set: CharSet,
index_type : IndexType,
}

impl Config {
Expand Down Expand Up @@ -406,6 +416,10 @@ impl Config {
///
/// If unspecified, this defaults to [`CharSet::Unicode`].
pub fn with_char_set(mut self, char_set: CharSet) -> Self { self.char_set = char_set; self }
/// Should this report use byte spans instead of char spans?
///
/// If unspecified, this defaults to 'false'
pub fn with_index_type(mut self, index_type : IndexType) -> Self { self.index_type = index_type; self }

fn error_color(&self) -> Option<Color> { Some(Color::Red).filter(|_| self.color) }
fn warning_color(&self) -> Option<Color> { Some(Color::Yellow).filter(|_| self.color) }
Expand Down Expand Up @@ -441,6 +455,7 @@ impl Default for Config {
color: true,
tab_width: 4,
char_set: CharSet::Unicode,
index_type: IndexType::Char,
}
}
}
Expand Down
24 changes: 24 additions & 0 deletions src/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ pub struct Source<I: AsRef<str> = String> {
text: I,
lines: Vec<Line>,
len: usize,
byte_len : usize
}

impl<I: AsRef<str>> Source<I> {
Expand Down Expand Up @@ -137,6 +138,7 @@ impl<I: AsRef<str>> From<I> for Source<I> {
text: input,
lines,
len: char_offset,
byte_len: byte_offset
}
}
}
Expand Down Expand Up @@ -176,6 +178,28 @@ impl<I: AsRef<str>> Source<I> {
}
}

/// Get the line that the given byte offset appears on, and the line/byte column numbers of the offset.
///
/// Note that the line/column numbers are zero-indexed.
pub fn get_byte_line(&self, byte_offset: usize) -> Option<(Line, usize, usize)> {
if byte_offset <= self.byte_len {
let idx = self
.lines
.binary_search_by_key(&byte_offset, |line| line.byte_offset)
.unwrap_or_else(|idx| idx.saturating_sub(1));
let line = self.line(idx)?;
assert!(
byte_offset >= line.byte_offset,
"byte_offset = {}, line.byte_offset = {}",
byte_offset,
line.byte_offset
);
Some((line, idx, byte_offset - line.byte_offset))
} else {
None
}
}

/// Get the range of lines that this span runs across.
///
/// The resulting range is guaranteed to contain valid line indices (i.e: those that can be used for
Expand Down
103 changes: 92 additions & 11 deletions src/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::borrow::Borrow;
use std::io;
use std::ops::Range;

use crate::LabelDisplay;
use crate::{IndexType, LabelDisplay};

use super::draw::{self, StreamAwareFmt, StreamType};
use super::{Cache, CharSet, LabelAttach, Report, ReportKind, Show, Span, Write};
Expand Down Expand Up @@ -42,40 +42,73 @@ impl<S: Span> Report<'_, S> {
fn get_source_groups(&self, cache: &mut impl Cache<S::SourceId>) -> Vec<SourceGroup<S>> {
let mut groups = Vec::new();
for label in self.labels.iter() {
let src_display = cache.display(label.span.source());
let src = match cache.fetch(label.span.source()) {
let label_source = label.span.source();

let src_display = cache.display(label_source);
let src = match cache.fetch(label_source) {
Ok(src) => src,
Err(e) => {
eprintln!("Unable to fetch source '{}': {:?}", Show(src_display), e);
continue;
}
};

let start_line = src.get_offset_line(label.span.start()).map(|(_, l, _)| l);
let end_line = src
.get_offset_line(label.span.end().saturating_sub(1).max(label.span.start()))
.map(|(_, l, _)| l);
let given_label_span = label.span.start()..label.span.end();

let (label_char_span, start_line, end_line) = match self.config.index_type {
IndexType::Char => {
let Some(start_line) = src.get_offset_line(given_label_span.start) else {continue};
let end_line = if given_label_span.start >= given_label_span.end {
start_line.1
} else {
let Some(end_line) = src.get_offset_line(given_label_span.end - 1) else {continue};
end_line.1
};
(given_label_span, start_line.1, end_line)
},
IndexType::Byte => {
let Some((start_line_obj, start_line, start_byte_col)) = src.get_byte_line(given_label_span.start) else {continue;};
let line_text = src.get_line_text(start_line_obj).unwrap();

let num_chars_before_start = line_text[..start_byte_col].chars().count();
let start_char_offset = start_line_obj.offset() + num_chars_before_start;

if given_label_span.start >= given_label_span.end {
(start_char_offset..start_char_offset, start_line, start_line)
} else {
// We can subtract 1 from end, because get_byte_line doesn't actually index into the text.
let end_pos = given_label_span.end - 1;
let Some((end_line_obj, end_line, end_byte_col)) = src.get_byte_line(end_pos) else {continue};
let end_line_text = src.get_line_text(start_line_obj).unwrap();
// Have to add 1 back now, so we don't cut a char in two.
let num_chars_before_end = end_line_text[..end_byte_col+1].chars().count();
let end_char_offset = end_line_obj.offset() + num_chars_before_end;

(start_char_offset..end_char_offset, start_line, end_line)
}
}
};

let label_info = LabelInfo {
kind: if start_line == end_line {
LabelKind::Inline
} else {
LabelKind::Multiline
},
char_span: label.span.start()..label.span.end(),
char_span: label_char_span,
display_info: &label.display_info,
};

if let Some(group) = groups
.iter_mut()
.find(|g: &&mut SourceGroup<S>| g.src_id == label.span.source())
.find(|g: &&mut SourceGroup<S>| g.src_id == label_source)
{
group.char_span.start = group.char_span.start.min(label_info.char_span.start);
group.char_span.end = group.char_span.end.max(label_info.char_span.end);
group.labels.push(label_info);
} else {
groups.push(SourceGroup {
src_id: label.span.source(),
src_id: label_source,
char_span: label_info.char_span.clone(),
labels: vec![label_info],
});
Expand Down Expand Up @@ -807,7 +840,7 @@ mod tests {

use insta::assert_snapshot;

use crate::{Cache, CharSet, Config, Label, Report, ReportKind, Source, Span};
use crate::{Cache, CharSet, Config, Label, Report, ReportKind, Source, Span, IndexType};

impl<S: Span> Report<'_, S> {
fn write_to_string<C: Cache<S::SourceId>>(&self, cache: C) -> String {
Expand Down Expand Up @@ -881,6 +914,54 @@ mod tests {
"###);
}

#[test]
fn multi_byte_chars() {
let source = "äpplë == örängë;";
let msg = Report::<Range<usize>>::build(ReportKind::Error, (), 0)
.with_config(no_color_and_ascii().with_index_type(IndexType::Char))
.with_message("can't compare äpplës with örängës")
.with_label(Label::new(0..5).with_message("This is an äpplë"))
.with_label(Label::new(9..15).with_message("This is an örängë"))
.finish()
.write_to_string(Source::from(source));
// TODO: it would be nice if these lines didn't cross
assert_snapshot!(msg, @r###"
Error: can't compare äpplës with örängës
,-[<unknown>:1:1]
|
1 | äpplë == örängë;
| ^^|^^ ^^^|^^
| `-------------- This is an äpplë
| |
| `---- This is an örängë
---'
"###);
}

#[test]
fn byte_label() {
let source = "äpplë == örängë;";
let msg = Report::<Range<usize>>::build(ReportKind::Error, (), 0)
.with_config(no_color_and_ascii().with_index_type(IndexType::Byte))
.with_message("can't compare äpplës with örängës")
.with_label(Label::new(0..7).with_message("This is an äpplë"))
.with_label(Label::new(11..20).with_message("This is an örängë"))
.finish()
.write_to_string(Source::from(source));
// TODO: it would be nice if these lines didn't cross
assert_snapshot!(msg, @r###"
Error: can't compare äpplës with örängës
,-[<unknown>:1:1]
|
1 | äpplë == örängë;
| ^^|^^ ^^^|^^
| `-------------- This is an äpplë
| |
| `---- This is an örängë
---'
"###);
}

#[test]
fn label_at_end_of_long_line() {
let source = format!("{}orange", "apple == ".repeat(100));
Expand Down

0 comments on commit 3dfac9a

Please sign in to comment.