From a061071ad130c235e650dd2aaed9a9a7d15d3d7d Mon Sep 17 00:00:00 2001 From: Lennart Van Hirtum Date: Thu, 29 Feb 2024 21:32:58 +0100 Subject: [PATCH] Add byte spans Closes #8 Closes Duplicate issues #71 and #57 --- src/lib.rs | 15 ++++++++ src/source.rs | 24 ++++++++++++ src/write.rs | 103 ++++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 131 insertions(+), 11 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 0bc2301..691ce60 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -358,6 +358,15 @@ pub enum CharSet { Ascii, } +/// Possible character sets to use when rendering diagnostics. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum IndexType { + /// Byte spans. Always results in O(1) loopups + Byte, + /// Char based spans. May incur O(n) lookups + Char, +} + /// A type used to configure a report #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub struct Config { @@ -369,6 +378,7 @@ pub struct Config { color: bool, tab_width: usize, char_set: CharSet, + index_type : IndexType, } impl Config { @@ -406,6 +416,10 @@ impl Config { /// /// If unspecified, this defaults to [`CharSet::Unicode`]. pub fn with_char_set(mut self, char_set: CharSet) -> Self { self.char_set = char_set; self } + /// Should this report use byte spans instead of char spans? + /// + /// If unspecified, this defaults to 'false' + pub fn with_index_type(mut self, index_type : IndexType) -> Self { self.index_type = index_type; self } fn error_color(&self) -> Option { Some(Color::Red).filter(|_| self.color) } fn warning_color(&self) -> Option { Some(Color::Yellow).filter(|_| self.color) } @@ -441,6 +455,7 @@ impl Default for Config { color: true, tab_width: 4, char_set: CharSet::Unicode, + index_type: IndexType::Char, } } } diff --git a/src/source.rs b/src/source.rs index 6df2096..f32d085 100644 --- a/src/source.rs +++ b/src/source.rs @@ -72,6 +72,7 @@ pub struct Source = String> { text: I, lines: Vec, len: usize, + byte_len : usize } impl> Source { @@ -137,6 +138,7 @@ impl> From for Source { text: input, lines, len: char_offset, + byte_len: byte_offset } } } @@ -176,6 +178,28 @@ impl> Source { } } + /// Get the line that the given byte offset appears on, and the line/byte column numbers of the offset. + /// + /// Note that the line/column numbers are zero-indexed. + pub fn get_byte_line(&self, byte_offset: usize) -> Option<(Line, usize, usize)> { + if byte_offset <= self.byte_len { + let idx = self + .lines + .binary_search_by_key(&byte_offset, |line| line.byte_offset) + .unwrap_or_else(|idx| idx.saturating_sub(1)); + let line = self.line(idx)?; + assert!( + byte_offset >= line.byte_offset, + "byte_offset = {}, line.byte_offset = {}", + byte_offset, + line.byte_offset + ); + Some((line, idx, byte_offset - line.byte_offset)) + } else { + None + } + } + /// Get the range of lines that this span runs across. /// /// The resulting range is guaranteed to contain valid line indices (i.e: those that can be used for diff --git a/src/write.rs b/src/write.rs index 9770925..a153144 100644 --- a/src/write.rs +++ b/src/write.rs @@ -2,7 +2,7 @@ use std::borrow::Borrow; use std::io; use std::ops::Range; -use crate::LabelDisplay; +use crate::{IndexType, LabelDisplay}; use super::draw::{self, StreamAwareFmt, StreamType}; use super::{Cache, CharSet, LabelAttach, Report, ReportKind, Show, Span, Write}; @@ -42,8 +42,10 @@ impl Report<'_, S> { fn get_source_groups(&self, cache: &mut impl Cache) -> Vec> { let mut groups = Vec::new(); for label in self.labels.iter() { - let src_display = cache.display(label.span.source()); - let src = match cache.fetch(label.span.source()) { + let label_source = label.span.source(); + + let src_display = cache.display(label_source); + let src = match cache.fetch(label_source) { Ok(src) => src, Err(e) => { eprintln!("Unable to fetch source '{}': {:?}", Show(src_display), e); @@ -51,10 +53,41 @@ impl Report<'_, S> { } }; - let start_line = src.get_offset_line(label.span.start()).map(|(_, l, _)| l); - let end_line = src - .get_offset_line(label.span.end().saturating_sub(1).max(label.span.start())) - .map(|(_, l, _)| l); + let given_label_span = label.span.start()..label.span.end(); + + let (label_char_span, start_line, end_line) = match self.config.index_type { + IndexType::Char => { + let Some(start_line) = src.get_offset_line(given_label_span.start) else {continue}; + let end_line = if given_label_span.start >= given_label_span.end { + start_line.1 + } else { + let Some(end_line) = src.get_offset_line(given_label_span.end - 1) else {continue}; + end_line.1 + }; + (given_label_span, start_line.1, end_line) + }, + IndexType::Byte => { + let Some((start_line_obj, start_line, start_byte_col)) = src.get_byte_line(given_label_span.start) else {continue;}; + let line_text = src.get_line_text(start_line_obj).unwrap(); + + let num_chars_before_start = line_text[..start_byte_col].chars().count(); + let start_char_offset = start_line_obj.offset() + num_chars_before_start; + + if given_label_span.start >= given_label_span.end { + (start_char_offset..start_char_offset, start_line, start_line) + } else { + // We can subtract 1 from end, because get_byte_line doesn't actually index into the text. + let end_pos = given_label_span.end - 1; + let Some((end_line_obj, end_line, end_byte_col)) = src.get_byte_line(end_pos) else {continue}; + let end_line_text = src.get_line_text(start_line_obj).unwrap(); + // Have to add 1 back now, so we don't cut a char in two. + let num_chars_before_end = end_line_text[..end_byte_col+1].chars().count(); + let end_char_offset = end_line_obj.offset() + num_chars_before_end; + + (start_char_offset..end_char_offset, start_line, end_line) + } + } + }; let label_info = LabelInfo { kind: if start_line == end_line { @@ -62,20 +95,20 @@ impl Report<'_, S> { } else { LabelKind::Multiline }, - char_span: label.span.start()..label.span.end(), + char_span: label_char_span, display_info: &label.display_info, }; if let Some(group) = groups .iter_mut() - .find(|g: &&mut SourceGroup| g.src_id == label.span.source()) + .find(|g: &&mut SourceGroup| g.src_id == label_source) { group.char_span.start = group.char_span.start.min(label_info.char_span.start); group.char_span.end = group.char_span.end.max(label_info.char_span.end); group.labels.push(label_info); } else { groups.push(SourceGroup { - src_id: label.span.source(), + src_id: label_source, char_span: label_info.char_span.clone(), labels: vec![label_info], }); @@ -807,7 +840,7 @@ mod tests { use insta::assert_snapshot; - use crate::{Cache, CharSet, Config, Label, Report, ReportKind, Source, Span}; + use crate::{Cache, CharSet, Config, Label, Report, ReportKind, Source, Span, IndexType}; impl Report<'_, S> { fn write_to_string>(&self, cache: C) -> String { @@ -881,6 +914,54 @@ mod tests { "###); } + #[test] + fn multi_byte_chars() { + let source = "äpplë == örängë;"; + let msg = Report::>::build(ReportKind::Error, (), 0) + .with_config(no_color_and_ascii().with_index_type(IndexType::Char)) + .with_message("can't compare äpplës with örängës") + .with_label(Label::new(0..5).with_message("This is an äpplë")) + .with_label(Label::new(9..15).with_message("This is an örängë")) + .finish() + .write_to_string(Source::from(source)); + // TODO: it would be nice if these lines didn't cross + assert_snapshot!(msg, @r###" + Error: can't compare äpplës with örängës + ,-[:1:1] + | + 1 | äpplë == örängë; + | ^^|^^ ^^^|^^ + | `-------------- This is an äpplë + | | + | `---- This is an örängë + ---' + "###); + } + + #[test] + fn byte_label() { + let source = "äpplë == örängë;"; + let msg = Report::>::build(ReportKind::Error, (), 0) + .with_config(no_color_and_ascii().with_index_type(IndexType::Byte)) + .with_message("can't compare äpplës with örängës") + .with_label(Label::new(0..7).with_message("This is an äpplë")) + .with_label(Label::new(11..20).with_message("This is an örängë")) + .finish() + .write_to_string(Source::from(source)); + // TODO: it would be nice if these lines didn't cross + assert_snapshot!(msg, @r###" + Error: can't compare äpplës with örängës + ,-[:1:1] + | + 1 | äpplë == örängë; + | ^^|^^ ^^^|^^ + | `-------------- This is an äpplë + | | + | `---- This is an örängë + ---' + "###); + } + #[test] fn label_at_end_of_long_line() { let source = format!("{}orange", "apple == ".repeat(100));