diff --git a/Cargo.toml b/Cargo.toml index ca8cf7f..b73ffef 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ rustc-hash = "1.1.0" unicode-ident = "1" once_cell = "1.17.0" indexmap = "2" +winnow = "0.6.7" [dev-dependencies] wgpu = { version = "0.19.0", features = ["naga-ir"] } diff --git a/src/compose/comment_strip_iter.rs b/src/compose/comment_strip_iter.rs index 4d12a5a..430ecad 100644 --- a/src/compose/comment_strip_iter.rs +++ b/src/compose/comment_strip_iter.rs @@ -1,90 +1,178 @@ -use std::{borrow::Cow, str::Lines}; +use std::{borrow::Cow, ops::Range}; + +use winnow::{ + ascii::till_line_ending, + combinator::{cut_err, opt}, + error::StrContext, + token::any, + Located, PResult, Parser, +}; + +struct SourceCode { + /** Sorted pieces of the source code without any gaps */ + parts: Vec, +} + +enum SourceCodePart { + Text(Range), + SingleLineComment(SingleLineComment), + MultiLineComment(MultiLineComment), +} + +impl SourceCodePart { + fn span(&self) -> Range { + match self { + SourceCodePart::Text(span) => span.clone(), + SourceCodePart::SingleLineComment(comment) => comment.span.clone(), + SourceCodePart::MultiLineComment(comment) => comment.span.clone(), + } + } +} -use regex::Regex; +pub struct SingleLineComment { + pub span: Range, +} +pub struct MultiLineComment { + pub span: Range, +} -static RE_COMMENT: once_cell::sync::Lazy = - once_cell::sync::Lazy::new(|| Regex::new(r"(//|/\*|\*/)").unwrap()); +fn parse_source(input: &mut Located<&str>) -> PResult { + let mut parts = Vec::new(); + loop { + if input.is_empty() { + break; + } + if let Some(part) = opt(single_line_comment).parse_next(input)? { + parts.push(SourceCodePart::SingleLineComment(part)); + } else if let Some(part) = opt(multi_line_comment).parse_next(input)? { + parts.push(SourceCodePart::MultiLineComment(part)); + } else { + let text_span = any.span().parse_next(input)?; + if let Some(SourceCodePart::Text(last_span)) = parts.last_mut() { + last_span.end = text_span.end; + } else { + parts.push(SourceCodePart::Text(text_span)); + } + } + } + Ok(SourceCode { parts }) +} + +fn single_line_comment(input: &mut Located<&str>) -> PResult { + let start_span = "//".span().parse_next(input)?; + let text_span = till_line_ending.span().parse_next(input)?; + Ok(SingleLineComment { + span: start_span.start..text_span.end, + }) +} +fn multi_line_comment(input: &mut Located<&str>) -> PResult { + let start_span = "/*".span().parse_next(input)?; + loop { + if let Some(end_span) = opt("*/".span()).parse_next(input)? { + return Ok(MultiLineComment { + span: start_span.start..end_span.end, + }); + } else if let Some(_) = opt(multi_line_comment).parse_next(input)? { + // We found a nested comment, skip it + } else { + // Skip a single character + let _ = cut_err(any) + .context(StrContext::Label("multiline comment")) + .parse_next(input)?; + } + } +} pub struct CommentReplaceIter<'a> { - lines: &'a mut Lines<'a>, - block_depth: usize, + text: &'a str, + text_index: usize, + parsed: SourceCode, + parsed_index: usize, +} + +fn clamp_range(range: Range, min: usize, max: usize) -> Range { + range.start.clamp(min, max)..range.end.clamp(min, max) } impl<'a> Iterator for CommentReplaceIter<'a> { - type Item = Cow<'a, str>; + type Item = (Cow<'a, str>, &'a str); fn next(&mut self) -> Option { - let line_in = self.lines.next()?; - let mut markers = RE_COMMENT - .captures_iter(line_in) - .map(|cap| cap.get(0).unwrap()) - .peekable(); - - // fast path - if self.block_depth == 0 && markers.peek().is_none() { - return Some(Cow::Borrowed(line_in)); + if self.text_index >= self.text.len() { + return None; } - let mut output = String::new(); - let mut section_start = 0; - - loop { - let mut next_marker = markers.next(); - let mut section_end = next_marker.map(|m| m.start()).unwrap_or(line_in.len()); - - // skip partial tokens - while next_marker.is_some() && section_start > section_end { - next_marker = markers.next(); - section_end = next_marker.map(|m| m.start()).unwrap_or(line_in.len()); + let line_start = self.text_index; + let line_end = self.text[line_start..] + .find('\n') // TODO: Handle \r\n + .map(|i| line_start + i + 1) + .unwrap_or_else(|| self.text.len()); + self.text_index = line_end; + + let mut parts = Vec::new(); + for (i, parsed_part) in self.parsed.parts.iter().enumerate().skip(self.parsed_index) { + let span = parsed_part.span(); + if span.start >= line_end { + break; + } + if span.end <= line_start { + self.parsed_index = i + 1; + continue; } + parts.push((parsed_part, clamp_range(span, line_start, line_end))); + } - if self.block_depth == 0 { - output.push_str(&line_in[section_start..section_end]); - } else { - output.extend(std::iter::repeat(' ').take(section_end - section_start)); + assert!(parts.len() > 0); + + // Fast path + if parts.len() == 1 { + match parts.into_iter().next().unwrap() { + (SourceCodePart::Text(_), span) => { + return Some(( + Cow::Borrowed(&self.text[span]), + &self.text[line_start..line_end], + )); + } + ( + SourceCodePart::SingleLineComment(_) | SourceCodePart::MultiLineComment(_), + span, + ) => { + let spaces = " ".repeat(span.len()); + return Some((Cow::Owned(spaces), &self.text[line_start..line_end])); + } } + } - match next_marker { - None => return Some(Cow::Owned(output)), - Some(marker) => { - match marker.as_str() { - "//" => { - // the specs (https://www.w3.org/TR/WGSL/#comment, https://registry.khronos.org/OpenGL/specs/gl/GLSLangSpec.4.60.pdf @ 3.4) state that - // whichever comment-type starts first should cancel parsing of the other type - if self.block_depth == 0 { - output.extend( - std::iter::repeat(' ').take(line_in.len() - marker.start()), - ); - return Some(Cow::Owned(output)); - } - } - "/*" => { - self.block_depth += 1; - } - "*/" => { - self.block_depth = self.block_depth.saturating_sub(1); - } - _ => unreachable!(), - } - output.extend(std::iter::repeat(' ').take(marker.as_str().len())); - section_start = marker.end(); + let mut output = String::new(); + let mut last_end = line_start; + for (part, span) in parts.into_iter() { + output.push_str(&self.text[last_end..span.start]); + last_end = span.end; + match part { + SourceCodePart::Text(_) => { + output.push_str(&self.text[span]); + } + SourceCodePart::SingleLineComment(_) | SourceCodePart::MultiLineComment(_) => { + output.extend(std::iter::repeat(' ').take(span.len())); } } } - } -} -pub trait CommentReplaceExt<'a> { - /// replace WGSL and GLSL comments with whitespace characters - fn replace_comments(&'a mut self) -> CommentReplaceIter; + assert!(last_end == line_end); + Some((Cow::Owned(output), &self.text[line_start..line_end])) + } } -impl<'a> CommentReplaceExt<'a> for Lines<'a> { - fn replace_comments(&'a mut self) -> CommentReplaceIter { - CommentReplaceIter { - lines: self, - block_depth: 0, - } +/// Gives you an iterator that replaces comments in the input text with spaces. +/// The iterator will yield the same lines as the input text, but with comments replaced. +/// Lines will include the newline character at the end! +pub fn replace_comments(input: &str) -> CommentReplaceIter { + let parsed = parse_source(&mut Located::new(input)).unwrap(); + CommentReplaceIter { + text: input, + text_index: 0, + parsed, + parsed_index: 0, } } @@ -108,14 +196,10 @@ not commented "; assert_eq!( - INPUT - .lines() - .replace_comments() - .zip(INPUT.lines()) - .find(|(line, original)| { - (line != "not commented" && !line.chars().all(|c| c == ' ')) - || line.len() != original.len() - }), + replace_comments(INPUT).find(|(line, original)| { + (line.trim_end() != "not commented" && !line.chars().all(|c| c == ' ' || c == '\n')) + || line.len() != original.len() + }), None ); @@ -139,8 +223,7 @@ not commented ]; for &(input, expected) in PARTIAL_TESTS.iter() { - let mut nasty_processed = input.lines(); - let nasty_processed = nasty_processed.replace_comments().next().unwrap(); + let nasty_processed = replace_comments(input).next().unwrap().0; assert_eq!(&nasty_processed, expected); } } diff --git a/src/compose/preprocess.rs b/src/compose/preprocess.rs index 45c3759..72eed17 100644 --- a/src/compose/preprocess.rs +++ b/src/compose/preprocess.rs @@ -4,7 +4,7 @@ use indexmap::IndexMap; use regex::Regex; use super::{ - comment_strip_iter::CommentReplaceExt, + comment_strip_iter::replace_comments, parse_imports::{parse_imports, substitute_identifiers}, ComposerErrorInner, ImportDefWithOffset, ShaderDefValue, }; @@ -245,8 +245,7 @@ impl Preprocessor { let len = shader_str.len(); // this code broadly stolen from bevy_render::ShaderProcessor - let mut lines = shader_str.lines(); - let mut lines = lines.replace_comments().zip(shader_str.lines()).peekable(); + let mut lines = replace_comments(shader_str).peekable(); while let Some((mut line, original_line)) = lines.next() { let mut output = false; @@ -271,8 +270,9 @@ impl Preprocessor { loop { // output spaces for removed lines to keep spans consistent (errors report against substituted_source, which is not preprocessed) - final_string.extend(std::iter::repeat(" ").take(line.len())); - offset += line.len() + 1; + final_string + .extend(std::iter::repeat(" ").take(line.len().saturating_sub(1))); + offset += line.len(); // PERF: Ideally we don't do multiple `match_indices` passes over `line` // in addition to the final pass for the import parse @@ -283,7 +283,6 @@ impl Preprocessor { // let import_lines = &shader_str[initial_offset..offset] // but we need the comments removed, and the iterator approach doesn't make that easy import_lines.push_str(&line); - import_lines.push('\n'); if open_count == 0 || lines.peek().is_none() { break; @@ -356,15 +355,15 @@ impl Preprocessor { final_string.push_str(&item_replaced_line); let diff = line.len().saturating_sub(item_replaced_line.len()); final_string.extend(std::iter::repeat(" ").take(diff)); - offset += original_line.len() + 1; + offset += original_line.len(); output = true; } } if !output { // output spaces for removed lines to keep spans consistent (errors report against substituted_source, which is not preprocessed) - final_string.extend(std::iter::repeat(" ").take(line.len())); - offset += line.len() + 1; + final_string.extend(std::iter::repeat(" ").take(line.len().saturating_sub(1))); + offset += line.len(); } final_string.push('\n'); } @@ -398,10 +397,9 @@ impl Preprocessor { let mut defines = HashMap::default(); let mut effective_defs = HashSet::default(); - let mut lines = shader_str.lines(); - let mut lines = lines.replace_comments().peekable(); + let mut lines = replace_comments(shader_str).peekable(); - while let Some(mut line) = lines.next() { + while let Some((mut line, _)) = lines.next() { let (is_scope, def) = self.check_scope(&HashMap::default(), &line, None, offset)?; if is_scope { @@ -423,7 +421,6 @@ impl Preprocessor { // let import_lines = &shader_str[initial_offset..offset] // but we need the comments removed, and the iterator approach doesn't make that easy import_lines.push_str(&line); - import_lines.push('\n'); if open_count == 0 || lines.peek().is_none() { break; @@ -432,7 +429,7 @@ impl Preprocessor { // output spaces for removed lines to keep spans consistent (errors report against substituted_source, which is not preprocessed) offset += line.len() + 1; - line = lines.next().unwrap(); + line = lines.next().unwrap().0; } parse_imports(import_lines.as_str(), &mut declared_imports).map_err(