Skip to content

Commit

Permalink
Try out winnow
Browse files Browse the repository at this point in the history
  • Loading branch information
stefnotch committed Apr 28, 2024
1 parent 33e57e4 commit 3771d6c
Show file tree
Hide file tree
Showing 3 changed files with 171 additions and 90 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ rustc-hash = "1.1.0"
unicode-ident = "1"
once_cell = "1.17.0"
indexmap = "2"
winnow = "0.6.7"

[dev-dependencies]
wgpu = { version = "0.19.0", features = ["naga-ir"] }
Expand Down
235 changes: 159 additions & 76 deletions src/compose/comment_strip_iter.rs
Original file line number Diff line number Diff line change
@@ -1,90 +1,178 @@
use std::{borrow::Cow, str::Lines};
use std::{borrow::Cow, ops::Range};

use winnow::{
ascii::till_line_ending,
combinator::{cut_err, opt},
error::StrContext,
token::any,
Located, PResult, Parser,
};

struct SourceCode {
/** Sorted pieces of the source code without any gaps */
parts: Vec<SourceCodePart>,
}

enum SourceCodePart {
Text(Range<usize>),
SingleLineComment(SingleLineComment),
MultiLineComment(MultiLineComment),
}

impl SourceCodePart {
fn span(&self) -> Range<usize> {
match self {
SourceCodePart::Text(span) => span.clone(),
SourceCodePart::SingleLineComment(comment) => comment.span.clone(),
SourceCodePart::MultiLineComment(comment) => comment.span.clone(),
}
}
}

use regex::Regex;
pub struct SingleLineComment {
pub span: Range<usize>,
}
pub struct MultiLineComment {
pub span: Range<usize>,
}

static RE_COMMENT: once_cell::sync::Lazy<Regex> =
once_cell::sync::Lazy::new(|| Regex::new(r"(//|/\*|\*/)").unwrap());
fn parse_source(input: &mut Located<&str>) -> PResult<SourceCode> {
let mut parts = Vec::new();
loop {
if input.is_empty() {
break;
}
if let Some(part) = opt(single_line_comment).parse_next(input)? {
parts.push(SourceCodePart::SingleLineComment(part));
} else if let Some(part) = opt(multi_line_comment).parse_next(input)? {
parts.push(SourceCodePart::MultiLineComment(part));
} else {
let text_span = any.span().parse_next(input)?;
if let Some(SourceCodePart::Text(last_span)) = parts.last_mut() {
last_span.end = text_span.end;
} else {
parts.push(SourceCodePart::Text(text_span));
}
}
}
Ok(SourceCode { parts })
}

fn single_line_comment(input: &mut Located<&str>) -> PResult<SingleLineComment> {
let start_span = "//".span().parse_next(input)?;
let text_span = till_line_ending.span().parse_next(input)?;
Ok(SingleLineComment {
span: start_span.start..text_span.end,
})
}
fn multi_line_comment(input: &mut Located<&str>) -> PResult<MultiLineComment> {
let start_span = "/*".span().parse_next(input)?;
loop {
if let Some(end_span) = opt("*/".span()).parse_next(input)? {
return Ok(MultiLineComment {
span: start_span.start..end_span.end,
});
} else if let Some(_) = opt(multi_line_comment).parse_next(input)? {
// We found a nested comment, skip it
} else {
// Skip a single character
let _ = cut_err(any)
.context(StrContext::Label("multiline comment"))
.parse_next(input)?;
}
}
}

pub struct CommentReplaceIter<'a> {
lines: &'a mut Lines<'a>,
block_depth: usize,
text: &'a str,
text_index: usize,
parsed: SourceCode,
parsed_index: usize,
}

fn clamp_range(range: Range<usize>, min: usize, max: usize) -> Range<usize> {
range.start.clamp(min, max)..range.end.clamp(min, max)
}

impl<'a> Iterator for CommentReplaceIter<'a> {
type Item = Cow<'a, str>;
type Item = (Cow<'a, str>, &'a str);

fn next(&mut self) -> Option<Self::Item> {
let line_in = self.lines.next()?;
let mut markers = RE_COMMENT
.captures_iter(line_in)
.map(|cap| cap.get(0).unwrap())
.peekable();

// fast path
if self.block_depth == 0 && markers.peek().is_none() {
return Some(Cow::Borrowed(line_in));
if self.text_index >= self.text.len() {
return None;
}

let mut output = String::new();
let mut section_start = 0;

loop {
let mut next_marker = markers.next();
let mut section_end = next_marker.map(|m| m.start()).unwrap_or(line_in.len());

// skip partial tokens
while next_marker.is_some() && section_start > section_end {
next_marker = markers.next();
section_end = next_marker.map(|m| m.start()).unwrap_or(line_in.len());
let line_start = self.text_index;
let line_end = self.text[line_start..]
.find('\n') // TODO: Handle \r\n
.map(|i| line_start + i + 1)
.unwrap_or_else(|| self.text.len());
self.text_index = line_end;

let mut parts = Vec::new();
for (i, parsed_part) in self.parsed.parts.iter().enumerate().skip(self.parsed_index) {
let span = parsed_part.span();
if span.start >= line_end {
break;
}
if span.end <= line_start {
self.parsed_index = i + 1;
continue;
}
parts.push((parsed_part, clamp_range(span, line_start, line_end)));
}

if self.block_depth == 0 {
output.push_str(&line_in[section_start..section_end]);
} else {
output.extend(std::iter::repeat(' ').take(section_end - section_start));
assert!(parts.len() > 0);

// Fast path
if parts.len() == 1 {
match parts.into_iter().next().unwrap() {
(SourceCodePart::Text(_), span) => {
return Some((
Cow::Borrowed(&self.text[span]),
&self.text[line_start..line_end],
));
}
(
SourceCodePart::SingleLineComment(_) | SourceCodePart::MultiLineComment(_),
span,
) => {
let spaces = " ".repeat(span.len());
return Some((Cow::Owned(spaces), &self.text[line_start..line_end]));
}
}
}

match next_marker {
None => return Some(Cow::Owned(output)),
Some(marker) => {
match marker.as_str() {
"//" => {
// the specs (https://www.w3.org/TR/WGSL/#comment, https://registry.khronos.org/OpenGL/specs/gl/GLSLangSpec.4.60.pdf @ 3.4) state that
// whichever comment-type starts first should cancel parsing of the other type
if self.block_depth == 0 {
output.extend(
std::iter::repeat(' ').take(line_in.len() - marker.start()),
);
return Some(Cow::Owned(output));
}
}
"/*" => {
self.block_depth += 1;
}
"*/" => {
self.block_depth = self.block_depth.saturating_sub(1);
}
_ => unreachable!(),
}
output.extend(std::iter::repeat(' ').take(marker.as_str().len()));
section_start = marker.end();
let mut output = String::new();
let mut last_end = line_start;
for (part, span) in parts.into_iter() {
output.push_str(&self.text[last_end..span.start]);
last_end = span.end;
match part {
SourceCodePart::Text(_) => {
output.push_str(&self.text[span]);
}
SourceCodePart::SingleLineComment(_) | SourceCodePart::MultiLineComment(_) => {
output.extend(std::iter::repeat(' ').take(span.len()));
}
}
}
}
}

pub trait CommentReplaceExt<'a> {
/// replace WGSL and GLSL comments with whitespace characters
fn replace_comments(&'a mut self) -> CommentReplaceIter;
assert!(last_end == line_end);
Some((Cow::Owned(output), &self.text[line_start..line_end]))
}
}

impl<'a> CommentReplaceExt<'a> for Lines<'a> {
fn replace_comments(&'a mut self) -> CommentReplaceIter {
CommentReplaceIter {
lines: self,
block_depth: 0,
}
/// Gives you an iterator that replaces comments in the input text with spaces.
/// The iterator will yield the same lines as the input text, but with comments replaced.
/// Lines will include the newline character at the end!
pub fn replace_comments(input: &str) -> CommentReplaceIter {
let parsed = parse_source(&mut Located::new(input)).unwrap();
CommentReplaceIter {
text: input,
text_index: 0,
parsed,
parsed_index: 0,
}
}

Expand All @@ -108,14 +196,10 @@ not commented
";

assert_eq!(
INPUT
.lines()
.replace_comments()
.zip(INPUT.lines())
.find(|(line, original)| {
(line != "not commented" && !line.chars().all(|c| c == ' '))
|| line.len() != original.len()
}),
replace_comments(INPUT).find(|(line, original)| {
(line.trim_end() != "not commented" && !line.chars().all(|c| c == ' ' || c == '\n'))
|| line.len() != original.len()
}),
None
);

Expand All @@ -139,8 +223,7 @@ not commented
];

for &(input, expected) in PARTIAL_TESTS.iter() {
let mut nasty_processed = input.lines();
let nasty_processed = nasty_processed.replace_comments().next().unwrap();
let nasty_processed = replace_comments(input).next().unwrap().0;
assert_eq!(&nasty_processed, expected);
}
}
25 changes: 11 additions & 14 deletions src/compose/preprocess.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use indexmap::IndexMap;
use regex::Regex;

use super::{
comment_strip_iter::CommentReplaceExt,
comment_strip_iter::replace_comments,
parse_imports::{parse_imports, substitute_identifiers},
ComposerErrorInner, ImportDefWithOffset, ShaderDefValue,
};
Expand Down Expand Up @@ -245,8 +245,7 @@ impl Preprocessor {
let len = shader_str.len();

// this code broadly stolen from bevy_render::ShaderProcessor
let mut lines = shader_str.lines();
let mut lines = lines.replace_comments().zip(shader_str.lines()).peekable();
let mut lines = replace_comments(shader_str).peekable();

while let Some((mut line, original_line)) = lines.next() {
let mut output = false;
Expand All @@ -271,8 +270,9 @@ impl Preprocessor {

loop {
// output spaces for removed lines to keep spans consistent (errors report against substituted_source, which is not preprocessed)
final_string.extend(std::iter::repeat(" ").take(line.len()));
offset += line.len() + 1;
final_string
.extend(std::iter::repeat(" ").take(line.len().saturating_sub(1)));
offset += line.len();

// PERF: Ideally we don't do multiple `match_indices` passes over `line`
// in addition to the final pass for the import parse
Expand All @@ -283,7 +283,6 @@ impl Preprocessor {
// let import_lines = &shader_str[initial_offset..offset]
// but we need the comments removed, and the iterator approach doesn't make that easy
import_lines.push_str(&line);
import_lines.push('\n');

if open_count == 0 || lines.peek().is_none() {
break;
Expand Down Expand Up @@ -356,15 +355,15 @@ impl Preprocessor {
final_string.push_str(&item_replaced_line);
let diff = line.len().saturating_sub(item_replaced_line.len());
final_string.extend(std::iter::repeat(" ").take(diff));
offset += original_line.len() + 1;
offset += original_line.len();
output = true;
}
}

if !output {
// output spaces for removed lines to keep spans consistent (errors report against substituted_source, which is not preprocessed)
final_string.extend(std::iter::repeat(" ").take(line.len()));
offset += line.len() + 1;
final_string.extend(std::iter::repeat(" ").take(line.len().saturating_sub(1)));
offset += line.len();
}
final_string.push('\n');
}
Expand Down Expand Up @@ -398,10 +397,9 @@ impl Preprocessor {
let mut defines = HashMap::default();
let mut effective_defs = HashSet::default();

let mut lines = shader_str.lines();
let mut lines = lines.replace_comments().peekable();
let mut lines = replace_comments(shader_str).peekable();

while let Some(mut line) = lines.next() {
while let Some((mut line, _)) = lines.next() {
let (is_scope, def) = self.check_scope(&HashMap::default(), &line, None, offset)?;

if is_scope {
Expand All @@ -423,7 +421,6 @@ impl Preprocessor {
// let import_lines = &shader_str[initial_offset..offset]
// but we need the comments removed, and the iterator approach doesn't make that easy
import_lines.push_str(&line);
import_lines.push('\n');

if open_count == 0 || lines.peek().is_none() {
break;
Expand All @@ -432,7 +429,7 @@ impl Preprocessor {
// output spaces for removed lines to keep spans consistent (errors report against substituted_source, which is not preprocessed)
offset += line.len() + 1;

line = lines.next().unwrap();
line = lines.next().unwrap().0;
}

parse_imports(import_lines.as_str(), &mut declared_imports).map_err(
Expand Down

0 comments on commit 3771d6c

Please sign in to comment.