From 172de440d833785b6c1a51fcbdf2a0f9c7e38548 Mon Sep 17 00:00:00 2001
From: robtfm <50659922+robtfm@users.noreply.github.com>
Date: Tue, 9 Apr 2024 01:27:16 +0100
Subject: [PATCH 1/2] don't strip quoted comments

---
 src/compose/comment_strip_iter.rs | 52 +++++++++++++++++++++++++++++--
 1 file changed, 49 insertions(+), 3 deletions(-)
diff --git a/src/compose/comment_strip_iter.rs b/src/compose/comment_strip_iter.rs
index 4d12a5a..f09d328 100644
--- a/src/compose/comment_strip_iter.rs
+++ b/src/compose/comment_strip_iter.rs
@@ -3,7 +3,7 @@ use std::{borrow::Cow, str::Lines};
 use regex::Regex;
 
 static RE_COMMENT: once_cell::sync::Lazy<Regex> =
-    once_cell::sync::Lazy::new(|| Regex::new(r"(//|/\*|\*/)").unwrap());
+    once_cell::sync::Lazy::new(|| Regex::new(r#"(//|/\*|\*/|\")"#).unwrap());
 
 pub struct CommentReplaceIter<'a> {
     lines: &'a mut Lines<'a>,
@@ -32,6 +32,27 @@ impl<'a> Iterator for CommentReplaceIter<'a> {
             let mut next_marker = markers.next();
             let mut section_end = next_marker.map(|m| m.start()).unwrap_or(line_in.len());
 
+            // comment markers in quoted strings should not be processed (unless the quote is in a comment ...)
+            if self.block_depth == 0 && next_marker.map_or(false, |marker| marker.as_str() == "\"")
+            {
+                let mut end_quote = markers.next();
+                while !end_quote.map_or(true, |marker| marker.as_str() == "\"") {
+                    end_quote = markers.next();
+                }
+
+                match end_quote {
+                    Some(end_quote) => {
+                        output.push_str(&line_in[section_start..end_quote.end()]);
+                        section_start = end_quote.end();
+                        continue;
+                    }
+                    None => {
+                        output.push_str(&line_in[section_start..]);
+                        return Some(Cow::Owned(output));
+                    }
+                }
+            }
+
             // skip partial tokens
             while next_marker.is_some() && section_start > section_end {
                 next_marker = markers.next();
@@ -64,6 +85,7 @@ impl<'a> Iterator for CommentReplaceIter<'a> {
                         "*/" => {
                             self.block_depth = self.block_depth.saturating_sub(1);
                         }
+                        "\"" => (),
                         _ => unreachable!(),
                     }
                     output.extend(std::iter::repeat(' ').take(marker.as_str().len()));
@@ -119,7 +141,7 @@ not commented
         None
     );
 
-    const PARTIAL_TESTS: [(&str, &str); 4] = [
+    const PARTIAL_TESTS: [(&str, &str); 10] = [
         (
             "1.0 /* block comment with a partial line comment on the end *// 2.0",
             "1.0                                                           / 2.0",
@@ -136,11 +158,35 @@ not commented
             "1.0 /* block comment with real line comment after */// line comment",
             "1.0                                                                ",
         ),
+        (
+            r#"#import "embedded://file.wgsl""#,
+            r#"#import "embedded://file.wgsl""#,
+        ),
+        (
+            r#"// #import "embedded://file.wgsl""#,
+            r#"                                 "#,
+        ),
+        (
+            r#"/* #import "embedded://file.wgsl" */"#,
+            r#"                                    "#,
+        ),
+        (
+            r#"/* #import "embedded:*/file.wgsl" */"#,
+            r#"                       file.wgsl" */"#,
+        ),
+        (
+            r#"#import "embedded://file.wgsl" // comment"#,
+            r#"#import "embedded://file.wgsl"           "#,
+        ),
+        (
+            r#"#import "embedded:/* */ /* /**/* / / /// * / //*/*/ / */*file.wgsl""#,
+            r#"#import "embedded:/* */ /* /**/* / / /// * / //*/*/ / */*file.wgsl""#,
+        ),
     ];
 
     for &(input, expected) in PARTIAL_TESTS.iter() {
         let mut nasty_processed = input.lines();
         let nasty_processed = nasty_processed.replace_comments().next().unwrap();
-        assert_eq!(&nasty_processed, expected);
+        assert_eq!((input, nasty_processed.as_ref()), (input, expected));
     }
 }

From 8c128237b3a3a5c336cb6a1057b0c49225ac9728 Mon Sep 17 00:00:00 2001
From: robtfm <50659922+robtfm@users.noreply.github.com>
Date: Fri, 26 Apr 2024 01:25:17 +0100
Subject: [PATCH 2/2] use better state management address partial token issues

Co-authored-by: stefnotch <stefnotch@users.noreply.github.com>
---
 src/compose/comment_strip_iter.rs | 180 +++++++++++++++++++++---------
 1 file changed, 125 insertions(+), 55 deletions(-)

diff --git a/src/compose/comment_strip_iter.rs b/src/compose/comment_strip_iter.rs
index f09d328..fdf3cdc 100644
--- a/src/compose/comment_strip_iter.rs
+++ b/src/compose/comment_strip_iter.rs
@@ -2,12 +2,26 @@ use std::{borrow::Cow, str::Lines};
 
 use regex::Regex;
 
-static RE_COMMENT: once_cell::sync::Lazy<Regex> =
-    once_cell::sync::Lazy::new(|| Regex::new(r#"(//|/\*|\*/|\")"#).unwrap());
+// outside of blocks and quotes, change state on //, /* or "
+static RE_NONE: once_cell::sync::Lazy<Regex> =
+    once_cell::sync::Lazy::new(|| Regex::new(r#"(//|/\*|\")"#).unwrap());
+// in blocks, change on /* and */
+static RE_BLOCK: once_cell::sync::Lazy<Regex> =
+    once_cell::sync::Lazy::new(|| Regex::new(r"(/\*|\*/)").unwrap());
+// in quotes, change only on "
+static RE_QUOTE: once_cell::sync::Lazy<Regex> =
+    once_cell::sync::Lazy::new(|| Regex::new(r#"\""#).unwrap());
+
+#[derive(PartialEq, Eq)]
+enum CommentState {
+    None,
+    Block(usize),
+    Quote,
+}
 
 pub struct CommentReplaceIter<'a> {
     lines: &'a mut Lines<'a>,
-    block_depth: usize,
+    state: CommentState,
 }
 
 impl<'a> Iterator for CommentReplaceIter<'a> {
@@ -15,13 +29,9 @@ impl<'a> Iterator for CommentReplaceIter<'a> {
 
     fn next(&mut self) -> Option<Self::Item> {
         let line_in = self.lines.next()?;
-        let mut markers = RE_COMMENT
-            .captures_iter(line_in)
-            .map(|cap| cap.get(0).unwrap())
-            .peekable();
 
         // fast path
-        if self.block_depth == 0 && markers.peek().is_none() {
+        if self.state == CommentState::None && !RE_NONE.is_match(line_in) {
             return Some(Cow::Borrowed(line_in));
         }
 
@@ -29,67 +39,65 @@ impl<'a> Iterator for CommentReplaceIter<'a> {
         let mut section_start = 0;
 
         loop {
-            let mut next_marker = markers.next();
-            let mut section_end = next_marker.map(|m| m.start()).unwrap_or(line_in.len());
-
-            // comment markers in quoted strings should not be processed (unless the quote is in a comment ...)
-            if self.block_depth == 0 && next_marker.map_or(false, |marker| marker.as_str() == "\"")
-            {
-                let mut end_quote = markers.next();
-                while !end_quote.map_or(true, |marker| marker.as_str() == "\"") {
-                    end_quote = markers.next();
-                }
-
-                match end_quote {
-                    Some(end_quote) => {
-                        output.push_str(&line_in[section_start..end_quote.end()]);
-                        section_start = end_quote.end();
-                        continue;
-                    }
-                    None => {
-                        output.push_str(&line_in[section_start..]);
-                        return Some(Cow::Owned(output));
-                    }
-                }
+            let marker = match self.state {
+                CommentState::None => &RE_NONE,
+                CommentState::Block(_) => &RE_BLOCK,
+                CommentState::Quote => &RE_QUOTE,
             }
+            .find(&line_in[section_start..]);
 
-            // skip partial tokens
-            while next_marker.is_some() && section_start > section_end {
-                next_marker = markers.next();
-                section_end = next_marker.map(|m| m.start()).unwrap_or(line_in.len());
-            }
+            let section_end = marker
+                .map(|m| section_start + m.start())
+                .unwrap_or(line_in.len());
 
-            if self.block_depth == 0 {
-                output.push_str(&line_in[section_start..section_end]);
-            } else {
+            if let CommentState::Block(_) = self.state {
                 output.extend(std::iter::repeat(' ').take(section_end - section_start));
+            } else {
+                output.push_str(&line_in[section_start..section_end]);
             }
 
-            match next_marker {
+            match marker {
                 None => return Some(Cow::Owned(output)),
                 Some(marker) => {
                     match marker.as_str() {
+                        // only possible in None state
                         "//" => {
-                            // the specs (https://www.w3.org/TR/WGSL/#comment, https://registry.khronos.org/OpenGL/specs/gl/GLSLangSpec.4.60.pdf @ 3.4) state that
-                            // whichever comment-type starts first should cancel parsing of the other type
-                            if self.block_depth == 0 {
-                                output.extend(
-                                    std::iter::repeat(' ').take(line_in.len() - marker.start()),
-                                );
-                                return Some(Cow::Owned(output));
-                            }
+                            output.extend(
+                                std::iter::repeat(' ')
+                                    .take(line_in.len() - marker.start() - section_start),
+                            );
+                            return Some(Cow::Owned(output));
                         }
+                        // only possible in None or Block state
                         "/*" => {
-                            self.block_depth += 1;
+                            self.state = match self.state {
+                                CommentState::None => CommentState::Block(1),
+                                CommentState::Block(n) => CommentState::Block(n + 1),
+                                _ => unreachable!(),
+                            };
+                            output.push_str("  ");
                         }
+                        // only possible in Block state
                         "*/" => {
-                            self.block_depth = self.block_depth.saturating_sub(1);
+                            self.state = match self.state {
+                                CommentState::Block(1) => CommentState::None,
+                                CommentState::Block(n) => CommentState::Block(n - 1),
+                                _ => unreachable!(),
+                            };
+                            output.push_str("  ");
+                        }
+                        // only possible in None or Quote state
+                        "\"" => {
+                            self.state = match self.state {
+                                CommentState::None => CommentState::Quote,
+                                CommentState::Quote => CommentState::None,
+                                _ => unreachable!(),
+                            };
+                            output.push('"');
                         }
-                        "\"" => (),
                         _ => unreachable!(),
                     }
-                    output.extend(std::iter::repeat(' ').take(marker.as_str().len()));
-                    section_start = marker.end();
+                    section_start += marker.end();
                 }
             }
         }
@@ -105,7 +113,7 @@ impl<'a> CommentReplaceExt<'a> for Lines<'a> {
     fn replace_comments(&'a mut self) -> CommentReplaceIter {
         CommentReplaceIter {
             lines: self,
-            block_depth: 0,
+            state: CommentState::None,
         }
     }
 }
@@ -141,7 +149,7 @@ not commented
         None
     );
 
-    const PARTIAL_TESTS: [(&str, &str); 10] = [
+    const PARTIAL_TESTS: [(&str, &str); 11] = [
         (
             "1.0 /* block comment with a partial line comment on the end *// 2.0",
             "1.0                                                           / 2.0",
@@ -158,6 +166,7 @@ not commented
             "1.0 /* block comment with real line comment after */// line comment",
             "1.0                                                                ",
         ),
+        ("*/", "*/"),
         (
             r#"#import "embedded://file.wgsl""#,
             r#"#import "embedded://file.wgsl""#,
@@ -187,6 +196,67 @@ not commented
     for &(input, expected) in PARTIAL_TESTS.iter() {
         let mut nasty_processed = input.lines();
         let nasty_processed = nasty_processed.replace_comments().next().unwrap();
-        assert_eq!((input, nasty_processed.as_ref()), (input, expected));
+        assert_eq!(&nasty_processed, expected);
+    }
+}
+
+#[test]
+fn multiline_comment_test() {
+    let test_cases = [
+        (
+            // Basic test
+            r"/*
+hoho
+*/",
+            r"  
+    
+  ",
+        ),
+        (
+            // Testing the commenting-out of multiline comments
+            r"///*
+hehe
+//*/",
+            r"    
+hehe
+    ",
+        ),
+        (
+            // Testing the commenting-out of single-line comments
+            r"/* // */ code goes here /*
+Still a comment // */
+/* dummy */",
+            r"         code goes here   
+                     
+           ",
+        ),
+        (
+            // A comment with a nested multiline comment
+            // Notice how the "//" inside the multiline comment doesn't take effect
+            r"/*
+//*
+*/commented
+*/not commented",
+            r"  
+   
+           
+  not commented",
+        ),
+    ];
+
+    for &(input, expected) in test_cases.iter() {
+        for (output_line, expected_line) in input.lines().replace_comments().zip(expected.lines()) {
+            assert_eq!(output_line.as_ref(), expected_line);
+        }
+    }
+}
+
+#[test]
+fn test_comment_becomes_spaces() {
+    let test_cases = [("let a/**/b =3u;", "let a    b =3u;")];
+    for &(input, expected) in test_cases.iter() {
+        for (output_line, expected_line) in input.lines().replace_comments().zip(expected.lines()) {
+            assert_eq!(output_line.as_ref(), expected_line);
+        }
     }
 }