From a5dc026325bc1350b324eb79bfb8728d13dff3aa Mon Sep 17 00:00:00 2001 From: Rob Parrett Date: Mon, 5 Feb 2024 15:31:58 -0700 Subject: [PATCH] Fix lines not being hidden in indented code blocks (#915) --- write-rustdoc-hide-lines/Cargo.lock | 45 ++++++ write-rustdoc-hide-lines/Cargo.toml | 1 + .../src/code_block_definition.rs | 42 +++--- write-rustdoc-hide-lines/src/formatter.rs | 142 +++++++++++++----- write-rustdoc-hide-lines/src/hidden_ranges.rs | 15 +- 5 files changed, 178 insertions(+), 67 deletions(-) diff --git a/write-rustdoc-hide-lines/Cargo.lock b/write-rustdoc-hide-lines/Cargo.lock index 1adac9b69a..83409b5557 100644 --- a/write-rustdoc-hide-lines/Cargo.lock +++ b/write-rustdoc-hide-lines/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "aho-corasick" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +dependencies = [ + "memchr", +] + [[package]] name = "anyhow" version = "1.0.68" @@ -14,10 +23,46 @@ version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da2d6f23ffea9d7e76c53eee25dfb67bcd8fde7f1198b0855350698c9f07c780" +[[package]] +name = "memchr" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" + +[[package]] +name = "regex" +version = "1.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" + [[package]] name = "write-rustdoc-hide-lines" version = "0.1.0" dependencies = [ "anyhow", "indoc", + "regex", ] diff --git a/write-rustdoc-hide-lines/Cargo.toml b/write-rustdoc-hide-lines/Cargo.toml index 9dbe0fb2d7..43f01bf13b 100644 --- a/write-rustdoc-hide-lines/Cargo.toml +++ b/write-rustdoc-hide-lines/Cargo.toml @@ -5,6 +5,7 @@ edition = "2021" [dependencies] anyhow = "1.0" +regex = "1.5" [dev-dependencies] indoc = "1.0" diff --git a/write-rustdoc-hide-lines/src/code_block_definition.rs b/write-rustdoc-hide-lines/src/code_block_definition.rs index c3e77547bf..c374f615fd 100644 --- a/write-rustdoc-hide-lines/src/code_block_definition.rs +++ b/write-rustdoc-hide-lines/src/code_block_definition.rs @@ -1,5 +1,7 @@ use std::ops::Range; +use regex::Regex; + use crate::hidden_ranges::HiddenRanges; #[derive(Debug, PartialEq)] @@ -74,27 +76,24 @@ pub struct CodeBlockDefinition { hide_lines_idx: Option, } -const RUST_CODE_BLOCK_LONG: &str = "```rust"; -const RUST_CODE_BLOCK_SHORT: &str = "```rs"; - impl CodeBlockDefinition { pub fn new(line: &str) -> Option { - let tag: String; + let lang_re = Regex::new(r"(\s*)```(.+)").ok()?; + let captures = lang_re.captures(line)?; - if line.starts_with(RUST_CODE_BLOCK_LONG) { - tag = RUST_CODE_BLOCK_LONG.into(); - } else if line.starts_with(RUST_CODE_BLOCK_SHORT) { - tag = RUST_CODE_BLOCK_SHORT.into(); - } else { + let whitespace = captures.get(1).map(|mat| mat.as_str())?; + let lang = captures.get(2).map(|mat| mat.as_str())?; + + let mut hide_lines_idx = None; + + let mut parts = lang.split(','); + let tag = parts.next()?; + + if tag != "rs" && tag != "rust" { return None; } - let mut hide_lines_idx = None; - let annotations = line - .get(tag.len()..) - .unwrap_or("") - .split(',') - .filter(|a| a.trim() != "") + let annotations = parts .enumerate() .map(|(idx, a)| { let annotation = Annotation::from(a); @@ -108,7 +107,7 @@ impl CodeBlockDefinition { .collect(); Some(CodeBlockDefinition { - tag, + tag: format!("{}```{}", whitespace, tag), annotations, hide_lines_idx, }) @@ -143,12 +142,9 @@ impl CodeBlockDefinition { pub fn set_hidden_ranges(&mut self, hidden_ranges: HiddenRanges) { if hidden_ranges.is_empty() { // Remove - match self.hide_lines_idx { - Some(idx) => { - self.annotations.remove(idx); - self.hide_lines_idx = None; - } - None => (), + if let Some(idx) = self.hide_lines_idx { + self.annotations.remove(idx); + self.hide_lines_idx = None; } } else { // Add @@ -175,7 +171,7 @@ mod tests { for case in cases { let definition = CodeBlockDefinition::new(case); - assert_eq!(definition.is_none(), true); + assert_eq!(definition, None); } } diff --git a/write-rustdoc-hide-lines/src/formatter.rs b/write-rustdoc-hide-lines/src/formatter.rs index 749253434d..ad61c97c87 100644 --- a/write-rustdoc-hide-lines/src/formatter.rs +++ b/write-rustdoc-hide-lines/src/formatter.rs @@ -1,4 +1,5 @@ use anyhow::Result; +use regex::Regex; use std::{ ffi::OsStr, fmt::Write, @@ -7,9 +8,7 @@ use std::{ path::Path, }; -use crate::{ - code_block_definition::CodeBlockDefinition, hidden_ranges::get_hidden_ranges -}; +use crate::{code_block_definition::CodeBlockDefinition, hidden_ranges::get_hidden_ranges}; pub fn run(dir: &Path) -> Result<()> { visit_dir_md_files(dir, &|entry| { @@ -19,8 +18,10 @@ pub fn run(dir: &Path) -> Result<()> { let file = File::open(entry.path())?; let file_size = file.metadata().unwrap().len().try_into().unwrap(); let contents = format_file( - io::BufReader::new(file).lines().map(|line| line.map_err(anyhow::Error::from)), - file_size + io::BufReader::new(file) + .lines() + .map(|line| line.map_err(anyhow::Error::from)), + file_size, )?; // Rewrite file @@ -53,57 +54,71 @@ fn visit_dir_md_files(dir: &Path, cb: &dyn Fn(&DirEntry) -> Result<()>) -> Resul fn format_file(reader: impl Iterator>, file_size: usize) -> Result { let mut contents = String::with_capacity(file_size); - let mut is_inside_rust_code_block = false; let mut rust_block: Vec = vec![]; + let mut is_rust = false; + + let mut inside_code_block = false; + + // Find a code block delimiter and optionally the first specified language + let code_block_delim = Regex::new(r"\s*```(\w*)")?; for line in reader { let line = line?; - let is_code_block_open = line.starts_with("```rust"); - let is_code_block_close = line == "```"; - if is_inside_rust_code_block && is_code_block_open { - panic!("Nested '```rust' code block not allowed"); - } else if is_code_block_open { - is_inside_rust_code_block = true; + let code_block_delim_match = code_block_delim.captures(&line).and_then(|cap| cap.get(1)); + let is_code_block_delim = code_block_delim_match.is_some(); + + if !inside_code_block && is_code_block_delim { + let lang = code_block_delim_match.unwrap().as_str(); + if lang == "rust" || lang == "rs" { + is_rust = true; + } + + inside_code_block = true; + } else if inside_code_block && is_code_block_delim { + inside_code_block = false; } - // Skip the line, save it as is - if !is_inside_rust_code_block { + // Pass through non-rust code block contents and contents outside of code blocks. + if !is_rust { writeln!(&mut contents, "{}", &line)?; continue; } rust_block.push(line); - // Process the `rust` code block - if is_code_block_close { - let code = &rust_block[1..rust_block.len() - 1]; - let real_hidden_ranges = get_hidden_ranges(code); - let mut definition = CodeBlockDefinition::new(&rust_block[0]).unwrap(); - - match definition.get_hidden_ranges() { - Some(annotation_hidden_ranges) => { - if *annotation_hidden_ranges != real_hidden_ranges { - definition.set_hidden_ranges(real_hidden_ranges); - } + if inside_code_block { + continue; + } + + // Process the `rust `code block + let code = &rust_block[1..rust_block.len() - 1]; + let real_hidden_ranges = get_hidden_ranges(code); + let mut definition = CodeBlockDefinition::new(&rust_block[0]).unwrap(); + + match definition.get_hidden_ranges() { + Some(annotation_hidden_ranges) => { + if *annotation_hidden_ranges != real_hidden_ranges { + definition.set_hidden_ranges(real_hidden_ranges); } - None => { - if !real_hidden_ranges.is_empty() { - definition.set_hidden_ranges(real_hidden_ranges); - } + } + None => { + if !real_hidden_ranges.is_empty() { + definition.set_hidden_ranges(real_hidden_ranges); } } + } - // Rewrite code block Zola annotations - rust_block[0] = definition.into_string(); + // Rewrite code block Zola annotations + rust_block[0] = definition.into_string(); - // Write code block - writeln!(&mut contents, "{}", &rust_block.join("\n"))?; + // Write code block + writeln!(&mut contents, "{}", &rust_block.join("\n"))?; - // Reset state - is_inside_rust_code_block = false; - rust_block = vec![]; - } + // Reset state + inside_code_block = false; + rust_block = vec![]; + is_rust = false; } Ok(contents) @@ -111,11 +126,11 @@ fn format_file(reader: impl Iterator>, file_size: usize) - #[cfg(test)] mod tests { - use indoc::indoc; use super::*; + use indoc::indoc; fn lines_iter(code: &str) -> impl Iterator> + '_ { - code.split("\n").map(|line| Ok(String::from(line))) + code.split('\n').map(|line| Ok(String::from(line))) } #[test] @@ -128,6 +143,10 @@ mod tests { } # test 3 + #[derive(Component)] + struct A; + # #[derive(Component)] + struct B; ``` "#}; @@ -136,13 +155,17 @@ mod tests { assert_eq!( contents.unwrap(), indoc! {r#" - ```rust,hide_lines=1-2 6 + ```rust,hide_lines=1-2 6 9 # test # test 2 fn not_hidden() { } # test 3 + #[derive(Component)] + struct A; + # #[derive(Component)] + struct B; ``` "#} @@ -204,4 +227,43 @@ mod tests { "#} ); } + + #[test] + fn indented() { + let markdown = r#" + ```rust + # test + # test 2 + fn not_hidden() { + + } + # test 3 + #[derive(Component)] + struct A; + # #[derive(Component)] + struct B; + ``` +"#; + + let contents = format_file(lines_iter(markdown), markdown.len()); + + assert_eq!( + contents.unwrap(), + r#" + ```rust,hide_lines=1-2 6 9 + # test + # test 2 + fn not_hidden() { + + } + # test 3 + #[derive(Component)] + struct A; + # #[derive(Component)] + struct B; + ``` + +"# + ); + } } diff --git a/write-rustdoc-hide-lines/src/hidden_ranges.rs b/write-rustdoc-hide-lines/src/hidden_ranges.rs index f222c1fc0b..758cf913d6 100644 --- a/write-rustdoc-hide-lines/src/hidden_ranges.rs +++ b/write-rustdoc-hide-lines/src/hidden_ranges.rs @@ -1,5 +1,7 @@ use std::ops::Range; +use regex::Regex; + pub type HiddenRanges = Vec>; // The generic is to allow both `&[String]` (slice of `Vec`) and `&[&str]` (slice of `Vec<&str>`) @@ -8,10 +10,15 @@ pub fn get_hidden_ranges>(code: &[T]) -> HiddenRanges { let mut ranges = vec![]; let mut curr_range: Option> = None; + // Match lines starting with a potentially indented `#` followed by a space or EOL. + let Ok(is_hidden_re) = Regex::new(r"^\s*#(?: |$)") else { + return ranges; + }; + for (idx, line) in code.iter().enumerate() { let n = idx + 1; let line = line.as_ref(); - let is_hidden = line.starts_with("# ") || line == "#"; + let is_hidden = is_hidden_re.is_match(line); if is_hidden { if let Some(range) = curr_range.as_mut() { @@ -41,14 +48,14 @@ mod tests { use indoc::indoc; fn split_lines(code: &str) -> Vec<&str> { - code.split("\n").collect::>() + code.split('\n').collect::>() } #[test] fn empty_block() { let code = split_lines(indoc! {r#""#}); - assert_eq!(get_hidden_ranges(&code), vec![]); + assert!(get_hidden_ranges(&code).is_empty()); } #[test] @@ -61,7 +68,7 @@ mod tests { 5 "#}); - assert_eq!(get_hidden_ranges(&code), vec![]); + assert!(get_hidden_ranges(&code).is_empty()); } #[test]