Skip to content

Commit

Permalink
Merge pull request #45 from cdesaintguilhem/pr-handle-sections
Browse files Browse the repository at this point in the history
Split sectioning commands onto new lines
  • Loading branch information
WGUNDERWOOD authored Oct 31, 2024
2 parents 2a7b1d4 + 8453f71 commit ffa7e38
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 18 deletions.
4 changes: 4 additions & 0 deletions src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use crate::cli::*;
use crate::ignore::*;
use crate::indent::*;
use crate::logging::*;
use crate::regexes::RE_SECTIONING;
use crate::regexes::{ENV_BEGIN, ENV_END, ITEM};
use crate::subs::*;
use crate::verbatim::*;
Expand Down Expand Up @@ -193,6 +194,8 @@ pub struct Pattern {
pub contains_env_end: bool,
/// Whether an item pattern is present
pub contains_item: bool,
/// Whether a sectioning pattern is present
pub contains_sectioning: bool,
}

impl Pattern {
Expand All @@ -202,6 +205,7 @@ impl Pattern {
contains_env_begin: s.contains(ENV_BEGIN),
contains_env_end: s.contains(ENV_END),
contains_item: s.contains(ITEM),
contains_sectioning: RE_SECTIONING.is_match(s),
}
}
}
Expand Down
78 changes: 65 additions & 13 deletions src/regexes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,57 @@ const LISTS: [&str; 5] = [
const VERBATIMS: [&str; 5] =
["verbatim", "Verbatim", "lstlisting", "minted", "comment"];

/// Regex matches for non-sectioning commands that should be on a new line.
const REQUIRE_NEW_LINE: [&str; 3] = [
r"\\begin\{",
r"\\end\{",
r"\\item ", // The trailing space should remain here.
];

/// Regex matches for sectioning commands
const SECTIONING_COMMANDS: [&str; 10] = [
r"\\part\{",
r"\\part\*\{",
r"\\chapter\{",
r"\\chapter\*\{",
r"\\section\{",
r"\\section\*\{",
r"\\subsection\{",
r"\\subsection\*\{",
r"\\subsubsection\{",
r"\\subsubsection\*\{",
];

// Regexes
lazy_static! {
// A static `String` which is a valid regex to match any one of the
// [`SECTIONING_COMMANDS`].
pub static ref SECTIONING_OR_GROUP: String = [
"(",
SECTIONING_COMMANDS.join("|").as_str(),
")"
].concat();
// A Vec of string slices that combines sectioning commands with other
// commands that need a new line.
pub static ref SPLITTING_COMMANDS: Vec<&'static str> = {
let mut v = Vec::with_capacity(
REQUIRE_NEW_LINE.len() + SECTIONING_COMMANDS.len(),
);
for str in REQUIRE_NEW_LINE {
v.push(str);
}
for str in SECTIONING_COMMANDS {
v.push(str);
}
v
};
// A static `String` which is a valid regex to match any one of the
// [`SPLITTING_COMMANDS`].
pub static ref SPLITTING_OR_GROUP: String = [
"(",
SPLITTING_COMMANDS.join("|").as_str(),
")"
].concat();
pub static ref RE_NEWLINES: Regex =
Regex::new(&format!(r"{LINE_END}{LINE_END}({LINE_END})+")).unwrap();
pub static ref RE_TRAIL: Regex =
Expand All @@ -52,22 +101,25 @@ lazy_static! {
Regex::new(r"(?P<prev>\S.*?)(?P<env>\\end\{)").unwrap();
pub static ref RE_ITEM_SHARED_LINE: Regex =
Regex::new(r"(?P<prev>\S.*?)(?P<env>\\item)").unwrap();
// Regex that matches sectioning commands
pub static ref RE_SECTIONING: Regex = Regex::new(
SECTIONING_OR_GROUP.as_str()
)
.unwrap();
// Regex that matches sectioning commands with non-whitespace characters
// before it.
pub static ref RE_SECTION_SHARED_LINE: Regex = Regex::new(
[r"(\S.*?)", "(", SECTIONING_OR_GROUP.as_str(), ".*)"]
.concat().as_str()
)
.unwrap();
// Regex that matches any splitting command with non-whitespace
// characters before it and catches the previous text in a group called
// characters before it, catches the previous text in a group called
// "prev" and captures the command itself and the remaining text
// in a group called "env".
pub static ref RE_ENV_ITEM_SHARED_LINE: Regex = Regex::new(
r"(?x) # Enable extended mode
(?P<prev>\S.*?) # <prev>: captures any number of characters starting
# with a non-whitespace character until the start
# of the next group;
(?P<env>( # <env>: captures any LaTeX command before which the
# line should be split
\\begin\{ # start of environments
|\\end\{ # end of environments
|\\item ) # list items (note the space before the closing bracket)
.*) # and any characters that follow the command
"
pub static ref RE_ENV_ITEM_SEC_SHARED_LINE: Regex = Regex::new(
[r"(?P<prev>\S.*?)", "(?P<env>", SPLITTING_OR_GROUP.as_str(), ".*)"]
.concat().as_str()
)
.unwrap();
}
10 changes: 6 additions & 4 deletions src/subs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,20 @@ pub fn needs_env_new_line(line: &str, pattern: &Pattern) -> bool {
// Check if we should format this line and if we've matched an environment.
let contains_splittable_env = (pattern.contains_env_begin
|| pattern.contains_env_end
|| pattern.contains_item)
|| pattern.contains_item
|| pattern.contains_sectioning)
&& (RE_ENV_BEGIN_SHARED_LINE.is_match(line)
|| RE_ENV_END_SHARED_LINE.is_match(line)
|| RE_ITEM_SHARED_LINE.is_match(line));
|| RE_ITEM_SHARED_LINE.is_match(line)
|| RE_SECTION_SHARED_LINE.is_match(line));

// If we're not ignoring and we've matched an environment ...
if contains_splittable_env {
// ... return `true` if the comment index is `None`
// (which implies the split point must be in text), otherwise
// compare the index of the comment with the split point.
find_comment_index(line).map_or(true, |comment_index| {
if RE_ENV_ITEM_SHARED_LINE
if RE_ENV_ITEM_SEC_SHARED_LINE
.captures(line)
.unwrap() // Matched split point so no panic.
.get(2)
Expand Down Expand Up @@ -74,7 +76,7 @@ pub fn put_env_new_line<'a>(
args: &Cli,
logs: &mut Vec<Log>,
) -> (&'a str, &'a str) {
let captures = RE_ENV_ITEM_SHARED_LINE.captures(line).unwrap();
let captures = RE_ENV_ITEM_SEC_SHARED_LINE.captures(line).unwrap();

let (line, [prev, rest, _]) = captures.extract();

Expand Down
3 changes: 2 additions & 1 deletion src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ fn test_short() {
//"comments.tex",
//"cv.tex",
//"document.tex",
"environment_lines.tex",
// "environment_lines.tex",
//"heavy_wrap.tex",
//"higher_categories_thesis.bib",
//"higher_categories_thesis.tex",
Expand All @@ -102,6 +102,7 @@ fn test_short() {
//"puthesis.cls",
//"quiver.sty",
//"readme.tex",
"sections.tex",
//"short_document.tex",
//"tikz_network.sty",
//"unicode.tex",
Expand Down
7 changes: 7 additions & 0 deletions tests/source/sections.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
\section{Section test}

Sectioning commands should be moved to their own line.\subsection{Result} Even if there are more than one.\subsection{Result 2}

Also \section*{A} unnumbered sectioning commands \subsection*{B} should be split onto their own lines, even if there \subsubsection*{C} are more than one.

All of this \part{D} should also hold \part*{E} for parts \chapter{F} and chapters \chapter*{G}.
16 changes: 16 additions & 0 deletions tests/target/sections.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
\section{Section test}

Sectioning commands should be moved to their own line.
\subsection{Result} Even if there are more than one.
\subsection{Result 2}

Also
\section*{A} unnumbered sectioning commands
\subsection*{B} should be split onto their own lines, even if there
\subsubsection*{C} are more than one.

All of this
\part{D} should also hold
\part*{E} for parts
\chapter{F} and chapters
\chapter*{G}.

0 comments on commit ffa7e38

Please sign in to comment.