Skip to content

Commit

Permalink
Merge pull request #1 from mgdm/ignore-whitespace
Browse files Browse the repository at this point in the history
Allow ignoring whitespace-only text nodes
  • Loading branch information
mgdm authored May 10, 2019
2 parents a8ebc77 + 83467eb commit 7c3ba03
Showing 1 changed file with 35 additions and 6 deletions.
41 changes: 35 additions & 6 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,19 @@ extern crate html5ever;
extern crate kuchiki;

use clap::{App, Arg, ArgMatches};
use kuchiki::NodeRef;
use kuchiki::traits::*;
use kuchiki::NodeRef;
use std::fs::File;
use std::io;
use std::str;

#[derive(Debug, Clone)]
struct Config {
input_path: String,
output_path: String,
selector: String,
text_only: bool,
ignore_whitespace: bool,
attributes: Option<Vec<String>>,
}

Expand All @@ -32,8 +34,9 @@ impl Config {
input_path: String::from(matches.value_of("filename").unwrap_or("-")),
output_path: String::from(matches.value_of("output").unwrap_or("-")),
text_only: matches.is_present("text_only"),
attributes: attributes,
selector: selector
ignore_whitespace: matches.is_present("ignore_whitespace"),
attributes,
selector,
})
}
}
Expand All @@ -50,6 +53,23 @@ fn select_attributes(node: &NodeRef, attributes: &Vec<String>, output: &mut io::
}
}

fn serialize_text(node: &NodeRef, ignore_whitespace: bool) -> String {
let mut result = String::new();
for text_node in node.inclusive_descendants().text_nodes() {
if ignore_whitespace && text_node.borrow().trim().len() == 0 {
continue;
}

result.push_str(&text_node.borrow());

if ignore_whitespace {
result.push_str("\n");
}
}

result
}

fn main() {
let matches = App::new("htmlq")
.version("0.0.1")
Expand Down Expand Up @@ -77,6 +97,12 @@ fn main() {
.long("text")
.help("Output only the contents of text nodes inside selected elements"),
)
.arg(
Arg::with_name("ignore_whitespace")
.short("w")
.long("ignore-whitespace")
.help("When printing text nodes, ignore those that consist entirely of whitespace"),
)
.arg(
Arg::with_name("attribute")
.short("a")
Expand Down Expand Up @@ -117,11 +143,14 @@ fn main() {
select_attributes(node, attributes, &mut output);
} else {
if config.text_only {
let content = serialize_text(node, config.ignore_whitespace);
output.write_all(format!("{}\n", content).as_ref()).unwrap();
} else {
let mut content: Vec<u8> = Vec::new();
node.serialize(&mut content).unwrap();
output
.write_all(format!("{}\n", node.text_contents()).as_ref())
.write_all(format!("{}\n", str::from_utf8(&content).unwrap()).as_ref())
.unwrap();
} else {
node.serialize(&mut output).unwrap();
}
}
}
Expand Down

0 comments on commit 7c3ba03

Please sign in to comment.