Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add --show-dom and small CSS fixes #179

Merged
merged 5 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion examples/html2text.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,14 @@ where
.unwrap();
}
}
if literal {
if flags.show_dom {
let conf = config::plain();
let conf = update_config(conf, &flags);
let dom = conf.parse_html(input).unwrap();
dom.as_dom_string()
} else if flags.show_render {
todo!()
} else if literal {
let conf = config::with_decorator(TrivialDecorator::new());
let conf = update_config(conf, &flags);
conf.string_from_read(input, flags.width).unwrap()
Expand All @@ -148,6 +155,8 @@ struct Flags {
ignore_css_colours: bool,
#[cfg(feature = "css")]
use_only_css: bool,
show_dom: bool,
show_render: bool,
}

fn main() {
Expand All @@ -166,6 +175,8 @@ fn main() {
ignore_css_colours: false,
#[cfg(feature = "css")]
use_only_css: false,
show_dom: false,
show_render: false,
};
let mut literal: bool = false;

Expand Down Expand Up @@ -214,6 +225,16 @@ fn main() {
StoreTrue,
"Don't use default non-CSS colours",
);
ap.refer(&mut flags.show_dom).add_option(
&["--show-dom"],
StoreTrue,
"Show the parsed HTML DOM instead of rendered output",
);
ap.refer(&mut flags.show_render).add_option(
&["--show-render"],
StoreTrue,
"Show the computed render tree instead of the rendered output",
);
ap.parse_args_or_exit();
}

Expand Down
74 changes: 67 additions & 7 deletions src/css/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ pub(crate) fn parse_color_attribute(
text: &str,
) -> Result<Colour, nom::Err<nom::error::Error<&'static str>>> {
let (_rest, value) = parse_value(text).map_err(|_| empty_fail())?;
parse_color(&value)
parse_color(&value.tokens)
}

#[derive(Copy, Clone, PartialEq, Eq, Debug)]
Expand All @@ -404,11 +404,15 @@ pub fn parse_declaration(text: &str) -> IResult<&str, Option<Declaration>> {
))(text)?;
let decl = match prop.0.as_str() {
"background-color" => {
let value = parse_color(&value)?;
let value = parse_color(&value.tokens)?;
Decl::BackgroundColor { value }
}
"background" => match parse_background_color(&value)? {
Some(value) => Decl::BackgroundColor { value },
_ => Decl::Unknown { name: prop },
},
"color" => {
let value = parse_color(&value)?;
let value = parse_color(&value.tokens)?;
Decl::Color { value }
}
"height" => {
Expand Down Expand Up @@ -457,12 +461,12 @@ fn empty_fail() -> nom::Err<nom::error::Error<&'static str>> {
nom::Err::Error(nom::error::Error::new("", ErrorKind::Fail))
}

fn parse_color(value: &RawValue) -> Result<Colour, nom::Err<nom::error::Error<&'static str>>> {
fn parse_color(tokens: &[Token]) -> Result<Colour, nom::Err<nom::error::Error<&'static str>>> {
let fail_error = empty_fail();
if value.tokens.is_empty() {
if tokens.is_empty() {
return Err(fail_error);
}
match &value.tokens[..] {
match tokens {
[Token::Ident(c)] => {
let colour = match c.deref() {
"aqua" => Colour::Rgb(0, 0xff, 0xff),
Expand Down Expand Up @@ -492,7 +496,7 @@ fn parse_color(value: &RawValue) -> Result<Colour, nom::Err<nom::error::Error<&'
use Token::*;
match name.deref() {
"rgb" => {
let rgb_args = &value.tokens[1..value.tokens.len() - 1];
let rgb_args = &tokens[1..tokens.len() - 1];
match rgb_args {
[Number(r), Comma, Number(g), Comma, Number(b)] => {
let r = r.parse().map_err(|_e| empty_fail())?;
Expand Down Expand Up @@ -527,6 +531,22 @@ fn parse_color(value: &RawValue) -> Result<Colour, nom::Err<nom::error::Error<&'
}
}

// Parse background: value, extracting only the colour (if present).
fn parse_background_color(
value: &RawValue,
) -> Result<Option<Colour>, nom::Err<nom::error::Error<&'static str>>> {
let tokens = if let Some(last) = value.tokens.rsplit(|tok| *tok == Token::Comma).next() {
last
} else {
return Err(empty_fail());
};

match parse_color(tokens) {
Ok(col) => Ok(Some(col)),
Err(_) => Ok(None),
}
}

fn parse_integer(text: &str) -> IResult<&str, f32> {
let (rest, digits) = digit1(text)?;
Ok((rest, <f32 as FromStr>::from_str(digits).unwrap()))
Expand Down Expand Up @@ -1000,4 +1020,44 @@ mod test {
))
);
}

#[test]
fn test_background() {
assert_eq!(
super::parse_declaration("background: white"),
Ok((
"",
Some(Declaration {
data: Decl::BackgroundColor {
value: Colour::Rgb(0xff, 0xff, 0xff)
},
important: Importance::Default,
})
))
);
assert_eq!(
super::parse_declaration("background: url('blah'), white"),
Ok((
"",
Some(Declaration {
data: Decl::BackgroundColor {
value: Colour::Rgb(0xff, 0xff, 0xff)
},
important: Importance::Default,
})
))
);
assert_eq!(
super::parse_declaration("background: url('blah'), foo"),
Ok((
"",
Some(Declaration {
data: Decl::Unknown {
name: PropertyName("background".into()),
},
important: Importance::Default,
})
))
);
}
}
39 changes: 39 additions & 0 deletions src/markup5ever_rcdom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,45 @@ pub struct RcDom {
pub quirks_mode: Cell<QuirksMode>,
}

impl RcDom {
fn add_node_to_string(s: &mut String, node: &Handle, indent: usize) {
use std::fmt::Write as _;
match &node.data {
NodeData::Document => {
for child in &*node.children.borrow() {
Self::add_node_to_string(s, child, indent);
}
}
NodeData::Doctype { .. } => {
writeln!(s, "{0:indent$}<doctype>", "", indent = indent).unwrap();
}
NodeData::Text { contents } => {
let borrowed = contents.borrow();
let text = borrowed.to_string();
if !text.trim().is_empty() {
writeln!(s, "{0:indent$}Text:{1}", "", text, indent = indent).unwrap();
}
}
NodeData::Comment { .. } => (),
NodeData::Element { name, .. } => {
writeln!(s, "{0:indent$}<{1}>", "", name.local, indent = indent).unwrap();
for child in &*node.children.borrow() {
Self::add_node_to_string(s, child, indent + 1);
}
writeln!(s, "{0:indent$}</{1}>", "", name.local, indent = indent).unwrap();
}
NodeData::ProcessingInstruction { .. } => {}
}
}

/// A low-quality debug DOM rendering.
pub fn as_dom_string(&self) -> String {
let mut s = String::new();
Self::add_node_to_string(&mut s, &self.document, 0);
s
}
}

impl TreeSink for RcDom {
type Output = Self;

Expand Down
36 changes: 20 additions & 16 deletions src/render/text_renderer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ impl<T: Clone + Eq + Debug + Default> WrappedBlock<T> {
}
}

fn flush_word(&mut self, ws_mode: WhiteSpace, main_tag: &T, wrap_tag: &T) -> Result<(), Error> {
fn flush_word(&mut self, ws_mode: WhiteSpace) -> Result<(), Error> {
use self::TaggedLineElement::Str;

/* Finish the word. */
Expand All @@ -362,18 +362,19 @@ impl<T: Clone + Eq + Debug + Default> WrappedBlock<T> {
self.line.len
);

let mut tag = if self.pre_wrapped { wrap_tag } else { main_tag };
if !self.word.is_empty() {
self.pre_wrapped = false;
let space_in_line = self.width - self.line.len;
let space_needed = self.wslen + self.wordlen;
if space_needed <= space_in_line {
html_trace!("Got enough space");
self.line.push(Str(TaggedString {
s: " ".repeat(self.wslen),
tag: tag.clone(),
}));
self.wslen = 0;
if self.wslen > 0 {
self.line.push(Str(TaggedString {
s: " ".repeat(self.wslen),
tag: self.spacetag.take().unwrap(),
}));
self.wslen = 0;
}

self.line.consume(&mut self.word);
html_trace!("linelen increased by wordlen to {}", self.line.len);
Expand All @@ -386,31 +387,33 @@ impl<T: Clone + Eq + Debug + Default> WrappedBlock<T> {
if self.wslen >= space_in_line {
// Skip the whitespace
self.wslen -= space_in_line;
} else {
self.line.push_ws(self.wslen, tag);
} else if self.wslen > 0 {
self.line
.push_ws(self.wslen, &self.spacetag.take().unwrap());
self.wslen = 0;
}
} else {
// We're word-wrapping, so discard any whitespace.
self.spacetag = None;
self.wslen = 0;
}
/* Start a new line */
self.flush_line();

if ws_mode == WhiteSpace::Pre {
self.pre_wrapped = true;
tag = wrap_tag;
}

// Write any remaining whitespace
while self.wslen > 0 {
let to_copy = self.wslen.min(self.width);
self.line.push_ws(to_copy, tag);
self.line.push_ws(to_copy, self.spacetag.as_ref().unwrap());
if to_copy == self.width {
self.flush_line();
}
self.wslen -= to_copy;
}
self.spacetag = None;

// At this point, either:
// We're word-wrapping, and at the start of the line or
Expand Down Expand Up @@ -506,9 +509,7 @@ impl<T: Clone + Eq + Debug + Default> WrappedBlock<T> {
}

fn flush(&mut self) -> Result<(), Error> {
let tag = self.spacetag.clone().unwrap_or_default();

self.flush_word(WhiteSpace::Normal, &tag, &tag)?;
self.flush_word(WhiteSpace::Normal)?;
self.flush_line();
Ok(())
}
Expand Down Expand Up @@ -548,6 +549,7 @@ impl<T: Clone + Eq + Debug + Default> WrappedBlock<T> {
// We walk character by character.
// 1. First, build up whitespace columns in self.wslen
// - In normal mode self.wslen will always be 0 or 1
// - If wslen > 0, then self.spacetag will always be set.
// 2. Next build up a word (non-whitespace).
// 2a. If the word gets too long for the line
// 2b. If we get to more whitespace, output the first whitespace and the word
Expand All @@ -563,7 +565,7 @@ impl<T: Clone + Eq + Debug + Default> WrappedBlock<T> {
self.line
);
if c.is_whitespace() && self.wordlen > 0 {
self.flush_word(ws_mode, main_tag, wrap_tag)?;
self.flush_word(ws_mode)?;
}

if c.is_whitespace() {
Expand All @@ -575,6 +577,7 @@ impl<T: Clone + Eq + Debug + Default> WrappedBlock<T> {
// the line.
self.force_flush_line();
self.wslen = 0;
self.spacetag = None;
self.pre_wrapped = false;
// Hard new line, so back to main tag.
tag = main_tag;
Expand Down Expand Up @@ -607,6 +610,7 @@ impl<T: Clone + Eq + Debug + Default> WrappedBlock<T> {
} else {
// Manual wrapping, keep the space.
self.wslen += cwidth;
self.spacetag = Some(tag.clone());
self.pre_wrapped = true;
}
} else {
Expand All @@ -619,7 +623,7 @@ impl<T: Clone + Eq + Debug + Default> WrappedBlock<T> {
} else {
// If not preserving whitespace, everything is collapsed,
// and the line won't start with whitespace.
if self.line.len > 0 {
if self.line.len > 0 && self.wslen == 0 {
self.spacetag = Some(tag.clone());
self.wslen = 1;
}
Expand Down
17 changes: 17 additions & 0 deletions src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2229,6 +2229,23 @@ There
);
}

#[test]
fn test_bgcoloured3() {
test_html_coloured(
br##"
<style>
.but {
background-color:#00FF00;
}
</style>
<p>Test <span class="but">Two words</span> bg</p>
"##,
r#"Test <g>Two words</g> bg
"#,
20,
);
}

#[test]
fn test_coloured_element() {
test_html_coloured(
Expand Down
Loading