From d098f3f435d11c511c6b8e6fecc867eadec75d9a Mon Sep 17 00:00:00 2001 From: Chris Emerson Date: Sat, 30 Nov 2024 14:59:15 +0000 Subject: [PATCH 01/10] Add support for CSS :nth-child() This doesn't include "of ", just the "An+B". --- src/css.rs | 74 ++++++++++++++------ src/css/parser.rs | 141 +++++++++++++++++++++++++++++++++++++++ src/lib.rs | 22 ++++++ src/markup5ever_rcdom.rs | 34 ++++++++++ src/tests.rs | 124 ++++++++++++++++++++++++++++++++++ 5 files changed, 376 insertions(+), 19 deletions(-) diff --git a/src/css.rs b/src/css.rs index c853b9a..b5685eb 100644 --- a/src/css.rs +++ b/src/css.rs @@ -1,5 +1,5 @@ //! Some basic CSS support. -use std::io::Write; +use std::{io::Write, rc::Rc}; use std::ops::Deref; mod parser; @@ -7,8 +7,7 @@ mod parser; use crate::{ css::parser::parse_rules, markup5ever_rcdom::{ - Handle, - NodeData::{self, Comment, Document, Element}, + Handle, NodeData::{self, Comment, Document, Element} }, tree_map_reduce, Colour, ComputedStyle, Result, Specificity, StyleOrigin, TreeMapResult, WhiteSpace, @@ -24,6 +23,12 @@ pub(crate) enum SelectorComponent { Star, CombChild, CombDescendant, + NthChild { + /* An + B [of sel] */ + a: i32, + b: i32, + sel: Selector, + }, } #[derive(Debug, Clone, PartialEq)] @@ -76,31 +81,58 @@ impl Selector { }, SelectorComponent::Star => Self::do_matches(&comps[1..], node), SelectorComponent::CombChild => { - if let Some(parent) = node.parent.take() { - let parent_handle = parent.upgrade(); - node.parent.set(Some(parent)); - if let Some(ph) = parent_handle { - Self::do_matches(&comps[1..], &ph) - } else { - false - } + if let Some(parent) = node.get_parent() { + Self::do_matches(&comps[1..], &parent) } else { false } } SelectorComponent::CombDescendant => { - if let Some(parent) = node.parent.take() { - let parent_handle = parent.upgrade(); - node.parent.set(Some(parent)); - if let Some(ph) = parent_handle { - Self::do_matches(&comps[1..], &ph) || Self::do_matches(comps, &ph) - } else { - false - } + if let Some(parent) = node.get_parent() { + Self::do_matches(&comps[1..], &parent) || Self::do_matches(comps, &parent) } else { false } } + SelectorComponent::NthChild { a, b, sel } => { + let parent = if let Some(parent) = node.get_parent() { + parent + } else { + return false; + }; + let mut idx = 0i32; + for child in parent.children.borrow().iter() { + if let Element { .. } = child.data { + if sel.matches(child) { + idx += 1; + if Rc::ptr_eq(child, node) { + break; + } + } else { + if Rc::ptr_eq(child, node) { + return false; + } + } + } + } + if idx == 0 { + // The child wasn't found(?) + return false; + } + /* The selector matches if idx == a*n + b, where + * n >= 0 + */ + let idx_offset = idx - b; + if *a == 0 { + return idx_offset == 0 && Self::do_matches(&comps[1..], &node); + } + if (idx_offset % a) != 0 { + // Not a multiple + return false; + } + let n = idx_offset / a; + n >= 0 && Self::do_matches(&comps[1..], &node) + } }, } } @@ -124,6 +156,10 @@ impl Selector { SelectorComponent::Star => {} SelectorComponent::CombChild => {} SelectorComponent::CombDescendant => {} + SelectorComponent::NthChild { sel, .. } => { + result.class += 1; + result += &sel.specificity(); + } } } diff --git a/src/css/parser.rs b/src/css/parser.rs index d0040a3..43c69e9 100644 --- a/src/css/parser.rs +++ b/src/css/parser.rs @@ -752,6 +752,100 @@ fn parse_class(text: &str) -> IResult<&str, SelectorComponent> { Ok((rest, SelectorComponent::Class(classname))) } +#[derive(Eq, PartialEq, Copy, Clone)] +enum Sign { + Plus, + Neg +} + +impl Sign { + fn val(&self) -> i32 { + match self { + Sign::Plus => 1, + Sign::Neg => -1, + } + } +} + +fn opt_sign(text: &str) -> IResult<&str, Sign> { + match text.chars().next() { + Some('-') => Ok((&text[1..], Sign::Neg)), + Some('+') => Ok((&text[1..], Sign::Plus)), + _ => Ok((text, Sign::Plus)), + } +} +fn sign(text: &str) -> IResult<&str, Sign> { + match text.chars().next() { + Some('-') => Ok((&text[1..], Sign::Neg)), + Some('+') => Ok((&text[1..], Sign::Plus)), + _ => fail(text), + } +} + +fn parse_nth_child_args(text: &str) -> IResult<&str, SelectorComponent> { + let (rest, _) = tag("(")(text)?; + let (rest, _) = skip_optional_whitespace(rest)?; + + let (rest, (a, b)) = + alt(( + map( + tag("even"), + |_| (2, 0), + ), + map( + tag("odd"), + |_| (2, 1), + ), + // The case where both a and b are specified + map( + tuple(( + opt_sign, opt(digit1), tag("n"), + skip_optional_whitespace, + sign, digit1)), + |(a_sign, a_opt_val, _, + _, + b_sign, b_val)| { + let a = ::from_str(a_opt_val.unwrap_or("1")).unwrap() * a_sign.val(); + let b = ::from_str(b_val).unwrap() * b_sign.val(); + (a, b) + }), + // Just a + map( + tuple((opt_sign, opt(digit1), tag("n"))), + |(a_sign, a_opt_val, _)| { + let a = ::from_str(a_opt_val.unwrap_or("1")).unwrap() * a_sign.val(); + (a, 0) + }), + // Just b + map( + tuple(( + opt_sign, digit1)), + |(b_sign, b_val)| { + let b = ::from_str(b_val).unwrap() * b_sign.val(); + (0, b) + }), + ))(rest)?; + + let (rest, _) = tuple((skip_optional_whitespace, tag(")")))(rest)?; + + let sel = Selector { + components: vec![SelectorComponent::Star] + }; + Ok((rest, SelectorComponent::NthChild { a, b, sel })) +} + +fn parse_pseudo_class(text: &str) -> IResult<&str, SelectorComponent> { + let (rest, _) = tag(":")(text)?; + let (rest, pseudo_classname) = parse_ident(rest)?; + match pseudo_classname.as_str() { + "nth-child" => { + let (rest, component) = parse_nth_child_args(rest)?; + Ok((rest, component)) + } + _ => fail(text), + } +} + fn parse_hash(text: &str) -> IResult<&str, SelectorComponent> { let (rest, _) = tag("#")(text)?; let (rest, word) = parse_identstring(rest)?; @@ -777,6 +871,7 @@ fn parse_simple_selector_component(text: &str) -> IResult<&str, SelectorComponen parse_class, parse_hash, map(parse_ident, SelectorComponent::Element), + parse_pseudo_class, ))(text) } @@ -1075,4 +1170,50 @@ mod test { )) ); } + + #[test] + fn test_nth_child() { + use SelectorComponent::NthChild; + let (_, sel_all) = super::parse_selector("*").unwrap(); + assert_eq!(super::parse_selector(":nth-child(even)").unwrap(), + ("", Selector { + components: vec![NthChild { a: 2, b: 0, sel: sel_all.clone() }] + })); + assert_eq!(super::parse_selector(":nth-child(odd)").unwrap(), + ("", Selector { + components: vec![NthChild { a: 2, b: 1, sel: sel_all.clone() }] + })); + assert_eq!(super::parse_selector(":nth-child(17)").unwrap(), + ("", Selector { + components: vec![NthChild { a: 0, b: 17, sel: sel_all.clone() }] + })); + assert_eq!(super::parse_selector(":nth-child(17n)").unwrap(), + ("", Selector { + components: vec![NthChild { a: 17, b: 0, sel: sel_all.clone() }] + })); + assert_eq!(super::parse_selector(":nth-child(10n-1)").unwrap(), + ("", Selector { + components: vec![NthChild { a: 10, b: -1, sel: sel_all.clone() }] + })); + assert_eq!(super::parse_selector(":nth-child(10n+9)").unwrap(), + ("", Selector { + components: vec![NthChild { a: 10, b: 9, sel: sel_all.clone() }] + })); + assert_eq!(super::parse_selector(":nth-child(-n+3)").unwrap(), + ("", Selector { + components: vec![NthChild { a: -1, b: 3, sel: sel_all.clone() }] + })); + assert_eq!(super::parse_selector(":nth-child(n)").unwrap(), + ("", Selector { + components: vec![NthChild { a: 1, b: 0, sel: sel_all.clone() }] + })); + assert_eq!(super::parse_selector(":nth-child(+n)").unwrap(), + ("", Selector { + components: vec![NthChild { a: 1, b: 0, sel: sel_all.clone() }] + })); + assert_eq!(super::parse_selector(":nth-child(-n)").unwrap(), + ("", Selector { + components: vec![NthChild { a: -1, b: 0, sel: sel_all.clone() }] + })); + } } diff --git a/src/lib.rs b/src/lib.rs index e6f2c7f..c872400 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -158,6 +158,28 @@ impl Specificity { } } +impl std::ops::Add<&Specificity> for &Specificity { + type Output = Specificity; + + fn add(self, rhs: &Specificity) -> Self::Output { + Specificity { + inline: self.inline || rhs.inline, + id: self.id + rhs.id, + class: self.class + rhs.class, + typ: self.typ + rhs.typ, + } + } +} + +impl std::ops::AddAssign<&Specificity> for Specificity { + fn add_assign(&mut self, rhs: &Specificity) { + self.inline = self.inline || rhs.inline; + self.id += rhs.id; + self.class += rhs.class; + self.typ += rhs.typ; + } +} + impl PartialOrd for Specificity { fn partial_cmp(&self, other: &Self) -> Option { match self.inline.partial_cmp(&other.inline) { diff --git a/src/markup5ever_rcdom.rs b/src/markup5ever_rcdom.rs index 92b53dd..75e4514 100644 --- a/src/markup5ever_rcdom.rs +++ b/src/markup5ever_rcdom.rs @@ -47,6 +47,7 @@ use std::io; use std::mem; use std::rc::{Rc, Weak}; +use html5ever::interface::ElemName; use tendril::StrTendril; use markup5ever::interface::tree_builder; @@ -122,6 +123,39 @@ impl Node { children: RefCell::new(Vec::new()), }) } + + pub fn get_parent(&self) -> Option> { + if let Some(parent) = self.parent.take() { + let parent_handle = parent.upgrade(); + self.parent.set(Some(parent)); + parent_handle + } else { + None + } + } + + /// Return the nth child element of this node, or None. + pub fn nth_child(&self, idx: usize) -> Option> { + let mut element_idx = 0; + for child in self.children.borrow().iter() { + if let NodeData::Element { .. } = child.data { + element_idx += 1; + if element_idx == idx { + return Some(child.clone()); + } + } + } + None + } + + /// Return the element type (if an element) + pub fn element_name(&self) -> Option { + if let NodeData::Element { ref name, .. } = self.data { + Some(format!("{}", &*name.local_name())) + } else { + None + } + } } impl Drop for Node { diff --git a/src/tests.rs b/src/tests.rs index d78298f..c06c12c 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -2643,4 +2643,128 @@ at line breaks }, ); } + + #[test] + fn test_nth_child() { + test_html_coloured( + br#" + +
    +
  • One
  • +
  • Two
  • +
  • Three
  • +
  • Four
  • +
  • Five
  • +
"#, + r#"* One +* Two +* Three +* Four +* Five +"#, + 20, + ); + test_html_coloured( + br#" + +
    +
  • One
  • +
  • Two
  • +
  • Three
  • +
  • Four
  • +
  • Five
  • +
"#, + r#"* One +* Two +* Three +* Four +* Five +"#, + 20, + ); + test_html_coloured( + br#" + +
    +
  • One
  • +
  • Two
  • +
  • Three
  • +
  • Four
  • +
  • Five
  • +
"#, + r#"* One +* Two +* Three +* Four +* Five +"#, + 20, + ); + test_html_coloured( + br#" + +
    +
  • One
  • +
  • Two
  • +
  • Three
  • +
  • Four
  • +
  • Five
  • +
"#, + r#"* One +* Two +* Three +* Four +* Five +"#, + 20, + ); + test_html_coloured( + br#" + +
    +
  • One
  • +
  • Two
  • +
  • Three
  • +
  • Four
  • +
  • Five
  • +
  • Six
  • +
  • Seven
  • +
  • Eight
  • +
  • Nine
  • +
  • Ten
  • +
"#, + r#"* One +* Two +* Three +* Four +* Five +* Six +* Seven +* Eight +* Nine +* Ten +"#, + 20, + ); + } } From c098d9766cabd8194a44aa459ce3ed0704f0cf52 Mon Sep 17 00:00:00 2001 From: Chris Emerson Date: Sat, 30 Nov 2024 15:01:13 +0000 Subject: [PATCH 02/10] Fix detection of empty wrapping block. --- src/render/text_renderer.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/render/text_renderer.rs b/src/render/text_renderer.rs index c584ebc..063ba1b 100644 --- a/src/render/text_renderer.rs +++ b/src/render/text_renderer.rs @@ -318,7 +318,6 @@ impl TaggedLine { struct WrappedBlock { width: usize, text: Vec>, - textlen: usize, line: TaggedLine, spacetag: Option, // Tag for the whitespace before the current word word: TaggedLine, // The current word (with no whitespace). @@ -334,7 +333,6 @@ impl WrappedBlock { WrappedBlock { width, text: Vec::new(), - textlen: 0, line: TaggedLine::new(), spacetag: None, word: TaggedLine::new(), @@ -646,7 +644,7 @@ impl WrappedBlock { } fn text_len(&self) -> usize { - self.textlen + self.line.len + self.wordlen + self.text.len() + self.line.len + self.wordlen } fn is_empty(&self) -> bool { From 667f0ff9f5f7a0b24081268c6dbd9bbd3c45668e Mon Sep 17 00:00:00 2001 From: Chris Emerson Date: Sat, 30 Nov 2024 15:03:04 +0000 Subject: [PATCH 03/10] Add CSS extension: "display: x-raw-dom". This is a custom extension (with feature flag css_ext) to allow formatting a subtree of the DOM as the raw DOM tree, for debugging. --- Cargo.toml | 1 + src/css.rs | 39 +++++++++++++++++++++++++++++---------- src/css/parser.rs | 4 ++++ src/lib.rs | 25 ++++++++++++++++++++----- src/markup5ever_rcdom.rs | 20 ++++++++++++++++++++ 5 files changed, 74 insertions(+), 15 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ab43638..888634f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,6 +28,7 @@ html_trace = ["dep:log"] html_trace_bt = ["html_trace", "dep:backtrace"] default = [] css = [] +css_ext = ["css"] [[example]] name = "html2term" diff --git a/src/css.rs b/src/css.rs index b5685eb..97f800e 100644 --- a/src/css.rs +++ b/src/css.rs @@ -167,11 +167,20 @@ impl Selector { } } +#[derive(Debug, Clone, Copy)] +pub(crate) enum Display { + /// display: none + None, + #[cfg(feature = "css_ext")] + /// Show node as HTML DOM + ExtRawDom, +} + #[derive(Debug, Clone)] pub(crate) enum Style { Colour(Colour), BgColour(Colour), - DisplayNone, + Display(Display), WhiteSpace(WhiteSpace), } @@ -263,11 +272,21 @@ fn styles_from_properties(decls: &[parser::Declaration]) -> Vec { } parser::Decl::Overflow { .. } | parser::Decl::OverflowY { .. } => {} parser::Decl::Display { value } => { - if let parser::Display::None = value { - styles.push(StyleDecl { - style: Style::DisplayNone, - importance: decl.important, - }); + match value { + parser::Display::None => { + styles.push(StyleDecl { + style: Style::Display(Display::None), + importance: decl.important, + }); + } + #[cfg(feature = "css_ext")] + parser::Display::RawDom => { + styles.push(StyleDecl { + style: Style::Display(Display::ExtRawDom), + importance: decl.important, + }); + } + _ => (), } } parser::Decl::WhiteSpace { value } => { @@ -285,7 +304,7 @@ fn styles_from_properties(decls: &[parser::Declaration]) -> Vec { // If the height is set to zero and overflow hidden, treat as display: none if height_zero && overflow_hidden { styles.push(StyleDecl { - style: Style::DisplayNone, + style: Style::Display(Display::None), importance: Importance::Default, }); } @@ -442,11 +461,11 @@ impl StyleData { .bg_colour .maybe_update(important, origin, specificity, col); } - Style::DisplayNone => { + Style::Display(disp) => { // We don't have a "not DisplayNone" - we might need to fix this. result - .display_none - .maybe_update(important, origin, specificity, true); + .display + .maybe_update(important, origin, specificity, disp); } Style::WhiteSpace(ws) => { result diff --git a/src/css/parser.rs b/src/css/parser.rs index 43c69e9..c6e60d0 100644 --- a/src/css/parser.rs +++ b/src/css/parser.rs @@ -61,6 +61,8 @@ pub(crate) enum Overflow { pub(crate) enum Display { None, Other, + #[cfg(feature = "css_ext")] + RawDom, } #[derive(Debug, PartialEq)] @@ -715,6 +717,8 @@ fn parse_display(value: &RawValue) -> Result return Ok(Display::None), + #[cfg(feature = "css_ext")] + "x-raw-dom" => return Ok(Display::RawDom), _ => (), } } diff --git a/src/lib.rs b/src/lib.rs index c872400..b5f0575 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -64,6 +64,7 @@ mod macros; pub mod css; pub mod render; +use css::Display; use render::text_renderer::{ RenderLine, RenderOptions, RichAnnotation, SubRenderer, TaggedLine, TextRenderer, }; @@ -270,7 +271,7 @@ pub(crate) struct ComputedStyle { pub(crate) bg_colour: WithSpec, #[cfg(feature = "css")] /// If set, indicates whether `display: none` or something equivalent applies - pub(crate) display_none: WithSpec, + pub(crate) display: WithSpec, /// The CSS white-space property pub(crate) white_space: WithSpec, @@ -830,8 +831,8 @@ impl RenderNode { if let Some(col) = style.bg_colour.val() { write!(f, " bg_colour={:?}", col)?; } - if let Some(val) = style.display_none.val() { - write!(f, " disp_none={:?}", val)?; + if let Some(val) = style.display.val() { + write!(f, " disp={:?}", val)?; } } if let Some(ws) = style.white_space.val() { @@ -1501,8 +1502,22 @@ fn process_dom_node( context .style_data .computed_style(**parent_style, handle, context.use_doc_css); - if let Some(true) = computed.display_none.val() { - return Ok(Nothing); + match computed.display.val() { + Some(Display::None) => return Ok(Nothing), + #[cfg(feature = "css_ext")] + Some(Display::ExtRawDom) => { + let result_text = RcDom::node_as_dom_string(handle); + let mut computed = computed; + computed.white_space.maybe_update( + false, + StyleOrigin::Agent, + Default::default(), + WhiteSpace::Pre, + ); + let text = RenderNode::new(RenderNodeInfo::Text(result_text)); + return Ok(Finished(RenderNode::new_styled(RenderNodeInfo::Block(vec![text]), computed))); + } + _ => (), } computed }; diff --git a/src/markup5ever_rcdom.rs b/src/markup5ever_rcdom.rs index 75e4514..dfdccd9 100644 --- a/src/markup5ever_rcdom.rs +++ b/src/markup5ever_rcdom.rs @@ -284,6 +284,26 @@ impl RcDom { Self::add_node_to_string(&mut s, &self.document, 0); s } + + /// A low-quality debug DOM rendering of an individual node + pub fn node_as_dom_string(node: &Handle) -> String { + let mut s = String::new(); + Self::add_node_to_string(&mut s, node, 0); + s + } + + /// Find the node at a child path starting from the root element. At each level, 1 is the + /// first child element, and only elements are counted. + pub fn get_node_by_path(&self, path: &[usize]) -> Option { + let mut node = self.document.clone(); + for idx in path { + node = match node.nth_child(*idx) { + Some(new_node) => new_node, + None => return None, + }; + } + Some(node) + } } impl TreeSink for RcDom { From 6300ab1291b442cf09538ea626e81345b2f91ccf Mon Sep 17 00:00:00 2001 From: Chris Emerson Date: Sat, 30 Nov 2024 15:15:12 +0000 Subject: [PATCH 04/10] Add "inspect" mode in html2term. "I" enters inspect mode, and arrows navigate around the DOM. The current node (and descendents) are shown as raw HTML. --- examples/html2term.rs | 166 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 144 insertions(+), 22 deletions(-) diff --git a/examples/html2term.rs b/examples/html2term.rs index 3e9b01c..6d930e0 100644 --- a/examples/html2term.rs +++ b/examples/html2term.rs @@ -4,7 +4,7 @@ extern crate argparse; extern crate unicode_width; #[cfg(unix)] mod top { - use argparse::{ArgumentParser, Store}; + use argparse::{ArgumentParser, Store, StoreFalse}; use html2text::render::{RichAnnotation, TaggedLine, TaggedLineElement}; use std::collections::HashMap; use std::io::{self, Write}; @@ -16,6 +16,7 @@ mod top { use unicode_width::UnicodeWidthStr; fn to_style(tag: &[RichAnnotation]) -> String { + use termion::color::*; let mut style = String::new(); for ann in tag { @@ -27,40 +28,45 @@ mod top { RichAnnotation::Image(_) => { style.push_str(&format!( "{}", - termion::color::Fg(termion::color::LightBlue) + Fg(LightBlue) )); } RichAnnotation::Emphasis => { style.push_str(&format!( "{}", - termion::color::Fg(termion::color::LightGreen) + Fg(LightGreen) )); } RichAnnotation::Strong => { style.push_str(&format!( "{}", - termion::color::Fg(termion::color::LightGreen) + Fg(LightGreen) )); } RichAnnotation::Strikeout => (), RichAnnotation::Code => { style.push_str(&format!( "{}", - termion::color::Fg(termion::color::LightYellow) + Fg(LightYellow) )); } RichAnnotation::Preformat(is_cont) => { if is_cont { style.push_str(&format!( "{}", - termion::color::Fg(termion::color::LightMagenta) + Fg(LightMagenta) )); } else { - style.push_str(&format!("{}", termion::color::Fg(termion::color::Magenta))); + style.push_str(&format!("{}", Fg(Magenta))); } } - // Ignore unhandled annotations - _ => {} + RichAnnotation::Colour(col) => { + style.push_str(&format!("{}", Fg(Rgb(col.r, col.g, col.b)))); + } + RichAnnotation::BgColour(col) => { + style.push_str(&format!("{}", Bg(Rgb(col.r, col.g, col.b)))); + } + _ => todo!(), } } style @@ -132,12 +138,30 @@ mod top { FragMap { start_xy: map } } + struct Options { + #[cfg(feature = "css")] + use_css: bool, + } + + impl Options { + fn new() -> Options { + Options { + #[cfg(feature = "css")] + use_css: true, + } + } + } + pub fn main() { let mut filename = String::new(); + let mut options = Options::new(); { let mut ap = ArgumentParser::new(); ap.refer(&mut filename) .add_argument("filename", Store, "Set HTML filename"); + #[cfg(feature = "css")] + ap.refer(&mut options.use_css) + .add_option(&["--no-css"], StoreFalse, "Disable CSS"); ap.parse_args_or_exit(); } @@ -145,16 +169,11 @@ mod top { let (width, height) = (width as usize, height as usize); let mut file = std::fs::File::open(filename).expect("Tried to open file"); - let annotated = - html2text::from_read_rich(&mut file, width).expect("Failed to convert from HTML"); - let link_map = find_links(&annotated); - let frag_map = find_frags(&annotated); + let dom = html2text::config::plain().parse_html(&mut file).expect("Failed to parse HTML"); let mut keys = io::stdin().keys(); - // max_y is the largest (0-based) index of a real document line. - let max_y = annotated.len() - 1; // top_y is the (0-based) index of the document line shown at // the top of the visible screen. let mut top_y = 0; @@ -169,7 +188,17 @@ mod top { .into_alternate_screen() .unwrap(); + let mut annotated = rerender(&dom, &[], width, &options); + + let link_map = find_links(&annotated); + let frag_map = find_frags(&annotated); + + let mut inspect_path = vec![]; + loop { + // max_y is the largest (0-based) index of a real document line. + let max_y = annotated.len() - 1; + // Sanity-check the current screen position. max_y should // be small enough that no blank lines beyond the end of // the document are visible on screen (except when the @@ -186,7 +215,10 @@ mod top { top_y = std::cmp::min(top_y, doc_y); let opt_url = link_map.link_at(doc_x, doc_y); - let vis_y_limit = std::cmp::min(top_y + height, max_y + 1); + let mut vis_y_limit = std::cmp::min(top_y + height, max_y + 1); + if !inspect_path.is_empty() { + vis_y_limit -= 1; + } write!(screen, "{}", termion::clear::All).unwrap(); for (i, line) in annotated[top_y..vis_y_limit].iter().enumerate() { write!(screen, "{}", Goto(1, i as u16 + 1)).unwrap(); @@ -202,6 +234,20 @@ mod top { write!(screen, "{}{}{}", style, ts.s, termion::style::Reset).unwrap(); } } + if !inspect_path.is_empty() { + let mut pth = String::from("top "); + let mut node = dom.document.clone(); + + let mut l=1; + for &idx in &inspect_path { + node = node.nth_child(idx).unwrap(); + pth.push_str(&format!("> {}", node.element_name().unwrap())); + + pth.push_str(&format!("[{}]", dom.get_node_by_path(&inspect_path[..l]).unwrap().element_name().unwrap())); + l += 1; + } + write!(screen, "{}{}{:?}", Goto(1, vis_y_limit as u16), pth, &inspect_path).unwrap(); + } // 1-based screen coordinates let cursor_x = (doc_x + 1) as u16; @@ -213,19 +259,52 @@ mod top { match k { Key::Char('q') => break, Key::Char('j') | Key::Down => { - if doc_y < max_y { - doc_y += 1; + if inspect_path.is_empty() { + if doc_y < max_y { + doc_y += 1; + } + } else { + *inspect_path.last_mut().unwrap() += 1; + if dom.get_node_by_path(&inspect_path).is_none() { + // No next node - undo. + *inspect_path.last_mut().unwrap() -= 1; + } else { + annotated = rerender(&dom, &inspect_path, width, &options); + } } } Key::Char('k') | Key::Up => { - doc_y = doc_y.saturating_sub(1); + if inspect_path.is_empty() { + doc_y = doc_y.saturating_sub(1); + } else { + if *inspect_path.last().unwrap() > 1 { + *inspect_path.last_mut().unwrap() -= 1; + annotated = rerender(&dom, &inspect_path, width, &options); + } + } } Key::Char('h') | Key::Left => { - doc_x = doc_x.saturating_sub(1); + if inspect_path.is_empty() { + doc_x = doc_x.saturating_sub(1); + } else { + if inspect_path.len() > 1 { + inspect_path.pop(); + annotated = rerender(&dom, &inspect_path, width, &options); + } + } } Key::Char('l') | Key::Right => { - if doc_x + 1 < width { - doc_x += 1; + if inspect_path.is_empty() { + if doc_x + 1 < width { + doc_x += 1; + } + } else { + inspect_path.push(1); + if dom.get_node_by_path(&inspect_path).is_none() { + inspect_path.pop(); + } else { + annotated = rerender(&dom, &inspect_path, width, &options); + } } } Key::Char(' ') | Key::PageDown => { @@ -266,11 +345,54 @@ mod top { } } } + Key::Char('I') => { + // Enter/leave inspect mode + if inspect_path.is_empty() { + inspect_path.push(1); + } else { + inspect_path.clear(); + } + annotated = rerender(&dom, &inspect_path, width, &options); + } _ => {} } } } } + + fn rerender(dom: &html2text::RcDom, inspect_path: &[usize], width: usize, options: &Options) -> Vec>> { + let config = html2text::config::rich(); + #[cfg(feature = "css")] + let config = if options.use_css { + config.use_doc_css() + .add_agent_css(r#" + img { + color: #77f; + } + "#).unwrap() + } else { + config + }; + if inspect_path.is_empty() { + let render_tree = config.dom_to_render_tree(&dom).expect("Failed to build render tree"); + config.render_to_lines(render_tree, width).expect("Failed to render") + } else { + let mut path_selector = String::new(); + for &idx in &inspect_path[1..] { + path_selector.push_str(&format!(" > :nth-child({})", idx)); + } + let config = config + .add_agent_css(&(format!(r#" + html {} {{ + color: white !important; + background-color: black !important; + display: x-raw-dom; + }} + "#, path_selector))).expect("Invalid CSS"); + let render_tree = config.dom_to_render_tree(&dom).expect("Failed to build render tree"); + config.render_to_lines(render_tree, width).expect("Failed to render") + } + } } #[cfg(not(unix))] From 6d02849be3bd93612ece5d8710e2944b7a22c3e0 Mon Sep 17 00:00:00 2001 From: Chris Emerson Date: Sat, 30 Nov 2024 15:47:54 +0000 Subject: [PATCH 05/10] Add html2text `--show-css` Show the parsed CSS rules from the document. --- examples/html2text.rs | 18 ++++++++++ src/css.rs | 78 +++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 15 +++++++++ 3 files changed, 111 insertions(+) diff --git a/examples/html2text.rs b/examples/html2text.rs index 7879dda..f92b695 100644 --- a/examples/html2text.rs +++ b/examples/html2text.rs @@ -126,6 +126,15 @@ where .unwrap(); } } + #[cfg(feature = "css")] + { + if flags.show_css { + let conf = config::plain(); + let conf = update_config(conf, &flags); + let dom = conf.parse_html(input).unwrap(); + return html2text::dom_to_parsed_style(&dom).expect("Parsing CSS"); + } + } if flags.show_dom { let conf = config::plain(); let conf = update_config(conf, &flags); @@ -162,6 +171,8 @@ struct Flags { use_only_css: bool, show_dom: bool, show_render: bool, + #[cfg(feature = "css")] + show_css: bool, } fn main() { @@ -182,6 +193,8 @@ fn main() { use_only_css: false, show_dom: false, show_render: false, + #[cfg(feature = "css")] + show_css: false, }; let mut literal: bool = false; @@ -240,6 +253,11 @@ fn main() { StoreTrue, "Show the computed render tree instead of the rendered output", ); + ap.refer(&mut flags.show_css).add_option( + &["--show-css"], + StoreTrue, + "Show the parsed CSS instead of rendered output", + ); ap.parse_args_or_exit(); } diff --git a/src/css.rs b/src/css.rs index 97f800e..7417b90 100644 --- a/src/css.rs +++ b/src/css.rs @@ -31,12 +31,35 @@ pub(crate) enum SelectorComponent { }, } +impl std::fmt::Display for SelectorComponent { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SelectorComponent::Class(name) => write!(f, ".{}", name), + SelectorComponent::Element(name) => write!(f, "{}", name), + SelectorComponent::Hash(val) => write!(f, "#{}", val), + SelectorComponent::Star => write!(f, " * "), + SelectorComponent::CombChild => write!(f, " > "), + SelectorComponent::CombDescendant => write!(f, " "), + SelectorComponent::NthChild { a, b, .. } => write!(f, ":nth-child({}n+{})", a, b), + } + } +} + #[derive(Debug, Clone, PartialEq)] pub(crate) struct Selector { // List of components, right first so we match from the leaf. components: Vec, } +impl std::fmt::Display for Selector { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for comp in self.components.iter().rev() { + comp.fmt(f)?; + } + Ok(()) + } +} + impl Selector { fn do_matches(comps: &[SelectorComponent], node: &Handle) -> bool { match comps.first() { @@ -190,12 +213,43 @@ pub(crate) struct StyleDecl { importance: Importance, } +impl std::fmt::Display for StyleDecl { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self.style { + Style::Colour(col) => write!(f, "color: {}", col)?, + Style::BgColour(col) => write!(f, "background-color: {}", col)?, + Style::Display(Display::None) => write!(f, "display: none")?, + #[cfg(feature = "css_ext")] + Style::Display(Display::ExtRawDom) => write!(f, "display: x-raw-dom")?, + Style::WhiteSpace(_) => todo!(), + } + match self.importance { + Importance::Default => (), + Importance::Important => { + write!(f, " !important")? + } + } + Ok(()) + } +} + #[derive(Debug, Clone)] struct Ruleset { selector: Selector, styles: Vec, } +impl std::fmt::Display for Ruleset { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, " {} {{", self.selector)?; + for decl in &self.styles { + writeln!(f, " {}", decl)?; + } + writeln!(f, " }}")?; + Ok(()) + } +} + /// Stylesheet data which can be used while building the render tree. #[derive(Clone, Default, Debug)] pub(crate) struct StyleData { @@ -476,6 +530,30 @@ impl StyleData { } } +impl std::fmt::Display for StyleData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if !self.agent_rules.is_empty() { + writeln!(f, "Agent rules:")?; + for ruleset in &self.agent_rules { + ruleset.fmt(f)?; + } + } + if !self.user_rules.is_empty() { + writeln!(f, "User rules:")?; + for ruleset in &self.user_rules { + ruleset.fmt(f)?; + } + } + if !self.author_rules.is_empty() { + writeln!(f, "Author rules:")?; + for ruleset in &self.author_rules { + ruleset.fmt(f)?; + } + } + Ok(()) + } +} + fn pending(handle: Handle, f: F) -> TreeMapResult<'static, (), Handle, Vec> where F: Fn(&mut (), Vec>) -> Result>> + 'static, diff --git a/src/lib.rs b/src/lib.rs index b5f0575..e3e112c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -128,6 +128,12 @@ pub struct Colour { pub b: u8, } +impl std::fmt::Display for Colour { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "#{:02x}{:02x}{:02x}", self.r, self.g, self.b) + } +} + #[derive(Debug, Copy, Clone, PartialEq, Eq, Default, PartialOrd)] pub(crate) enum StyleOrigin { #[default] @@ -1366,6 +1372,15 @@ fn dom_to_render_tree_with_context( result } +#[cfg(feature = "css")] +/// Return a string representation of the CSS rules parsed from +/// the DOM document. +pub fn dom_to_parsed_style(dom: &RcDom) -> Result { + let handle = dom.document.clone(); + let doc_style_data = css::dom_to_stylesheet(handle, &mut std::io::sink())?; + Ok(doc_style_data.to_string()) +} + fn pending( input: RenderInput, f: F, From 756f23e8ae6519be475927de3f48b51882f84a03 Mon Sep 17 00:00:00 2001 From: Chris Emerson Date: Sat, 30 Nov 2024 16:40:50 +0000 Subject: [PATCH 06/10] Skip over CSS at-rules, rather than abandoning at the first one. --- src/css/parser.rs | 138 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 137 insertions(+), 1 deletion(-) diff --git a/src/css/parser.rs b/src/css/parser.rs index c6e60d0..e5bde97 100644 --- a/src/css/parser.rs +++ b/src/css/parser.rs @@ -933,8 +933,94 @@ fn parse_ruleset(text: &str) -> IResult<&str, RuleSet> { )) } +fn skip_to_end_of_statement(text: &str) -> IResult<&str, ()> { + let mut rest = text; + + let mut bra_stack = vec![]; + loop { + let (remain, tok) = match parse_token(rest) { + Ok(res) => res, + Err(_) => return Ok((rest, ())), + }; + match &tok { + Token::Ident(..) | + Token::AtKeyword(_) | + Token::Hash(_) | + Token::String(_) | + Token::BadString(_) | + Token::Url(_) | + Token::BadUrl(_) | + Token::Delim(_) | + Token::Number(_) | + Token::Dimension(_, _) | + Token::Percentage(_) | + Token::Colon | + Token::Comma => (), + + Token::Function(_) | + Token::OpenRound => { + bra_stack.push(Token::CloseRound); + } + Token::CDO => { + bra_stack.push(Token::CDC); + } + Token::OpenSquare => { + bra_stack.push(Token::CloseSquare); + } + Token::OpenBrace => { + bra_stack.push(Token::CloseBrace); + } + Token::Semicolon => { + if bra_stack.is_empty() { + return Ok((remain, ())); + } + } + Token::CloseBrace if bra_stack.is_empty() => { + // The stack is empty, so don't include the closing brace. + return Ok((rest, ())); + } + // Standard closing brackets + Token::CDC | + Token::CloseSquare | + Token::CloseRound | + Token::CloseBrace => { + if bra_stack.last() == Some(&tok) { + bra_stack.pop(); + + if tok == Token::CloseBrace && bra_stack.is_empty() { + // The rule lasted until the end of the next block; + // eat this closing brace. + return Ok((remain, ())); + } + } else { + // Unbalanced brackets + return fail(rest); + } + } + } + rest = remain; + } +} + +fn parse_at_rule(text: &str) -> IResult<&str, ()> { + let (rest, _) = tuple(( + skip_optional_whitespace, + tag("@"), + skip_optional_whitespace, + parse_ident))(text)?; + + skip_to_end_of_statement(rest) +} + +fn parse_statement(text: &str) -> IResult<&str, Option> { + alt(( + map(parse_ruleset, Some), + map(parse_at_rule, |_| None)))(text) +} + pub(crate) fn parse_stylesheet(text: &str) -> IResult<&str, Vec> { - many0(parse_ruleset)(text) + let (rest, items) = many0(parse_statement)(text)?; + Ok((rest, items.into_iter().flatten().collect())) } #[cfg(test)] @@ -1103,6 +1189,56 @@ mod test { ); } + #[test] + fn test_parse_at_rules() { + assert_eq!( + super::parse_stylesheet( + " + @media paper { + } + + @blah asldfkjasfda; + + @nested { lkasjfd alkdsjfa sldkfjas ( alksjdasfd ) [ alskdjfalskdf] } + + @keyframes foo { + 0% { transform: translateY(0); } + 50% { opacity:0.8; } + 100% { } + } + + + .foo { + color: #112233; + background-color: #332211 !important; + } + " + ), + Ok(( + "", + vec![RuleSet { + selectors: vec![Selector { + components: vec![SelectorComponent::Class("foo".into()),], + },], + declarations: vec![ + Declaration { + data: Decl::Color { + value: Colour::Rgb(0x11, 0x22, 0x33) + }, + important: Importance::Default, + }, + Declaration { + data: Decl::BackgroundColor { + value: Colour::Rgb(0x33, 0x22, 0x11) + }, + important: Importance::Important, + }, + ], + }] + )) + ); + } + #[test] fn test_parse_named_colour() { assert_eq!( From eef5c19056c68ed19d359b5f0d0edcd38c8f455f Mon Sep 17 00:00:00 2001 From: Chris Emerson Date: Sat, 30 Nov 2024 16:54:37 +0000 Subject: [PATCH 07/10] Remove some debug output. --- examples/html2term.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/examples/html2term.rs b/examples/html2term.rs index 6d930e0..a2c1bf9 100644 --- a/examples/html2term.rs +++ b/examples/html2term.rs @@ -238,13 +238,9 @@ mod top { let mut pth = String::from("top "); let mut node = dom.document.clone(); - let mut l=1; for &idx in &inspect_path { node = node.nth_child(idx).unwrap(); pth.push_str(&format!("> {}", node.element_name().unwrap())); - - pth.push_str(&format!("[{}]", dom.get_node_by_path(&inspect_path[..l]).unwrap().element_name().unwrap())); - l += 1; } write!(screen, "{}{}{:?}", Goto(1, vis_y_limit as u16), pth, &inspect_path).unwrap(); } From 79d6863eeb8713df93ede9746438fd348de5b8cc Mon Sep 17 00:00:00 2001 From: Chris Emerson Date: Sat, 30 Nov 2024 16:54:54 +0000 Subject: [PATCH 08/10] cargo fmt --- examples/html2term.rs | 81 +++++++----- src/css.rs | 41 +++--- src/css/parser.rs | 297 +++++++++++++++++++++++++++--------------- src/lib.rs | 5 +- 4 files changed, 261 insertions(+), 163 deletions(-) diff --git a/examples/html2term.rs b/examples/html2term.rs index a2c1bf9..b50aa35 100644 --- a/examples/html2term.rs +++ b/examples/html2term.rs @@ -26,36 +26,21 @@ mod top { style.push_str(&format!("{}", termion::style::Underline)); } RichAnnotation::Image(_) => { - style.push_str(&format!( - "{}", - Fg(LightBlue) - )); + style.push_str(&format!("{}", Fg(LightBlue))); } RichAnnotation::Emphasis => { - style.push_str(&format!( - "{}", - Fg(LightGreen) - )); + style.push_str(&format!("{}", Fg(LightGreen))); } RichAnnotation::Strong => { - style.push_str(&format!( - "{}", - Fg(LightGreen) - )); + style.push_str(&format!("{}", Fg(LightGreen))); } RichAnnotation::Strikeout => (), RichAnnotation::Code => { - style.push_str(&format!( - "{}", - Fg(LightYellow) - )); + style.push_str(&format!("{}", Fg(LightYellow))); } RichAnnotation::Preformat(is_cont) => { if is_cont { - style.push_str(&format!( - "{}", - Fg(LightMagenta) - )); + style.push_str(&format!("{}", Fg(LightMagenta))); } else { style.push_str(&format!("{}", Fg(Magenta))); } @@ -170,7 +155,9 @@ mod top { let mut file = std::fs::File::open(filename).expect("Tried to open file"); - let dom = html2text::config::plain().parse_html(&mut file).expect("Failed to parse HTML"); + let dom = html2text::config::plain() + .parse_html(&mut file) + .expect("Failed to parse HTML"); let mut keys = io::stdin().keys(); @@ -242,7 +229,14 @@ mod top { node = node.nth_child(idx).unwrap(); pth.push_str(&format!("> {}", node.element_name().unwrap())); } - write!(screen, "{}{}{:?}", Goto(1, vis_y_limit as u16), pth, &inspect_path).unwrap(); + write!( + screen, + "{}{}{:?}", + Goto(1, vis_y_limit as u16), + pth, + &inspect_path + ) + .unwrap(); } // 1-based screen coordinates @@ -356,37 +350,60 @@ mod top { } } - fn rerender(dom: &html2text::RcDom, inspect_path: &[usize], width: usize, options: &Options) -> Vec>> { + fn rerender( + dom: &html2text::RcDom, + inspect_path: &[usize], + width: usize, + options: &Options, + ) -> Vec>> { let config = html2text::config::rich(); #[cfg(feature = "css")] let config = if options.use_css { - config.use_doc_css() - .add_agent_css(r#" + config + .use_doc_css() + .add_agent_css( + r#" img { color: #77f; } - "#).unwrap() + "#, + ) + .unwrap() } else { config }; if inspect_path.is_empty() { - let render_tree = config.dom_to_render_tree(&dom).expect("Failed to build render tree"); - config.render_to_lines(render_tree, width).expect("Failed to render") + let render_tree = config + .dom_to_render_tree(&dom) + .expect("Failed to build render tree"); + config + .render_to_lines(render_tree, width) + .expect("Failed to render") } else { let mut path_selector = String::new(); for &idx in &inspect_path[1..] { path_selector.push_str(&format!(" > :nth-child({})", idx)); } let config = config - .add_agent_css(&(format!(r#" + .add_agent_css( + &(format!( + r#" html {} {{ color: white !important; background-color: black !important; display: x-raw-dom; }} - "#, path_selector))).expect("Invalid CSS"); - let render_tree = config.dom_to_render_tree(&dom).expect("Failed to build render tree"); - config.render_to_lines(render_tree, width).expect("Failed to render") + "#, + path_selector + )), + ) + .expect("Invalid CSS"); + let render_tree = config + .dom_to_render_tree(&dom) + .expect("Failed to build render tree"); + config + .render_to_lines(render_tree, width) + .expect("Failed to render") } } } diff --git a/src/css.rs b/src/css.rs index 7417b90..f3207e9 100644 --- a/src/css.rs +++ b/src/css.rs @@ -1,13 +1,14 @@ //! Some basic CSS support. -use std::{io::Write, rc::Rc}; use std::ops::Deref; +use std::{io::Write, rc::Rc}; mod parser; use crate::{ css::parser::parse_rules, markup5ever_rcdom::{ - Handle, NodeData::{self, Comment, Document, Element} + Handle, + NodeData::{self, Comment, Document, Element}, }, tree_map_reduce, Colour, ComputedStyle, Result, Specificity, StyleOrigin, TreeMapResult, WhiteSpace, @@ -225,9 +226,7 @@ impl std::fmt::Display for StyleDecl { } match self.importance { Importance::Default => (), - Importance::Important => { - write!(f, " !important")? - } + Importance::Important => write!(f, " !important")?, } Ok(()) } @@ -325,24 +324,22 @@ fn styles_from_properties(decls: &[parser::Declaration]) -> Vec { overflow_hidden = true; } parser::Decl::Overflow { .. } | parser::Decl::OverflowY { .. } => {} - parser::Decl::Display { value } => { - match value { - parser::Display::None => { - styles.push(StyleDecl { - style: Style::Display(Display::None), - importance: decl.important, - }); - } - #[cfg(feature = "css_ext")] - parser::Display::RawDom => { - styles.push(StyleDecl { - style: Style::Display(Display::ExtRawDom), - importance: decl.important, - }); - } - _ => (), + parser::Decl::Display { value } => match value { + parser::Display::None => { + styles.push(StyleDecl { + style: Style::Display(Display::None), + importance: decl.important, + }); } - } + #[cfg(feature = "css_ext")] + parser::Display::RawDom => { + styles.push(StyleDecl { + style: Style::Display(Display::ExtRawDom), + importance: decl.important, + }); + } + _ => (), + }, parser::Decl::WhiteSpace { value } => { styles.push(StyleDecl { style: Style::WhiteSpace(*value), diff --git a/src/css/parser.rs b/src/css/parser.rs index e5bde97..0d64f0c 100644 --- a/src/css/parser.rs +++ b/src/css/parser.rs @@ -759,7 +759,7 @@ fn parse_class(text: &str) -> IResult<&str, SelectorComponent> { #[derive(Eq, PartialEq, Copy, Clone)] enum Sign { Plus, - Neg + Neg, } impl Sign { @@ -790,50 +790,46 @@ fn parse_nth_child_args(text: &str) -> IResult<&str, SelectorComponent> { let (rest, _) = tag("(")(text)?; let (rest, _) = skip_optional_whitespace(rest)?; - let (rest, (a, b)) = - alt(( - map( - tag("even"), - |_| (2, 0), - ), - map( - tag("odd"), - |_| (2, 1), - ), - // The case where both a and b are specified - map( - tuple(( - opt_sign, opt(digit1), tag("n"), - skip_optional_whitespace, - sign, digit1)), - |(a_sign, a_opt_val, _, - _, - b_sign, b_val)| { - let a = ::from_str(a_opt_val.unwrap_or("1")).unwrap() * a_sign.val(); - let b = ::from_str(b_val).unwrap() * b_sign.val(); - (a, b) - }), - // Just a - map( - tuple((opt_sign, opt(digit1), tag("n"))), - |(a_sign, a_opt_val, _)| { - let a = ::from_str(a_opt_val.unwrap_or("1")).unwrap() * a_sign.val(); - (a, 0) - }), - // Just b - map( - tuple(( - opt_sign, digit1)), - |(b_sign, b_val)| { - let b = ::from_str(b_val).unwrap() * b_sign.val(); - (0, b) - }), - ))(rest)?; + let (rest, (a, b)) = alt(( + map(tag("even"), |_| (2, 0)), + map(tag("odd"), |_| (2, 1)), + // The case where both a and b are specified + map( + tuple(( + opt_sign, + opt(digit1), + tag("n"), + skip_optional_whitespace, + sign, + digit1, + )), + |(a_sign, a_opt_val, _, _, b_sign, b_val)| { + let a = + ::from_str(a_opt_val.unwrap_or("1")).unwrap() * a_sign.val(); + let b = ::from_str(b_val).unwrap() * b_sign.val(); + (a, b) + }, + ), + // Just a + map( + tuple((opt_sign, opt(digit1), tag("n"))), + |(a_sign, a_opt_val, _)| { + let a = + ::from_str(a_opt_val.unwrap_or("1")).unwrap() * a_sign.val(); + (a, 0) + }, + ), + // Just b + map(tuple((opt_sign, digit1)), |(b_sign, b_val)| { + let b = ::from_str(b_val).unwrap() * b_sign.val(); + (0, b) + }), + ))(rest)?; let (rest, _) = tuple((skip_optional_whitespace, tag(")")))(rest)?; let sel = Selector { - components: vec![SelectorComponent::Star] + components: vec![SelectorComponent::Star], }; Ok((rest, SelectorComponent::NthChild { a, b, sel })) } @@ -943,22 +939,21 @@ fn skip_to_end_of_statement(text: &str) -> IResult<&str, ()> { Err(_) => return Ok((rest, ())), }; match &tok { - Token::Ident(..) | - Token::AtKeyword(_) | - Token::Hash(_) | - Token::String(_) | - Token::BadString(_) | - Token::Url(_) | - Token::BadUrl(_) | - Token::Delim(_) | - Token::Number(_) | - Token::Dimension(_, _) | - Token::Percentage(_) | - Token::Colon | - Token::Comma => (), - - Token::Function(_) | - Token::OpenRound => { + Token::Ident(..) + | Token::AtKeyword(_) + | Token::Hash(_) + | Token::String(_) + | Token::BadString(_) + | Token::Url(_) + | Token::BadUrl(_) + | Token::Delim(_) + | Token::Number(_) + | Token::Dimension(_, _) + | Token::Percentage(_) + | Token::Colon + | Token::Comma => (), + + Token::Function(_) | Token::OpenRound => { bra_stack.push(Token::CloseRound); } Token::CDO => { @@ -980,10 +975,7 @@ fn skip_to_end_of_statement(text: &str) -> IResult<&str, ()> { return Ok((rest, ())); } // Standard closing brackets - Token::CDC | - Token::CloseSquare | - Token::CloseRound | - Token::CloseBrace => { + Token::CDC | Token::CloseSquare | Token::CloseRound | Token::CloseBrace => { if bra_stack.last() == Some(&tok) { bra_stack.pop(); @@ -1004,18 +996,17 @@ fn skip_to_end_of_statement(text: &str) -> IResult<&str, ()> { fn parse_at_rule(text: &str) -> IResult<&str, ()> { let (rest, _) = tuple(( - skip_optional_whitespace, - tag("@"), - skip_optional_whitespace, - parse_ident))(text)?; + skip_optional_whitespace, + tag("@"), + skip_optional_whitespace, + parse_ident, + ))(text)?; skip_to_end_of_statement(rest) } fn parse_statement(text: &str) -> IResult<&str, Option> { - alt(( - map(parse_ruleset, Some), - map(parse_at_rule, |_| None)))(text) + alt((map(parse_ruleset, Some), map(parse_at_rule, |_| None)))(text) } pub(crate) fn parse_stylesheet(text: &str) -> IResult<&str, Vec> { @@ -1315,45 +1306,135 @@ mod test { fn test_nth_child() { use SelectorComponent::NthChild; let (_, sel_all) = super::parse_selector("*").unwrap(); - assert_eq!(super::parse_selector(":nth-child(even)").unwrap(), - ("", Selector { - components: vec![NthChild { a: 2, b: 0, sel: sel_all.clone() }] - })); - assert_eq!(super::parse_selector(":nth-child(odd)").unwrap(), - ("", Selector { - components: vec![NthChild { a: 2, b: 1, sel: sel_all.clone() }] - })); - assert_eq!(super::parse_selector(":nth-child(17)").unwrap(), - ("", Selector { - components: vec![NthChild { a: 0, b: 17, sel: sel_all.clone() }] - })); - assert_eq!(super::parse_selector(":nth-child(17n)").unwrap(), - ("", Selector { - components: vec![NthChild { a: 17, b: 0, sel: sel_all.clone() }] - })); - assert_eq!(super::parse_selector(":nth-child(10n-1)").unwrap(), - ("", Selector { - components: vec![NthChild { a: 10, b: -1, sel: sel_all.clone() }] - })); - assert_eq!(super::parse_selector(":nth-child(10n+9)").unwrap(), - ("", Selector { - components: vec![NthChild { a: 10, b: 9, sel: sel_all.clone() }] - })); - assert_eq!(super::parse_selector(":nth-child(-n+3)").unwrap(), - ("", Selector { - components: vec![NthChild { a: -1, b: 3, sel: sel_all.clone() }] - })); - assert_eq!(super::parse_selector(":nth-child(n)").unwrap(), - ("", Selector { - components: vec![NthChild { a: 1, b: 0, sel: sel_all.clone() }] - })); - assert_eq!(super::parse_selector(":nth-child(+n)").unwrap(), - ("", Selector { - components: vec![NthChild { a: 1, b: 0, sel: sel_all.clone() }] - })); - assert_eq!(super::parse_selector(":nth-child(-n)").unwrap(), - ("", Selector { - components: vec![NthChild { a: -1, b: 0, sel: sel_all.clone() }] - })); + assert_eq!( + super::parse_selector(":nth-child(even)").unwrap(), + ( + "", + Selector { + components: vec![NthChild { + a: 2, + b: 0, + sel: sel_all.clone() + }] + } + ) + ); + assert_eq!( + super::parse_selector(":nth-child(odd)").unwrap(), + ( + "", + Selector { + components: vec![NthChild { + a: 2, + b: 1, + sel: sel_all.clone() + }] + } + ) + ); + assert_eq!( + super::parse_selector(":nth-child(17)").unwrap(), + ( + "", + Selector { + components: vec![NthChild { + a: 0, + b: 17, + sel: sel_all.clone() + }] + } + ) + ); + assert_eq!( + super::parse_selector(":nth-child(17n)").unwrap(), + ( + "", + Selector { + components: vec![NthChild { + a: 17, + b: 0, + sel: sel_all.clone() + }] + } + ) + ); + assert_eq!( + super::parse_selector(":nth-child(10n-1)").unwrap(), + ( + "", + Selector { + components: vec![NthChild { + a: 10, + b: -1, + sel: sel_all.clone() + }] + } + ) + ); + assert_eq!( + super::parse_selector(":nth-child(10n+9)").unwrap(), + ( + "", + Selector { + components: vec![NthChild { + a: 10, + b: 9, + sel: sel_all.clone() + }] + } + ) + ); + assert_eq!( + super::parse_selector(":nth-child(-n+3)").unwrap(), + ( + "", + Selector { + components: vec![NthChild { + a: -1, + b: 3, + sel: sel_all.clone() + }] + } + ) + ); + assert_eq!( + super::parse_selector(":nth-child(n)").unwrap(), + ( + "", + Selector { + components: vec![NthChild { + a: 1, + b: 0, + sel: sel_all.clone() + }] + } + ) + ); + assert_eq!( + super::parse_selector(":nth-child(+n)").unwrap(), + ( + "", + Selector { + components: vec![NthChild { + a: 1, + b: 0, + sel: sel_all.clone() + }] + } + ) + ); + assert_eq!( + super::parse_selector(":nth-child(-n)").unwrap(), + ( + "", + Selector { + components: vec![NthChild { + a: -1, + b: 0, + sel: sel_all.clone() + }] + } + ) + ); } } diff --git a/src/lib.rs b/src/lib.rs index e3e112c..6a1f5fa 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1530,7 +1530,10 @@ fn process_dom_node( WhiteSpace::Pre, ); let text = RenderNode::new(RenderNodeInfo::Text(result_text)); - return Ok(Finished(RenderNode::new_styled(RenderNodeInfo::Block(vec![text]), computed))); + return Ok(Finished(RenderNode::new_styled( + RenderNodeInfo::Block(vec![text]), + computed, + ))); } _ => (), } From 3ea1ae1104ef1ef0006f93623dd5026520b2123c Mon Sep 17 00:00:00 2001 From: Chris Emerson Date: Sat, 30 Nov 2024 17:05:18 +0000 Subject: [PATCH 09/10] Fix builds with different feature flag combinations. --- examples/html2term.rs | 48 ++++++++++++++++++++++++++----------------- examples/html2text.rs | 1 + src/lib.rs | 5 ++--- 3 files changed, 32 insertions(+), 22 deletions(-) diff --git a/examples/html2term.rs b/examples/html2term.rs index b50aa35..a315ba2 100644 --- a/examples/html2term.rs +++ b/examples/html2term.rs @@ -4,7 +4,9 @@ extern crate argparse; extern crate unicode_width; #[cfg(unix)] mod top { - use argparse::{ArgumentParser, Store, StoreFalse}; + use argparse::{ArgumentParser, Store}; + #[cfg(feature = "css")] + use argparse::StoreFalse; use html2text::render::{RichAnnotation, TaggedLine, TaggedLineElement}; use std::collections::HashMap; use std::io::{self, Write}; @@ -139,6 +141,7 @@ mod top { pub fn main() { let mut filename = String::new(); + #[allow(unused_mut)] let mut options = Options::new(); { let mut ap = ArgumentParser::new(); @@ -335,6 +338,7 @@ mod top { } } } + #[cfg(feature = "css_ext")] Key::Char('I') => { // Enter/leave inspect mode if inspect_path.is_empty() { @@ -354,6 +358,7 @@ mod top { dom: &html2text::RcDom, inspect_path: &[usize], width: usize, + #[allow(unused)] options: &Options, ) -> Vec>> { let config = html2text::config::rich(); @@ -380,30 +385,35 @@ mod top { .render_to_lines(render_tree, width) .expect("Failed to render") } else { - let mut path_selector = String::new(); - for &idx in &inspect_path[1..] { - path_selector.push_str(&format!(" > :nth-child({})", idx)); - } - let config = config - .add_agent_css( - &(format!( - r#" + #[cfg(feature = "css_ext")] + { + let mut path_selector = String::new(); + for &idx in &inspect_path[1..] { + path_selector.push_str(&format!(" > :nth-child({})", idx)); + } + let config = config + .add_agent_css( + &(format!( + r#" html {} {{ color: white !important; background-color: black !important; display: x-raw-dom; }} "#, - path_selector - )), - ) - .expect("Invalid CSS"); - let render_tree = config - .dom_to_render_tree(&dom) - .expect("Failed to build render tree"); - config - .render_to_lines(render_tree, width) - .expect("Failed to render") + path_selector + )), + ) + .expect("Invalid CSS"); + let render_tree = config + .dom_to_render_tree(&dom) + .expect("Failed to build render tree"); + config + .render_to_lines(render_tree, width) + .expect("Failed to render") + } + #[cfg(not(feature = "css_ext"))] + unreachable!() } } } diff --git a/examples/html2text.rs b/examples/html2text.rs index f92b695..b69c288 100644 --- a/examples/html2text.rs +++ b/examples/html2text.rs @@ -253,6 +253,7 @@ fn main() { StoreTrue, "Show the computed render tree instead of the rendered output", ); + #[cfg(feature = "css")] ap.refer(&mut flags.show_css).add_option( &["--show-css"], StoreTrue, diff --git a/src/lib.rs b/src/lib.rs index 6a1f5fa..41bea8a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -64,7 +64,6 @@ mod macros; pub mod css; pub mod render; -use css::Display; use render::text_renderer::{ RenderLine, RenderOptions, RichAnnotation, SubRenderer, TaggedLine, TextRenderer, }; @@ -1518,9 +1517,9 @@ fn process_dom_node( .style_data .computed_style(**parent_style, handle, context.use_doc_css); match computed.display.val() { - Some(Display::None) => return Ok(Nothing), + Some(css::Display::None) => return Ok(Nothing), #[cfg(feature = "css_ext")] - Some(Display::ExtRawDom) => { + Some(css::Display::ExtRawDom) => { let result_text = RcDom::node_as_dom_string(handle); let mut computed = computed; computed.white_space.maybe_update( From 6c9df621157848f3db2d7479b188e4b70dd57ce8 Mon Sep 17 00:00:00 2001 From: Chris Emerson Date: Sat, 30 Nov 2024 17:09:06 +0000 Subject: [PATCH 10/10] cargo fmt --- examples/html2term.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/examples/html2term.rs b/examples/html2term.rs index a315ba2..a1c2937 100644 --- a/examples/html2term.rs +++ b/examples/html2term.rs @@ -4,9 +4,9 @@ extern crate argparse; extern crate unicode_width; #[cfg(unix)] mod top { - use argparse::{ArgumentParser, Store}; #[cfg(feature = "css")] use argparse::StoreFalse; + use argparse::{ArgumentParser, Store}; use html2text::render::{RichAnnotation, TaggedLine, TaggedLineElement}; use std::collections::HashMap; use std::io::{self, Write}; @@ -358,8 +358,7 @@ mod top { dom: &html2text::RcDom, inspect_path: &[usize], width: usize, - #[allow(unused)] - options: &Options, + #[allow(unused)] options: &Options, ) -> Vec>> { let config = html2text::config::rich(); #[cfg(feature = "css")] @@ -394,14 +393,14 @@ mod top { let config = config .add_agent_css( &(format!( - r#" + r#" html {} {{ color: white !important; background-color: black !important; display: x-raw-dom; }} "#, - path_selector + path_selector )), ) .expect("Invalid CSS");