Skip to content

Commit

Permalink
Merge pull request #189 from jugglerchris/html_nth_child_raw_dom
Browse files Browse the repository at this point in the history
Add `:nth-child()` and (as a debug extension) `display: x-raw-dom`.
  • Loading branch information
jugglerchris authored Nov 30, 2024
2 parents 4fcb4f0 + 6c9df62 commit 7291cba
Show file tree
Hide file tree
Showing 9 changed files with 957 additions and 71 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ html_trace = ["dep:log"]
html_trace_bt = ["html_trace", "dep:backtrace"]
default = []
css = []
css_ext = ["css"]

[[example]]
name = "html2term"
Expand Down
216 changes: 180 additions & 36 deletions examples/html2term.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ extern crate argparse;
extern crate unicode_width;
#[cfg(unix)]
mod top {
#[cfg(feature = "css")]
use argparse::StoreFalse;
use argparse::{ArgumentParser, Store};
use html2text::render::{RichAnnotation, TaggedLine, TaggedLineElement};
use std::collections::HashMap;
Expand All @@ -16,6 +18,7 @@ mod top {
use unicode_width::UnicodeWidthStr;

fn to_style(tag: &[RichAnnotation]) -> String {
use termion::color::*;
let mut style = String::new();

for ann in tag {
Expand All @@ -25,42 +28,32 @@ mod top {
style.push_str(&format!("{}", termion::style::Underline));
}
RichAnnotation::Image(_) => {
style.push_str(&format!(
"{}",
termion::color::Fg(termion::color::LightBlue)
));
style.push_str(&format!("{}", Fg(LightBlue)));
}
RichAnnotation::Emphasis => {
style.push_str(&format!(
"{}",
termion::color::Fg(termion::color::LightGreen)
));
style.push_str(&format!("{}", Fg(LightGreen)));
}
RichAnnotation::Strong => {
style.push_str(&format!(
"{}",
termion::color::Fg(termion::color::LightGreen)
));
style.push_str(&format!("{}", Fg(LightGreen)));
}
RichAnnotation::Strikeout => (),
RichAnnotation::Code => {
style.push_str(&format!(
"{}",
termion::color::Fg(termion::color::LightYellow)
));
style.push_str(&format!("{}", Fg(LightYellow)));
}
RichAnnotation::Preformat(is_cont) => {
if is_cont {
style.push_str(&format!(
"{}",
termion::color::Fg(termion::color::LightMagenta)
));
style.push_str(&format!("{}", Fg(LightMagenta)));
} else {
style.push_str(&format!("{}", termion::color::Fg(termion::color::Magenta)));
style.push_str(&format!("{}", Fg(Magenta)));
}
}
// Ignore unhandled annotations
_ => {}
RichAnnotation::Colour(col) => {
style.push_str(&format!("{}", Fg(Rgb(col.r, col.g, col.b))));
}
RichAnnotation::BgColour(col) => {
style.push_str(&format!("{}", Bg(Rgb(col.r, col.g, col.b))));
}
_ => todo!(),
}
}
style
Expand Down Expand Up @@ -132,29 +125,45 @@ mod top {
FragMap { start_xy: map }
}

struct Options {
#[cfg(feature = "css")]
use_css: bool,
}

impl Options {
fn new() -> Options {
Options {
#[cfg(feature = "css")]
use_css: true,
}
}
}

pub fn main() {
let mut filename = String::new();
#[allow(unused_mut)]
let mut options = Options::new();
{
let mut ap = ArgumentParser::new();
ap.refer(&mut filename)
.add_argument("filename", Store, "Set HTML filename");
#[cfg(feature = "css")]
ap.refer(&mut options.use_css)
.add_option(&["--no-css"], StoreFalse, "Disable CSS");
ap.parse_args_or_exit();
}

let (width, height) = termion::terminal_size().unwrap();
let (width, height) = (width as usize, height as usize);

let mut file = std::fs::File::open(filename).expect("Tried to open file");
let annotated =
html2text::from_read_rich(&mut file, width).expect("Failed to convert from HTML");

let link_map = find_links(&annotated);
let frag_map = find_frags(&annotated);
let dom = html2text::config::plain()
.parse_html(&mut file)
.expect("Failed to parse HTML");

let mut keys = io::stdin().keys();

// max_y is the largest (0-based) index of a real document line.
let max_y = annotated.len() - 1;
// top_y is the (0-based) index of the document line shown at
// the top of the visible screen.
let mut top_y = 0;
Expand All @@ -169,7 +178,17 @@ mod top {
.into_alternate_screen()
.unwrap();

let mut annotated = rerender(&dom, &[], width, &options);

let link_map = find_links(&annotated);
let frag_map = find_frags(&annotated);

let mut inspect_path = vec![];

loop {
// max_y is the largest (0-based) index of a real document line.
let max_y = annotated.len() - 1;

// Sanity-check the current screen position. max_y should
// be small enough that no blank lines beyond the end of
// the document are visible on screen (except when the
Expand All @@ -186,7 +205,10 @@ mod top {
top_y = std::cmp::min(top_y, doc_y);

let opt_url = link_map.link_at(doc_x, doc_y);
let vis_y_limit = std::cmp::min(top_y + height, max_y + 1);
let mut vis_y_limit = std::cmp::min(top_y + height, max_y + 1);
if !inspect_path.is_empty() {
vis_y_limit -= 1;
}
write!(screen, "{}", termion::clear::All).unwrap();
for (i, line) in annotated[top_y..vis_y_limit].iter().enumerate() {
write!(screen, "{}", Goto(1, i as u16 + 1)).unwrap();
Expand All @@ -202,6 +224,23 @@ mod top {
write!(screen, "{}{}{}", style, ts.s, termion::style::Reset).unwrap();
}
}
if !inspect_path.is_empty() {
let mut pth = String::from("top ");
let mut node = dom.document.clone();

for &idx in &inspect_path {
node = node.nth_child(idx).unwrap();
pth.push_str(&format!("> {}", node.element_name().unwrap()));
}
write!(
screen,
"{}{}{:?}",
Goto(1, vis_y_limit as u16),
pth,
&inspect_path
)
.unwrap();
}

// 1-based screen coordinates
let cursor_x = (doc_x + 1) as u16;
Expand All @@ -213,19 +252,52 @@ mod top {
match k {
Key::Char('q') => break,
Key::Char('j') | Key::Down => {
if doc_y < max_y {
doc_y += 1;
if inspect_path.is_empty() {
if doc_y < max_y {
doc_y += 1;
}
} else {
*inspect_path.last_mut().unwrap() += 1;
if dom.get_node_by_path(&inspect_path).is_none() {
// No next node - undo.
*inspect_path.last_mut().unwrap() -= 1;
} else {
annotated = rerender(&dom, &inspect_path, width, &options);
}
}
}
Key::Char('k') | Key::Up => {
doc_y = doc_y.saturating_sub(1);
if inspect_path.is_empty() {
doc_y = doc_y.saturating_sub(1);
} else {
if *inspect_path.last().unwrap() > 1 {
*inspect_path.last_mut().unwrap() -= 1;
annotated = rerender(&dom, &inspect_path, width, &options);
}
}
}
Key::Char('h') | Key::Left => {
doc_x = doc_x.saturating_sub(1);
if inspect_path.is_empty() {
doc_x = doc_x.saturating_sub(1);
} else {
if inspect_path.len() > 1 {
inspect_path.pop();
annotated = rerender(&dom, &inspect_path, width, &options);
}
}
}
Key::Char('l') | Key::Right => {
if doc_x + 1 < width {
doc_x += 1;
if inspect_path.is_empty() {
if doc_x + 1 < width {
doc_x += 1;
}
} else {
inspect_path.push(1);
if dom.get_node_by_path(&inspect_path).is_none() {
inspect_path.pop();
} else {
annotated = rerender(&dom, &inspect_path, width, &options);
}
}
}
Key::Char(' ') | Key::PageDown => {
Expand Down Expand Up @@ -266,11 +338,83 @@ mod top {
}
}
}
#[cfg(feature = "css_ext")]
Key::Char('I') => {
// Enter/leave inspect mode
if inspect_path.is_empty() {
inspect_path.push(1);
} else {
inspect_path.clear();
}
annotated = rerender(&dom, &inspect_path, width, &options);
}
_ => {}
}
}
}
}

fn rerender(
dom: &html2text::RcDom,
inspect_path: &[usize],
width: usize,
#[allow(unused)] options: &Options,
) -> Vec<TaggedLine<Vec<RichAnnotation>>> {
let config = html2text::config::rich();
#[cfg(feature = "css")]
let config = if options.use_css {
config
.use_doc_css()
.add_agent_css(
r#"
img {
color: #77f;
}
"#,
)
.unwrap()
} else {
config
};
if inspect_path.is_empty() {
let render_tree = config
.dom_to_render_tree(&dom)
.expect("Failed to build render tree");
config
.render_to_lines(render_tree, width)
.expect("Failed to render")
} else {
#[cfg(feature = "css_ext")]
{
let mut path_selector = String::new();
for &idx in &inspect_path[1..] {
path_selector.push_str(&format!(" > :nth-child({})", idx));
}
let config = config
.add_agent_css(
&(format!(
r#"
html {} {{
color: white !important;
background-color: black !important;
display: x-raw-dom;
}}
"#,
path_selector
)),
)
.expect("Invalid CSS");
let render_tree = config
.dom_to_render_tree(&dom)
.expect("Failed to build render tree");
config
.render_to_lines(render_tree, width)
.expect("Failed to render")
}
#[cfg(not(feature = "css_ext"))]
unreachable!()
}
}
}

#[cfg(not(unix))]
Expand Down
19 changes: 19 additions & 0 deletions examples/html2text.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,15 @@ where
.unwrap();
}
}
#[cfg(feature = "css")]
{
if flags.show_css {
let conf = config::plain();
let conf = update_config(conf, &flags);
let dom = conf.parse_html(input).unwrap();
return html2text::dom_to_parsed_style(&dom).expect("Parsing CSS");
}
}
if flags.show_dom {
let conf = config::plain();
let conf = update_config(conf, &flags);
Expand Down Expand Up @@ -162,6 +171,8 @@ struct Flags {
use_only_css: bool,
show_dom: bool,
show_render: bool,
#[cfg(feature = "css")]
show_css: bool,
}

fn main() {
Expand All @@ -182,6 +193,8 @@ fn main() {
use_only_css: false,
show_dom: false,
show_render: false,
#[cfg(feature = "css")]
show_css: false,
};
let mut literal: bool = false;

Expand Down Expand Up @@ -240,6 +253,12 @@ fn main() {
StoreTrue,
"Show the computed render tree instead of the rendered output",
);
#[cfg(feature = "css")]
ap.refer(&mut flags.show_css).add_option(
&["--show-css"],
StoreTrue,
"Show the parsed CSS instead of rendered output",
);
ap.parse_args_or_exit();
}

Expand Down
Loading

0 comments on commit 7291cba

Please sign in to comment.