Skip to content

Commit

Permalink
add control flow capabilities
Browse files Browse the repository at this point in the history
  • Loading branch information
aumetra committed Sep 27, 2024
1 parent 78521a0 commit 32ba801
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 11 deletions.
7 changes: 2 additions & 5 deletions crates/kitsune-embed/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use kitsune_http_client::Client as HttpClient;
use lantern_client_sdk::models::EmbedWithExpire;
use schaber::Scraper;
use smol_str::SmolStr;
use std::sync::LazyLock;
use std::{ops::ControlFlow, sync::LazyLock};

pub use lantern_client_sdk::models::{Embed, EmbedType};

Expand All @@ -26,11 +26,8 @@ fn first_link_from_fragment(fragment: &str) -> Option<String> {
let mut link = None;
LINK_SCRAPER
.process(fragment, |element| {
if link.is_some() {
return;
}

link = element.get_attribute("href");
ControlFlow::Break(())
})
.unwrap();

Expand Down
30 changes: 24 additions & 6 deletions lib/schaber/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,26 @@ use lol_html::{
html_content::Element,
ElementContentHandlers, HandlerResult, HtmlRewriter, Selector, Settings,
};
use std::{borrow::Cow, str::FromStr};
use std::{borrow::Cow, ops::ControlFlow, str::FromStr};
use thiserror::Error;

type Result<T, E = Error> = std::result::Result<T, E>;

/// Ignore any content handler "errors", since we use these errors
/// as our means of communicating control flow
macro_rules! handle_error {
($error_expr:expr) => {{
match { $error_expr } {
Err(::lol_html::errors::RewritingError::ContentHandlerError(..)) => return Ok(()),
other => other,
}
}};
}

#[derive(Debug, Error)]
#[error("small sacrifice for the lol_html gods")]
struct Sacrifice;

#[derive(Debug, Error)]
pub enum Error {
#[error(transparent)]
Expand All @@ -31,7 +46,7 @@ impl Scraper {
pub fn process<I, H>(&self, input: I, mut handler: H) -> Result<()>
where
I: AsRef<[u8]>,
H: FnMut(&Element<'_, '_>),
H: FnMut(&Element<'_, '_>) -> ControlFlow<()>,
{
#[inline(always)]
fn handler_assert<F>(uwu: F) -> F
Expand All @@ -54,17 +69,20 @@ impl Scraper {
element_content_handlers: vec![(
Cow::Borrowed(&self.element_selector),
ElementContentHandlers::default().element(handler_assert(|el| {
handler(el);
Ok(())
if handler(el).is_continue() {
Ok(())
} else {
Err(Box::new(Sacrifice))
}
})),
)],
..Settings::new()
},
sink_assert(|_| {}),
);

rewriter.write(input.as_ref())?;
rewriter.end()?;
handle_error!(rewriter.write(input.as_ref()))?;
handle_error!(rewriter.end())?;

Ok(())
}
Expand Down
2 changes: 2 additions & 0 deletions lib/schaber/tests/basic.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use schaber::Scraper;
use std::ops::ControlFlow;

#[test]
fn select_link() {
Expand All @@ -16,6 +17,7 @@ fn select_link() {
scraper
.process(html, |element| {
link_url = element.get_attribute("href");
ControlFlow::Break(())
})
.unwrap();

Expand Down
56 changes: 56 additions & 0 deletions lib/schaber/tests/control_flow.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
use schaber::Scraper;
use std::ops::ControlFlow;

#[test]
fn ends_after_break() {
let html = r#"
<div id="hello">
<a href="http://druckbrudi.lab">
PRINT MORE BLÅHAJ CATEARS!
</a>
<a href="http://evil.com">
This link shall not be seen!
</a>
</div>
"#;

let mut link_url = None;
let scraper = Scraper::new("a").unwrap();

scraper
.process(html, |element| {
link_url = element.get_attribute("href");
ControlFlow::Break(())
})
.unwrap();

assert_eq!(link_url.as_deref(), Some("http://druckbrudi.lab"));
}

#[test]
fn continues_after_continue() {
let html = r#"
<div id="hello">
<a href="http://druckbrudi.lab">
PRINT MORE BLÅHAJ CATEARS!
</a>
<a href="https://good.org">
This link shall be seen!
</a>
</div>
"#;

let mut link_url = None;
let scraper = Scraper::new("a").unwrap();

scraper
.process(html, |element| {
link_url = element.get_attribute("href");
ControlFlow::Continue(())
})
.unwrap();

assert_eq!(link_url.as_deref(), Some("https://good.org"));
}

0 comments on commit 32ba801

Please sign in to comment.