diff --git a/crates/kitsune-embed/src/lib.rs b/crates/kitsune-embed/src/lib.rs index 1544ef5a7..e484f0d24 100644 --- a/crates/kitsune-embed/src/lib.rs +++ b/crates/kitsune-embed/src/lib.rs @@ -14,7 +14,7 @@ use kitsune_http_client::Client as HttpClient; use lantern_client_sdk::models::EmbedWithExpire; use schaber::Scraper; use smol_str::SmolStr; -use std::sync::LazyLock; +use std::{ops::ControlFlow, sync::LazyLock}; pub use lantern_client_sdk::models::{Embed, EmbedType}; @@ -26,11 +26,8 @@ fn first_link_from_fragment(fragment: &str) -> Option { let mut link = None; LINK_SCRAPER .process(fragment, |element| { - if link.is_some() { - return; - } - link = element.get_attribute("href"); + ControlFlow::Break(()) }) .unwrap(); diff --git a/lib/schaber/src/lib.rs b/lib/schaber/src/lib.rs index b72d8d0fd..0163fd3dc 100644 --- a/lib/schaber/src/lib.rs +++ b/lib/schaber/src/lib.rs @@ -3,11 +3,26 @@ use lol_html::{ html_content::Element, ElementContentHandlers, HandlerResult, HtmlRewriter, Selector, Settings, }; -use std::{borrow::Cow, str::FromStr}; +use std::{borrow::Cow, ops::ControlFlow, str::FromStr}; use thiserror::Error; type Result = std::result::Result; +/// Ignore any content handler "errors", since we use these errors +/// as our means of communicating control flow +macro_rules! handle_error { + ($error_expr:expr) => {{ + match { $error_expr } { + Err(::lol_html::errors::RewritingError::ContentHandlerError(..)) => return Ok(()), + other => other, + } + }}; +} + +#[derive(Debug, Error)] +#[error("small sacrifice for the lol_html gods")] +struct Sacrifice; + #[derive(Debug, Error)] pub enum Error { #[error(transparent)] @@ -31,7 +46,7 @@ impl Scraper { pub fn process(&self, input: I, mut handler: H) -> Result<()> where I: AsRef<[u8]>, - H: FnMut(&Element<'_, '_>), + H: FnMut(&Element<'_, '_>) -> ControlFlow<()>, { #[inline(always)] fn handler_assert(uwu: F) -> F @@ -54,8 +69,11 @@ impl Scraper { element_content_handlers: vec![( Cow::Borrowed(&self.element_selector), ElementContentHandlers::default().element(handler_assert(|el| { - handler(el); - Ok(()) + if handler(el).is_continue() { + Ok(()) + } else { + Err(Box::new(Sacrifice)) + } })), )], ..Settings::new() @@ -63,8 +81,8 @@ impl Scraper { sink_assert(|_| {}), ); - rewriter.write(input.as_ref())?; - rewriter.end()?; + handle_error!(rewriter.write(input.as_ref()))?; + handle_error!(rewriter.end())?; Ok(()) } diff --git a/lib/schaber/tests/basic.rs b/lib/schaber/tests/basic.rs index 66f3bea52..0afbe44c7 100644 --- a/lib/schaber/tests/basic.rs +++ b/lib/schaber/tests/basic.rs @@ -1,4 +1,5 @@ use schaber::Scraper; +use std::ops::ControlFlow; #[test] fn select_link() { @@ -16,6 +17,7 @@ fn select_link() { scraper .process(html, |element| { link_url = element.get_attribute("href"); + ControlFlow::Break(()) }) .unwrap(); diff --git a/lib/schaber/tests/control_flow.rs b/lib/schaber/tests/control_flow.rs new file mode 100644 index 000000000..4199b68d5 --- /dev/null +++ b/lib/schaber/tests/control_flow.rs @@ -0,0 +1,56 @@ +use schaber::Scraper; +use std::ops::ControlFlow; + +#[test] +fn ends_after_break() { + let html = r#" + + "#; + + let mut link_url = None; + let scraper = Scraper::new("a").unwrap(); + + scraper + .process(html, |element| { + link_url = element.get_attribute("href"); + ControlFlow::Break(()) + }) + .unwrap(); + + assert_eq!(link_url.as_deref(), Some("http://druckbrudi.lab")); +} + +#[test] +fn continues_after_continue() { + let html = r#" + + "#; + + let mut link_url = None; + let scraper = Scraper::new("a").unwrap(); + + scraper + .process(html, |element| { + link_url = element.get_attribute("href"); + ControlFlow::Continue(()) + }) + .unwrap(); + + assert_eq!(link_url.as_deref(), Some("https://good.org")); +}