Skip to content

Commit

Permalink
add repl (#36)
Browse files Browse the repository at this point in the history
* add repl

* doc fixes

* fix
  • Loading branch information
suaviloquence authored Oct 8, 2024
1 parent fba58f3 commit fd8cbae
Show file tree
Hide file tree
Showing 10 changed files with 667 additions and 54 deletions.
80 changes: 79 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ serde = { version = "1.0", features = ["derive", "rc"] }
futures = "0.3"
url = "2.5"
clap = { version = "4.5.16", features = ["derive"] }
ouroboros = "0.18.4"

[workspace]
members = [".", "filter-proc-macro", "filter-types"]
Expand Down
68 changes: 46 additions & 22 deletions filter-types/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,24 +18,56 @@ pub trait ElementContextView<'ast, 'ctx> {
///
/// Overwrites a previous value bound to that name, if one is present.
///
/// Implementors should not implement this, instead [`set_inner`](Self::set_inner).
///
/// # Errors
///
/// Returns an `Err` if `name` cannot be rebound (e.g., it is `"element"`).
fn set(&mut self, name: Cow<'ast, str>, value: EValue<'ctx>) -> Result<()>;
fn set(&mut self, name: Cow<'ast, str>, value: EValue<'ctx>) -> Result<()> {
match &*name {
immutable @ "element" => {
bail!("assignment to immutable binding `{immutable}`")
}
_ => self.set_inner(name, value),
}
}

/// Sets the binding with name `name` to `value` in this context.
///
/// Overwrites a previous value bound to that name, if one is present.
///
/// Implementors should implement this and not `set`.
fn set_inner(&mut self, name: Cow<'ast, str>, value: EValue<'ctx>) -> Result<()>;

/// Gets the binding with name `id`, if it is present. Handles
/// retrieving special bindings like `element`. Looks in this
/// context and all parent contexts, starting innermost first.
///
/// Implementors should not implement this and instead use [`get_inner`](Self::get_inner).
///
/// # Errors
///
/// Returns an `Err` if a binding with name `id` is not found in this
/// scope or any parent scopes.
fn get(&self, id: &str) -> Result<EValue<'ctx>>;
fn get(&self, id: &str) -> Result<EValue<'ctx>> {
match id {
"element" => Ok(self.element().into()),
_ => self.get_inner(id),
}
}

/// For implementors of [`ElementContext`]. Only needs to get/set safe
/// (non-special) bindings, looking up in parent scopes as necessary.
///
/// # Errors
///
/// Returns an `Err` if a binding with name `id` is not found in this
/// scope or any parent scopes.
fn get_inner(&self, id: &str) -> Result<EValue<'ctx>>;

/// Returns a [reference](ElementRef) to the root element of this block.
#[must_use]
fn element(&self) -> ElementRef<'_>;
fn element(&self) -> ElementRef<'ctx>;

/// Returns a reference to the URL of the document that this element is in.
#[must_use]
Expand Down Expand Up @@ -167,27 +199,19 @@ impl<'ast, 'ctx> Linked<'ast, 'ctx> {
}

impl<'ast, 'ctx> ElementContextView<'ast, 'ctx> for Linked<'ast, 'ctx> {
fn get(&self, id: &str) -> Result<EValue<'ctx>> {
match id {
"element" => Ok(self.element.into()),
_ => match self.bindings.0.get(id) {
Some(id) => Ok(id.clone()),
None => self
.parent
.with_msg(|| format!("unknown binding `{id}`"))?
.get(id),
},
fn get_inner(&self, id: &str) -> Result<EValue<'ctx>> {
match self.bindings.0.get(id) {
Some(id) => Ok(id.clone()),
None => self
.parent
.with_msg(|| format!("unknown binding `{id}`"))?
.get(id),
}
}

fn set(&mut self, name: Cow<'ast, str>, value: EValue<'ctx>) -> Result<()> {
match &*name {
immutable @ "element" => {
bail!("assignment to immutable binding `{immutable}`")
}
_ => self.bindings.0.insert(name, value),
};

#[inline]
fn set_inner(&mut self, name: Cow<'ast, str>, value: EValue<'ctx>) -> Result<()> {
self.bindings.0.insert(name, value);
Ok(())
}

Expand All @@ -197,7 +221,7 @@ impl<'ast, 'ctx> ElementContextView<'ast, 'ctx> for Linked<'ast, 'ctx> {
}

#[inline]
fn element(&self) -> ElementRef<'_> {
fn element(&self) -> ElementRef<'ctx> {
self.element
}
}

Check warning on line 227 in filter-types/src/context.rs

View workflow job for this annotation

GitHub Actions / verify formatting and lints

Diff in /home/runner/work/scrapelect/scrapelect/filter-types/src/context.rs
Expand Down
21 changes: 12 additions & 9 deletions filter-types/src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,13 @@ use super::{

pub use scrapelect_filter_proc_macro::{filter_fn, Args};

/// Typed arguments for a [`Filter`]. It can be implemented manually, but you
/// can also use the derive macro [`Args`](scrapelect_filter_proc_macro::Args)
/// if all the fields in the struct implement [`TryFromValue<Element>`](TryFromValue).
/// Typed arguments for a [`Filter`].
///
/// If all the fields in the struct implement [`TryFromValue<Element`], it is easier
/// to use the derive macro [`Args`], but this can also be implemented manually if
/// you need to express more custom deserializing logic.
///
/// [`Args`](scrapelect_filter_proc_macro::Args)
pub trait Args<'doc>: Sized {
/// Try to deserialize the typed arguments from the given `args`.
///
Expand Down Expand Up @@ -59,18 +63,17 @@ pub trait Filter {
) -> Result<PValue<'doc>>;
}

#[allow(clippy::doc_lazy_continuation)]
/// An object-safe version of [`Filter`]. All `F: Filter` implement this trait,
/// so prefer implementing `Filter` unless you must:
///
/// 1. Deserialize the input [`PValue`] in a custom way (not using [`TryFromValue`])
/// 2. Use custom arg-deserializing logic (but often you will be able to implement
/// [`Args`] manually instead, and still get the typed guarantees of [`Filter`])
/// [`Args`] manually instead, and still get the typed guarantees of [`Filter`])
/// 3. Use the `&self` reference. This *can* be used to store state with interior
/// mutability (though note that in `scrapelect`, filters must be `Send + Sync`) to
/// register, but it is often not the best idea to have filter state because filters
/// can be called from anywhere in the program, and you will have to reason out the
/// soundness of having the state.
/// mutability (though note that in `scrapelect`, filters must be `Send + Sync`) to
/// register, but it is often not the best idea to have filter state because filters
/// can be called from anywhere in the program, and you will have to reason out the
/// soundness of having the state.
pub trait FilterDyn {
/// Call this filter with the given `value`, `args`, and `ctx`.
///
Expand Down
10 changes: 9 additions & 1 deletion filter-types/src/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,15 @@ pub enum Element<'a> {
impl fmt::Display for Element<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Element(elem) => write!(f, "`{}`", elem.html()),
Self::Element(elem) => {
write!(f, "<{}", elem.value().name())?;

for (name, value) in elem.value().attrs() {
write!(f, r#"{name}="{value}""#)?;
}

f.write_str(">")
}
}
}
}
Expand Down
16 changes: 16 additions & 0 deletions src/frontend/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,4 +221,20 @@ mod selector_display {
Ok(())
}
}

impl Selector<'_> {
pub fn to_scraper(&self) -> scraper::Selector {
let selector_str = self.to_string();
// the borrow checker does not like having this inline.
let result = scraper::Selector::parse(&selector_str);
match result {
Ok(s) => s,
Err(e) => unreachable!(
"failed to parse selector `{selector_str}`.
This is a bug in `scrapelect`, please report it.
`selectors` error: {e}"
),
}
}
}
}
6 changes: 4 additions & 2 deletions src/frontend/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ impl<'a> Parser<'a> {
Ok(Inline { value, filters })
}

fn parse_value(&mut self) -> Result<Inline<'a>> {
pub(crate) fn parse_value(&mut self) -> Result<Inline<'a>> {
let (span, lx) = self.scanner.peek_non_whitespace();
match lx.token {
Token::Less => self.parse_inline(),
Expand Down Expand Up @@ -230,7 +230,9 @@ impl<'a> Parser<'a> {
let (span, lx) = item;

match lx.token {
Token::BraceOpen | Token::ParenOpen => Ok(None),
// Eof is only allowed in a repl context.
// TODO: investigate if this makes it possible to parse an invalid program anyway.
Token::BraceOpen | Token::ParenOpen | Token::Eof => Ok(None),
// invariant: peek_next_whitespace is one of Id | Hash | Dot | Star
// whitespace is eaten in the above block.
Token::Whitespace => Ok(Some(SelectorCombinator::Descendent)),
Expand Down
Loading

0 comments on commit fd8cbae

Please sign in to comment.