Skip to content

Commit

Permalink
Stabilize filename hashes for fragments
Browse files Browse the repository at this point in the history
  • Loading branch information
bglw committed Dec 17, 2024
1 parent 74c4ed8 commit c933742
Show file tree
Hide file tree
Showing 7 changed files with 87 additions and 28 deletions.
58 changes: 58 additions & 0 deletions pagefind/integration_tests/base/stable-output.toolproof.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
name: Base Tests > Stable Output
steps:
- ref: ./background.toolproof.yml
- step: I have a "public/cat/index.html" file with the content {html}
html: >-
<!DOCTYPE html><html lang="en"><head></head><body>
<h1>Hello world</h1>
<p data-pagefind-meta="animal">Cat</p>
<p data-pagefind-filter="species">Feline</p>
<p data-pagefind-sort="order">1</p>
</body></html>
- step: I have a "public/dog/index.html" file with the content {html}
html: >-
<!DOCTYPE html><html lang="en"><head></head><body>
<h1>Dog world</h1>
<p data-pagefind-meta="animal">Dog</p>
<p data-pagefind-filter="species">Canine</p>
<p data-pagefind-sort="order">2</p>
</body></html>
- macro: I run Pagefind
- step: stdout should contain "Running Pagefind"
- step: The file "public/pagefind/pagefind.js" should not be empty
# -----------------------------------------
# TODO: Stabilise the `pf_meta` file hashes
# -----------------------------------------
# - I run "ls public/pagefind"
# - snapshot: stdout
# snapshot_content: |-
# ╎filter
# ╎fragment
# ╎index
# ╎pagefind-entry.json
# ╎pagefind-highlight.js
# ╎pagefind-modular-ui.css
# ╎pagefind-modular-ui.js
# ╎pagefind-ui.css
# ╎pagefind-ui.js
# ╎pagefind.en_3918d9ab34.pf_meta
# ╎pagefind.js
# ╎wasm.en.pagefind
# ╎wasm.unknown.pagefind
# -----------------------------------------
# TODO: Stabilise the `pf_meta` file hashes
# -----------------------------------------
# - I run "ls public/pagefind/filter"
# - snapshot: stdout
# snapshot_content: |-
# ╎en_8d32c48.pf_filter
- I run "ls public/pagefind/fragment"
- snapshot: stdout
snapshot_content: |-
╎en_282213b.pf_fragment
╎en_4375818.pf_fragment
╎en_571daca.pf_fragment
- I run "ls public/pagefind/index"
- snapshot: stdout
snapshot_content: |-
╎en_b2167ad.pf_index
3 changes: 2 additions & 1 deletion pagefind/src/fossick/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use lazy_static::lazy_static;
use pagefind_stem::{Algorithm, Stemmer};
use path_slash::PathExt as _;
use regex::Regex;
use std::collections::BTreeMap;
use std::io::Error;
use std::ops::Mul;
use std::path::{Path, PathBuf};
Expand Down Expand Up @@ -46,7 +47,7 @@ pub struct FossickedData {
pub url: String,
pub fragment: PageFragment,
pub word_data: HashMap<String, Vec<FossickedWord>>,
pub sort: HashMap<String, String>,
pub sort: BTreeMap<String, String>,
pub has_custom_body: bool,
pub force_inclusion: bool,
pub has_html_element: bool,
Expand Down
20 changes: 10 additions & 10 deletions pagefind/src/fossick/parser.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use hashbrown::HashMap;
use lazy_static::lazy_static;
use lol_html::html_content::Element;
use lol_html::{element, text, HtmlRewriter, Settings};
use regex::Regex;
use std::cell::RefCell;
use std::collections::BTreeMap;
use std::default::Default;
use std::rc::Rc;

Expand Down Expand Up @@ -53,11 +53,11 @@ pub struct DomParser<'a> {
#[derive(Default, Debug)]
struct DomParserData {
current_node: Rc<RefCell<DomParsingNode>>,
filters: HashMap<String, Vec<String>>,
sort: HashMap<String, String>,
meta: HashMap<String, String>,
default_meta: HashMap<String, String>,
anchor_content: HashMap<String, String>,
filters: BTreeMap<String, Vec<String>>,
sort: BTreeMap<String, String>,
meta: BTreeMap<String, String>,
default_meta: BTreeMap<String, String>,
anchor_content: BTreeMap<String, String>,
language: Option<String>,
has_html_element: bool,
has_old_bundle_reference: bool,
Expand Down Expand Up @@ -104,10 +104,10 @@ struct DomParsingNode {
#[derive(Debug)]
pub struct DomParserResult {
pub digest: String,
pub filters: HashMap<String, Vec<String>>,
pub sort: HashMap<String, String>,
pub meta: HashMap<String, String>,
pub anchor_content: HashMap<String, String>,
pub filters: BTreeMap<String, Vec<String>>,
pub sort: BTreeMap<String, String>,
pub meta: BTreeMap<String, String>,
pub anchor_content: BTreeMap<String, String>,
pub has_custom_body: bool,
pub force_inclusion: bool, // Include this page even if there is no body
pub has_html_element: bool,
Expand Down
6 changes: 3 additions & 3 deletions pagefind/src/fragments/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use hashbrown::HashMap;
use std::collections::BTreeMap;

use serde::Serialize;

Expand All @@ -15,8 +15,8 @@ pub struct PageFragmentData {
pub url: String,
pub content: String,
pub word_count: usize,
pub filters: HashMap<String, Vec<String>>,
pub meta: HashMap<String, String>,
pub filters: BTreeMap<String, Vec<String>>,
pub meta: BTreeMap<String, String>,
pub anchors: Vec<PageAnchorData>,
}

Expand Down
14 changes: 6 additions & 8 deletions pagefind/src/service/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,8 @@
pub use crate::output::SyntheticFile;
use anyhow::{bail, Result};
use hashbrown::HashMap;
use rust_patch::Patch;
use std::path::PathBuf;
use std::{collections::BTreeMap, path::PathBuf};

use crate::{
fossick::{parser::DomParserResult, Fossicker},
Expand All @@ -49,7 +48,7 @@ use crate::{
pub struct IndexedFileResponse {
pub page_word_count: u32,
pub page_url: String,
pub page_meta: HashMap<String, String>,
pub page_meta: BTreeMap<String, String>,
}

pub struct PagefindIndex {
Expand Down Expand Up @@ -126,16 +125,16 @@ impl PagefindIndex {
url: String,
content: String,
language: String,
meta: Option<HashMap<String, String>>,
filters: Option<HashMap<String, Vec<String>>>,
sort: Option<HashMap<String, String>>,
meta: Option<BTreeMap<String, String>>,
filters: Option<BTreeMap<String, Vec<String>>>,
sort: Option<BTreeMap<String, String>>,
) -> Result<IndexedFileResponse> {
let data = DomParserResult {
digest: content,
filters: filters.unwrap_or_default(),
sort: sort.unwrap_or_default(),
meta: meta.unwrap_or_default(),
anchor_content: HashMap::new(),
anchor_content: BTreeMap::new(),
has_custom_body: false,
force_inclusion: true,
has_html_element: true,
Expand Down Expand Up @@ -214,7 +213,6 @@ impl PagefindIndex {
#[cfg(test)]
mod tests {
use super::*;
use tokio;

#[tokio::test]
async fn test_add_file() {
Expand Down
9 changes: 5 additions & 4 deletions pagefind/src/service/requests.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use hashbrown::HashMap;
use std::collections::BTreeMap;

use serde::{Deserialize, Serialize};

use crate::options::PagefindServiceConfig;
Expand Down Expand Up @@ -26,9 +27,9 @@ pub(super) enum RequestAction {
url: String,
content: String,
language: String,
meta: Option<HashMap<String, String>>,
filters: Option<HashMap<String, Vec<String>>>,
sort: Option<HashMap<String, String>>,
meta: Option<BTreeMap<String, String>>,
filters: Option<BTreeMap<String, Vec<String>>>,
sort: Option<BTreeMap<String, String>>,
},
AddDir {
index_id: u32,
Expand Down
5 changes: 3 additions & 2 deletions pagefind/src/service/responses.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use hashbrown::HashMap;
use std::collections::BTreeMap;

use serde::{Deserialize, Serialize};

#[derive(Debug, Deserialize, Serialize)]
Expand All @@ -20,7 +21,7 @@ pub(super) enum ResponseAction {
IndexedFile {
page_word_count: u32,
page_url: String,
page_meta: HashMap<String, String>,
page_meta: BTreeMap<String, String>,
},
IndexedDir {
page_count: u32,
Expand Down

0 comments on commit c933742

Please sign in to comment.