From c9337423d1c1b46f3522df99e3595525ee9ff846 Mon Sep 17 00:00:00 2001
From: Liam Bigelow <40188355+bglw@users.noreply.github.com>
Date: Wed, 18 Dec 2024 09:55:46 +1300
Subject: [PATCH] Stabilize filename hashes for fragments
---
.../base/stable-output.toolproof.yml | 58 +++++++++++++++++++
pagefind/src/fossick/mod.rs | 3 +-
pagefind/src/fossick/parser.rs | 20 +++----
pagefind/src/fragments/mod.rs | 6 +-
pagefind/src/service/api.rs | 14 ++---
pagefind/src/service/requests.rs | 9 +--
pagefind/src/service/responses.rs | 5 +-
7 files changed, 87 insertions(+), 28 deletions(-)
create mode 100644 pagefind/integration_tests/base/stable-output.toolproof.yml
diff --git a/pagefind/integration_tests/base/stable-output.toolproof.yml b/pagefind/integration_tests/base/stable-output.toolproof.yml
new file mode 100644
index 00000000..689e88a4
--- /dev/null
+++ b/pagefind/integration_tests/base/stable-output.toolproof.yml
@@ -0,0 +1,58 @@
+name: Base Tests > Stable Output
+steps:
+ - ref: ./background.toolproof.yml
+ - step: I have a "public/cat/index.html" file with the content {html}
+ html: >-
+
+ Hello world
+ Cat
+ Feline
+ 1
+
+ - step: I have a "public/dog/index.html" file with the content {html}
+ html: >-
+
+ Dog world
+ Dog
+ Canine
+ 2
+
+ - macro: I run Pagefind
+ - step: stdout should contain "Running Pagefind"
+ - step: The file "public/pagefind/pagefind.js" should not be empty
+ # -----------------------------------------
+ # TODO: Stabilise the `pf_meta` file hashes
+ # -----------------------------------------
+ # - I run "ls public/pagefind"
+ # - snapshot: stdout
+ # snapshot_content: |-
+ # ╎filter
+ # ╎fragment
+ # ╎index
+ # ╎pagefind-entry.json
+ # ╎pagefind-highlight.js
+ # ╎pagefind-modular-ui.css
+ # ╎pagefind-modular-ui.js
+ # ╎pagefind-ui.css
+ # ╎pagefind-ui.js
+ # ╎pagefind.en_3918d9ab34.pf_meta
+ # ╎pagefind.js
+ # ╎wasm.en.pagefind
+ # ╎wasm.unknown.pagefind
+ # -----------------------------------------
+ # TODO: Stabilise the `pf_meta` file hashes
+ # -----------------------------------------
+ # - I run "ls public/pagefind/filter"
+ # - snapshot: stdout
+ # snapshot_content: |-
+ # ╎en_8d32c48.pf_filter
+ - I run "ls public/pagefind/fragment"
+ - snapshot: stdout
+ snapshot_content: |-
+ ╎en_282213b.pf_fragment
+ ╎en_4375818.pf_fragment
+ ╎en_571daca.pf_fragment
+ - I run "ls public/pagefind/index"
+ - snapshot: stdout
+ snapshot_content: |-
+ ╎en_b2167ad.pf_index
diff --git a/pagefind/src/fossick/mod.rs b/pagefind/src/fossick/mod.rs
index c23456d3..3828562b 100644
--- a/pagefind/src/fossick/mod.rs
+++ b/pagefind/src/fossick/mod.rs
@@ -8,6 +8,7 @@ use lazy_static::lazy_static;
use pagefind_stem::{Algorithm, Stemmer};
use path_slash::PathExt as _;
use regex::Regex;
+use std::collections::BTreeMap;
use std::io::Error;
use std::ops::Mul;
use std::path::{Path, PathBuf};
@@ -46,7 +47,7 @@ pub struct FossickedData {
pub url: String,
pub fragment: PageFragment,
pub word_data: HashMap>,
- pub sort: HashMap,
+ pub sort: BTreeMap,
pub has_custom_body: bool,
pub force_inclusion: bool,
pub has_html_element: bool,
diff --git a/pagefind/src/fossick/parser.rs b/pagefind/src/fossick/parser.rs
index c5e2b556..d1f609c3 100644
--- a/pagefind/src/fossick/parser.rs
+++ b/pagefind/src/fossick/parser.rs
@@ -1,9 +1,9 @@
-use hashbrown::HashMap;
use lazy_static::lazy_static;
use lol_html::html_content::Element;
use lol_html::{element, text, HtmlRewriter, Settings};
use regex::Regex;
use std::cell::RefCell;
+use std::collections::BTreeMap;
use std::default::Default;
use std::rc::Rc;
@@ -53,11 +53,11 @@ pub struct DomParser<'a> {
#[derive(Default, Debug)]
struct DomParserData {
current_node: Rc>,
- filters: HashMap>,
- sort: HashMap,
- meta: HashMap,
- default_meta: HashMap,
- anchor_content: HashMap,
+ filters: BTreeMap>,
+ sort: BTreeMap,
+ meta: BTreeMap,
+ default_meta: BTreeMap,
+ anchor_content: BTreeMap,
language: Option,
has_html_element: bool,
has_old_bundle_reference: bool,
@@ -104,10 +104,10 @@ struct DomParsingNode {
#[derive(Debug)]
pub struct DomParserResult {
pub digest: String,
- pub filters: HashMap>,
- pub sort: HashMap,
- pub meta: HashMap,
- pub anchor_content: HashMap,
+ pub filters: BTreeMap>,
+ pub sort: BTreeMap,
+ pub meta: BTreeMap,
+ pub anchor_content: BTreeMap,
pub has_custom_body: bool,
pub force_inclusion: bool, // Include this page even if there is no body
pub has_html_element: bool,
diff --git a/pagefind/src/fragments/mod.rs b/pagefind/src/fragments/mod.rs
index d280c66a..0df1fec0 100644
--- a/pagefind/src/fragments/mod.rs
+++ b/pagefind/src/fragments/mod.rs
@@ -1,4 +1,4 @@
-use hashbrown::HashMap;
+use std::collections::BTreeMap;
use serde::Serialize;
@@ -15,8 +15,8 @@ pub struct PageFragmentData {
pub url: String,
pub content: String,
pub word_count: usize,
- pub filters: HashMap>,
- pub meta: HashMap,
+ pub filters: BTreeMap>,
+ pub meta: BTreeMap,
pub anchors: Vec,
}
diff --git a/pagefind/src/service/api.rs b/pagefind/src/service/api.rs
index 5e9605c8..8df40cf3 100644
--- a/pagefind/src/service/api.rs
+++ b/pagefind/src/service/api.rs
@@ -35,9 +35,8 @@
pub use crate::output::SyntheticFile;
use anyhow::{bail, Result};
-use hashbrown::HashMap;
use rust_patch::Patch;
-use std::path::PathBuf;
+use std::{collections::BTreeMap, path::PathBuf};
use crate::{
fossick::{parser::DomParserResult, Fossicker},
@@ -49,7 +48,7 @@ use crate::{
pub struct IndexedFileResponse {
pub page_word_count: u32,
pub page_url: String,
- pub page_meta: HashMap,
+ pub page_meta: BTreeMap,
}
pub struct PagefindIndex {
@@ -126,16 +125,16 @@ impl PagefindIndex {
url: String,
content: String,
language: String,
- meta: Option>,
- filters: Option>>,
- sort: Option>,
+ meta: Option>,
+ filters: Option>>,
+ sort: Option>,
) -> Result {
let data = DomParserResult {
digest: content,
filters: filters.unwrap_or_default(),
sort: sort.unwrap_or_default(),
meta: meta.unwrap_or_default(),
- anchor_content: HashMap::new(),
+ anchor_content: BTreeMap::new(),
has_custom_body: false,
force_inclusion: true,
has_html_element: true,
@@ -214,7 +213,6 @@ impl PagefindIndex {
#[cfg(test)]
mod tests {
use super::*;
- use tokio;
#[tokio::test]
async fn test_add_file() {
diff --git a/pagefind/src/service/requests.rs b/pagefind/src/service/requests.rs
index 5c603157..83654fc0 100644
--- a/pagefind/src/service/requests.rs
+++ b/pagefind/src/service/requests.rs
@@ -1,4 +1,5 @@
-use hashbrown::HashMap;
+use std::collections::BTreeMap;
+
use serde::{Deserialize, Serialize};
use crate::options::PagefindServiceConfig;
@@ -26,9 +27,9 @@ pub(super) enum RequestAction {
url: String,
content: String,
language: String,
- meta: Option>,
- filters: Option>>,
- sort: Option>,
+ meta: Option>,
+ filters: Option>>,
+ sort: Option>,
},
AddDir {
index_id: u32,
diff --git a/pagefind/src/service/responses.rs b/pagefind/src/service/responses.rs
index 843d3676..e021d081 100644
--- a/pagefind/src/service/responses.rs
+++ b/pagefind/src/service/responses.rs
@@ -1,4 +1,5 @@
-use hashbrown::HashMap;
+use std::collections::BTreeMap;
+
use serde::{Deserialize, Serialize};
#[derive(Debug, Deserialize, Serialize)]
@@ -20,7 +21,7 @@ pub(super) enum ResponseAction {
IndexedFile {
page_word_count: u32,
page_url: String,
- page_meta: HashMap,
+ page_meta: BTreeMap,
},
IndexedDir {
page_count: u32,