From 80faf909bfeb5f0042d85b1bd113b7ce167474ac Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 4 Jan 2024 11:49:48 +0100 Subject: [PATCH 01/12] ci(test): upload the binary that was tested It is mighty useful to have the exact binary that was tested in the GitHub workflow, for example to use it in other workflows when introducing features specifically for those other workflows' use case... Signed-off-by: Johannes Schindelin --- .github/workflows/test.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9f6b5ae8..9cd6cb88 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -106,6 +106,12 @@ jobs: working-directory: ./pagefind run: cargo build --release --features extended + - name: Upload Testing Binary + uses: actions/upload-artifact@v4 + with: + name: pagefind-${{ matrix.target }} + path: target/release/pagefind${{ matrix.build == 'windows' && '.exe' || '' }} + - name: Test Lib working-directory: ./pagefind run: cargo test --release --lib --features extended From 36492af42ef2ffc593ee7764817faf4a1239806d Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 24 Dec 2023 23:48:16 +0100 Subject: [PATCH 02/12] Add an option to turn off density-weighting When searching, Pagefind applies a heuristic that often works quite well to boost pages with a higher density, i.e. a higher number of hits divided by the number of words on the page. This is called "density weighting". In some instances, it is desirable, though, to just use the number of hits directly, without dividing by the number of words on the page. Let's support this via the search option `use_weighting`, which default to `true` to maintain the current behavior. Signed-off-by: Johannes Schindelin --- docs/content/docs/api.md | 12 ++++++++++++ pagefind/features/weighting.feature | 28 +++++++++++++++++++++++++++ pagefind_web/src/lib.rs | 6 +++--- pagefind_web/src/search.rs | 22 ++++++++++++++++++++- pagefind_web_js/lib/coupled_search.ts | 5 ++++- pagefind_web_js/types/index.d.ts | 4 ++++ 6 files changed, 72 insertions(+), 5 deletions(-) diff --git a/docs/content/docs/api.md b/docs/content/docs/api.md index 8436df0e..10fcf95e 100644 --- a/docs/content/docs/api.md +++ b/docs/content/docs/api.md @@ -239,6 +239,18 @@ const search = await pagefind.search("static", { See [Sorting using the Pagefind JavaScript API](/docs/js-api-sorting/) for more details and functionality. +## Controlling how search results are ranked + +By default, the results' are sorted using a "balanced score" which is calculated using a sophisticated formula. This formula takes the ratio into account between matching vs total number of words on any given page. To support scenarios where this is not desirable (e.g. on sites where longer articles are better matches than short ones), this can be turned off: + +{{< diffcode >}} +```js +const search = await pagefind.search("term", { ++ ranking: { pageFrequency: 0.0 } +}); +``` +{{< /diffcode >}} + ## Re-initializing the search API In some cases you might need to re-initialize Pagefind. For example, if you dynamically change the language of the page without reloading, Pagefind will need to be re-initialized to reflect this langauge change. diff --git a/pagefind/features/weighting.feature b/pagefind/features/weighting.feature index 4de98933..d1cfd0a9 100644 --- a/pagefind/features/weighting.feature +++ b/pagefind/features/weighting.feature @@ -224,3 +224,31 @@ Feature: Word Weighting Then There should be no logs # Treat the bal value here as a snapshot — update the expected value as needed Then The selector "p" should contain "weight:1/bal:82.28572/loc:4" + + Scenario: Density weighting can be turned off + Given I have a "public/single-word.html" file with the body: + """ +

word

+ """ + Given I have a "public/three-words.html" file with the body: + """ +

I have a word and a word and another word

+ """ + When I run my program + Then I should see "Running Pagefind" in stdout + When I serve the "public" directory + When I load "/" + When I evaluate: + """ + async function() { + let pagefind = await import("/pagefind/pagefind.js"); + + let search = await pagefind.search(`word`); + let search2 = await pagefind.search(`word`, { ranking: { pageFrequency: 0.0 } }); + let counts = [search, search2].map(s => s.results.map(r => r.words.length)); + document.querySelector('p').innerText = JSON.stringify(counts); + } + """ + Then There should be no logs + # With density weighting, single-word should be the first hit, otherwise three-words + Then The selector "p" should contain "[[1,3],[3,1]]" diff --git a/pagefind_web/src/lib.rs b/pagefind_web/src/lib.rs index f4412e70..5ffbdbf5 100644 --- a/pagefind_web/src/lib.rs +++ b/pagefind_web/src/lib.rs @@ -5,7 +5,7 @@ use std::collections::HashMap; use util::*; use wasm_bindgen::prelude::*; -use crate::search::BalancedWordScore; +use crate::search::{BalancedWordScore, RankingWeights}; mod filter; mod filter_index; @@ -210,7 +210,7 @@ pub fn filters(ptr: *mut SearchIndex) -> String { } #[wasm_bindgen] -pub fn search(ptr: *mut SearchIndex, query: &str, filter: &str, sort: &str, exact: bool) -> String { +pub fn search(ptr: *mut SearchIndex, query: &str, filter: &str, sort: &str, exact: bool, ranking: &RankingWeights) -> String { let search_index = unsafe { Box::from_raw(ptr) }; if let Some(generator_version) = search_index.generator_version.as_ref() { @@ -225,7 +225,7 @@ pub fn search(ptr: *mut SearchIndex, query: &str, filter: &str, sort: &str, exac let (unfiltered_results, mut results) = if exact { search_index.exact_term(query, filter_set) } else { - search_index.search_term(query, filter_set) + search_index.search_term(query, filter_set, ranking) }; let unfiltered_total = unfiltered_results.len(); debug!({ format!("Raw total of {} results", unfiltered_total) }); diff --git a/pagefind_web/src/search.rs b/pagefind_web/src/search.rs index 824e7975..4b1c7c3d 100644 --- a/pagefind_web/src/search.rs +++ b/pagefind_web/src/search.rs @@ -3,6 +3,7 @@ use std::{borrow::Cow, cmp::Ordering}; use crate::{util::*, PageWord}; use bit_set::BitSet; use pagefind_stem::Stemmer; +use wasm_bindgen::prelude::wasm_bindgen; use crate::SearchIndex; @@ -34,6 +35,24 @@ pub struct BalancedWordScore { pub word_location: u32, } +#[derive(Debug, Clone)] +#[wasm_bindgen] +pub struct RankingWeights { + pub page_frequency: f32, +} + +#[wasm_bindgen] +impl RankingWeights { + #[wasm_bindgen(constructor)] + pub fn new( + page_frequency: f32, + ) -> RankingWeights { + RankingWeights { + page_frequency, + } + } +} + impl From for BalancedWordScore { fn from( VerboseWordLocation { @@ -175,6 +194,7 @@ impl SearchIndex { &self, term: &str, filter_results: Option, + ranking: &RankingWeights, ) -> (Vec, Vec) { debug!({ format! {"Searching {:?}", term} @@ -318,7 +338,7 @@ impl SearchIndex { .map(|BalancedWordScore { balanced_score, .. }| balanced_score) .sum::() / 24.0) - / page.word_count as f32; + / ((page.word_count as f32).ln() * (*ranking).page_frequency).exp(); let search_result = PageSearchResult { page: page.hash.clone(), diff --git a/pagefind_web_js/lib/coupled_search.ts b/pagefind_web_js/lib/coupled_search.ts index d340971b..f380677a 100644 --- a/pagefind_web_js/lib/coupled_search.ts +++ b/pagefind_web_js/lib/coupled_search.ts @@ -440,10 +440,13 @@ class PagefindInstance { return null; } + let ranking = new this.backend.RankingWeights( + options.ranking?.pageFrequency ?? 1.0, + ) // pointer may have updated from the loadChunk calls ptr = await this.getPtr(); let searchStart = Date.now(); - let result = this.backend.search(ptr, term, filter_list, sort_list, exact_search) as string; + let result = this.backend.search(ptr, term, filter_list, sort_list, exact_search, ranking) as string; log(`Got the raw search result: ${result}`); let [unfilteredResultCount, all_results, filters, totalFilters] = result.split(/:([^:]*):(.*)__PF_UNFILTERED_DELIM__(.*)$/); let filterObj = this.parseFilters(filters); diff --git a/pagefind_web_js/types/index.d.ts b/pagefind_web_js/types/index.d.ts index aad34393..9d2deb38 100644 --- a/pagefind_web_js/types/index.d.ts +++ b/pagefind_web_js/types/index.d.ts @@ -44,6 +44,10 @@ declare global { filters?: Object, /** The set of sorts to use for this search, instead of relevancy */ sort?: Object, + /** Fine-grained ranking weights (range: 0.0 - 1.0) */ + ranking?: { + pageFrequency?: Number, + }, } /** Filter counts returned from pagefind.filters(), and alongside results from pagefind.search() */ From cb0ffa753243cac67d90fc099bfb3a93d22fb853 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 6 Jan 2024 16:23:38 +0100 Subject: [PATCH 03/12] ranking: add more knobs In addition to controlling how much of a role the "page frequency" plays in ranking pages, let's add more ways to modify the way pages are ranked. Signed-off-by: Johannes Schindelin --- docs/content/docs/api.md | 20 ++++++++ pagefind_web/src/search.rs | 67 ++++++++++++++------------- pagefind_web_js/lib/coupled_search.ts | 2 + pagefind_web_js/types/index.d.ts | 2 + 4 files changed, 60 insertions(+), 31 deletions(-) diff --git a/docs/content/docs/api.md b/docs/content/docs/api.md index 10fcf95e..d8a98298 100644 --- a/docs/content/docs/api.md +++ b/docs/content/docs/api.md @@ -251,6 +251,26 @@ const search = await pagefind.search("term", { ``` {{< /diffcode >}} +It is also possible to control how much the site-wide frequency of a given term is taken into account (by default, terms that appear less often have a higher weight): + +{{< diffcode >}} +```js +const search = await pagefind.search("term", { ++ ranking: { siteFrequency: 0.0 } +}); +``` +{{< /diffcode >}} + +Another knob to control the ranking is `wordDistance`, which tells Pagefind how much it should weigh the length difference of the matched word vs the length of the matching search term: + +{{< diffcode >}} +```js +const search = await pagefind.search("term", { ++ ranking: { wordDistance: 0.3 } +}); +``` +{{< /diffcode >}} + ## Re-initializing the search API In some cases you might need to re-initialize Pagefind. For example, if you dynamically change the language of the page without reloading, Pagefind will need to be re-initialized to reflect this langauge change. diff --git a/pagefind_web/src/search.rs b/pagefind_web/src/search.rs index 4b1c7c3d..ec9c9eb3 100644 --- a/pagefind_web/src/search.rs +++ b/pagefind_web/src/search.rs @@ -38,6 +38,8 @@ pub struct BalancedWordScore { #[derive(Debug, Clone)] #[wasm_bindgen] pub struct RankingWeights { + pub word_distance: f32, + pub site_frequency: f32, pub page_frequency: f32, } @@ -45,43 +47,46 @@ pub struct RankingWeights { impl RankingWeights { #[wasm_bindgen(constructor)] pub fn new( + word_distance: f32, + site_frequency: f32, page_frequency: f32, ) -> RankingWeights { RankingWeights { + word_distance, + site_frequency, page_frequency, } } } -impl From for BalancedWordScore { - fn from( - VerboseWordLocation { - weight, - length_differential, - word_frequency, - word_location, - }: VerboseWordLocation, - ) -> Self { - let word_length_bonus = if length_differential > 0 { - (2.0 / length_differential as f32).max(0.2) - } else { - 3.0 - }; - - // Starting with the raw user-supplied (or derived) weight of the word, - // we take it to the power of two to make the weight scale non-linear. - // We then multiply it with word_length_bonus, which should be a roughly 0 -> 3 scale of how close - // this was was in length to the target word. - // That result is then multiplied by the word frequency, which is again a roughly 0 -> 2 scale - // of how unique this word is in the entire site. (tf-idf ish) - let balanced_score = - ((weight as f32).powi(2) * word_length_bonus) * word_frequency.max(0.5); - - Self { - weight, - balanced_score, - word_location, - } +fn calculate_word_score( + VerboseWordLocation { + weight, + length_differential, + word_frequency, + word_location, + }: VerboseWordLocation, + ranking: &RankingWeights, +) -> BalancedWordScore { + let word_length_bonus = ((if length_differential > 0 { + (2.0 / length_differential as f32).max(0.2) + } else { + 3.0 + }).ln() * (*ranking).word_distance).exp(); + + // Starting with the raw user-supplied (or derived) weight of the word, + // we take it to the power of two to make the weight scale non-linear. + // We then multiply it with word_length_bonus, which should be a roughly 0 -> 3 scale of how close + // this was was in length to the target word. + // That result is then multiplied by the word frequency, which is again a roughly 0 -> 2 scale + // of how unique this word is in the entire site. (tf-idf ish) + let balanced_score = + ((weight as f32).powi(2) * word_length_bonus) * (word_frequency.max(0.5).ln() * (*ranking).site_frequency).exp(); + + BalancedWordScore { + weight, + balanced_score, + word_location, } } @@ -321,11 +326,11 @@ impl SearchIndex { working_word.length_differential = next_word.length_differential; } } else { - unique_word_locations.push(working_word.into()); + unique_word_locations.push(calculate_word_score(working_word, ranking)); working_word = next_word; } } - unique_word_locations.push(working_word.into()); + unique_word_locations.push(calculate_word_score(working_word, ranking)); } let page = &self.pages[page_index]; diff --git a/pagefind_web_js/lib/coupled_search.ts b/pagefind_web_js/lib/coupled_search.ts index f380677a..818c0b38 100644 --- a/pagefind_web_js/lib/coupled_search.ts +++ b/pagefind_web_js/lib/coupled_search.ts @@ -441,6 +441,8 @@ class PagefindInstance { } let ranking = new this.backend.RankingWeights( + options.ranking?.wordDistance ?? 1.0, + options.ranking?.siteFrequency ?? 1.0, options.ranking?.pageFrequency ?? 1.0, ) // pointer may have updated from the loadChunk calls diff --git a/pagefind_web_js/types/index.d.ts b/pagefind_web_js/types/index.d.ts index 9d2deb38..25ef0dfc 100644 --- a/pagefind_web_js/types/index.d.ts +++ b/pagefind_web_js/types/index.d.ts @@ -46,6 +46,8 @@ declare global { sort?: Object, /** Fine-grained ranking weights (range: 0.0 - 1.0) */ ranking?: { + wordDistance?: Number, + siteFrequency?: Number, pageFrequency?: Number, }, } From e56f662233937b169b6d1486872db656bb3dd3c7 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 6 Jan 2024 18:10:21 +0100 Subject: [PATCH 04/12] amend! Add an option to turn off density-weighting Add an option to stop scoring shorter pages higher When searching, Pagefind applies a heuristic that often works quite well to boost pages with a higher density, i.e. a higher number of hits divided by the number of words on the page. This is called "density weighting". In some instances, it is desirable, though, to just use the number of hits directly, without dividing by the number of words on the page. Let's support this via a new search option `ranking`, which as of right now contains a single field to specify how much "denser pages" should be favored. Signed-off-by: Johannes Schindelin --- pagefind/features/scoring.feature | 43 +++++++++++++++++++++++++++++ pagefind/features/weighting.feature | 28 ------------------- 2 files changed, 43 insertions(+), 28 deletions(-) diff --git a/pagefind/features/scoring.feature b/pagefind/features/scoring.feature index 90551037..3320518f 100644 --- a/pagefind/features/scoring.feature +++ b/pagefind/features/scoring.feature @@ -54,6 +54,49 @@ Feature: Result Scoring Then The selector "[data-count]" should contain "2 result(s)" Then The selector "[data-result]" should contain "/dog/, /cat/" + Scenario: Ranking can be configured to stop favoring pages with less words + Given I have a "public/index.html" file with the body: + """ +
    +
  • +
+ """ + Given I have a "public/single-word.html" file with the body: + """ +

word

+ """ + Given I have a "public/three-words.html" file with the body: + """ +

I have a word and a word and another word

+ """ + When I run my program + Then I should see "Running Pagefind" in stdout + When I serve the "public" directory + When I load "/" + When I evaluate: + """ + async function() { + let pagefind = await import("/pagefind/pagefind.js"); + + let search = await pagefind.search(`word`); + document.querySelector('[data-result]').innerText = search.results.map(r => r.words.length).join(', '); + } + """ + Then There should be no logs + # With density weighting, single-word should be the first hit, otherwise three-words + Then The selector "[data-result]" should contain "1, 3" + When I evaluate: + """ + async function() { + let pagefind = await import("/pagefind/pagefind.js"); + + let search = await pagefind.search(`word`, { ranking: { pageFrequency: 0.0 } }); + document.querySelector('[data-result]').innerText = search.results.map(r => r.words.length).join(', '); + } + """ + Then There should be no logs + Then The selector "[data-result]" should contain "3, 1" + @skip Scenario: Search terms in close proximity rank higher in results When I evaluate: diff --git a/pagefind/features/weighting.feature b/pagefind/features/weighting.feature index d1cfd0a9..4de98933 100644 --- a/pagefind/features/weighting.feature +++ b/pagefind/features/weighting.feature @@ -224,31 +224,3 @@ Feature: Word Weighting Then There should be no logs # Treat the bal value here as a snapshot — update the expected value as needed Then The selector "p" should contain "weight:1/bal:82.28572/loc:4" - - Scenario: Density weighting can be turned off - Given I have a "public/single-word.html" file with the body: - """ -

word

- """ - Given I have a "public/three-words.html" file with the body: - """ -

I have a word and a word and another word

- """ - When I run my program - Then I should see "Running Pagefind" in stdout - When I serve the "public" directory - When I load "/" - When I evaluate: - """ - async function() { - let pagefind = await import("/pagefind/pagefind.js"); - - let search = await pagefind.search(`word`); - let search2 = await pagefind.search(`word`, { ranking: { pageFrequency: 0.0 } }); - let counts = [search, search2].map(s => s.results.map(r => r.words.length)); - document.querySelector('p').innerText = JSON.stringify(counts); - } - """ - Then There should be no logs - # With density weighting, single-word should be the first hit, otherwise three-words - Then The selector "p" should contain "[[1,3],[3,1]]" From 41bb37e7c431dd95def49ebf100f67cab72ee8d7 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 6 Jan 2024 18:32:43 +0100 Subject: [PATCH 05/12] WIP verify that `siteFrequency` actually does something So far, I seem to be unable to make this work, as it seems that `word_frequency` is always 0 in my tests... Signed-off-by: Johannes Schindelin --- pagefind/features/scoring.feature | 32 +++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/pagefind/features/scoring.feature b/pagefind/features/scoring.feature index 3320518f..1af65160 100644 --- a/pagefind/features/scoring.feature +++ b/pagefind/features/scoring.feature @@ -54,6 +54,38 @@ Feature: Result Scoring Then The selector "[data-count]" should contain "2 result(s)" Then The selector "[data-result]" should contain "/dog/, /cat/" + Scenario: Results with less-frequent words score higher + When I evaluate: + """ + async function() { + let pagefind = await import("/pagefind/pagefind.js"); + + let search = await pagefind.search(`cat dog`); + + document.querySelector('[data-count]').innerText = `${search.results.length} result(s)`; + let data = await Promise.all(search.results.map(result => result.data())); + document.querySelector('[data-result]').innerText = data.map(d => d.url).join(', '); + } + """ + Then There should be no logs + Then The selector "[data-count]" should contain "2 result(s)" + Then The selector "[data-result]" should contain "/dog/, /cat/" + When I evaluate: + """ + async function() { + let pagefind = await import("/pagefind/pagefind.js"); + + let search = await pagefind.search(`cat dog`, { ranking: { siteFrequency: 0.0 } }); + + document.querySelector('[data-count]').innerText = `${search.results.length} result(s)`; + let data = await Promise.all(search.results.map(result => result.data())); + document.querySelector('[data-result]').innerText = data.map(d => d.url).join(', '); + } + """ + Then There should be no logs + Then The selector "[data-count]" should contain "2 result(s)" + # This currently fails... but why? Then The selector "[data-result]" should contain "/cat/, /dog/" + Scenario: Ranking can be configured to stop favoring pages with less words Given I have a "public/index.html" file with the body: """ From ebfef2df20547645040d4a785f99e64e2bec38e6 Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Mon, 26 Feb 2024 18:48:48 +1300 Subject: [PATCH 06/12] Rename weighting knobs for clarity --- docs/content/docs/api.md | 6 +++--- pagefind/features/scoring.feature | 4 ++-- pagefind_web/src/search.rs | 23 +++++++++++------------ pagefind_web_js/lib/coupled_search.ts | 4 ++-- pagefind_web_js/types/index.d.ts | 7 +++++-- 5 files changed, 23 insertions(+), 21 deletions(-) diff --git a/docs/content/docs/api.md b/docs/content/docs/api.md index d8a98298..6bc2e25a 100644 --- a/docs/content/docs/api.md +++ b/docs/content/docs/api.md @@ -256,17 +256,17 @@ It is also possible to control how much the site-wide frequency of a given term {{< diffcode >}} ```js const search = await pagefind.search("term", { -+ ranking: { siteFrequency: 0.0 } ++ ranking: { siteRarity: 0.0 } }); ``` {{< /diffcode >}} -Another knob to control the ranking is `wordDistance`, which tells Pagefind how much it should weigh the length difference of the matched word vs the length of the matching search term: +Another knob to control the ranking is `termSimilarity`, which tells Pagefind how much it should weigh the length difference of the matched word vs the length of the matching search term: {{< diffcode >}} ```js const search = await pagefind.search("term", { -+ ranking: { wordDistance: 0.3 } ++ ranking: { termSimilarity: 0.3 } }); ``` {{< /diffcode >}} diff --git a/pagefind/features/scoring.feature b/pagefind/features/scoring.feature index 1af65160..f5357fe0 100644 --- a/pagefind/features/scoring.feature +++ b/pagefind/features/scoring.feature @@ -75,7 +75,7 @@ Feature: Result Scoring async function() { let pagefind = await import("/pagefind/pagefind.js"); - let search = await pagefind.search(`cat dog`, { ranking: { siteFrequency: 0.0 } }); + let search = await pagefind.search(`cat dog`, { ranking: { siteRarity: 0.0 } }); document.querySelector('[data-count]').innerText = `${search.results.length} result(s)`; let data = await Promise.all(search.results.map(result => result.data())); @@ -84,7 +84,7 @@ Feature: Result Scoring """ Then There should be no logs Then The selector "[data-count]" should contain "2 result(s)" - # This currently fails... but why? Then The selector "[data-result]" should contain "/cat/, /dog/" + # This currently fails... but why? Then The selector "[data-result]" should contain "/cat/, /dog/" Scenario: Ranking can be configured to stop favoring pages with less words Given I have a "public/index.html" file with the body: diff --git a/pagefind_web/src/search.rs b/pagefind_web/src/search.rs index ec9c9eb3..48920262 100644 --- a/pagefind_web/src/search.rs +++ b/pagefind_web/src/search.rs @@ -38,22 +38,18 @@ pub struct BalancedWordScore { #[derive(Debug, Clone)] #[wasm_bindgen] pub struct RankingWeights { - pub word_distance: f32, - pub site_frequency: f32, + pub term_similarity: f32, + pub site_rarity: f32, pub page_frequency: f32, } #[wasm_bindgen] impl RankingWeights { #[wasm_bindgen(constructor)] - pub fn new( - word_distance: f32, - site_frequency: f32, - page_frequency: f32, - ) -> RankingWeights { + pub fn new(term_similarity: f32, site_rarity: f32, page_frequency: f32) -> RankingWeights { RankingWeights { - word_distance, - site_frequency, + term_similarity, + site_rarity, page_frequency, } } @@ -72,7 +68,10 @@ fn calculate_word_score( (2.0 / length_differential as f32).max(0.2) } else { 3.0 - }).ln() * (*ranking).word_distance).exp(); + }) + .ln() + * (*ranking).term_similarity) + .exp(); // Starting with the raw user-supplied (or derived) weight of the word, // we take it to the power of two to make the weight scale non-linear. @@ -80,8 +79,8 @@ fn calculate_word_score( // this was was in length to the target word. // That result is then multiplied by the word frequency, which is again a roughly 0 -> 2 scale // of how unique this word is in the entire site. (tf-idf ish) - let balanced_score = - ((weight as f32).powi(2) * word_length_bonus) * (word_frequency.max(0.5).ln() * (*ranking).site_frequency).exp(); + let balanced_score = ((weight as f32).powi(2) * word_length_bonus) + * (word_frequency.max(0.5).ln() * (*ranking).site_rarity).exp(); BalancedWordScore { weight, diff --git a/pagefind_web_js/lib/coupled_search.ts b/pagefind_web_js/lib/coupled_search.ts index 818c0b38..cff8773a 100644 --- a/pagefind_web_js/lib/coupled_search.ts +++ b/pagefind_web_js/lib/coupled_search.ts @@ -441,8 +441,8 @@ class PagefindInstance { } let ranking = new this.backend.RankingWeights( - options.ranking?.wordDistance ?? 1.0, - options.ranking?.siteFrequency ?? 1.0, + options.ranking?.termSimilarity ?? 1.0, + options.ranking?.siteRarity ?? 1.0, options.ranking?.pageFrequency ?? 1.0, ) // pointer may have updated from the loadChunk calls diff --git a/pagefind_web_js/types/index.d.ts b/pagefind_web_js/types/index.d.ts index 25ef0dfc..e0b6c907 100644 --- a/pagefind_web_js/types/index.d.ts +++ b/pagefind_web_js/types/index.d.ts @@ -46,8 +46,11 @@ declare global { sort?: Object, /** Fine-grained ranking weights (range: 0.0 - 1.0) */ ranking?: { - wordDistance?: Number, - siteFrequency?: Number, + /* How much to boost words that closely match the search term, over fuzzier matches */ + termSimilarity?: Number, + /* How much to boost unique words in the search term, based on their occurance within the site as a whole */ + siteRarity?: Number, + /* How much to boost results based on density of the search term on the page */ pageFrequency?: Number, }, } From 67fbcd7f7986062446f7f9e92aa174a5bc04031d Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Thu, 28 Mar 2024 18:20:09 +1300 Subject: [PATCH 07/12] Implement BM25, change ranking params, rework ranking option pathway --- .vscode/settings.json | 1 + Cargo.lock | 93 +------ Cargo.toml | 3 +- .../features/multisite/multisite_sort.feature | 16 +- pagefind/features/scoring_custom.feature | 238 ++++++++++++++++ ...oring.feature => scoring_defaults.feature} | 75 ----- pagefind/features/ui/ui_scoring.feature | 73 +++++ pagefind/features/weighting.feature | 26 +- pagefind_web/.vscode/settings.json | 3 + pagefind_web/Cargo.lock | 160 +++++++++++ pagefind_web/Cargo.toml | 2 +- pagefind_web/src/lib.rs | 92 +++++- pagefind_web/src/metadata.rs | 5 + pagefind_web/src/search.rs | 262 +++++++++++------- pagefind_web_js/lib/coupled_search.ts | 29 +- pagefind_web_js/types/index.d.ts | 44 ++- 16 files changed, 820 insertions(+), 302 deletions(-) create mode 100644 pagefind/features/scoring_custom.feature rename pagefind/features/{scoring.feature => scoring_defaults.feature} (53%) create mode 100644 pagefind/features/ui/ui_scoring.feature create mode 100644 pagefind_web/.vscode/settings.json create mode 100644 pagefind_web/Cargo.lock diff --git a/.vscode/settings.json b/.vscode/settings.json index 7a73a41b..4d9636b5 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,2 +1,3 @@ { + "rust-analyzer.showUnlinkedFileNotification": false } \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 82e17554..fa5b5af5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -409,12 +409,6 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4ae4235e6dac0694637c763029ecea1a2ec9e4e06ec2729bd21ba4d9c863eb7" -[[package]] -name = "bumpalo" -version = "3.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" - [[package]] name = "byteorder" version = "1.4.3" @@ -1698,12 +1692,6 @@ dependencies = [ "minicbor-derive", ] -[[package]] -name = "minicbor" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d15f4203d71fdf90903c2696e55426ac97a363c67b218488a73b534ce7aca10" - [[package]] name = "minicbor-derive" version = "0.13.0" @@ -1832,7 +1820,7 @@ dependencies = [ "lazy_static", "lexical-core", "lol_html", - "minicbor 0.19.1", + "minicbor", "minifier", "pagefind_stem 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "path-slash", @@ -1848,12 +1836,6 @@ dependencies = [ "wax", ] -[[package]] -name = "pagefind_microjson" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f97aa64cde14e78d6274c473f36cec98d401f0b583282055e953d2df907e210" - [[package]] name = "pagefind_stem" version = "0.2.0" @@ -1864,17 +1846,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70b9cf5d3cd867dd32e54385d85ecfda45c6f2f896a9d464426ab564e7391467" -[[package]] -name = "pagefind_web" -version = "0.0.0" -dependencies = [ - "bit-set", - "minicbor 0.20.0", - "pagefind_microjson", - "pagefind_stem 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "wasm-bindgen", -] - [[package]] name = "parking_lot" version = "0.12.1" @@ -2106,18 +2077,18 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" [[package]] name = "proc-macro2" -version = "1.0.43" +version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab" +checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.21" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] @@ -2781,60 +2752,6 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" -[[package]] -name = "wasm-bindgen" -version = "0.2.84" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" -dependencies = [ - "cfg-if", - "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.84" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9" -dependencies = [ - "bumpalo", - "log", - "once_cell", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.84" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.84" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-backend", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.84" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d" - [[package]] name = "wax" version = "0.5.0" diff --git a/Cargo.toml b/Cargo.toml index 0612ef96..a905d619 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,4 @@ [workspace] -members = ["pagefind", "pagefind_web", "pagefind_stem"] +members = ["pagefind", "pagefind_stem"] +exclude = ["pagefind_web"] diff --git a/pagefind/features/multisite/multisite_sort.feature b/pagefind/features/multisite/multisite_sort.feature index aa3d641b..6b843a0e 100644 --- a/pagefind/features/multisite/multisite_sort.feature +++ b/pagefind/features/multisite/multisite_sort.feature @@ -9,13 +9,21 @@ Feature: Multisite Result Scoring """

my page on the web web

""" - Given I have a "root/website_b/oneweb/index.html" file with the body: + Given I have a "root/website_a/oneweb/index.html" file with the body: """ -

my page on the web

+

my page on the world web

+ """ + Given I have a "root/website_a/longdoc/index.html" file with the body: + """ +

Aenean lacinia bibendum nulla sed consectetur. Duis mollis, est non commodo luctus, nisi erat porttitor ligula, eget lacinia odio sem nec elit.

""" Given I have a "root/website_b/threewebs/index.html" file with the body: """ -

my page on the web web web

+

my web web web page

+ """ + Given I have a "root/website_b/longdoc/index.html" file with the body: + """ +

Aenean lacinia bibendum nulla sed consectetur. Duis mollis, est non commodo luctus, nisi erat porttitor ligula, eget lacinia odio sem nec elit.

""" Scenario: Pages are scored correctly across indexes @@ -42,7 +50,7 @@ Feature: Multisite Result Scoring } """ Then There should be no logs - Then The selector "[data-result]" should contain "/website_b/threewebs/, /website_a/twowebs/, /website_b/oneweb/" + Then The selector "[data-result]" should contain "/website_b/threewebs/, /website_a/twowebs/, /website_a/oneweb/" Scenario: Multiple indexes can be weighted separately When I run my program with the flags: diff --git a/pagefind/features/scoring_custom.feature b/pagefind/features/scoring_custom.feature new file mode 100644 index 00000000..25b3517d --- /dev/null +++ b/pagefind/features/scoring_custom.feature @@ -0,0 +1,238 @@ +Feature: Result Scoring + Background: + Given I have the environment variables: + | PAGEFIND_SITE | public | + Given I have a "public/index.html" file with the body: + """ +
    +
  • +
+ """ + # Create dummy pages to allow BM25 calculations to be effective + Given I have a "public/latin-1/index.html" file with the body: + """ +

Maecenas sed diam eget risus varius blandit sit amet non common

+ """ + Given I have a "public/latin-2/index.html" file with the body: + """ +

Cras justo odio, common ac facilisis in, egestas eget quam.

+ """ + Given I have a "public/latin-3/index.html" file with the body: + """ +

Donec sed odio dui.

+ """ + Given I have a "public/latin-4/index.html" file with the body: + """ +

Vivamus sagittis lacus vel augue laoreet rutrum faucibus dolor auctor.

+ """ + Given I have a "public/latin-5/index.html" file with the body: + """ +

Integer posuere erat a ante venenatis dapibus posuere velit aliquet..

+ """ + + Scenario: Term similarity ranking can be configured + Given I have a "public/similar-term/index.html" file with the body: + """ +

This post talks about abcdef once

+ """ + Given I have a "public/dissimilar-term/index.html" file with the body: + """ +

This post talks about abcdefghijklmnopqrstuv — twice! abcdefghijklmnopqrstuv

+ """ + When I run my program + Then I should see "Running Pagefind" in stdout + When I serve the "public" directory + When I load "/" + # The abcdefghijklmnopqrstuv hits should be pretty useless + When I evaluate: + """ + async function() { + let pagefind = await import("/pagefind/pagefind.js"); + + let search = await pagefind.search(`abcdef`); + + let data = await Promise.all(search.results.map(result => result.data())); + document.querySelector('[data-result]').innerText = data.map(d => d.url).join(', '); + } + """ + Then There should be no logs + Then The selector "[data-result]" should contain "/similar-term/, /dissimilar-term/" + # The abcdefghijklmnopqrstuv hits are just as important, so win due to two of them + When I evaluate: + """ + async function() { + let pagefind = await import("/pagefind/pagefind.js"); + await pagefind.options({ + ranking: { + termSimilarity: 0.0 + } + }); + + let search = await pagefind.search(`abcdef`); + + let data = await Promise.all(search.results.map(result => result.data())); + document.querySelector('[data-result]').innerText = data.map(d => d.url).join(', '); + } + """ + Then There should be no logs + Then The selector "[data-result]" should contain "/dissimilar-term/, /similar-term/" + + Scenario: Page length ranking can be configured + Given I have a "public/longer/index.html" file with the body: + """ +

This post is quite long, and talks about terracotta at length.

+

Fusce dapibus, tellus ac cursus commodo, tortor mauris condimentum nibh, ut fermentum terracotta justo sit amet risus. Donec sed odio dui. Aenean eu leo quam. Pellentesque ornare sem lacinia quam venenatis vestibulum. Nulla vitae elit libero, a pharetra augue. Aenean lacinia bibendum nulla sed consectetur. Donec id elit non mi porta gravida at eget metus. Maecenas faucibus mollis interdum.

+

Integer terracotta erat a ante venenatis dapibus posuere velit aliquet. Vivamus sagittis lacus vel augue laoreet rutrum faucibus terracotta auctor. Nullam quis risus eget urna mollis ornare vel eu leo. Aenean lacinia bibendum nulla sed consectetur.

+

Praesent commodo cursus magna, vel scelerisque terracotta consectetur et. Fusce dapibus, tellus ac cursus commodo, tortor mauris condimentum nibh, ut fermentum massa justo sit amet risus. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Curabitur blandit tempus porttitor.

+ """ + Given I have a "public/shorter/index.html" file with the body: + """ +

This is a shorter terracotta page.

+

Sed posuere consectetur est at lobortis.

+ """ + When I run my program + Then I should see "Running Pagefind" in stdout + When I serve the "public" directory + When I load "/" + # Should prefer documents shorter than the average document + When I evaluate: + """ + async function() { + let pagefind = await import("/pagefind/pagefind.js"); + await pagefind.options({ + ranking: { + pageLength: 1.0 + } + }); + + let search = await pagefind.search(`terracotta`); + + let data = await Promise.all(search.results.map(result => result.data())); + document.querySelector('[data-result]').innerText = data.map(d => d.url).join(', '); + } + """ + Then There should be no logs + Then The selector "[data-result]" should contain "/shorter/, /longer/" + # Should care about term frequency more than document length + When I evaluate: + """ + async function() { + let pagefind = await import("/pagefind/pagefind.js"); + await pagefind.options({ + ranking: { + pageLength: 0.0 + } + }); + + let search = await pagefind.search(`terracotta`); + + let data = await Promise.all(search.results.map(result => result.data())); + document.querySelector('[data-result]').innerText = data.map(d => d.url).join(', '); + } + """ + Then There should be no logs + Then The selector "[data-result]" should contain "/longer/, /shorter/" + + Scenario: Term frequency vs raw count can be configured + Given I have a "public/longer/index.html" file with the body: + """ +

This post is quite long, and talks about terracotta at length.

+

Fusce dapibus, tellus ac cursus commodo, tortor mauris condimentum nibh, ut fermentum terracotta justo sit amet risus. Donec sed odio dui. Aenean eu leo quam. Pellentesque ornare sem lacinia quam venenatis vestibulum. Nulla vitae elit libero, a pharetra augue. Aenean lacinia bibendum nulla sed consectetur. Donec id elit non mi porta gravida at eget metus. Maecenas faucibus mollis interdum.

+

Integer erat a ante venenatis dapibus posuere velit aliquet. Vivamus sagittis lacus vel augue laoreet rutrum faucibus auctor. Nullam quis risus eget urna mollis ornare vel eu leo. Aenean lacinia bibendum nulla sed consectetur.

+

Praesent commodo cursus magna, vel scelerisque consectetur et. Fusce dapibus, tellus ac cursus commodo, tortor mauris condimentum nibh, ut fermentum massa justo sit amet risus. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Curabitur blandit tempus porttitor.

+ """ + Given I have a "public/shorter/index.html" file with the body: + """ +

This is a shorter terracotta page.

+ """ + When I run my program + Then I should see "Running Pagefind" in stdout + When I serve the "public" directory + When I load "/" + # Default: should score based on term frequency + When I evaluate: + """ + async function() { + let pagefind = await import("/pagefind/pagefind.js"); + + let search = await pagefind.search(`terracotta`); + + let data = await Promise.all(search.results.map(result => result.data())); + document.querySelector('[data-result]').innerText = data.map(d => d.url).join(', '); + } + """ + Then There should be no logs + Then The selector "[data-result]" should contain "/shorter/, /longer/" + # Flipped: Should pick the page with higher result count, regardless of length + When I evaluate: + """ + async function() { + let pagefind = await import("/pagefind/pagefind.js"); + await pagefind.options({ + ranking: { + termFrequency: 0.0 + } + }); + + let search = await pagefind.search(`terracotta`); + + let data = await Promise.all(search.results.map(result => result.data())); + document.querySelector('[data-result]').innerText = data.map(d => d.url).join(', '); + } + """ + Then There should be no logs + Then The selector "[data-result]" should contain "/longer/, /shorter/" + + Scenario: Term saturation can be configured + Given I have a "public/lots/index.html" file with the body: + """ +

post

+

common and common and common and unrelated

+ """ + Given I have a "public/slightly-less-than-lots/index.html" file with the body: + """ +

post

+

common and common and unrelated and unrelated

+ """ + When I run my program + Then I should see "Running Pagefind" in stdout + When I serve the "public" directory + When I load "/" + # More sensitive to term frequency, should pick the more frequent document for "common" + When I evaluate: + """ + async function() { + let pagefind = await import("/pagefind/pagefind.js"); + await pagefind.options({ + ranking: { + termSaturation: 2.0 + } + }); + + let search = await pagefind.search(`common post`); + + let data = await Promise.all(search.results.map(result => result.data())); + document.querySelector('[data-result]').innerText = data.map(d => d.url).join(', '); + } + """ + Then There should be no logs + Then The selector "[data-result]" should contain "/slightly-less-than-lots/, /lots/" + # Less sensitive to term frequency of "common", should pick the better document for "post" + When I evaluate: + """ + async function() { + let pagefind = await import("/pagefind/pagefind.js"); + await pagefind.options({ + ranking: { + termSaturation: 0.1 + } + }); + + let search = await pagefind.search(`common post`); + + let data = await Promise.all(search.results.map(result => result.data())); + document.querySelector('[data-result]').innerText = data.map(d => d.url).join(', '); + } + """ + Then There should be no logs + Then The selector "[data-result]" should contain "/lots/, /slightly-less-than-lots/" diff --git a/pagefind/features/scoring.feature b/pagefind/features/scoring_defaults.feature similarity index 53% rename from pagefind/features/scoring.feature rename to pagefind/features/scoring_defaults.feature index f5357fe0..90551037 100644 --- a/pagefind/features/scoring.feature +++ b/pagefind/features/scoring_defaults.feature @@ -54,81 +54,6 @@ Feature: Result Scoring Then The selector "[data-count]" should contain "2 result(s)" Then The selector "[data-result]" should contain "/dog/, /cat/" - Scenario: Results with less-frequent words score higher - When I evaluate: - """ - async function() { - let pagefind = await import("/pagefind/pagefind.js"); - - let search = await pagefind.search(`cat dog`); - - document.querySelector('[data-count]').innerText = `${search.results.length} result(s)`; - let data = await Promise.all(search.results.map(result => result.data())); - document.querySelector('[data-result]').innerText = data.map(d => d.url).join(', '); - } - """ - Then There should be no logs - Then The selector "[data-count]" should contain "2 result(s)" - Then The selector "[data-result]" should contain "/dog/, /cat/" - When I evaluate: - """ - async function() { - let pagefind = await import("/pagefind/pagefind.js"); - - let search = await pagefind.search(`cat dog`, { ranking: { siteRarity: 0.0 } }); - - document.querySelector('[data-count]').innerText = `${search.results.length} result(s)`; - let data = await Promise.all(search.results.map(result => result.data())); - document.querySelector('[data-result]').innerText = data.map(d => d.url).join(', '); - } - """ - Then There should be no logs - Then The selector "[data-count]" should contain "2 result(s)" - # This currently fails... but why? Then The selector "[data-result]" should contain "/cat/, /dog/" - - Scenario: Ranking can be configured to stop favoring pages with less words - Given I have a "public/index.html" file with the body: - """ -
    -
  • -
- """ - Given I have a "public/single-word.html" file with the body: - """ -

word

- """ - Given I have a "public/three-words.html" file with the body: - """ -

I have a word and a word and another word

- """ - When I run my program - Then I should see "Running Pagefind" in stdout - When I serve the "public" directory - When I load "/" - When I evaluate: - """ - async function() { - let pagefind = await import("/pagefind/pagefind.js"); - - let search = await pagefind.search(`word`); - document.querySelector('[data-result]').innerText = search.results.map(r => r.words.length).join(', '); - } - """ - Then There should be no logs - # With density weighting, single-word should be the first hit, otherwise three-words - Then The selector "[data-result]" should contain "1, 3" - When I evaluate: - """ - async function() { - let pagefind = await import("/pagefind/pagefind.js"); - - let search = await pagefind.search(`word`, { ranking: { pageFrequency: 0.0 } }); - document.querySelector('[data-result]').innerText = search.results.map(r => r.words.length).join(', '); - } - """ - Then There should be no logs - Then The selector "[data-result]" should contain "3, 1" - @skip Scenario: Search terms in close proximity rank higher in results When I evaluate: diff --git a/pagefind/features/ui/ui_scoring.feature b/pagefind/features/ui/ui_scoring.feature new file mode 100644 index 00000000..4bcc789f --- /dev/null +++ b/pagefind/features/ui/ui_scoring.feature @@ -0,0 +1,73 @@ +Feature: UI Scoring + Background: + Given I have the environment variables: + | PAGEFIND_SITE | public | + + Scenario: Pagefind UI can customize scoring + Given I have a "public/unrelated/index.html" file with the body: + """ +

unrelated

+

Donec id elit non mi porta gravida at eget metus. Fusce dapibus, tellus ac cursus commodo, tortor mauris condimentum nibh, ut fermentum massa justo sit amet risus. Nullam quis risus eget urna mollis ornare vel eu leo. Cras justo odio, dapibus ac facilisis in, egestas eget quam. Donec sed odio dui. Cras mattis consectetur purus sit amet fermentum.

+ """ + Given I have a "public/longer/index.html" file with the body: + """ +

longer

+

This post is quite long, and talks about terracotta at length.

+

Fusce dapibus, tellus ac cursus commodo, tortor mauris condimentum nibh, ut fermentum terracotta justo sit amet risus. Donec sed odio dui. Aenean eu leo quam. Pellentesque ornare sem lacinia quam venenatis vestibulum. Nulla vitae elit libero, a pharetra augue. Aenean lacinia bibendum nulla sed consectetur. Donec id elit non mi porta gravida at eget metus. Maecenas faucibus mollis interdum.

+

Integer erat a ante venenatis dapibus posuere velit aliquet. Vivamus sagittis lacus vel augue laoreet rutrum faucibus auctor. Nullam quis risus eget urna mollis ornare vel eu leo. Aenean lacinia bibendum nulla sed consectetur.

+

Praesent commodo cursus magna, vel scelerisque consectetur et. Fusce dapibus, tellus ac cursus commodo, tortor mauris condimentum nibh, ut fermentum massa justo sit amet risus. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Curabitur blandit tempus porttitor.

+ """ + Given I have a "public/shorter/index.html" file with the body: + """ +

shorter

+

This is a shorter terracotta page.

+ """ + Given I have a "public/a.html" file with the body: + """ + + + + + """ + When I run my program + Then I should see "Running Pagefind" in stdout + Then I should see the file "public/pagefind/pagefind.js" + When I serve the "public" directory + When I load "/a.html" + When I evaluate: + """ + async function() { + window.pui.triggerSearch("terracotta"); + await new Promise(r => setTimeout(r, 1500)); // TODO: await el in humane + } + """ + Then There should be no logs + Then The selector ".pagefind-ui__result-link" should contain "shorter" + Given I have a "public/b.html" file with the body: + """ + + + + + """ + When I load "/b.html" + When I evaluate: + """ + async function() { + window.pui.triggerSearch("terracotta"); + await new Promise(r => setTimeout(r, 1500)); // TODO: await el in humane + } + """ + Then There should be no logs + Then The selector ".pagefind-ui__result-link" should contain "longer" diff --git a/pagefind/features/weighting.feature b/pagefind/features/weighting.feature index 4de98933..6feec7e8 100644 --- a/pagefind/features/weighting.feature +++ b/pagefind/features/weighting.feature @@ -17,24 +17,24 @@ Feature: Word Weighting Given I have a "public/r2/index.html" file with the body: """

Antelope

-

Antelope Antelope Antelope

+

Antelope Antelope Antelope Notantelope

Other text again

""" Given I have a "public/r3/index.html" file with the body: """ -
Antelope
-

Antelope Antelope Antelope

+
Antelope
+

Antelope Antelope Antelope Notantelope

Other text again

""" Given I have a "public/r4/index.html" file with the body: """

Antelope

-

Other text

+

Other text, totalling eight words of content

""" Given I have a "public/r5/index.html" file with the body: """

Antelope

-

Other text again

+

Other antelope text, of a similar length

""" When I run my program Then I should see "Running Pagefind" in stdout @@ -129,11 +129,11 @@ Feature: Word Weighting """ Given I have a "public/r2/index.html" file with the body: """ -

Two references to ThreeWordAntelope ThreeWordAntelope

+

Two references to AFourWordAntelope AFourWordAntelope

""" Given I have a "public/r3/index.html" file with the body: """ -

Three of TwoAntelope TwoAntelope TwoAntelope

+

A single reference to TwoAntelope

""" When I run my program Then I should see "Running Pagefind" in stdout @@ -169,13 +169,13 @@ Feature: Word Weighting let search = await pagefind.search(`three`); let data = await search.results[0].data(); - let weights = data.weighted_locations.map(l => `weight:${l.weight}/bal:${l.balanced_score}/loc:${l.location}`).join(' • '); + let weights = data.weighted_locations.map(l => `weight:${l.weight}/bal:${l.balanced_score.toFixed(2)}/loc:${l.location}`).join(' • '); document.querySelector('p').innerText = weights; } """ Then There should be no logs # Treat the bal value here as a snapshot — update the expected value as needed - Then The selector "p" should contain "weight:0.5/bal:18/loc:4" + Then The selector "p" should contain "weight:0.5/bal:128.04/loc:4" Scenario: Compound words sum to a full weight Given I have a "public/r1/index.html" file with the body: @@ -193,13 +193,13 @@ Feature: Word Weighting let search = await pagefind.search(`three antelopes`); let data = await search.results[0].data(); - let weights = data.weighted_locations.map(l => `weight:${l.weight}/bal:${l.balanced_score}/loc:${l.location}`).join(' • '); + let weights = data.weighted_locations.map(l => `weight:${l.weight}/bal:${l.balanced_score.toFixed(2)}/loc:${l.location}`).join(' • '); document.querySelector('p').innerText = weights; } """ Then There should be no logs # Treat the bal value here as a snapshot — update the expected value as needed - Then The selector "p" should contain "weight:1/bal:72/loc:4" + Then The selector "p" should contain "weight:1/bal:512.14/loc:4" Scenario: Compound words matched as full words use the full weight Given I have a "public/r1/index.html" file with the body: @@ -217,10 +217,10 @@ Feature: Word Weighting let search = await pagefind.search(`threea`); let data = await search.results[0].data(); - let weights = data.weighted_locations.map(l => `weight:${l.weight}/bal:${l.balanced_score}/loc:${l.location}`).join(' • '); + let weights = data.weighted_locations.map(l => `weight:${l.weight}/bal:${l.balanced_score.toFixed(2)}/loc:${l.location}`).join(' • '); document.querySelector('p').innerText = weights; } """ Then There should be no logs # Treat the bal value here as a snapshot — update the expected value as needed - Then The selector "p" should contain "weight:1/bal:82.28572/loc:4" + Then The selector "p" should contain "weight:1/bal:212.36/loc:4" diff --git a/pagefind_web/.vscode/settings.json b/pagefind_web/.vscode/settings.json new file mode 100644 index 00000000..f49d8f9e --- /dev/null +++ b/pagefind_web/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "rust-analyzer.cargo.target": "wasm32-unknown-unknown", +} \ No newline at end of file diff --git a/pagefind_web/Cargo.lock b/pagefind_web/Cargo.lock new file mode 100644 index 00000000..973ab950 --- /dev/null +++ b/pagefind_web/Cargo.lock @@ -0,0 +1,160 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + +[[package]] +name = "bumpalo" +version = "3.15.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "log" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" + +[[package]] +name = "minicbor" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d15f4203d71fdf90903c2696e55426ac97a363c67b218488a73b534ce7aca10" + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "pagefind_microjson" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f97aa64cde14e78d6274c473f36cec98d401f0b583282055e953d2df907e210" + +[[package]] +name = "pagefind_stem" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70b9cf5d3cd867dd32e54385d85ecfda45c6f2f896a9d464426ab564e7391467" + +[[package]] +name = "pagefind_web" +version = "0.0.0" +dependencies = [ + "bit-set", + "minicbor", + "pagefind_microjson", + "pagefind_stem", + "wasm-bindgen", +] + +[[package]] +name = "proc-macro2" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "syn" +version = "2.0.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "002a1b3dbf967edfafc32655d0f377ab0bb7b994aa1d32c8cc7e9b8bf3ebb8f0" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "wasm-bindgen" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" diff --git a/pagefind_web/Cargo.toml b/pagefind_web/Cargo.toml index ed5e875f..dfe7de4e 100644 --- a/pagefind_web/Cargo.toml +++ b/pagefind_web/Cargo.toml @@ -7,7 +7,7 @@ edition = "2021" crate-type = ["cdylib", "rlib"] [dependencies] -wasm-bindgen = "0.2" +wasm-bindgen = "0.2.92" bit-set = "0.5" pagefind_stem = "0.2.0" minicbor = "0.20.0" diff --git a/pagefind_web/src/lib.rs b/pagefind_web/src/lib.rs index 5ffbdbf5..1e696436 100644 --- a/pagefind_web/src/lib.rs +++ b/pagefind_web/src/lib.rs @@ -2,10 +2,11 @@ use std::collections::HashMap; +use pagefind_microjson::JSONValue; use util::*; use wasm_bindgen::prelude::*; -use crate::search::{BalancedWordScore, RankingWeights}; +use crate::search::BalancedWordScore; mod filter; mod filter_index; @@ -34,11 +35,53 @@ pub struct SearchIndex { web_version: &'static str, generator_version: Option, pages: Vec, + average_page_length: f32, chunks: Vec, filter_chunks: HashMap, words: HashMap>, filters: HashMap>>, sorts: HashMap>, + ranking_weights: RankingWeights, +} + +#[derive(Debug, Clone)] +pub struct RankingWeights { + /// Controls page ranking based on similarity of terms to the search query (in length). + /// Increasing this number means pages rank higher when they contain works very close to the query, + /// e.g. if searching for `part` then `party` will boost a page higher than one containing `partition`. + /// As this number trends to zero, then `party` and `partition` would be viewed equally. + /// Must be >= 0 + pub term_similarity: f32, + /// Controls how much effect the average page length has on ranking. + /// At 1.0, ranking will strongly favour pages that are shorter than the average page on the site. + /// At 0.0, ranking will exclusively look at term frequency, regardless of how long a document is. + /// Must be clamped to 0..=1 + pub page_length: f32, + /// Controls how quickly a term saturates on the page and reduces impact on the ranking. + /// At 2.0, pages will take a long time to saturate, and pages with very high term frequencies will take over. + /// As this number trends to 0, it does not take many terms to saturate and allow other paramaters to influence the ranking. + /// At 0.0, terms will saturate immediately and results will not distinguish between one term and many. + /// Must be clamped to 0..=2 + pub term_saturation: f32, + /// Controls how much ranking uses term frequency versus raw term count. + /// At 1.0, term frequency fully applies and is the main ranking factor. + /// At 0.0, term frequency does not apply, and pages are ranked based on the raw sum of words and weights. + /// Reducing this number is a good way to boost longer documents in your search results, + /// as they no longer get penalized for having a low term frequency. + /// Numbers between 0.0 and 1.0 will interpolate between the two ranking methods. + /// Must be clamped to 0..=1 + pub term_frequency: f32, +} + +impl Default for RankingWeights { + fn default() -> Self { + Self { + term_similarity: 1.0, + page_length: 0.75, + term_saturation: 1.5, + term_frequency: 1.0, + } + } } #[cfg(debug_assertions)] @@ -61,11 +104,13 @@ pub fn init_pagefind(metadata_bytes: &[u8]) -> *mut SearchIndex { web_version: env!("CARGO_PKG_VERSION"), generator_version: None, pages: Vec::new(), + average_page_length: 0.0, chunks: Vec::new(), filter_chunks: HashMap::new(), words: HashMap::new(), filters: HashMap::new(), sorts: HashMap::new(), + ranking_weights: RankingWeights::default(), }; match search_index.decode_metadata(metadata_bytes) { @@ -78,6 +123,47 @@ pub fn init_pagefind(metadata_bytes: &[u8]) -> *mut SearchIndex { } } +#[wasm_bindgen] +pub fn set_ranking_weights(ptr: *mut SearchIndex, weights: &str) -> *mut SearchIndex { + debug!({ "Loading Ranking Weights" }); + + let Ok(weights) = JSONValue::parse(weights) else { + return ptr; + }; + + let mut search_index = unsafe { Box::from_raw(ptr) }; + + if let Ok(term_similarity) = weights + .get_key_value("term_similarity") + .and_then(|v| v.read_float()) + { + search_index.ranking_weights.term_similarity = term_similarity.max(0.0); + } + + if let Ok(page_length) = weights + .get_key_value("page_length") + .and_then(|v| v.read_float()) + { + search_index.ranking_weights.page_length = page_length.clamp(0.0, 1.0); + } + + if let Ok(term_saturation) = weights + .get_key_value("term_saturation") + .and_then(|v| v.read_float()) + { + search_index.ranking_weights.term_saturation = term_saturation.clamp(0.0, 2.0); + } + + if let Ok(term_frequency) = weights + .get_key_value("term_frequency") + .and_then(|v| v.read_float()) + { + search_index.ranking_weights.term_frequency = term_frequency.clamp(0.0, 1.0); + } + + Box::into_raw(search_index) +} + #[wasm_bindgen] pub fn load_index_chunk(ptr: *mut SearchIndex, chunk_bytes: &[u8]) -> *mut SearchIndex { debug!({ "Loading Index Chunk" }); @@ -210,7 +296,7 @@ pub fn filters(ptr: *mut SearchIndex) -> String { } #[wasm_bindgen] -pub fn search(ptr: *mut SearchIndex, query: &str, filter: &str, sort: &str, exact: bool, ranking: &RankingWeights) -> String { +pub fn search(ptr: *mut SearchIndex, query: &str, filter: &str, sort: &str, exact: bool) -> String { let search_index = unsafe { Box::from_raw(ptr) }; if let Some(generator_version) = search_index.generator_version.as_ref() { @@ -225,7 +311,7 @@ pub fn search(ptr: *mut SearchIndex, query: &str, filter: &str, sort: &str, exac let (unfiltered_results, mut results) = if exact { search_index.exact_term(query, filter_set) } else { - search_index.search_term(query, filter_set, ranking) + search_index.search_term(query, filter_set) }; let unfiltered_total = unfiltered_results.len(); debug!({ format!("Raw total of {} results", unfiltered_total) }); diff --git a/pagefind_web/src/metadata.rs b/pagefind_web/src/metadata.rs index b18971a6..2ca80c15 100644 --- a/pagefind_web/src/metadata.rs +++ b/pagefind_web/src/metadata.rs @@ -59,6 +59,11 @@ impl SearchIndex { }); } + if !self.pages.is_empty() { + self.average_page_length = self.pages.iter().map(|p| p.word_count as f32).sum::() + / self.pages.len() as f32; + } + debug!({ "Reading index chunks array" }); let index_chunks = consume_arr_len!(decoder); debug!({ format!("Reading {:#?} index chunks", index_chunks) }); diff --git a/pagefind_web/src/search.rs b/pagefind_web/src/search.rs index 48920262..50132445 100644 --- a/pagefind_web/src/search.rs +++ b/pagefind_web/src/search.rs @@ -1,9 +1,13 @@ -use std::{borrow::Cow, cmp::Ordering}; - -use crate::{util::*, PageWord}; +use std::{ + borrow::Cow, + cmp::Ordering, + collections::HashMap, + ops::{Add, AddAssign, Div}, +}; + +use crate::{util::*, PageWord, RankingWeights}; use bit_set::BitSet; use pagefind_stem::Stemmer; -use wasm_bindgen::prelude::wasm_bindgen; use crate::SearchIndex; @@ -14,18 +18,19 @@ pub struct PageSearchResult { pub word_locations: Vec, } -struct ScoredPageWord<'a> { +struct MatchingPageWord<'a> { word: &'a PageWord, - length_differential: u8, - word_frequency: f32, + word_str: &'a str, + length_bonus: f32, + num_pages_matching: usize, } #[derive(Debug, Clone)] -struct VerboseWordLocation { +struct VerboseWordLocation<'a> { + word_str: &'a str, weight: u8, - length_differential: u8, - word_frequency: f32, word_location: u32, + length_bonus: f32, } #[derive(Debug, Clone)] @@ -35,52 +40,75 @@ pub struct BalancedWordScore { pub word_location: u32, } -#[derive(Debug, Clone)] -#[wasm_bindgen] -pub struct RankingWeights { - pub term_similarity: f32, - pub site_rarity: f32, - pub page_frequency: f32, +#[derive(Debug)] +struct BM25Params { + weighted_term_frequency: f32, + document_length: f32, + average_page_length: f32, + total_pages: usize, + pages_containing_term: usize, + length_bonus: f32, } -#[wasm_bindgen] -impl RankingWeights { - #[wasm_bindgen(constructor)] - pub fn new(term_similarity: f32, site_rarity: f32, page_frequency: f32) -> RankingWeights { - RankingWeights { - term_similarity, - site_rarity, - page_frequency, - } - } +/// Returns a score between 0.0 and 1.0 for the given word. +/// 1.0 implies the word is the exact length we need, +/// and that decays as the word becomes longer or shorter than the query word. +/// As `term_similarity_ranking` trends to zero, all output trends to 1.0. +/// As `term_similarity_ranking` increases, the score decays faster as differential grows. +fn word_length_bonus(differential: u8, term_similarity_ranking: f32) -> f32 { + let std_dev = 2.0_f32; + let base = (-0.5 * (differential as f32).powi(2) / std_dev.powi(2)).exp(); + let max_value = term_similarity_ranking.exp(); + (base * term_similarity_ranking).exp() / max_value +} + +fn calculate_bm25_word_score( + BM25Params { + weighted_term_frequency, + document_length, + average_page_length, + total_pages, + pages_containing_term, + length_bonus, + }: BM25Params, + ranking: &RankingWeights, +) -> f32 { + let weighted_with_length = weighted_term_frequency * length_bonus; + + let k1 = ranking.term_saturation; + let b = ranking.page_length; + + let idf = (total_pages as f32 - pages_containing_term as f32 + 0.5) + .div(pages_containing_term as f32 + 0.5) + .add(1.0) // Prevent IDF from ever being negative + .ln(); + + let bm25_tf = (k1 + 1.0) * weighted_with_length + / (k1 * (1.0 - b + b * (document_length / average_page_length)) + weighted_with_length); + + // Use ranking.term_frequency to interpolate between only caring about BM25's term frequency, + // and only caring about the original weighted word count on the page. + // Attempting to scale the original weighted word count to roughly the same bounds as the BM25 output (k1 + 1) + let raw_count_scalar = average_page_length / 5.0; + let scaled_raw_count = (weighted_with_length / raw_count_scalar).min(k1 + 1.0); + let tf = (1.0 - ranking.term_frequency) * scaled_raw_count + ranking.term_frequency * bm25_tf; + + debug!({ + format! {"TF is {tf:?}, IDF is {idf:?}"} + }); + + idf * tf } -fn calculate_word_score( +fn calculate_individual_word_score( VerboseWordLocation { + word_str: _, weight, - length_differential, - word_frequency, + length_bonus, word_location, }: VerboseWordLocation, - ranking: &RankingWeights, ) -> BalancedWordScore { - let word_length_bonus = ((if length_differential > 0 { - (2.0 / length_differential as f32).max(0.2) - } else { - 3.0 - }) - .ln() - * (*ranking).term_similarity) - .exp(); - - // Starting with the raw user-supplied (or derived) weight of the word, - // we take it to the power of two to make the weight scale non-linear. - // We then multiply it with word_length_bonus, which should be a roughly 0 -> 3 scale of how close - // this was was in length to the target word. - // That result is then multiplied by the word frequency, which is again a roughly 0 -> 2 scale - // of how unique this word is in the entire site. (tf-idf ish) - let balanced_score = ((weight as f32).powi(2) * word_length_bonus) - * (word_frequency.max(0.5).ln() * (*ranking).site_rarity).exp(); + let balanced_score = (weight as f32).powi(2) * length_bonus; BalancedWordScore { weight, @@ -198,7 +226,6 @@ impl SearchIndex { &self, term: &str, filter_results: Option, - ranking: &RankingWeights, ) -> (Vec, Vec) { debug!({ format! {"Searching {:?}", term} @@ -208,7 +235,7 @@ impl SearchIndex { let mut unfiltered_results: Vec = vec![]; let mut maps = Vec::new(); - let mut words: Vec = Vec::new(); + let mut words: Vec = Vec::new(); let split_term = stems_from_term(term); for term in split_term.iter() { @@ -217,15 +244,15 @@ impl SearchIndex { let length_differential: u8 = (word.len().abs_diff(term.len()) + 1) .try_into() .unwrap_or(std::u8::MAX); - let word_frequency: f32 = (total_pages - .checked_div(word_index.len()) - .unwrap_or_default() as f32) - .log10(); - words.extend(word_index.iter().map(|pageword| ScoredPageWord { + words.extend(word_index.iter().map(|pageword| MatchingPageWord { word: pageword, - length_differential, - word_frequency, + word_str: &word, + length_bonus: word_length_bonus( + length_differential, + self.ranking_weights.term_similarity, + ), + num_pages_matching: word_index.len(), })); let mut set = BitSet::new(); for page in word_index { @@ -266,42 +293,40 @@ impl SearchIndex { let mut pages: Vec = vec![]; for page_index in results.iter() { - // length diff, word weight, word position - let mut word_locations: Vec = words + let page = &self.pages[page_index]; + + let mut word_locations: Vec<_> = words .iter() - .filter_map( - |ScoredPageWord { - word, - length_differential, - word_frequency, - }| { - if word.page as usize == page_index { - Some( - word.locs - .iter() - .map(|loc| VerboseWordLocation { - weight: loc.0, - length_differential: *length_differential, - word_frequency: *word_frequency, - word_location: loc.1, - }) - .collect::>(), - ) - } else { - None - } - }, - ) + .filter_map(|w| { + if w.word.page as usize == page_index { + Some( + w.word + .locs + .iter() + .map(|(weight, location)| VerboseWordLocation { + word_str: w.word_str, + weight: *weight, + word_location: *location, + length_bonus: w.length_bonus, + }), + ) + } else { + None + } + }) .flatten() .collect(); - debug!({ - format! {"Word locations {:?}", word_locations} - }); word_locations .sort_unstable_by_key(|VerboseWordLocation { word_location, .. }| *word_location); + debug!({ + format! {"Found the raw word locations {:?}", word_locations} + }); + let mut unique_word_locations: Vec = Vec::with_capacity(word_locations.len()); + let mut weighted_words: HashMap<&str, usize> = HashMap::with_capacity(words.len()); + if !word_locations.is_empty() { let mut working_word = word_locations[0].clone(); for next_word in word_locations.into_iter().skip(1) { @@ -318,31 +343,66 @@ impl SearchIndex { } // We don't want to do anything if the new word is weighted higher // (Lowest weight wins) - - if next_word.length_differential > working_word.length_differential { - // If the new word is further from target than the working word, - // we want to use that value. (Longest diff wins) - working_word.length_differential = next_word.length_differential; - } } else { - unique_word_locations.push(calculate_word_score(working_word, ranking)); + weighted_words + .entry(working_word.word_str) + .or_default() + .add_assign(working_word.weight as usize); + + unique_word_locations.push(calculate_individual_word_score(working_word)); working_word = next_word; } } - unique_word_locations.push(calculate_word_score(working_word, ranking)); + weighted_words + .entry(working_word.word_str) + .or_default() + .add_assign(working_word.weight as usize); + + unique_word_locations.push(calculate_individual_word_score(working_word)); } - let page = &self.pages[page_index]; debug!({ - format! {"Sorted word locations {:?}, {:?} word(s)", unique_word_locations, page.word_count} + format! {"Coerced to unique locations {:?}", unique_word_locations} + }); + debug!({ + format! {"Words have the final weights {:?}", weighted_words} }); - let page_score = (unique_word_locations - .iter() - .map(|BalancedWordScore { balanced_score, .. }| balanced_score) - .sum::() - / 24.0) - / ((page.word_count as f32).ln() * (*ranking).page_frequency).exp(); + let word_scores = + weighted_words + .into_iter() + .map(|(word_str, weighted_term_frequency)| { + let matched_word = words + .iter() + .find(|w| w.word_str == word_str) + .expect("word should be in the initial set"); + + let params = BM25Params { + weighted_term_frequency: (weighted_term_frequency as f32) / 24.0, + document_length: page.word_count as f32, + average_page_length: self.average_page_length, + total_pages, + pages_containing_term: matched_word.num_pages_matching, + length_bonus: matched_word.length_bonus, + }; + + debug!({ + format! {"Calculating BM25 with the params {:?}", params} + }); + debug!({ + format! {"And the weights {:?}", self.ranking_weights} + }); + + let score = calculate_bm25_word_score(params, &self.ranking_weights); + + debug!({ + format! {"BM25 gives us the score {:?}", score} + }); + + score + }); + + let page_score = word_scores.sum(); let search_result = PageSearchResult { page: page.hash.clone(), @@ -358,7 +418,7 @@ impl SearchIndex { pages.push(search_result); } - debug!({ "Sorting by word frequency" }); + debug!({ "Sorting by score" }); pages.sort_unstable_by(|a, b| { b.page_score .partial_cmp(&a.page_score) diff --git a/pagefind_web_js/lib/coupled_search.ts b/pagefind_web_js/lib/coupled_search.ts index cff8773a..23fde350 100644 --- a/pagefind_web_js/lib/coupled_search.ts +++ b/pagefind_web_js/lib/coupled_search.ts @@ -22,6 +22,7 @@ class PagefindInstance { indexWeight: number; excerptLength: number; mergeFilter: Object; + ranking?: PagefindRankingWeights; highlightParam: string | null; loaded_chunks: Record>; @@ -61,6 +62,7 @@ class PagefindInstance { this.indexWeight = opts.indexWeight ?? 1; this.excerptLength = opts.excerptLength ?? 30; this.mergeFilter = opts.mergeFilter ?? {}; + this.ranking = opts.ranking; this.highlightParam = opts.highlightParam ?? null; this.loaded_chunks = {}; @@ -90,12 +92,14 @@ class PagefindInstance { } async options(options: PagefindIndexOptions) { - const opts = ["basePath", "baseUrl", "indexWeight", "excerptLength", "mergeFilter", "highlightParam"]; + const opts = ["basePath", "baseUrl", "indexWeight", "excerptLength", "mergeFilter", "highlightParam", "ranking"]; for (const [k, v] of Object.entries(options)) { if (k === "mergeFilter") { let filters = this.stringifyFilters(v); let ptr = await this.getPtr(); this.raw_ptr = this.backend.add_synthetic_filter(ptr, filters); + } else if (k === "ranking") { + await this.set_ranking(options.ranking); } else if (opts.includes(k)) { if (k === "basePath" && typeof v === "string") this.basePath = v; if (k === "baseUrl" && typeof v === "string") this.baseUrl = v; @@ -123,6 +127,19 @@ class PagefindInstance { return data.slice(12); } + async set_ranking(ranking?: PagefindRankingWeights) { + if (!ranking) return; + + let rankingWeights = { + term_similarity: ranking.termSimilarity ?? null, + page_length: ranking.pageLength ?? null, + term_saturation: ranking.termSaturation ?? null, + term_frequency: ranking.termFrequency ?? null, + }; + let ptr = await this.getPtr(); + this.raw_ptr = this.backend.set_ranking_weights(ptr, JSON.stringify(rankingWeights)); + } + async init(language: string, opts: { load_wasm: boolean }) { await this.loadEntry(); let index = this.findIndex(language); @@ -140,6 +157,9 @@ class PagefindInstance { let ptr = await this.getPtr(); this.raw_ptr = this.backend.add_synthetic_filter(ptr, filters); } + if (this.ranking) { + await this.set_ranking(this.ranking); + } } async loadEntry() { @@ -440,15 +460,10 @@ class PagefindInstance { return null; } - let ranking = new this.backend.RankingWeights( - options.ranking?.termSimilarity ?? 1.0, - options.ranking?.siteRarity ?? 1.0, - options.ranking?.pageFrequency ?? 1.0, - ) // pointer may have updated from the loadChunk calls ptr = await this.getPtr(); let searchStart = Date.now(); - let result = this.backend.search(ptr, term, filter_list, sort_list, exact_search, ranking) as string; + let result = this.backend.search(ptr, term, filter_list, sort_list, exact_search) as string; log(`Got the raw search result: ${result}`); let [unfilteredResultCount, all_results, filters, totalFilters] = result.split(/:([^:]*):(.*)__PF_UNFILTERED_DELIM__(.*)$/); let filterObj = this.parseFilters(filters); diff --git a/pagefind_web_js/types/index.d.ts b/pagefind_web_js/types/index.d.ts index e0b6c907..42ce498b 100644 --- a/pagefind_web_js/types/index.d.ts +++ b/pagefind_web_js/types/index.d.ts @@ -32,8 +32,43 @@ declare global { * This is set for you automatically, so it is unlikely you should set this directly. */ primary?: boolean, + /** + * Provides the ability to fine tune Pagefind's ranking algorithm to better suit your dataset. + */ + ranking?: PagefindRankingWeights, }; + type PagefindRankingWeights = { + /** + Controls page ranking based on similarity of terms to the search query (in length). + Increasing this number means pages rank higher when they contain works very close to the query, + e.g. if searching for `part` then `party` will boost a page higher than one containing `partition`. + Minimum value is 0.0, where `party` and `partition` would be viewed equally. + */ + termSimilarity?: Number, + /** + Controls how much effect the average page length has on ranking. + Maximum value is 1.0, where ranking will strongly favour pages that are shorter than the average page on the site. + Minimum value is 0.0, where ranking will exclusively look at term frequency, regardless of how long a document is. + */ + pageLength?: Number, + /** + Controls how quickly a term saturates on the page and reduces impact on the ranking. + Maximum value is 2.0, where pages will take a long time to saturate, and pages with very high term frequencies will take over. + As this number trends to 0, it does not take many terms to saturate and allow other paramaters to influence the ranking. + Minimum value is 0.0, where terms will saturate immediately and results will not distinguish between one term and many. + */ + termSaturation?: Number, + /** + Controls how much ranking uses term frequency versus raw term count. + Maximum value is 1.0, where term frequency fully applies and is the main ranking factor. + Minimum value is 0.0, where term frequency does not apply, and pages are ranked based on the raw sum of words and weights. + Values between 0.0 and 1.0 will interpolate between the two ranking methods. + Reducing this number is a good way to boost longer documents in your search results, as they no longer get penalized for having a low term frequency. + */ + termFrequency?: Number + } + /** Options that can be passed to pagefind.search() */ type PagefindSearchOptions = { /** If set, this call will load all assets but return before searching. Prefer using pagefind.preload() instead */ @@ -44,15 +79,6 @@ declare global { filters?: Object, /** The set of sorts to use for this search, instead of relevancy */ sort?: Object, - /** Fine-grained ranking weights (range: 0.0 - 1.0) */ - ranking?: { - /* How much to boost words that closely match the search term, over fuzzier matches */ - termSimilarity?: Number, - /* How much to boost unique words in the search term, based on their occurance within the site as a whole */ - siteRarity?: Number, - /* How much to boost results based on density of the search term on the page */ - pageFrequency?: Number, - }, } /** Filter counts returned from pagefind.filters(), and alongside results from pagefind.search() */ From 7df6442b363c4b1a907422bdf5dc35a6a7514f78 Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Thu, 28 Mar 2024 23:28:44 +1300 Subject: [PATCH 08/12] Rework ranking documentation --- docs/content/docs/api.md | 32 ----- docs/content/docs/ranking.md | 120 ++++++++++++++++++ docs/content/docs/search-config.md | 4 + .../features/multisite/multisite_sort.feature | 6 +- pagefind_web/src/lib.rs | 4 +- pagefind_web_js/types/index.d.ts | 2 +- 6 files changed, 130 insertions(+), 38 deletions(-) create mode 100644 docs/content/docs/ranking.md diff --git a/docs/content/docs/api.md b/docs/content/docs/api.md index 6bc2e25a..8436df0e 100644 --- a/docs/content/docs/api.md +++ b/docs/content/docs/api.md @@ -239,38 +239,6 @@ const search = await pagefind.search("static", { See [Sorting using the Pagefind JavaScript API](/docs/js-api-sorting/) for more details and functionality. -## Controlling how search results are ranked - -By default, the results' are sorted using a "balanced score" which is calculated using a sophisticated formula. This formula takes the ratio into account between matching vs total number of words on any given page. To support scenarios where this is not desirable (e.g. on sites where longer articles are better matches than short ones), this can be turned off: - -{{< diffcode >}} -```js -const search = await pagefind.search("term", { -+ ranking: { pageFrequency: 0.0 } -}); -``` -{{< /diffcode >}} - -It is also possible to control how much the site-wide frequency of a given term is taken into account (by default, terms that appear less often have a higher weight): - -{{< diffcode >}} -```js -const search = await pagefind.search("term", { -+ ranking: { siteRarity: 0.0 } -}); -``` -{{< /diffcode >}} - -Another knob to control the ranking is `termSimilarity`, which tells Pagefind how much it should weigh the length difference of the matched word vs the length of the matching search term: - -{{< diffcode >}} -```js -const search = await pagefind.search("term", { -+ ranking: { termSimilarity: 0.3 } -}); -``` -{{< /diffcode >}} - ## Re-initializing the search API In some cases you might need to re-initialize Pagefind. For example, if you dynamically change the language of the page without reloading, Pagefind will need to be re-initialized to reflect this langauge change. diff --git a/docs/content/docs/ranking.md b/docs/content/docs/ranking.md new file mode 100644 index 00000000..738f4d54 --- /dev/null +++ b/docs/content/docs/ranking.md @@ -0,0 +1,120 @@ +--- +title: "Customize Pagefind's result ranking" +nav_title: "Customize ranking" +nav_section: Searching +weight: 90 +--- + +Pagefind's default search algorithm is a great choice for most circumstances, but some datasets might be improved by changing the way results are ranked. + +A good example is sites with a mix of long and short pages, where the long pages tend to be the preferred result. In this case, tweaking the `pageLength` and/or `termFrequency` parameters can improve the search relevance for the specific content. + +Ranking parameters are configured within the `ranking` option passed to Pagefind, which can optionally contain any or all of the available parameters. + +## Configuring ranking parameters via the JavaScript API + +{{< diffcode >}} +```javascript +const pagefind = await import("/pagefind/pagefind.js"); +await pagefind.options({ ++ ranking: { ++ termFrequency: 1.0 ++ } +}); +``` +{{< /diffcode >}} + +## Configuring ranking parameters via the Default UI + +{{< diffcode >}} +```javascript +new PagefindUI({ + element: "#search", ++ ranking: { ++ termFrequency: 1.0 ++ } +}); +``` +{{< /diffcode >}} + +## Configuring Term Frequency + +{{< diffcode >}} +```javascript +await pagefind.options({ ++ ranking: { ++ termFrequency: 1.0 // default value ++ } +}); +``` +{{< /diffcode >}} + +`termFrequency` changes the ranking balance between frequency of the term relative to document length, versus weighted term count. + +As an example, if we were querying `search` in the sentence **"Pagefind is a search tool that can search websites"**, the term frequency of `search` is 0.22 (2 / 9 words), while the weighted term count of `search` is 2. This latter number would also include any [content with custom weights](/docs/weighting/). + +- The maximum value is `1.0`, where term frequency fully applies and is the main ranking factor. +- The minimum value is `0.0`, where term frequency does not apply, and pages are ranked based on the raw sum of words and weights. +- Values between `0.0` and `1.0` will interpolate between the two ranking methods. + +Reducing the `termFrequency` parameter is a good way to boost longer documents in your search results, as they no longer get penalized for having a low term frequency, and instead get promoted for having many instances of the search term. + +## Configuring Term Similarity + +{{< diffcode >}} +```javascript +await pagefind.options({ ++ ranking: { ++ termSimilarity: 1.0 // default value ++ } +}); +``` +{{< /diffcode >}} + +`termSimilarity` changes the ranking based on similarity of terms to the search query. Currently this only takes the length of the term into account. + +Increasing this number means pages rank higher when they contain words very close to the query, +e.g. if searching for `part`, a result of `party` will boost a page higher than one containing `partition`. + +The minimum value is `0.0`, where `party` and `partition` would be viewed equally. + +Increasing the `termSimilarity` parameter is a good way to suppress pages that are ranking well for long extensions of search terms. + +## Configuring Page Length + +{{< diffcode >}} +```javascript +await pagefind.options({ ++ ranking: { ++ pageLength: 0.75 // default value ++ } +}); +``` +{{< /diffcode >}} + +`pageLength` changes the way ranking compares page lengths with the average page lengths on your site. + +- The maximum value is `1.0`, where ranking will strongly favour pages that are shorter than the average page on the site, even if longer documents exist with a higher term frequency. +- The minimum value is `0.0`, where ranking will exclusively look at term frequency, regardless of how long a document is. + +Decreasing the `pageLength` parameter is a good way to suppress very short pages that are undesirably ranking higher than longer pages. + +## Configuring Term Saturation + +{{< diffcode >}} +```javascript +await pagefind.options({ ++ ranking: { ++ termSaturation: 1.4 // default value ++ } +}); +``` +{{< /diffcode >}} + +`termSaturation` controls how quickly a term "saturates" on a page. Once a term has appeared on a page many times, further appearances have a reduced impact on the page rank. + +- The maximum value is `2.0`, where pages will take a long time to saturate, giving pages with very high term frequencies a boost in ranking. +- As this value trends to 0, it does not take many terms to saturate and allow other paramaters to influence the ranking. +- The minimum value is `0.0`, where terms will saturate immediately and results will not distinguish between one term and many. + +Decreasing the `termSaturation` parameter is a good way to suppress pages that are ranking well due to an extremely high number of search terms existing in their content. diff --git a/docs/content/docs/search-config.md b/docs/content/docs/search-config.md index 7fb114e9..9227c254 100644 --- a/docs/content/docs/search-config.md +++ b/docs/content/docs/search-config.md @@ -79,6 +79,10 @@ If set, Pagefind will add the search term as a query parameter under the same na If using the [Pagefind highlight script](/docs/highlighting/), make sure this is configured to match. +### Ranking + +See [customize ranking](/docs/ranking/) + ### Index weight See [multisite search > weighting](/docs/multisite/#changing-the-weighting-of-individual-indexes) diff --git a/pagefind/features/multisite/multisite_sort.feature b/pagefind/features/multisite/multisite_sort.feature index 6b843a0e..9466baee 100644 --- a/pagefind/features/multisite/multisite_sort.feature +++ b/pagefind/features/multisite/multisite_sort.feature @@ -66,8 +66,8 @@ Feature: Multisite Result Scoring When I evaluate: """ async function() { - let pagefind = await import("/website_a/pagefind/pagefind.js"); - await pagefind.mergeIndex("/website_b/pagefind/", { + let pagefind = await import("/website_b/pagefind/pagefind.js"); + await pagefind.mergeIndex("/website_a/pagefind/", { indexWeight: 20 }); @@ -78,4 +78,4 @@ Feature: Multisite Result Scoring } """ Then There should be no logs - Then The selector "[data-result]" should contain "/website_b/threewebs/, /website_b/oneweb/, /website_a/twowebs/" + Then The selector "[data-result]" should contain "/website_a/twowebs/, /website_a/oneweb/, /website_b/threewebs/" diff --git a/pagefind_web/src/lib.rs b/pagefind_web/src/lib.rs index 1e696436..75949289 100644 --- a/pagefind_web/src/lib.rs +++ b/pagefind_web/src/lib.rs @@ -47,7 +47,7 @@ pub struct SearchIndex { #[derive(Debug, Clone)] pub struct RankingWeights { /// Controls page ranking based on similarity of terms to the search query (in length). - /// Increasing this number means pages rank higher when they contain works very close to the query, + /// Increasing this number means pages rank higher when they contain words very close to the query, /// e.g. if searching for `part` then `party` will boost a page higher than one containing `partition`. /// As this number trends to zero, then `party` and `partition` would be viewed equally. /// Must be >= 0 @@ -78,7 +78,7 @@ impl Default for RankingWeights { Self { term_similarity: 1.0, page_length: 0.75, - term_saturation: 1.5, + term_saturation: 1.4, term_frequency: 1.0, } } diff --git a/pagefind_web_js/types/index.d.ts b/pagefind_web_js/types/index.d.ts index 42ce498b..cd7de666 100644 --- a/pagefind_web_js/types/index.d.ts +++ b/pagefind_web_js/types/index.d.ts @@ -41,7 +41,7 @@ declare global { type PagefindRankingWeights = { /** Controls page ranking based on similarity of terms to the search query (in length). - Increasing this number means pages rank higher when they contain works very close to the query, + Increasing this number means pages rank higher when they contain words very close to the query, e.g. if searching for `part` then `party` will boost a page higher than one containing `partition`. Minimum value is 0.0, where `party` and `partition` would be viewed equally. */ From 5e1b422f80341fdbcae2a48512048cbd81d53106 Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Thu, 28 Mar 2024 23:38:05 +1300 Subject: [PATCH 09/12] Resolve non-deterministic test --- pagefind/features/weighting.feature | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pagefind/features/weighting.feature b/pagefind/features/weighting.feature index 6feec7e8..916982d2 100644 --- a/pagefind/features/weighting.feature +++ b/pagefind/features/weighting.feature @@ -33,8 +33,8 @@ Feature: Word Weighting """ Given I have a "public/r5/index.html" file with the body: """ -

Antelope

-

Other antelope text, of a similar length

+

Antelope

+

Other antelope antelope text, of a similar length

""" When I run my program Then I should see "Running Pagefind" in stdout From 0433bc9138bf8aa4c023819b382c81fcf8eba33a Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Fri, 29 Mar 2024 09:41:34 +1300 Subject: [PATCH 10/12] cargo update --- Cargo.lock | 1389 ++++++++++++++++++++++++++++------------------------ 1 file changed, 756 insertions(+), 633 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fa5b5af5..2c9f639b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,33 +4,32 @@ version = 3 [[package]] name = "actix-codec" -version = "0.5.0" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57a7559404a7f3573127aab53c08ce37a6c6a315c374a31070f3c91cd1b4a7fe" +checksum = "5f7b0a21988c1bf877cf4759ef5ddaac04c1c9fe808c9142ecb78ba97d97a28a" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.5.0", "bytes", "futures-core", "futures-sink", - "log", "memchr", "pin-project-lite", "tokio", "tokio-util", + "tracing", ] [[package]] name = "actix-files" -version = "0.6.2" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d832782fac6ca7369a70c9ee9a20554623c5e51c76e190ad151780ebea1cf689" +checksum = "bf0bdd6ff79de7c9a021f5d9ea79ce23e108d8bfc9b49b5b4a2cf6fad5a35212" dependencies = [ "actix-http", "actix-service", "actix-utils", "actix-web", - "askama_escape", - "bitflags 1.3.2", + "bitflags 2.5.0", "bytes", "derive_more", "futures-core", @@ -40,21 +39,22 @@ dependencies = [ "mime_guess", "percent-encoding", "pin-project-lite", + "v_htmlescape", ] [[package]] name = "actix-http" -version = "3.2.2" +version = "3.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c83abf9903e1f0ad9973cc4f7b9767fd5a03a583f51a5b7a339e07987cd2724" +checksum = "d223b13fd481fc0d1f83bb12659ae774d9e3601814c68a0bc539731698cca743" dependencies = [ "actix-codec", "actix-rt", "actix-service", "actix-utils", - "ahash 0.7.6", - "base64 0.13.0", - "bitflags 1.3.2", + "ahash", + "base64", + "bitflags 2.5.0", "brotli", "bytes", "bytestring", @@ -66,7 +66,7 @@ dependencies = [ "http", "httparse", "httpdate", - "itoa 1.0.3", + "itoa 1.0.11", "language-tags", "local-channel", "mime", @@ -75,25 +75,27 @@ dependencies = [ "rand 0.8.5", "sha1", "smallvec", + "tokio", + "tokio-util", "tracing", "zstd", ] [[package]] name = "actix-macros" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "465a6172cf69b960917811022d8f29bc0b7fa1398bc4f78b3c466673db1213b6" +checksum = "e01ed3140b2f8d422c68afa1ed2e85d996ea619c988ac834d255db32138655cb" dependencies = [ "quote", - "syn", + "syn 2.0.55", ] [[package]] name = "actix-router" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d66ff4d247d2b160861fa2866457e85706833527840e4133f8f49aa423a38799" +checksum = "d22475596539443685426b6bdadb926ad0ecaefdfc5fb05e5e3441f15463c511" dependencies = [ "bytestring", "http", @@ -104,9 +106,9 @@ dependencies = [ [[package]] name = "actix-rt" -version = "2.7.0" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ea16c295198e958ef31930a6ef37d0fb64e9ca3b6116e6b93a8bdae96ee1000" +checksum = "28f32d40287d3f402ae0028a9d54bef51af15c8769492826a69d28f81893151d" dependencies = [ "futures-core", "tokio", @@ -114,9 +116,9 @@ dependencies = [ [[package]] name = "actix-server" -version = "2.1.1" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0da34f8e659ea1b077bb4637948b815cd3768ad5a188fdcd74ff4d84240cd824" +checksum = "3eb13e7eef0423ea6eab0e59f6c72e7cb46d33691ad56a726b3cd07ddec2c2d4" dependencies = [ "actix-rt", "actix-service", @@ -124,7 +126,6 @@ dependencies = [ "futures-core", "futures-util", "mio", - "num_cpus", "socket2", "tokio", "tracing", @@ -143,9 +144,9 @@ dependencies = [ [[package]] name = "actix-utils" -version = "3.0.0" +version = "3.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e491cbaac2e7fc788dfff99ff48ef317e23b3cf63dbaf7aaab6418f40f92aa94" +checksum = "88a1dcdff1466e3c2488e1cb5c36a71822750ad43839937f85d2f4d9f8b705d8" dependencies = [ "local-waker", "pin-project-lite", @@ -153,9 +154,9 @@ dependencies = [ [[package]] name = "actix-web" -version = "4.2.1" +version = "4.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d48f7b6534e06c7bfc72ee91db7917d4af6afe23e7d223b51e68fffbb21e96b9" +checksum = "43a6556ddebb638c2358714d853257ed226ece6023ef9364f23f0c70737ea984" dependencies = [ "actix-codec", "actix-http", @@ -166,7 +167,7 @@ dependencies = [ "actix-service", "actix-utils", "actix-web-codegen", - "ahash 0.7.6", + "ahash", "bytes", "bytestring", "cfg-if", @@ -175,8 +176,7 @@ dependencies = [ "encoding_rs", "futures-core", "futures-util", - "http", - "itoa 1.0.3", + "itoa 1.0.11", "language-tags", "log", "mime", @@ -194,49 +194,49 @@ dependencies = [ [[package]] name = "actix-web-codegen" -version = "4.1.0" +version = "4.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fa9362663c8643d67b2d5eafba49e4cb2c8a053a29ed00a0bea121f17c76b13" +checksum = "eb1f50ebbb30eca122b188319a4398b3f7bb4a8cdf50ecfb73bfc6a3c3ce54f5" dependencies = [ "actix-router", "proc-macro2", "quote", - "syn", + "syn 2.0.55", ] [[package]] -name = "adler" -version = "1.0.2" +name = "addr2line" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +dependencies = [ + "gimli", +] [[package]] -name = "ahash" -version = "0.7.6" +name = "adler" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" -dependencies = [ - "getrandom 0.2.7", - "once_cell", - "version_check", -] +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "ahash" -version = "0.8.2" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf6ccdb167abbf410dcb915cabd428929d7f6a04980b54a11f26a39f1c7f7107" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", + "getrandom 0.2.12", "once_cell", "version_check", + "zerocopy", ] [[package]] name = "aho-corasick" -version = "0.7.19" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4f55bd91a0978cbfd91c457a164bab8b4001c833b7f323132c0a4e1922dd44e" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] @@ -257,28 +257,76 @@ dependencies = [ ] [[package]] -name = "anyhow" -version = "1.0.65" +name = "allocator-api2" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98161a4e3e2184da77bb14f02184cdd111e83bbbcc9979dfee3c44b9a85f5602" +checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" [[package]] -name = "arrayvec" -version = "0.7.2" +name = "anstream" +version = "0.6.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" + +[[package]] +name = "anstyle-parse" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" +dependencies = [ + "anstyle", + "windows-sys 0.52.0", +] + +[[package]] +name = "anyhow" +version = "1.0.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" +checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247" [[package]] -name = "askama_escape" -version = "0.10.3" +name = "arrayvec" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "619743e34b5ba4e9703bba34deac3427c72507c7159f5fd030aea8cac0cfe341" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "async-compression" -version = "0.4.0" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0122885821398cc923ece939e24d1056a2384ee719432397fa9db87230ff11" +checksum = "a116f46a969224200a0a97f29cfd4c50e7534e4b4826bd23ea2c3c533039c82c" dependencies = [ "flate2", "futures-core", @@ -287,34 +335,32 @@ dependencies = [ "tokio", ] -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi 0.1.19", - "libc", - "winapi", -] - [[package]] name = "autocfg" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" [[package]] -name = "base64" -version = "0.13.0" +name = "backtrace" +version = "0.3.71" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" +checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] [[package]] name = "base64" -version = "0.21.0" +version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" [[package]] name = "bincode" @@ -348,24 +394,24 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.0.2" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "487f1e0fcbe47deb8b0574e646def1c903389d95241dd1bbcc6ce4a715dfc0c1" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" [[package]] name = "block-buffer" -version = "0.10.3" +version = "0.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69cce20737498f97b993470a6e536b8523f0af7892a4f928cceb1ac5e52ebe7e" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" dependencies = [ "generic-array", ] [[package]] name = "brotli" -version = "3.3.4" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1a0b1dbcc8ae29329621f8d4f0d835787c1c38bb1401979b49d13b0b305ff68" +checksum = "d640d25bc63c50fb1f0b545ffd80207d2e10a4c965530809b40ba3386825c391" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -374,9 +420,9 @@ dependencies = [ [[package]] name = "brotli-decompressor" -version = "2.3.2" +version = "2.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59ad2d4653bf5ca36ae797b1f4bb4dbddb60ce49ca4aed8a2ce4829f60425b80" +checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -399,51 +445,45 @@ checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" dependencies = [ "lazy_static", "memchr", - "regex-automata", - "serde", + "regex-automata 0.1.10", ] -[[package]] -name = "build_const" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4ae4235e6dac0694637c763029ecea1a2ec9e4e06ec2729bd21ba4d9c863eb7" - [[package]] name = "byteorder" -version = "1.4.3" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.2.1" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec8a7b6a70fde80372154c65702f00a0f56f3e1c36abbc6c440484be248856db" +checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" [[package]] name = "bytestring" -version = "1.1.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b6a75fd3048808ef06af5cd79712be8111960adaf89d90250974b38fc3928a" +checksum = "74d80203ea6b29df88012294f62733de21cfeab47f17b41af3a38bc30a03ee72" dependencies = [ "bytes", ] [[package]] name = "cc" -version = "1.0.73" +version = "1.0.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" +checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5" dependencies = [ "jobserver", + "libc", ] [[package]] name = "cedarwood" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa312498f9f41452998d984d3deb84c84f86aeb8a2499d7505bb8106d78d147d" +checksum = "6d910bedd62c24733263d0bed247460853c9d22e8956bd4cd964302095e04e90" dependencies = [ "smallvec", ] @@ -456,14 +496,16 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "charabia" -version = "0.7.0" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b57f9571f611796ea38e5a9c12e5ce37476f70397b032757f8dfe0c7b9bc5637" +checksum = "413155d93157bff9130895c3bd83970ac7f35659ca57226a96aa35cf1e8e102c" dependencies = [ "cow-utils", "csv", "deunicode", + "finl_unicode", "fst", + "irg-kvariants", "jieba-rs", "lindera", "once_cell", @@ -477,79 +519,49 @@ dependencies = [ [[package]] name = "clap" -version = "3.2.22" +version = "4.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86447ad904c7fb335a790c9d7fe3d0d971dc523b8ccd1561a520de9a85302750" +checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0" dependencies = [ - "atty", - "bitflags 1.3.2", - "clap_derive 3.2.18", - "clap_lex 0.2.4", - "indexmap", - "once_cell", - "strsim", - "termcolor", - "textwrap", + "clap_builder", + "clap_derive", ] [[package]] -name = "clap" -version = "4.1.11" +name = "clap_builder" +version = "4.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42dfd32784433290c51d92c438bb72ea5063797fc3cc9a21a8c4346bebbb2098" +checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" dependencies = [ - "bitflags 2.0.2", - "clap_derive 4.1.9", - "clap_lex 0.3.3", - "is-terminal", - "once_cell", + "anstream", + "anstyle", + "clap_lex", "strsim", - "termcolor", ] [[package]] name = "clap_derive" -version = "3.2.18" +version = "4.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea0c8bce528c4be4da13ea6fead8965e95b6073585a2f05204bd8f4119f82a65" +checksum = "528131438037fd55894f62d6e9f068b8f45ac57ffa77517819645d10aed04f64" dependencies = [ - "heck", - "proc-macro-error", + "heck 0.5.0", "proc-macro2", "quote", - "syn", -] - -[[package]] -name = "clap_derive" -version = "4.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fddf67631444a3a3e3e5ac51c36a5e01335302de677bd78759eaa90ab1f46644" -dependencies = [ - "heck", - "proc-macro-error", - "proc-macro2", - "quote", - "syn", + "syn 2.0.55", ] [[package]] name = "clap_lex" -version = "0.2.4" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" -dependencies = [ - "os_str_bytes", -] +checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" [[package]] -name = "clap_lex" -version = "0.3.3" +name = "colorchoice" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "033f6b7a4acb1f358c742aaca805c939ee73b4c6209ae4318ec7aca81c42e646" -dependencies = [ - "os_str_bytes", -] +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" [[package]] name = "config-derive" @@ -557,40 +569,39 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7329955b015b82dbcf7bf217f85cbcc016a1a825bf3b074093cd39a5c071a60c" dependencies = [ - "heck", + "heck 0.4.1", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] name = "console" -version = "0.15.1" +version = "0.15.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89eab4d20ce20cea182308bca13088fecea9c05f6776cf287205d41a0ed3c847" +checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" dependencies = [ "encode_unicode", + "lazy_static", "libc", - "once_cell", - "terminal_size", "unicode-width", - "winapi", + "windows-sys 0.52.0", ] [[package]] name = "const_format" -version = "0.2.26" +version = "0.2.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "939dc9e2eb9077e0679d2ce32de1ded8531779360b003b4a972a7a39ec263495" +checksum = "e3a214c7af3d04997541b18d432afaff4c455e79e2029079647e72fc2bd27673" dependencies = [ "const_format_proc_macros", ] [[package]] name = "const_format_proc_macros" -version = "0.2.22" +version = "0.2.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef196d5d972878a48da7decb7686eded338b4858fbabeed513d63a7c98b2b82d" +checksum = "c7f6ff08fd20f4f299298a28e2dfa8a8ba1036e6cd2460ac1de7b425d76f2500" dependencies = [ "proc-macro2", "quote", @@ -614,9 +625,9 @@ dependencies = [ [[package]] name = "cookie" -version = "0.16.0" +version = "0.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94d4706de1b0fa5b132270cddffa8585166037822e260a944fe161acd137ca05" +checksum = "e859cd57d0710d9e06c381b550c06e76992472a8c6d527aecd2fc673dcc231fb" dependencies = [ "percent-encoding", "time", @@ -625,33 +636,24 @@ dependencies = [ [[package]] name = "cow-utils" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79bb3adfaf5f75d24b01aee375f7555907840fa2800e5ec8fa3b9e2031830173" +checksum = "417bef24afe1460300965a25ff4a24b8b45ad011948302ec221e8a0a81eb2c79" [[package]] name = "cpufeatures" -version = "0.2.5" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d997bd5e24a5928dd43e46dc529867e207907fe0b239c3477d924f7f2ca320" +checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" dependencies = [ "libc", ] -[[package]] -name = "crc" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d663548de7f5cca343f1e0a48d14dcfb0e9eb4e079ec58883b7251539fa10aeb" -dependencies = [ - "build_const", -] - [[package]] name = "crc32fast" -version = "1.3.2" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" dependencies = [ "cfg-if", ] @@ -680,41 +682,49 @@ dependencies = [ "proc-macro2", "quote", "smallvec", - "syn", + "syn 1.0.109", ] [[package]] name = "cssparser-macros" -version = "0.6.0" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfae75de57f2b2e85e8768c3ea840fd159c8f33e2b6522c7835b7abac81be16e" +checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" dependencies = [ "quote", - "syn", + "syn 2.0.55", ] [[package]] name = "csv" -version = "1.1.6" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" dependencies = [ - "bstr", "csv-core", - "itoa 0.4.8", + "itoa 1.0.11", "ryu", "serde", ] [[package]] name = "csv-core" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" dependencies = [ "memchr", ] +[[package]] +name = "deranged" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +dependencies = [ + "powerfmt", +] + [[package]] name = "derive_more" version = "0.99.17" @@ -725,20 +735,20 @@ dependencies = [ "proc-macro2", "quote", "rustc_version", - "syn", + "syn 1.0.109", ] [[package]] name = "deunicode" -version = "1.3.2" +version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08ff6a4480d42625e59bc4e8b5dc3723279fd24d83afe8aa20df217276261cd6" +checksum = "b6e854126756c496b8c81dec88f9a706b15b875c5849d4097a3854476b9fdf94" [[package]] name = "digest" -version = "0.10.5" +version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adfbc57365a37acbd2ebf2b64d7e69bb766e2fea813521ed536f5d0520dcf86c" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", "crypto-common", @@ -746,24 +756,24 @@ dependencies = [ [[package]] name = "dtoa" -version = "0.4.8" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56899898ce76aaf4a0f24d914c97ea6ed976d42fec6ad33fcbb0a1103e07b2b0" +checksum = "dcbb2bf8e87535c23f7a8a321e364ce21462d0ff10cb6407820e8e96dfff6653" [[package]] name = "dtoa-short" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bde03329ae10e79ede66c9ce4dc930aa8599043b0743008548680f25b91502d6" +checksum = "dbaceec3c6e4211c79e7b1800fb9680527106beb2f9c51904a3210c03a448c74" dependencies = [ "dtoa", ] [[package]] name = "either" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" +checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" [[package]] name = "emojis" @@ -846,9 +856,9 @@ checksum = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569" [[package]] name = "encoding_rs" -version = "0.8.31" +version = "0.8.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9852635589dc9f9ea1b6fe9f05b50ef208c85c834a562f0c6abb1c475736ec2b" +checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1" dependencies = [ "cfg-if", ] @@ -864,12 +874,12 @@ dependencies = [ [[package]] name = "env_logger" -version = "0.9.1" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c90bf5f19754d10198ccb95b70664fc925bd1fc090a0fd9a6ebc54acc8cd6272" +checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580" dependencies = [ - "atty", "humantime", + "is-terminal", "log", "regex", "termcolor", @@ -885,43 +895,44 @@ dependencies = [ ] [[package]] -name = "errno" -version = "0.3.0" +name = "equivalent" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d6a0976c999d473fe89ad888d5a284e55366d9dc9038b1ba2aa15128c4afa0" -dependencies = [ - "errno-dragonfly", - "libc", - "windows-sys 0.45.0", -] +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] -name = "errno-dragonfly" -version = "0.1.2" +name = "errno" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" dependencies = [ - "cc", "libc", + "windows-sys 0.52.0", ] [[package]] name = "filetime" -version = "0.2.17" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94a7bbaa59354bc20dd75b67f23e2797b4490e9d6928203fb105c79e448c86c" +checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" dependencies = [ "cfg-if", "libc", "redox_syscall", - "windows-sys 0.36.1", + "windows-sys 0.52.0", ] +[[package]] +name = "finl_unicode" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fcfdc7a0362c9f4444381a9e697c79d435fe65b52a37466fc2c1184cee9edc6" + [[package]] name = "flate2" -version = "1.0.24" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f82b0f4c27ad9f8bfd1f3208d882da2b09c301bc1c828fd3a00d0216d2fbbff6" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" dependencies = [ "crc32fast", "miniz_oxide", @@ -935,9 +946,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "form_urlencoded" -version = "1.1.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" dependencies = [ "percent-encoding", ] @@ -950,9 +961,9 @@ checksum = "7ab85b9b05e3978cc9a9cf8fea7f01b494e1a09ed3037e16ba39edc7a29eb61a" [[package]] name = "futures" -version = "0.3.24" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f21eda599937fba36daeb58a22e8f5cee2d14c4a17b5b7739c7c8e5e3b8230c" +checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" dependencies = [ "futures-channel", "futures-core", @@ -965,9 +976,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.24" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30bdd20c28fadd505d0fd6712cdfcb0d4b5648baf45faef7f852afb2399bb050" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" dependencies = [ "futures-core", "futures-sink", @@ -975,15 +986,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.24" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e5aa3de05362c3fb88de6531e6296e85cde7739cccad4b9dfeeb7f6ebce56bf" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" [[package]] name = "futures-executor" -version = "0.3.24" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ff63c23854bee61b6e9cd331d523909f238fc7636290b96826e9cfa5faa00ab" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" dependencies = [ "futures-core", "futures-task", @@ -992,38 +1003,38 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.24" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbf4d2a7a308fd4578637c0b17c7e1c7ba127b8f6ba00b29f717e9655d85eb68" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" [[package]] name = "futures-macro" -version = "0.3.24" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42cd15d1c7456c04dbdf7e88bcd69760d74f3a798d6444e16974b505b0e62f17" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.55", ] [[package]] name = "futures-sink" -version = "0.3.24" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b20ba5a92e727ba30e72834706623d94ac93a725410b6a6b6fbc1b07f7ba56" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" [[package]] name = "futures-task" -version = "0.3.24" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6508c467c73851293f390476d4491cf4d227dbabcd4170f3bb6044959b294f1" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" [[package]] name = "futures-util" -version = "0.3.24" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44fb6cb1be61cc1d2e43b262516aafcf63b241cffdb1d3fa115f91d9c7b09c90" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" dependencies = [ "futures-channel", "futures-core", @@ -1048,9 +1059,9 @@ dependencies = [ [[package]] name = "generic-array" -version = "0.14.6" +version = "0.14.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", "version_check", @@ -1069,26 +1080,32 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.7" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6" +checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" dependencies = [ "cfg-if", "libc", "wasi 0.11.0+wasi-snapshot-preview1", ] +[[package]] +name = "gimli" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" + [[package]] name = "glob" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.17" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66b91535aa35fea1523ad1b86cb6b53c28e0ae566ba4a460f4457e936cad7c6f" +checksum = "4fbd2820c5e49886948654ab546d0688ff24530286bdcf8fca3cefb16d4618eb" dependencies = [ "bytes", "fnv", @@ -1096,7 +1113,7 @@ dependencies = [ "futures-sink", "futures-util", "http", - "indexmap", + "indexmap 2.2.6", "slab", "tokio", "tokio-util", @@ -1105,68 +1122,66 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.11.2" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" [[package]] name = "hashbrown" -version = "0.12.3" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" dependencies = [ - "ahash 0.7.6", + "ahash", + "serde", ] [[package]] name = "hashbrown" -version = "0.13.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33ff8ae62cd3a9102e5637afc8452c55acf3844001bd5374e0b0bd7b6616c038" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" dependencies = [ - "ahash 0.8.2", - "serde", + "ahash", + "allocator-api2", ] [[package]] name = "heck" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] -name = "hermit-abi" -version = "0.1.19" +name = "heck" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "hermit-abi" -version = "0.3.1" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" [[package]] name = "html-escape" -version = "0.2.11" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8e7479fa1ef38eb49fb6a42c426be515df2d063f06cb8efd3e50af073dbc26c" +checksum = "6d1ad449764d627e22bfd7cd5e8868264fc9236e07c752972b4080cd351cb476" dependencies = [ "utf8-width", ] [[package]] name = "http" -version = "0.2.8" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" dependencies = [ "bytes", "fnv", - "itoa 1.0.3", + "itoa 1.0.11", ] [[package]] @@ -1183,9 +1198,9 @@ checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" [[package]] name = "httpdate" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "humantime" @@ -1195,9 +1210,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "idna" -version = "0.3.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" dependencies = [ "unicode-bidi", "unicode-normalization", @@ -1205,18 +1220,18 @@ dependencies = [ [[package]] name = "include_dir" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "482a2e29200b7eed25d7fdbd14423326760b7f6658d21a4cf12d55a50713c69f" +checksum = "18762faeff7122e89e0857b02f7ce6fcc0d101d5e9ad2ad7846cc01d61b7f19e" dependencies = [ "include_dir_macros", ] [[package]] name = "include_dir_macros" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e074c19deab2501407c91ba1860fa3d6820bfde307db6d8cb851b55a10be89b" +checksum = "b139284b5cf57ecfa712bcc66950bb635b31aff41c188e8a4cfc758eca374a3f" dependencies = [ "proc-macro2", "quote", @@ -1230,35 +1245,44 @@ checksum = "0cfe9645a18782869361d9c8732246be7b410ad4e919d3609ebabdac00ba12c3" [[package]] name = "indexmap" -version = "1.9.1" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", "hashbrown 0.12.3", ] [[package]] -name = "io-lifetimes" -version = "1.0.9" +name = "indexmap" +version = "2.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09270fd4fa1111bc614ed2246c7ef56239a3063d5be0d1ec3b589c505d400aeb" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" dependencies = [ - "hermit-abi 0.3.1", - "libc", - "windows-sys 0.45.0", + "equivalent", + "hashbrown 0.14.3", +] + +[[package]] +name = "irg-kvariants" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c73214298363629cf9dbfc93b426808865ee3c121029778cb31b1284104fdf78" +dependencies = [ + "csv", + "once_cell", + "serde", ] [[package]] name = "is-terminal" -version = "0.4.6" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "256017f749ab3117e93acb91063009e1f1bb56d03965b14c2c8df4eb02c524d8" +checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" dependencies = [ - "hermit-abi 0.3.1", - "io-lifetimes", - "rustix", - "windows-sys 0.45.0", + "hermit-abi", + "libc", + "windows-sys 0.52.0", ] [[package]] @@ -1278,30 +1302,30 @@ checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" [[package]] name = "itoa" -version = "1.0.3" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "jieba-rs" -version = "0.6.6" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c7e12f50325401dde50c29ca32cff44bae20873135b39f4e19ecf305226dd80" +checksum = "93f0c1347cd3ac8d7c6e3a2dc33ac496d365cf09fc0831aa61111e1a6738983e" dependencies = [ "cedarwood", "fxhash", - "hashbrown 0.11.2", + "hashbrown 0.14.3", "lazy_static", - "phf 0.10.1", - "phf_codegen 0.10.0", + "phf 0.11.2", + "phf_codegen 0.11.2", "regex", ] [[package]] name = "jobserver" -version = "0.1.25" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "068b1ee6743e4d11fb9c6a1e6064b3693a1b600e7f5f5988047d98b3dc9fb90b" +checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6" dependencies = [ "libc", ] @@ -1312,6 +1336,15 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72167d68f5fce3b8655487b8038691a3c9984ee769590f93f2a631f4ad64e4f5" +[[package]] +name = "kanaria" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0f9d9652540055ac4fded998a73aca97d965899077ab1212587437da44196ff" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "language-tags" version = "0.3.2" @@ -1396,43 +1429,46 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.133" +version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0f80d65747a3e43d1596c7c5492d95d5edddaabd45a7fcdb02b95f644164966" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" [[package]] name = "lindera" -version = "0.17.0" +version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "082ca91ac4d1557028ace9bfb8cee1500d156a4574dda93cfcdcf4caaebb9bd7" +checksum = "72be283281bec2768687b1784be03a678609b51f2f90f6f9d9b4f07953e6dd25" dependencies = [ "anyhow", "bincode", "byteorder", "encoding", + "kanaria", "lindera-cc-cedict-builder", "lindera-core", "lindera-dictionary", - "lindera-ipadic", + "lindera-filter", "lindera-ipadic-builder", - "lindera-ko-dic", "lindera-ko-dic-builder", "lindera-unidic-builder", + "regex", "serde", "serde_json", "thiserror", + "unicode-blocks", + "unicode-normalization", + "yada", ] [[package]] name = "lindera-cc-cedict-builder" -version = "0.17.0" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8967615a6d85320ec2755e1435c36165467ba01a79026adc3f86dad1b668df3" +checksum = "10fbafd37adab44ccc2668a40fba2dbc4e665cb3c36018c15dfe2e2b830e28ce" dependencies = [ "anyhow", "bincode", "byteorder", - "clap 3.2.22", "csv", "encoding", "env_logger", @@ -1443,17 +1479,29 @@ dependencies = [ "yada", ] +[[package]] +name = "lindera-compress" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed9196bf5995503f6878a090dfee6114ba86430c72f67ef3624246b564869937" +dependencies = [ + "anyhow", + "flate2", + "lindera-decompress", +] + [[package]] name = "lindera-core" -version = "0.17.0" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e8ed3cea13f73557a4574a179b1518670a3b70bfdad120521313b03cc89380e" +checksum = "e5f0baa9932f682e9c5b388897330f155d3c40de80016e60125897fde5e0e246" dependencies = [ "anyhow", "bincode", "byteorder", "encoding_rs", "log", + "once_cell", "serde", "thiserror", "yada", @@ -1461,38 +1509,64 @@ dependencies = [ [[package]] name = "lindera-decompress" -version = "0.17.0" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2badb41828f89cfa6452db0a66da77897c0a04478304de26c8b2b36613e08d43" +checksum = "a6e63fa6ef0bc3ce2c26d372aa6185b7a316194494a84f81678f5da2893bf4a2" dependencies = [ "anyhow", - "lzma-rs", + "flate2", "serde", ] [[package]] name = "lindera-dictionary" -version = "0.17.0" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e219722c9f56b920c231210e7c25d8b5d35b508e7a2fd69d368916c4b1c926f6" +checksum = "fd765c36166016de87a1f447ea971573e4c63e334836c46ad0020f0408c88bfc" dependencies = [ "anyhow", "bincode", "byteorder", "lindera-core", + "lindera-ipadic", + "lindera-ko-dic", + "serde", +] + +[[package]] +name = "lindera-filter" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5345e37fb9521ab3cee19283bed135d46b3521dc1fd13a49fa0992379056203" +dependencies = [ + "anyhow", + "bincode", + "byteorder", + "kanaria", + "lindera-core", + "lindera-dictionary", + "once_cell", + "regex", + "serde", + "serde_json", + "unicode-blocks", + "unicode-normalization", + "unicode-segmentation", + "yada", ] [[package]] name = "lindera-ipadic" -version = "0.17.0" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c8e87c8362c724e8188fb7d9b6d184cac15d01369295e9bff7812b630d57e3b" +checksum = "60eeb356295f784e7db4cfd2c6772f2bd059e565a7744e246642a07bc333a88a" dependencies = [ "bincode", "byteorder", "encoding", "flate2", "lindera-core", + "lindera-decompress", "lindera-ipadic-builder", "once_cell", "tar", @@ -1500,19 +1574,19 @@ dependencies = [ [[package]] name = "lindera-ipadic-builder" -version = "0.17.0" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1439e95852e444a116424086dc64d709c90e8af269ff7d2c2c4020f666f8dfab" +checksum = "0a16a2a88db9d956f5086bc976deb9951ca2dbbfef41a002df0a7bfb2c845aab" dependencies = [ "anyhow", "bincode", "byteorder", - "clap 3.2.22", "csv", "encoding_rs", "encoding_rs_io", "env_logger", "glob", + "lindera-compress", "lindera-core", "lindera-decompress", "log", @@ -1522,15 +1596,16 @@ dependencies = [ [[package]] name = "lindera-ko-dic" -version = "0.17.0" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb15f949220da45872d774b7831bb030855ec083435c907499782f8558c8a203" +checksum = "abb479b170a841b8cfbe602d772e30849ffe0562b219190a378368968b8c8f66" dependencies = [ "bincode", "byteorder", "encoding", "flate2", "lindera-core", + "lindera-decompress", "lindera-ko-dic-builder", "once_cell", "tar", @@ -1538,18 +1613,18 @@ dependencies = [ [[package]] name = "lindera-ko-dic-builder" -version = "0.17.0" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fde5a7352f4754be4f741e90bf4dff38a12a6572ab3880d0cf688e1166b8d82b" +checksum = "9b9b58213552560717c48e7833444a20d2d7fe26a6e565f7ce0cbbf85784c7cf" dependencies = [ "anyhow", "bincode", "byteorder", - "clap 3.2.22", "csv", "encoding", "env_logger", "glob", + "lindera-compress", "lindera-core", "lindera-decompress", "log", @@ -1558,14 +1633,13 @@ dependencies = [ [[package]] name = "lindera-unidic-builder" -version = "0.17.0" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1451b2ed8a7184a5f815d84f99d358c1d67297305831453dfdc0eb5d08e22b5" +checksum = "6858147cdaf4a7b564c08a247449d3aca38e9b4812499651af08afbf85324596" dependencies = [ "anyhow", "bincode", "byteorder", - "clap 3.2.22", "csv", "encoding", "env_logger", @@ -1584,33 +1658,32 @@ checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" [[package]] name = "linux-raw-sys" -version = "0.3.1" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d59d8c75012853d2e872fb56bc8a2e53718e2cafe1a4c823143141c6d90c322f" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" [[package]] name = "local-channel" -version = "0.1.3" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f303ec0e94c6c54447f84f3b0ef7af769858a9c4ef56ef2a986d3dcd4c3fc9c" +checksum = "b6cbc85e69b8df4b8bb8b89ec634e7189099cea8927a276b7384ce5488e53ec8" dependencies = [ "futures-core", "futures-sink", - "futures-util", "local-waker", ] [[package]] name = "local-waker" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e34f76eb3611940e0e7d53a9aaa4e6a3151f69541a282fd0dad5571420c53ff1" +checksum = "4d873d7c67ce09b42110d801813efbc9364414e356be9935700d368351657487" [[package]] name = "lock_api" -version = "0.4.9" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df" +checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" dependencies = [ "autocfg", "scopeguard", @@ -1618,12 +1691,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.17" +version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" -dependencies = [ - "cfg-if", -] +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" [[package]] name = "lol_html" @@ -1631,11 +1701,11 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1610d7994d67a05bb35861cd733b069b1171de8693bc8452849c59361a1bb87b" dependencies = [ - "bitflags 2.0.2", + "bitflags 2.5.0", "cfg-if", "cssparser", "encoding_rs", - "hashbrown 0.13.1", + "hashbrown 0.13.2", "lazy_static", "lazycell", "memchr", @@ -1645,33 +1715,23 @@ dependencies = [ "thiserror", ] -[[package]] -name = "lzma-rs" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aba8ecb0450dfabce4ad72085eed0a75dffe8f21f7ada05638564ea9db2d7fb1" -dependencies = [ - "byteorder", - "crc", -] - [[package]] name = "matches" -version = "0.1.9" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" +checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5" [[package]] name = "memchr" -version = "2.5.0" +version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" [[package]] name = "mime" -version = "0.3.16" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "mime_guess" @@ -1700,14 +1760,14 @@ checksum = "1154809406efdb7982841adb6311b3d095b46f78342dd646736122fe6b19e267" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] name = "minifier" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eb022374af2f446981254e6bf9efb6e2c9e1a53176d395fca02792fd4435729" +checksum = "5394aa376422b4b2b6c02fd9cfcb657e4ec544ae98e43d7d5d785fd0d042fd6d" [[package]] name = "minimal-lexical" @@ -1717,23 +1777,23 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.5.4" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96590ba8f175222643a85693f33d26e9c8a015f599c216509b1a6894af675d34" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" dependencies = [ "adler", ] [[package]] name = "mio" -version = "0.8.4" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57ee1c23c7c63b0c9250c339ffdc69255f110b298b901b9f6c82547b7b87caaf" +checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" dependencies = [ "libc", "log", "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys 0.36.1", + "windows-sys 0.48.0", ] [[package]] @@ -1744,9 +1804,9 @@ checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" [[package]] name = "nom" -version = "7.1.1" +version = "7.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8903e5a29a317527874d0402f867152a3d21c908bb0b933e416c65e301d4c36" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" dependencies = [ "memchr", "minimal-lexical", @@ -1765,36 +1825,36 @@ dependencies = [ "nom", ] +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + [[package]] name = "num_cpus" -version = "1.13.1" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ - "hermit-abi 0.1.19", + "hermit-abi", "libc", ] [[package]] -name = "num_threads" -version = "0.1.6" +name = "object" +version = "0.32.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" dependencies = [ - "libc", + "memchr", ] [[package]] name = "once_cell" -version = "1.15.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1" - -[[package]] -name = "os_str_bytes" -version = "6.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ff7415e9ae3fff1225851df9e0d9e4e5479f947619774677a63572e55e80eff" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "pagefind" @@ -1804,17 +1864,17 @@ dependencies = [ "actix-web", "anyhow", "async-compression", - "base64 0.21.0", + "base64", "bit-set", "charabia", - "clap 4.1.11", + "clap", "console", "convert_case 0.6.0", "either", "emojis", "flate2", "futures", - "hashbrown 0.13.1", + "hashbrown 0.13.2", "html-escape", "include_dir", "lazy_static", @@ -1858,22 +1918,22 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.3" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929" +checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" dependencies = [ "cfg-if", "libc", "redox_syscall", "smallvec", - "windows-sys 0.36.1", + "windows-targets 0.48.5", ] [[package]] name = "paste" -version = "1.0.9" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1de2e551fb905ac83f73f7aedf2f0cb4a0da7e35efa24a202a936269f1f18e1" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" [[package]] name = "path-slash" @@ -1883,9 +1943,9 @@ checksum = "1e91099d4268b0e11973f036e885d652fb0b21fedcf69738c627f94db6a44f42" [[package]] name = "percent-encoding" -version = "2.2.0" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "phf" @@ -1898,15 +1958,6 @@ dependencies = [ "proc-macro-hack", ] -[[package]] -name = "phf" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" -dependencies = [ - "phf_shared 0.10.0", -] - [[package]] name = "phf" version = "0.11.2" @@ -1928,12 +1979,12 @@ dependencies = [ [[package]] name = "phf_codegen" -version = "0.10.0" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" dependencies = [ - "phf_generator 0.10.0", - "phf_shared 0.10.0", + "phf_generator 0.11.2", + "phf_shared 0.11.2", ] [[package]] @@ -1948,11 +1999,11 @@ dependencies = [ [[package]] name = "phf_generator" -version = "0.10.0" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" dependencies = [ - "phf_shared 0.10.0", + "phf_shared 0.11.2", "rand 0.8.5", ] @@ -1967,7 +2018,7 @@ dependencies = [ "proc-macro-hack", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1979,15 +2030,6 @@ dependencies = [ "siphasher", ] -[[package]] -name = "phf_shared" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" -dependencies = [ - "siphasher", -] - [[package]] name = "phf_shared" version = "0.11.2" @@ -1999,9 +2041,9 @@ dependencies = [ [[package]] name = "pin-project-lite" -version = "0.2.9" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" +checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" [[package]] name = "pin-utils" @@ -2015,6 +2057,12 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3bd12336e3afa34152e002f57df37a7056778daa59ea542b3473b87f5fb260c4" +[[package]] +name = "pkg-config" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" + [[package]] name = "pori" version = "0.0.0" @@ -2033,11 +2081,17 @@ dependencies = [ "rand 0.8.5", ] +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "ppv-lite86" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "precomputed-hash" @@ -2054,7 +2108,7 @@ dependencies = [ "proc-macro-error-attr", "proc-macro2", "quote", - "syn", + "syn 1.0.109", "version_check", ] @@ -2071,9 +2125,9 @@ dependencies = [ [[package]] name = "proc-macro-hack" -version = "0.5.19" +version = "0.5.20+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" +checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" @@ -2153,7 +2207,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.7", + "getrandom 0.2.12", ] [[package]] @@ -2176,21 +2230,22 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.2.16" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" dependencies = [ "bitflags 1.3.2", ] [[package]] name = "regex" -version = "1.6.0" +version = "1.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" dependencies = [ "aho-corasick", "memchr", + "regex-automata 0.4.6", "regex-syntax", ] @@ -2200,11 +2255,22 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +[[package]] +name = "regex-automata" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + [[package]] name = "regex-syntax" -version = "0.6.27" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" [[package]] name = "rust-patch" @@ -2224,9 +2290,15 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] +[[package]] +name = "rustc-demangle" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" + [[package]] name = "rustc_version" version = "0.4.0" @@ -2238,23 +2310,22 @@ dependencies = [ [[package]] name = "rustix" -version = "0.37.3" +version = "0.38.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b24138615de35e32031d041a09032ef3487a616d901ca4db224e7d557efae2" +checksum = "65e04861e65f21776e67888bfbea442b3642beaa0138fdb1dd7a84a52dffdb89" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.5.0", "errno", - "io-lifetimes", "libc", "linux-raw-sys", - "windows-sys 0.45.0", + "windows-sys 0.52.0", ] [[package]] name = "ryu" -version = "1.0.11" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" +checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" [[package]] name = "safemem" @@ -2273,9 +2344,9 @@ dependencies = [ [[package]] name = "scopeguard" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "selectors" @@ -2299,37 +2370,37 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.14" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e25dfac463d778e353db5be2449d1cce89bd6fd23c9f1ea21310ce6e5a1b29c4" +checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca" [[package]] name = "serde" -version = "1.0.145" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "728eb6351430bccb993660dfffc5a72f91ccc1295abaa8ce19b27ebe4f75568b" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.145" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81fa1584d3d1bcacd84c277a0dfe21f5b0f6accf4a23d04d4c6d61f1af522b4c" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.55", ] [[package]] name = "serde_json" -version = "1.0.85" +version = "1.0.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e55a28e3aaef9d5ce0506d0a14dbba8054ddc7e499ef522dd8b26859ec9d4a44" +checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd" dependencies = [ - "itoa 1.0.3", + "itoa 1.0.11", "ryu", "serde", ] @@ -2341,7 +2412,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" dependencies = [ "form_urlencoded", - "itoa 1.0.3", + "itoa 1.0.11", "ryu", "serde", ] @@ -2352,7 +2423,7 @@ version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "578a7433b776b56a35785ed5ce9a7e777ac0598aac5a6dd1b4b18a307c7fc71b" dependencies = [ - "indexmap", + "indexmap 1.9.3", "ryu", "serde", "yaml-rust", @@ -2370,9 +2441,9 @@ dependencies = [ [[package]] name = "sha-1" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "028f48d513f9678cda28f6e4064755b3fbb2af6acd672f2c209b62323f7aea0f" +checksum = "f5058ada175748e33390e40e872bd0fe59a19f265d0158daa551c5a88a76009c" dependencies = [ "cfg-if", "cpufeatures", @@ -2381,9 +2452,9 @@ dependencies = [ [[package]] name = "sha1" -version = "0.10.5" +version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f04293dc80c3993519f2d7f6f511707ee7094fe0c6d3406feb330cdb3540eba3" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" dependencies = [ "cfg-if", "cpufeatures", @@ -2392,48 +2463,48 @@ dependencies = [ [[package]] name = "signal-hook-registry" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" +checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" dependencies = [ "libc", ] [[package]] name = "siphasher" -version = "0.3.10" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" [[package]] name = "slab" -version = "0.4.7" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4614a76b2a8be0058caa9dbbaf66d988527d86d003c11a94fbd335d7661edcef" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" dependencies = [ "autocfg", ] [[package]] name = "slice-group-by" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03b634d87b960ab1a38c4fe143b508576f075e7c978bfad18217645ebfdfa2ec" +checksum = "826167069c09b99d56f31e9ae5c99049e932a98c9dc2dac47645b08dbbf76ba7" [[package]] name = "smallvec" -version = "1.9.0" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] name = "socket2" -version = "0.4.7" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd" +checksum = "05ffd9c0a93b7543e062e759284fcf5f5e3b098501104bfbdde4d404db792871" dependencies = [ "libc", - "winapi", + "windows-sys 0.52.0", ] [[package]] @@ -2450,15 +2521,26 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "strsim" -version = "0.10.0" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" + +[[package]] +name = "syn" +version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] [[package]] name = "syn" -version = "1.0.100" +version = "2.0.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52205623b1b0f064a4e71182c3b18ae902267282930c6d5462c91b859668426e" +checksum = "002a1b3dbf967edfafc32655d0f377ab0bb7b994aa1d32c8cc7e9b8bf3ebb8f0" dependencies = [ "proc-macro2", "quote", @@ -2467,9 +2549,9 @@ dependencies = [ [[package]] name = "tar" -version = "0.4.38" +version = "0.4.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b55807c0344e1e6c04d7c965f5289c39a8d94ae23ed5c0b57aabac549f871c6" +checksum = "b16afcea1f22891c49a00c751c7b63b2233284064f11a200fc624137c51e2ddb" dependencies = [ "filetime", "libc", @@ -2478,29 +2560,13 @@ dependencies = [ [[package]] name = "termcolor" -version = "1.1.3" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" dependencies = [ "winapi-util", ] -[[package]] -name = "terminal_size" -version = "0.1.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df" -dependencies = [ - "libc", - "winapi", -] - -[[package]] -name = "textwrap" -version = "0.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "949517c0cf1bf4ee812e2e07e08ab448e3ae0d23472aee8a06c985f0c8815b16" - [[package]] name = "thin-slice" version = "0.1.1" @@ -2509,41 +2575,54 @@ checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c" [[package]] name = "thiserror" -version = "1.0.35" +version = "1.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c53f98874615aea268107765aa1ed8f6116782501d18e53d08b471733bea6c85" +checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.35" +version = "1.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8b463991b4eab2d801e724172285ec4195c650e8ec79b149e6c2a8e6dd3f783" +checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.55", ] [[package]] name = "time" -version = "0.3.14" +version = "0.3.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c3f9a28b618c3a6b9251b6908e9c99e04b9e5c02e6581ccbb67d59c34ef7f9b" +checksum = "c8248b6521bb14bc45b4067159b9b6ad792e2d6d754d6c41fb50e29fefe38749" dependencies = [ - "itoa 1.0.3", - "libc", - "num_threads", + "deranged", + "itoa 1.0.11", + "num-conv", + "powerfmt", + "serde", + "time-core", "time-macros", ] +[[package]] +name = "time-core" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" + [[package]] name = "time-macros" -version = "0.2.4" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42657b1a6f4d817cda8e7a0ace261fe0cc946cf3a80314390b22cc61ae080792" +checksum = "7ba3a3ef41e6672a2f0f001392bb5dcd3ff0a9992d618ca761a11c3121547774" +dependencies = [ + "num-conv", + "time-core", +] [[package]] name = "tinyvec" @@ -2556,20 +2635,19 @@ dependencies = [ [[package]] name = "tinyvec_macros" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.24.2" +version = "1.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597a12a59981d9e3c38d216785b0c37399f6e415e8d0712047620f189371b0bb" +checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" dependencies = [ - "autocfg", + "backtrace", "bytes", "libc", - "memchr", "mio", "num_cpus", "parking_lot", @@ -2577,25 +2655,25 @@ dependencies = [ "signal-hook-registry", "socket2", "tokio-macros", - "windows-sys 0.42.0", + "windows-sys 0.48.0", ] [[package]] name = "tokio-macros" -version = "1.8.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9724f9a975fb987ef7a3cd9be0350edcbe130698af5b8f7a631e23d42d052484" +checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.55", ] [[package]] name = "tokio-util" -version = "0.7.4" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bb2e075f03b3d66d8d8785356224ba688d2906a371015e225beeb65ca92c740" +checksum = "5419f34732d9eb6ee4c3578b7989078579b7f039cbbb9ca2c4da015749371e15" dependencies = [ "bytes", "futures-core", @@ -2607,20 +2685,19 @@ dependencies = [ [[package]] name = "toml" -version = "0.5.9" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d82e1a7758622a465f8cee077614c73484dac5b836c02ff6a40d5d1010324d7" +checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" dependencies = [ "serde", ] [[package]] name = "tracing" -version = "0.1.36" +version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fce9567bd60a67d08a16488756721ba392f24f29006402881e43b19aac64307" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ - "cfg-if", "log", "pin-project-lite", "tracing-core", @@ -2628,9 +2705,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.29" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aeea4303076558a00714b823f9ad67d58a3bbda1df83d8827d21193156e22f7" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" dependencies = [ "once_cell", ] @@ -2641,7 +2718,7 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0f6b76f0d5feab6eeb6a36900c5e1f6867f5061ce87917acc3d1c2d985db5212" dependencies = [ - "clap 4.1.11", + "clap", "config-derive", "envy", "log", @@ -2654,51 +2731,57 @@ dependencies = [ [[package]] name = "typenum" -version = "1.15.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "unicase" -version = "2.6.0" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6" +checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89" dependencies = [ "version_check", ] [[package]] name = "unicode-bidi" -version = "0.3.8" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" + +[[package]] +name = "unicode-blocks" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992" +checksum = "6b12e05d9e06373163a9bb6bb8c263c261b396643a99445fe6b9811fd376581b" [[package]] name = "unicode-ident" -version = "1.0.4" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "unicode-normalization" -version = "0.1.22" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" dependencies = [ "tinyvec", ] [[package]] name = "unicode-segmentation" -version = "1.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" +checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" [[package]] name = "unicode-width" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" [[package]] name = "unicode-xid" @@ -2708,9 +2791,9 @@ checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" [[package]] name = "url" -version = "2.3.1" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d68c799ae75762b8c3fe375feb6600ef5602c883c5d21eb51c09f22b83c4643" +checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" dependencies = [ "form_urlencoded", "idna", @@ -2719,9 +2802,21 @@ dependencies = [ [[package]] name = "utf8-width" -version = "0.1.6" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3" + +[[package]] +name = "utf8parse" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + +[[package]] +name = "v_htmlescape" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e8257fbc510f0a46eb602c10215901938b5c2a7d5e70fc11483b1d3c9b5b18c" [[package]] name = "version_check" @@ -2731,12 +2826,11 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "walkdir" -version = "2.3.2" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ "same-file", - "winapi", "winapi-util", ] @@ -2772,11 +2866,11 @@ dependencies = [ [[package]] name = "whatlang" -version = "0.16.2" +version = "0.16.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c531a2dc4c462b833788be2c07eef4e621d0e9edbd55bf280cc164c1c1aa043" +checksum = "471d1c1645d361eb782a1650b1786a8fb58dd625e681a04c09f5ff7c8764a7b0" dependencies = [ - "hashbrown 0.12.3", + "hashbrown 0.14.3", "once_cell", ] @@ -2798,9 +2892,9 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" dependencies = [ "winapi", ] @@ -2813,142 +2907,152 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows-sys" -version = "0.36.1" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" dependencies = [ - "windows_aarch64_msvc 0.36.1", - "windows_i686_gnu 0.36.1", - "windows_i686_msvc 0.36.1", - "windows_x86_64_gnu 0.36.1", - "windows_x86_64_msvc 0.36.1", + "windows-targets 0.48.5", ] [[package]] name = "windows-sys" -version = "0.42.0" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc 0.42.2", + "windows-targets 0.52.4", ] [[package]] -name = "windows-sys" -version = "0.45.0" +name = "windows-targets" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" dependencies = [ - "windows-targets", + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", ] [[package]] name = "windows-targets" -version = "0.42.2" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc 0.42.2", + "windows_aarch64_gnullvm 0.52.4", + "windows_aarch64_msvc 0.52.4", + "windows_i686_gnu 0.52.4", + "windows_i686_msvc 0.52.4", + "windows_x86_64_gnu 0.52.4", + "windows_x86_64_gnullvm 0.52.4", + "windows_x86_64_msvc 0.52.4", ] [[package]] name = "windows_aarch64_gnullvm" -version = "0.42.2" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" [[package]] name = "windows_aarch64_msvc" -version = "0.36.1" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" -version = "0.42.2" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" +checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" [[package]] name = "windows_i686_gnu" -version = "0.36.1" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" -version = "0.42.2" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" +checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" [[package]] name = "windows_i686_msvc" -version = "0.36.1" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" -version = "0.42.2" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" +checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" [[package]] name = "windows_x86_64_gnu" -version = "0.36.1" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" -version = "0.42.2" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" -version = "0.42.2" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" +checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" [[package]] name = "windows_x86_64_msvc" -version = "0.36.1" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" -version = "0.42.2" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" +checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" [[package]] name = "xattr" -version = "0.2.3" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d1526bbe5aaeb5eb06885f4d987bcdfa5e23187055de9b83fe00156a821fabc" +checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f" dependencies = [ "libc", + "linux-raw-sys", + "rustix", ] [[package]] name = "yada" -version = "0.5.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6d12cb7a57bbf2ab670ed9545bae3648048547f9039279a89ce000208e585c1" +checksum = "aed111bd9e48a802518765906cbdadf0b45afb72b9c81ab049a3b86252adffdd" [[package]] name = "yaml-rust" @@ -2959,31 +3063,50 @@ dependencies = [ "linked-hash-map", ] +[[package]] +name = "zerocopy" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.55", +] + [[package]] name = "zstd" -version = "0.11.2+zstd.1.5.2" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" +checksum = "2d789b1514203a1120ad2429eae43a7bd32b90976a7bb8a05f7ec02fa88cc23a" dependencies = [ "zstd-safe", ] [[package]] name = "zstd-safe" -version = "5.0.2+zstd.1.5.2" +version = "7.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" +checksum = "1cd99b45c6bc03a018c8b8a86025678c87e55526064e38f9df301989dce7ec0a" dependencies = [ - "libc", "zstd-sys", ] [[package]] name = "zstd-sys" -version = "2.0.1+zstd.1.5.2" +version = "2.0.10+zstd.1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fd07cbbc53846d9145dbffdf6dd09a7a0aa52be46741825f5c97bdd4f73f12b" +checksum = "c253a4914af5bafc8fa8c86ee400827e83cf6ec01195ec1f1ed8441bf00d65aa" dependencies = [ "cc", - "libc", + "pkg-config", ] From a6c40e49c64a71903ef1301af6326239058adf6a Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Fri, 29 Mar 2024 10:22:09 +1300 Subject: [PATCH 11/12] Fix non-deterministic test --- pagefind/features/weighting.feature | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pagefind/features/weighting.feature b/pagefind/features/weighting.feature index 916982d2..3bb611bb 100644 --- a/pagefind/features/weighting.feature +++ b/pagefind/features/weighting.feature @@ -133,7 +133,7 @@ Feature: Word Weighting """ Given I have a "public/r3/index.html" file with the body: """ -

A single reference to TwoAntelope

+

A single reference to the TwoAntelope

""" When I run my program Then I should see "Running Pagefind" in stdout @@ -143,6 +143,11 @@ Feature: Word Weighting """ async function() { let pagefind = await import("/pagefind/pagefind.js"); + await pagefind.options({ + ranking: { + termFrequency: 0.0 + } + }); let search = await pagefind.search(`antelope`); From 136842f3dd7c128ad6169dc82f0b00264b0417fa Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Fri, 29 Mar 2024 10:37:09 +1300 Subject: [PATCH 12/12] Again, resolve non-deterministic test --- pagefind/features/weighting.feature | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pagefind/features/weighting.feature b/pagefind/features/weighting.feature index 3bb611bb..0b525478 100644 --- a/pagefind/features/weighting.feature +++ b/pagefind/features/weighting.feature @@ -129,7 +129,7 @@ Feature: Word Weighting """ Given I have a "public/r2/index.html" file with the body: """ -

Two references to AFourWordAntelope AFourWordAntelope

+

Two references to SomeLongFiveWordAntelope SomeLongFiveWordAntelope

""" Given I have a "public/r3/index.html" file with the body: """