From 70a12c05da1eeed9096121b063cdda02773af46d Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Tue, 26 Sep 2023 21:37:10 +1300 Subject: [PATCH] Fix sub results for headings containing non-ascii text --- pagefind/features/edge_cases.feature | 61 +++++++++++++++++++++++++--- pagefind_web_js/lib/sub_results.ts | 2 +- 2 files changed, 56 insertions(+), 7 deletions(-) diff --git a/pagefind/features/edge_cases.feature b/pagefind/features/edge_cases.feature index 7fe7e736..d0370023 100644 --- a/pagefind/features/edge_cases.feature +++ b/pagefind/features/edge_cases.feature @@ -88,12 +88,12 @@ Feature: Graceful Pagefind Errors """ Given I have a "public/ja/index.html" file with the content: """ - - -
-Hello 👋
- - + + + +Hello 👋
+ + """ When I run my program Then I should see "Running Pagefind" in stdout @@ -113,3 +113,52 @@ Feature: Graceful Pagefind Errors """ Then There should be no logs Then The selector "[data-url]" should contain "/ja/" + + # Previously, headings that didn't match \w would be filtered out + Scenario: Pagefind multilingual sub-results + Given I have a "public/index.html" file with the content: + """ + + + +Nothing
+ + + """ + Given I have a "public/test/index.html" file with the content: + """ + + + +هزار سال پس از ماجرای گمشدنت
+ +از پیالهای چای سیاه پررنگ
+ +بیرون نه میروی از من
+ + + """ + When I run my program + Then I should see "Running Pagefind" in stdout + Then I should see the file "public/pagefind/pagefind.js" + When I serve the "public" directory + When I load "/" + When I evaluate: + """ + async function() { + let pagefind = await import("/pagefind/pagefind.js"); + + let search = await pagefind.search("از"); + let results = await Promise.all(search.results.map(r => r.data())); + let result = results[0]; + + let subs = result.sub_results.map(s => s.url).sort().join(', '); + + document.querySelector('[data-url]').innerText = subs; + } + """ + Then There should be no logs + Then The selector "[data-url]" should contain "/test/#%D8%A7%D8%B2, /test/#_top, /test/#rtl-content" diff --git a/pagefind_web_js/lib/sub_results.ts b/pagefind_web_js/lib/sub_results.ts index 1cc707a3..41aa8742 100644 --- a/pagefind_web_js/lib/sub_results.ts +++ b/pagefind_web_js/lib/sub_results.ts @@ -6,7 +6,7 @@ export const calculate_sub_results = ( ): PagefindSubResult[] => { const anchors = fragment.anchors .filter( - (a) => /h\d/i.test(a.element) && a.text?.length && /\w/.test(a.text) + (a) => /h\d/i.test(a.element) && a.text?.length && /\S/.test(a.text) ) .sort((a, b) => a.location - b.location); const results: PagefindSubResult[] = [];