From 4c58626b51dd038dbbebb09aeb2754b18feaf790 Mon Sep 17 00:00:00 2001 From: Lala Sabathil Date: Mon, 12 Aug 2024 14:56:55 +0200 Subject: [PATCH] fix: try fixing search --- docs/_static/language_data.js | 283 ++++++++++++++++++++++++++++++++++ docs/conf.py | 2 +- 2 files changed, 284 insertions(+), 1 deletion(-) create mode 100644 docs/_static/language_data.js diff --git a/docs/_static/language_data.js b/docs/_static/language_data.js new file mode 100644 index 0000000000..9b5aee6c60 --- /dev/null +++ b/docs/_static/language_data.js @@ -0,0 +1,283 @@ +/* + * language_data.js + * ~~~~~~~~~~~~~~~~ + * + * This script contains the language-specific data used by searchtools.js, + * namely the list of stopwords, stemmer, scorer and splitter. + * + * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"]; + + +/* Non-minified version is copied as a separate JS file, if available */ + +/** + * Porter Stemmer + */ +var Stemmer = function() { + + var step2list = { + ational: 'ate', + tional: 'tion', + enci: 'ence', + anci: 'ance', + izer: 'ize', + bli: 'ble', + alli: 'al', + entli: 'ent', + eli: 'e', + ousli: 'ous', + ization: 'ize', + ation: 'ate', + ator: 'ate', + alism: 'al', + iveness: 'ive', + fulness: 'ful', + ousness: 'ous', + aliti: 'al', + iviti: 'ive', + biliti: 'ble', + logi: 'log' + }; + + var step3list = { + icate: 'ic', + ative: '', + alize: 'al', + iciti: 'ic', + ical: 'ic', + ful: '', + ness: '' + }; + + var c = "[^aeiou]"; // consonant + var v = "[aeiouy]"; // vowel + var C = c + "[^aeiouy]*"; // consonant sequence + var V = v + "[aeiou]*"; // vowel sequence + + var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 + var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 + var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 + var s_v = "^(" + C + ")?" + v; // vowel in stem + + this.stemWord = function (w) { + var stem; + var suffix; + var firstch; + var origword = w; + + if (w.length < 3) + return w; + + var re; + var re2; + var re3; + var re4; + + firstch = w.substr(0,1); + if (firstch == "y") + w = firstch.toUpperCase() + w.substr(1); + + // Step 1a + re = /^(.+?)(ss|i)es$/; + re2 = /^(.+?)([^s])s$/; + + if (re.test(w)) + w = w.replace(re,"$1$2"); + else if (re2.test(w)) + w = w.replace(re2,"$1$2"); + + // Step 1b + re = /^(.+?)eed$/; + re2 = /^(.+?)(ed|ing)$/; + if (re.test(w)) { + var fp = re.exec(w); + re = new RegExp(mgr0); + if (re.test(fp[1])) { + re = /.$/; + w = w.replace(re,""); + } + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1]; + re2 = new RegExp(s_v); + if (re2.test(stem)) { + w = stem; + re2 = /(at|bl|iz)$/; + re3 = new RegExp("([^aeiouylsz])\\1$"); + re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re2.test(w)) + w = w + "e"; + else if (re3.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + else if (re4.test(w)) + w = w + "e"; + } + } + + // Step 1c + re = /^(.+?)y$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(s_v); + if (re.test(stem)) + w = stem + "i"; + } + + // Step 2 + re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step2list[suffix]; + } + + // Step 3 + re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step3list[suffix]; + } + + // Step 4 + re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; + re2 = /^(.+?)(s|t)(ion)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + if (re.test(stem)) + w = stem; + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1] + fp[2]; + re2 = new RegExp(mgr1); + if (re2.test(stem)) + w = stem; + } + + // Step 5 + re = /^(.+?)e$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + re2 = new RegExp(meq1); + re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) + w = stem; + } + re = /ll$/; + re2 = new RegExp(mgr1); + if (re.test(w) && re2.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + + // and turn initial Y back to y + if (firstch == "y") + w = firstch.toLowerCase() + w.substr(1); + return w; + } +} + + +"use-strict"; + +let queryBeingDone = null; +let pattern = null; + +const escapedRegex = /[-\/\\^$*+?.()|[\]{}]/g; +function escapeRegex(e) { + return e.replace(escapedRegex, "\\$&"); +} + +// for some reason Sphinx shows some entries twice +// if something has been scored already I'd rather sort it to the bottom +const beenScored = new Set(); + +function __score(haystack, regex) { + let match = regex.exec(haystack); + if (match == null) { + return Number.MAX_VALUE; + } + let subLength = match[0].length; + let start = match.index; + return (subLength * 1000 + start) / 1000.0; +} + +// unused for now +function __cleanNamespaces(query) { + return query.replace(/(discord\.(ext\.)?)?(.+)/, "$3"); +} + +Scorer = { + // Implement the following function to further tweak the score for each result + // The function takes a result array [filename, title, anchor, descr, score] + // and returns the new score. + score: (result) => { + // only inflate the score of things that are actual API reference things + const [, title, , , score] = result; + + if (pattern !== null && title.startsWith("discord.")) { + let _score = __score(title, pattern); + if (_score === Number.MAX_VALUE) { + return score; + } + if (beenScored.has(title)) { + return 0; + } + beenScored.add(title); + let newScore = 100 + queryBeingDone.length - _score; + // console.log(`${title}: ${score} -> ${newScore} (${_score})`); + return newScore; + } + return score; + }, + + // query matches the full name of an object + objNameMatch: 15, + // or matches in the last dotted part of the object name + objPartialMatch: 11, + // Additive scores depending on the priority of the object + objPrio: { + 0: 15, // used to be importantResults + 1: 7, // used to be objectResults + 2: -5, // used to be unimportantResults + }, + // Used when the priority is not in the mapping. + objPrioDefault: 0, + + // query found in title + title: 15, + partialTitle: 7, + // query found in terms + term: 5, + partialTerm: 2, +}; + +document.addEventListener("DOMContentLoaded", () => { + const params = new URLSearchParams(window.location.search); + queryBeingDone = params.get("q"); + if (queryBeingDone) { + let pattern = Array.from(queryBeingDone).map(escapeRegex).join(".*?"); + pattern = new RegExp(pattern, "i"); + } +}); + diff --git a/docs/conf.py b/docs/conf.py index 372e67c2e6..fa0f2da368 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -326,7 +326,7 @@ def write_new(): # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] html_css_files = ["css/custom.css"] -html_js_files = ["js/custom.js", "js/scorer.js"] +html_js_files = ["js/custom.js", "js/scorer.js", "language_data.js"] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied