Skip to content

Commit

Permalink
Add a helper script to run Pagefind
Browse files Browse the repository at this point in the history
Nominally, `pagefind.js` can only be used on a website.

But with a few hacks, as described in
CloudCannon/pagefind#175 (reply in thread),
it can also be run via a node.js script on the command-line.

This script will be invaluable in the CI builds, to guarantee that
certain search results appear on top (such as `docs/git-log` for the
search term "log").

Signed-off-by: Johannes Schindelin <[email protected]>
  • Loading branch information
dscho committed Sep 11, 2024
1 parent 5773968 commit 7646102
Showing 1 changed file with 158 additions and 0 deletions.
158 changes: 158 additions & 0 deletions script/run-pagefind.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
#!/usr/bin/env node

(async () => {
const options = {
basePath: {
option: '--base-path',
value: 'public/pagefind/',
},
language: {
option: '--language',
value: 'en',
},
limit: {
option: '--limit',
type: 'int',
value: 10
},
termFrequency: {
option: '--term-frequency',
type: 'float',
value: 0.1 // do not favor short pages
},
termSimilarity: {
option: '--term-similarity',
type: 'float',
value: 9
},
termSaturation: {
option: '--term-saturation',
type: 'float',
value: 2
},
pageLength: {
option: '--page-length',
type: 'float',
value: 0.1
},
json: {
option: '--json',
type: 'boolean',
value: false
}
}
let terms = []

// Parse command-line options
const handleValue = (option, value) => {
if (option.type === 'int') option.value = Number.parseInt(value)
else if (option.type === 'float') option.value = Number.parseFloat(value)
else option.value = value
}

const args = process.argv.slice(2)
optionLoop:
for (let i = 2; i < process.argv.length; i++) {
for (const key of Object.keys(options)) {
const option = options[key]
if (option.type === 'boolean') {
if (process.argv[i] === option.option) option.value = true
else if (process.argv[i] === `--no-${option.option}`) option.value = false
else continue
} else if (process.argv[i] === option.option) {
if (++i >= process.argv.length) throw new Error(`\`${option.option}\` requires an argument`)
handleValue(option, process.argv[i])
} else if (process.argv[i].startsWith(`${option.option}=`)) {
const value = process.argv[i].slice(option.option.length + 1)
handleValue(option, value)
} else continue
continue optionLoop
}
if (process.argv[i].startsWith('-')) throw new Error(`Unknown option: ${process.argv[i]}`)
terms.push(...process.argv.slice(i))
break
}

if (terms.length === 0) throw new Error('No search terms provided')

// Make `basePath` an absolute path
const path = await import('path')
if (!path.isAbsolute(options.basePath.value)) options.basePath.value = `${path.resolve(process.cwd(), options.basePath.value)}/`

const pagefindPath = path.join(options.basePath.value, 'pagefind.js')
const url = await import('url')
const pagefindUrl = url.pathToFileURL(pagefindPath).href

// Cannot use `await import('public/pagefind/pagefind.js')` because
// `pagefind.js` does not have the `.mjs` file extension yet is an ESM
// module. This would elicit the warning and error:
//
// Warning: To load an ES module, set "type": "module" in the package.json or use the .mjs extension.
// [...]
// SyntaxError: Cannot use 'import.meta' outside a module
//
// Instead, we load the file contents and dynamically import them via a data URL.
const fs = await import('fs')
const contents = await fs.promises.readFile(pagefindPath)
const moduleUrl = `data:application/javascript;base64,${contents.toString('base64')}`
// Dynamically import the module from the blob URL
const pagefind = await import(moduleUrl)

// Emulate the globals that the `pagefind.js` script expects to find in the
// browser environment.
Object.assign(globalThis, {
window: {
location: {
origin: ''
}
},
document: {
querySelector: () => {
return {
getAttribute: () => {
return 'en'
}
}
}
},
location: {
href: pagefindUrl
},
fetch: async url => {
const match = url.match(/(.*)\?.*$/)
const contents = await fs.promises.readFile(match ? match[1] : url)
return {
...contents,
arrayBuffer: () => contents,
json: () => JSON.parse(contents),
}
}
})

const pagefindOptions = {
basePath: options.basePath.value,
ranking: ['termFrequency', 'termSaturation', 'termSimilarity', 'pageLength'].reduce((v, c) => {
if (options[c].value !== undefined) v[c] = options[c].value
return v
}, {})
}

pagefind.options(pagefindOptions)
await pagefind.init(options.language.value)

for (const term of terms) {
let results = await pagefind.debouncedSearch(term)
results = options.limit.value < 1 ? results.results : results.results.slice(0, options.limit.value)
await Promise.all(results.map(async (item, index) => {
item.index = index
item.data = await item.data()
}))
console.log(`Results for ${term}:\n${options.json.value
? JSON.stringify(results, null, 2)
: results.map(item => `${item.index + 1}. ${item.data.raw_url} (${item.score})`).join('\n')
}`)
}
})().catch(e => {
process.stderr.write(`${e}\n`)
process.exit(1)
})

0 comments on commit 7646102

Please sign in to comment.