From 19de72c19660c0cbd5ad31bac5e5ba74f31a3ef7 Mon Sep 17 00:00:00 2001 From: Lars Yencken Date: Tue, 26 Mar 2024 11:32:05 +0000 Subject: [PATCH 1/4] :hammer: Add "make bench.search" to evaluate search performance It fetches a dataset of synthetic queries and evaluates the extent to which we surface good articles for the given queries. The scoring algorithm chosen for articles is `precision@4`, meaning the the proportion of the first four results that are relevant, averaged over a ton of queries. This is chosen since at most four articles are presented un-collapsed, and the value of getting those four right is much much higher than getting any right further down in the ranking. It does not yet score chart or explorer search. --- Makefile | 5 ++ settings/serverSettings.ts | 4 + site/search/evaluateSearch.ts | 149 ++++++++++++++++++++++++++++++++++ 3 files changed, 158 insertions(+) create mode 100644 site/search/evaluateSearch.ts diff --git a/Makefile b/Makefile index 36b0003383b..f9687a9bf07 100644 --- a/Makefile +++ b/Makefile @@ -40,6 +40,7 @@ help: @echo ' make refresh.full do a full MySQL update of both wordpress and grapher' @echo ' make sync-images sync all images from the remote master' @echo ' make reindex reindex (or initialise) search in Algolia' + @echo ' make bench.search run search benchmarks' @echo @echo ' OPS (staff-only)' @echo ' make deploy Deploy your local site to production' @@ -354,5 +355,9 @@ reindex: itsJustJavascript node --enable-source-maps itsJustJavascript/baker/algolia/indexChartsToAlgolia.js node --enable-source-maps itsJustJavascript/baker/algolia/indexExplorersToAlgolia.js +bench.search: itsJustJavascript + @echo '==> Running search benchmarks' + @node --enable-source-maps itsJustJavascript/site/search/evaluateSearch.js + clean: rm -rf node_modules itsJustJavascript diff --git a/settings/serverSettings.ts b/settings/serverSettings.ts index 9e24863df3f..33ae460f918 100644 --- a/settings/serverSettings.ts +++ b/settings/serverSettings.ts @@ -205,3 +205,7 @@ export const OPENAI_API_KEY: string = serverSettings.OPENAI_API_KEY ?? "" export const SLACK_BOT_OAUTH_TOKEN: string = serverSettings.SLACK_BOT_OAUTH_TOKEN ?? "" + +// search evaluation +export const SEARCH_EVAL_URL: string = + "https://pub-ec761fe0df554b02bc605610f3296000.r2.dev" diff --git a/site/search/evaluateSearch.ts b/site/search/evaluateSearch.ts new file mode 100644 index 00000000000..c234ed4784d --- /dev/null +++ b/site/search/evaluateSearch.ts @@ -0,0 +1,149 @@ +/** + * Simulate searches against our Algolia index and evaluate the results. + */ + +import { + ALGOLIA_ID, + ALGOLIA_SEARCH_KEY, +} from "../../settings/clientSettings.js" +import { SEARCH_EVAL_URL } from "../../settings/serverSettings.js" +import { getIndexName } from "./searchClient.js" +import algoliasearch from "algoliasearch" + +/* eslint-disable no-console */ + +// this many articles are displayed un-collapsed, only score this many results +const N_ARTICLES_DISPLAYED = 4 + +const CONCURRENT_QUERIES = 10 + +type QueryDataset = { + name: string + queries: Query[] +} + +type Query = { + query: string + slugs: string[] +} + +type ScoredQuery = { + query: string + expected: string[] + actual: string[] + precision: number +} + +type SearchResults = { + name: string + scope: "articles" | "charts" | "all" + meanPrecision: number + numQueries: number +} + +const QUERY_FILES = { + single: "synthetic-queries-single-2024-03-25.json", + multi: "synthetic-queries-2024-03-25.json", +} + +const main = async (): Promise => { + await evaluateAndPrint(QUERY_FILES.single) + await evaluateAndPrint(QUERY_FILES.multi) +} + +const evaluateAndPrint = async (name: string): Promise => { + const results = await evaluateArticleSearch(name) + console.log(JSON.stringify(results, null, 2)) +} + +const evaluateArticleSearch = async (name: string): Promise => { + const ds = await fetchQueryDataset(name) + const indexName = getIndexName("pages") + + // make a search client + const client = getClient() + const index = client.initIndex(indexName) + + // run the evaluation + const results = await simulateQueries(index, ds.queries) + const meanPrecision = + results.map((r) => r.precision).reduce((a, b) => a + b) / results.length + + // print the results to two decimal places + return { + name: ds.name, + scope: "articles", + meanPrecision: parseFloat(meanPrecision.toFixed(3)), + numQueries: ds.queries.length, + } +} + +const getClient = (): any => { + const client = algoliasearch(ALGOLIA_ID, ALGOLIA_SEARCH_KEY) + return client +} + +const fetchQueryDataset = async (name: string): Promise => { + const url: string = `${SEARCH_EVAL_URL}/${name}` + const resp = await fetch(url) + const jsonData = await resp.json() + return { name, queries: jsonData } +} + +const simulateQuery = async ( + index: any, + query: Query +): Promise => { + const { hits } = await index.search(query.query) + const actual = hits.map((h: any) => h.slug) + const precision = calculatePrecision(query.slugs, actual) + return { query: query.query, expected: query.slugs, actual, precision } +} + +const calculatePrecision = (expected: string[], actual: string[]): number => { + const actualTruncated = actual.slice(0, N_ARTICLES_DISPLAYED) + const n = actualTruncated.length + if (n === 0) { + return 0 + } + const correct = actualTruncated.filter((a) => expected.includes(a)).length + return correct / n +} + +const simulateQueries = async ( + index: any, + queries: Query[] +): Promise => { + // NOTE: should be a rate-limited version of: + // + // const scores = await Promise.all( + // queries.map((query) => simulateQuery(index, query)) + // ) + + let activeQueries = 0 + let i = 0 + const scores: ScoredQuery[] = [] + + const next = async () => { + if (i >= queries.length) return + const query = queries[i++] + activeQueries++ + const score = await simulateQuery(index, query) + scores.push(score) + activeQueries-- + if (i < queries.length) { + await next() + } + } + + const promises = [] + while (activeQueries < CONCURRENT_QUERIES && i < queries.length) { + promises.push(next()) + } + + await Promise.all(promises) + + return scores +} + +main() From e5e1e85b83438f3429577015b2cf32fa0a47ee13 Mon Sep 17 00:00:00 2001 From: Lars Yencken Date: Tue, 26 Mar 2024 11:41:21 +0000 Subject: [PATCH 2/4] :hammer: Only evaluate article search using the multi set --- site/search/evaluateSearch.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/site/search/evaluateSearch.ts b/site/search/evaluateSearch.ts index c234ed4784d..7d52e24f85f 100644 --- a/site/search/evaluateSearch.ts +++ b/site/search/evaluateSearch.ts @@ -47,7 +47,7 @@ const QUERY_FILES = { } const main = async (): Promise => { - await evaluateAndPrint(QUERY_FILES.single) + // only do the multi, since it contains the single-word set as well await evaluateAndPrint(QUERY_FILES.multi) } From cb50a2b646e25039ea65787a0eaa4d8de452e6e5 Mon Sep 17 00:00:00 2001 From: Lars Yencken Date: Tue, 26 Mar 2024 11:43:33 +0000 Subject: [PATCH 3/4] :hammer: Tag search evaluations with their algolia app and index --- site/search/evaluateSearch.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/site/search/evaluateSearch.ts b/site/search/evaluateSearch.ts index 7d52e24f85f..11d9aec45a4 100644 --- a/site/search/evaluateSearch.ts +++ b/site/search/evaluateSearch.ts @@ -39,6 +39,8 @@ type SearchResults = { scope: "articles" | "charts" | "all" meanPrecision: number numQueries: number + algoliaApp: string + algoliaIndex: string } const QUERY_FILES = { @@ -75,6 +77,8 @@ const evaluateArticleSearch = async (name: string): Promise => { scope: "articles", meanPrecision: parseFloat(meanPrecision.toFixed(3)), numQueries: ds.queries.length, + algoliaApp: ALGOLIA_ID, + algoliaIndex: indexName, } } From c637c9256c4f0477a68a59b14fa430e72229bc49 Mon Sep 17 00:00:00 2001 From: Lars Yencken Date: Tue, 26 Mar 2024 13:44:31 +0000 Subject: [PATCH 4/4] :hammer: Calculate precision@2 when benchmarking search The `precision@2` score reflects that we return two articles in the instant search results, so we want to know if we make that better or worse. --- site/search/evaluateSearch.ts | 48 ++++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/site/search/evaluateSearch.ts b/site/search/evaluateSearch.ts index 11d9aec45a4..b4837874b07 100644 --- a/site/search/evaluateSearch.ts +++ b/site/search/evaluateSearch.ts @@ -13,7 +13,8 @@ import algoliasearch from "algoliasearch" /* eslint-disable no-console */ // this many articles are displayed un-collapsed, only score this many results -const N_ARTICLES_DISPLAYED = 4 +const N_ARTICLES_QUICK_RESULTS = 2 +const N_ARTICLES_LONG_RESULTS = 4 const CONCURRENT_QUERIES = 10 @@ -22,6 +23,8 @@ type QueryDataset = { queries: Query[] } +type Scores = { [key: string]: number } + type Query = { query: string slugs: string[] @@ -31,13 +34,13 @@ type ScoredQuery = { query: string expected: string[] actual: string[] - precision: number + scores: Scores } type SearchResults = { name: string scope: "articles" | "charts" | "all" - meanPrecision: number + scores: Scores numQueries: number algoliaApp: string algoliaIndex: string @@ -68,14 +71,19 @@ const evaluateArticleSearch = async (name: string): Promise => { // run the evaluation const results = await simulateQueries(index, ds.queries) - const meanPrecision = - results.map((r) => r.precision).reduce((a, b) => a + b) / results.length + const scores: Scores = {} + for (const scoreName of Object.keys(results[0].scores)) { + const mean = + results.map((r) => r.scores[scoreName]).reduce((a, b) => a + b) / + results.length + scores[scoreName] = parseFloat(mean.toFixed(3)) + } // print the results to two decimal places return { name: ds.name, scope: "articles", - meanPrecision: parseFloat(meanPrecision.toFixed(3)), + scores: scores, numQueries: ds.queries.length, algoliaApp: ALGOLIA_ID, algoliaIndex: indexName, @@ -100,18 +108,28 @@ const simulateQuery = async ( ): Promise => { const { hits } = await index.search(query.query) const actual = hits.map((h: any) => h.slug) - const precision = calculatePrecision(query.slugs, actual) - return { query: query.query, expected: query.slugs, actual, precision } + const scores = scoreResults(query.slugs, actual) + return { query: query.query, expected: query.slugs, actual, scores } } -const calculatePrecision = (expected: string[], actual: string[]): number => { - const actualTruncated = actual.slice(0, N_ARTICLES_DISPLAYED) - const n = actualTruncated.length - if (n === 0) { - return 0 +const scoreResults = (relevant: string[], actual: string[]): Scores => { + const scores: Scores = {} + + for (const k of [N_ARTICLES_QUICK_RESULTS, N_ARTICLES_LONG_RESULTS]) { + const key = `precision@${k}` + const actualTruncated = actual.slice(0, k) + const n = actualTruncated.length + if (n === 0) { + scores[key] = 0 + continue + } + + const correct = actualTruncated.filter((a) => + relevant.includes(a) + ).length + scores[key] = correct / n } - const correct = actualTruncated.filter((a) => expected.includes(a)).length - return correct / n + return scores } const simulateQueries = async (