From 19de72c19660c0cbd5ad31bac5e5ba74f31a3ef7 Mon Sep 17 00:00:00 2001
From: Lars Yencken <lars@yencken.org>
Date: Tue, 26 Mar 2024 11:32:05 +0000
Subject: [PATCH 1/4] :hammer: Add "make bench.search" to evaluate search
 performance

It fetches a dataset of synthetic queries and evaluates the extent to
which we surface good articles for the given queries.

The scoring algorithm chosen for articles is `precision@4`, meaning the
the proportion of the first four results that are relevant, averaged
over a ton of queries.

This is chosen since at most four articles are
presented un-collapsed, and the value of getting those four right is
much much higher than getting any right further down in the ranking.

It does not yet score chart or explorer search.
---
 Makefile                      |   5 ++
 settings/serverSettings.ts    |   4 +
 site/search/evaluateSearch.ts | 149 ++++++++++++++++++++++++++++++++++
 3 files changed, 158 insertions(+)
 create mode 100644 site/search/evaluateSearch.ts
diff --git a/Makefile b/Makefile
index 36b0003383b..f9687a9bf07 100644
--- a/Makefile
+++ b/Makefile
@@ -40,6 +40,7 @@ help:
 	@echo '  make refresh.full      do a full MySQL update of both wordpress and grapher'
 	@echo '  make sync-images       sync all images from the remote master'
 	@echo '  make reindex			reindex (or initialise) search in Algolia'
+	@echo '  make bench.search      run search benchmarks'
 	@echo
 	@echo '  OPS (staff-only)'
 	@echo '  make deploy            Deploy your local site to production'
@@ -354,5 +355,9 @@ reindex: itsJustJavascript
 	node --enable-source-maps itsJustJavascript/baker/algolia/indexChartsToAlgolia.js
 	node --enable-source-maps itsJustJavascript/baker/algolia/indexExplorersToAlgolia.js
 
+bench.search: itsJustJavascript
+	@echo '==> Running search benchmarks'
+	@node --enable-source-maps itsJustJavascript/site/search/evaluateSearch.js
+
 clean:
 	rm -rf node_modules itsJustJavascript
diff --git a/settings/serverSettings.ts b/settings/serverSettings.ts
index 9e24863df3f..33ae460f918 100644
--- a/settings/serverSettings.ts
+++ b/settings/serverSettings.ts
@@ -205,3 +205,7 @@ export const OPENAI_API_KEY: string = serverSettings.OPENAI_API_KEY ?? ""
 
 export const SLACK_BOT_OAUTH_TOKEN: string =
     serverSettings.SLACK_BOT_OAUTH_TOKEN ?? ""
+
+// search evaluation
+export const SEARCH_EVAL_URL: string =
+    "https://pub-ec761fe0df554b02bc605610f3296000.r2.dev"
diff --git a/site/search/evaluateSearch.ts b/site/search/evaluateSearch.ts
new file mode 100644
index 00000000000..c234ed4784d
--- /dev/null
+++ b/site/search/evaluateSearch.ts
@@ -0,0 +1,149 @@
+/**
+ * Simulate searches against our Algolia index and evaluate the results.
+ */
+
+import {
+    ALGOLIA_ID,
+    ALGOLIA_SEARCH_KEY,
+} from "../../settings/clientSettings.js"
+import { SEARCH_EVAL_URL } from "../../settings/serverSettings.js"
+import { getIndexName } from "./searchClient.js"
+import algoliasearch from "algoliasearch"
+
+/* eslint-disable no-console */
+
+// this many articles are displayed un-collapsed, only score this many results
+const N_ARTICLES_DISPLAYED = 4
+
+const CONCURRENT_QUERIES = 10
+
+type QueryDataset = {
+    name: string
+    queries: Query[]
+}
+
+type Query = {
+    query: string
+    slugs: string[]
+}
+
+type ScoredQuery = {
+    query: string
+    expected: string[]
+    actual: string[]
+    precision: number
+}
+
+type SearchResults = {
+    name: string
+    scope: "articles" | "charts" | "all"
+    meanPrecision: number
+    numQueries: number
+}
+
+const QUERY_FILES = {
+    single: "synthetic-queries-single-2024-03-25.json",
+    multi: "synthetic-queries-2024-03-25.json",
+}
+
+const main = async (): Promise<void> => {
+    await evaluateAndPrint(QUERY_FILES.single)
+    await evaluateAndPrint(QUERY_FILES.multi)
+}
+
+const evaluateAndPrint = async (name: string): Promise<void> => {
+    const results = await evaluateArticleSearch(name)
+    console.log(JSON.stringify(results, null, 2))
+}
+
+const evaluateArticleSearch = async (name: string): Promise<SearchResults> => {
+    const ds = await fetchQueryDataset(name)
+    const indexName = getIndexName("pages")
+
+    // make a search client
+    const client = getClient()
+    const index = client.initIndex(indexName)
+
+    // run the evaluation
+    const results = await simulateQueries(index, ds.queries)
+    const meanPrecision =
+        results.map((r) => r.precision).reduce((a, b) => a + b) / results.length
+
+    // print the results to two decimal places
+    return {
+        name: ds.name,
+        scope: "articles",
+        meanPrecision: parseFloat(meanPrecision.toFixed(3)),
+        numQueries: ds.queries.length,
+    }
+}
+
+const getClient = (): any => {
+    const client = algoliasearch(ALGOLIA_ID, ALGOLIA_SEARCH_KEY)
+    return client
+}
+
+const fetchQueryDataset = async (name: string): Promise<QueryDataset> => {
+    const url: string = `${SEARCH_EVAL_URL}/${name}`
+    const resp = await fetch(url)
+    const jsonData = await resp.json()
+    return { name, queries: jsonData }
+}
+
+const simulateQuery = async (
+    index: any,
+    query: Query
+): Promise<ScoredQuery> => {
+    const { hits } = await index.search(query.query)
+    const actual = hits.map((h: any) => h.slug)
+    const precision = calculatePrecision(query.slugs, actual)
+    return { query: query.query, expected: query.slugs, actual, precision }
+}
+
+const calculatePrecision = (expected: string[], actual: string[]): number => {
+    const actualTruncated = actual.slice(0, N_ARTICLES_DISPLAYED)
+    const n = actualTruncated.length
+    if (n === 0) {
+        return 0
+    }
+    const correct = actualTruncated.filter((a) => expected.includes(a)).length
+    return correct / n
+}
+
+const simulateQueries = async (
+    index: any,
+    queries: Query[]
+): Promise<ScoredQuery[]> => {
+    // NOTE: should be a rate-limited version of:
+    //
+    // const scores = await Promise.all(
+    //     queries.map((query) => simulateQuery(index, query))
+    // )
+
+    let activeQueries = 0
+    let i = 0
+    const scores: ScoredQuery[] = []
+
+    const next = async () => {
+        if (i >= queries.length) return
+        const query = queries[i++]
+        activeQueries++
+        const score = await simulateQuery(index, query)
+        scores.push(score)
+        activeQueries--
+        if (i < queries.length) {
+            await next()
+        }
+    }
+
+    const promises = []
+    while (activeQueries < CONCURRENT_QUERIES && i < queries.length) {
+        promises.push(next())
+    }
+
+    await Promise.all(promises)
+
+    return scores
+}
+
+main()

From e5e1e85b83438f3429577015b2cf32fa0a47ee13 Mon Sep 17 00:00:00 2001
From: Lars Yencken <lars@yencken.org>
Date: Tue, 26 Mar 2024 11:41:21 +0000
Subject: [PATCH 2/4] :hammer: Only evaluate article search using the multi set

---
 site/search/evaluateSearch.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/site/search/evaluateSearch.ts b/site/search/evaluateSearch.ts
index c234ed4784d..7d52e24f85f 100644
--- a/site/search/evaluateSearch.ts
+++ b/site/search/evaluateSearch.ts
@@ -47,7 +47,7 @@ const QUERY_FILES = {
 }
 
 const main = async (): Promise<void> => {
-    await evaluateAndPrint(QUERY_FILES.single)
+    // only do the multi, since it contains the single-word set as well
     await evaluateAndPrint(QUERY_FILES.multi)
 }
 

From cb50a2b646e25039ea65787a0eaa4d8de452e6e5 Mon Sep 17 00:00:00 2001
From: Lars Yencken <lars@yencken.org>
Date: Tue, 26 Mar 2024 11:43:33 +0000
Subject: [PATCH 3/4] :hammer: Tag search evaluations with their algolia app
 and index

---
 site/search/evaluateSearch.ts | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/site/search/evaluateSearch.ts b/site/search/evaluateSearch.ts
index 7d52e24f85f..11d9aec45a4 100644
--- a/site/search/evaluateSearch.ts
+++ b/site/search/evaluateSearch.ts
@@ -39,6 +39,8 @@ type SearchResults = {
     scope: "articles" | "charts" | "all"
     meanPrecision: number
     numQueries: number
+    algoliaApp: string
+    algoliaIndex: string
 }
 
 const QUERY_FILES = {
@@ -75,6 +77,8 @@ const evaluateArticleSearch = async (name: string): Promise<SearchResults> => {
         scope: "articles",
         meanPrecision: parseFloat(meanPrecision.toFixed(3)),
         numQueries: ds.queries.length,
+        algoliaApp: ALGOLIA_ID,
+        algoliaIndex: indexName,
     }
 }
 

From c637c9256c4f0477a68a59b14fa430e72229bc49 Mon Sep 17 00:00:00 2001
From: Lars Yencken <lars@yencken.org>
Date: Tue, 26 Mar 2024 13:44:31 +0000
Subject: [PATCH 4/4] :hammer: Calculate precision@2 when benchmarking search

The `precision@2` score reflects that we return two articles in the
instant search results, so we want to know if we make that better or
worse.
---
 site/search/evaluateSearch.ts | 48 ++++++++++++++++++++++++-----------
 1 file changed, 33 insertions(+), 15 deletions(-)

diff --git a/site/search/evaluateSearch.ts b/site/search/evaluateSearch.ts
index 11d9aec45a4..b4837874b07 100644
--- a/site/search/evaluateSearch.ts
+++ b/site/search/evaluateSearch.ts
@@ -13,7 +13,8 @@ import algoliasearch from "algoliasearch"
 /* eslint-disable no-console */
 
 // this many articles are displayed un-collapsed, only score this many results
-const N_ARTICLES_DISPLAYED = 4
+const N_ARTICLES_QUICK_RESULTS = 2
+const N_ARTICLES_LONG_RESULTS = 4
 
 const CONCURRENT_QUERIES = 10
 
@@ -22,6 +23,8 @@ type QueryDataset = {
     queries: Query[]
 }
 
+type Scores = { [key: string]: number }
+
 type Query = {
     query: string
     slugs: string[]
@@ -31,13 +34,13 @@ type ScoredQuery = {
     query: string
     expected: string[]
     actual: string[]
-    precision: number
+    scores: Scores
 }
 
 type SearchResults = {
     name: string
     scope: "articles" | "charts" | "all"
-    meanPrecision: number
+    scores: Scores
     numQueries: number
     algoliaApp: string
     algoliaIndex: string
@@ -68,14 +71,19 @@ const evaluateArticleSearch = async (name: string): Promise<SearchResults> => {
 
     // run the evaluation
     const results = await simulateQueries(index, ds.queries)
-    const meanPrecision =
-        results.map((r) => r.precision).reduce((a, b) => a + b) / results.length
+    const scores: Scores = {}
+    for (const scoreName of Object.keys(results[0].scores)) {
+        const mean =
+            results.map((r) => r.scores[scoreName]).reduce((a, b) => a + b) /
+            results.length
+        scores[scoreName] = parseFloat(mean.toFixed(3))
+    }
 
     // print the results to two decimal places
     return {
         name: ds.name,
         scope: "articles",
-        meanPrecision: parseFloat(meanPrecision.toFixed(3)),
+        scores: scores,
         numQueries: ds.queries.length,
         algoliaApp: ALGOLIA_ID,
         algoliaIndex: indexName,
@@ -100,18 +108,28 @@ const simulateQuery = async (
 ): Promise<ScoredQuery> => {
     const { hits } = await index.search(query.query)
     const actual = hits.map((h: any) => h.slug)
-    const precision = calculatePrecision(query.slugs, actual)
-    return { query: query.query, expected: query.slugs, actual, precision }
+    const scores = scoreResults(query.slugs, actual)
+    return { query: query.query, expected: query.slugs, actual, scores }
 }
 
-const calculatePrecision = (expected: string[], actual: string[]): number => {
-    const actualTruncated = actual.slice(0, N_ARTICLES_DISPLAYED)
-    const n = actualTruncated.length
-    if (n === 0) {
-        return 0
+const scoreResults = (relevant: string[], actual: string[]): Scores => {
+    const scores: Scores = {}
+
+    for (const k of [N_ARTICLES_QUICK_RESULTS, N_ARTICLES_LONG_RESULTS]) {
+        const key = `precision@${k}`
+        const actualTruncated = actual.slice(0, k)
+        const n = actualTruncated.length
+        if (n === 0) {
+            scores[key] = 0
+            continue
+        }
+
+        const correct = actualTruncated.filter((a) =>
+            relevant.includes(a)
+        ).length
+        scores[key] = correct / n
     }
-    const correct = actualTruncated.filter((a) => expected.includes(a)).length
-    return correct / n
+    return scores
 }
 
 const simulateQueries = async (