From e782a7ff4780ea9a78492201e1ff6384c6c0c0c8 Mon Sep 17 00:00:00 2001 From: Marcel Gerber Date: Wed, 10 Apr 2024 14:19:12 +0200 Subject: [PATCH] refactor(algolia): remove old explorer indexing code --- Makefile | 1 - baker/algolia/configureAlgolia.ts | 21 -- baker/algolia/indexExplorerViewsToAlgolia.ts | 10 +- baker/algolia/indexExplorersToAlgolia.ts | 209 ------------------- site/search/searchTypes.ts | 11 - 5 files changed, 9 insertions(+), 243 deletions(-) delete mode 100644 baker/algolia/indexExplorersToAlgolia.ts diff --git a/Makefile b/Makefile index fc3bc5644e0..3eac17b6a23 100644 --- a/Makefile +++ b/Makefile @@ -357,7 +357,6 @@ reindex: itsJustJavascript node --enable-source-maps itsJustJavascript/baker/algolia/configureAlgolia.js node --enable-source-maps itsJustJavascript/baker/algolia/indexToAlgolia.js node --enable-source-maps itsJustJavascript/baker/algolia/indexChartsToAlgolia.js - node --enable-source-maps itsJustJavascript/baker/algolia/indexExplorersToAlgolia.js node --enable-source-maps itsJustJavascript/baker/algolia/indexExplorerViewsToAlgolia.js clean: diff --git a/baker/algolia/configureAlgolia.ts b/baker/algolia/configureAlgolia.ts index c83ae4d15b0..9fa27d01f94 100644 --- a/baker/algolia/configureAlgolia.ts +++ b/baker/algolia/configureAlgolia.ts @@ -130,24 +130,6 @@ export const configureAlgolia = async () => { disablePrefixOnAttributes: ["content"], }) - const explorersIndex = client.initIndex( - getIndexName(SearchIndexName.Explorers) - ) - - await explorersIndex.setSettings({ - ...baseSettings, - searchableAttributes: [ - "unordered(slug)", - "unordered(title)", - "unordered(subtitle)", - "unordered(text)", - ], - customRanking: ["desc(views_7d)"], - attributeForDistinct: "slug", - attributesForFaceting: [], - disableTypoToleranceOnAttributes: ["text"], - }) - const explorerViewsIndex = client.initIndex( getIndexName(SearchIndexName.ExplorerViews) ) @@ -334,9 +316,6 @@ export const configureAlgolia = async () => { await chartsIndex.saveSynonyms(algoliaSynonyms, { replaceExistingSynonyms: true, }) - await explorersIndex.saveSynonyms(algoliaSynonyms, { - replaceExistingSynonyms: true, - }) await explorerViewsIndex.saveSynonyms(algoliaSynonyms, { replaceExistingSynonyms: true, }) diff --git a/baker/algolia/indexExplorerViewsToAlgolia.ts b/baker/algolia/indexExplorerViewsToAlgolia.ts index b5288e3d27d..6039a6e1b21 100644 --- a/baker/algolia/indexExplorerViewsToAlgolia.ts +++ b/baker/algolia/indexExplorerViewsToAlgolia.ts @@ -1,5 +1,4 @@ import * as db from "../../db/db.js" -import { ExplorerBlockGraphers } from "./indexExplorersToAlgolia.js" import { DecisionMatrix } from "../../explorer/ExplorerDecisionMatrix.js" import { tsvFormat } from "d3-dsv" import { @@ -15,6 +14,15 @@ import { SearchIndexName } from "../../site/search/searchTypes.js" import { groupBy, keyBy, orderBy } from "lodash" import { MarkdownTextWrap } from "@ourworldindata/components" +export type ExplorerBlockGraphers = { + type: "graphers" + block: { + title?: string + subtitle?: string + grapherId?: number + }[] +} + interface ExplorerViewEntry { viewTitle: string viewSubtitle: string diff --git a/baker/algolia/indexExplorersToAlgolia.ts b/baker/algolia/indexExplorersToAlgolia.ts deleted file mode 100644 index 9085cf1dab9..00000000000 --- a/baker/algolia/indexExplorersToAlgolia.ts +++ /dev/null @@ -1,209 +0,0 @@ -import cheerio from "cheerio" -import { isArray } from "lodash" -import { match } from "ts-pattern" -import { - GrapherInterface, - DbRawChart, - checkIsPlainObjectWithGuard, - identity, - keyBy, - parseChartConfig, -} from "@ourworldindata/utils" -import { getAlgoliaClient } from "./configureAlgolia.js" -import * as db from "../../db/db.js" -import { ALGOLIA_INDEXING } from "../../settings/serverSettings.js" -import { getAnalyticsPageviewsByUrlObj } from "../../db/model/Pageview.js" -import { chunkParagraphs } from "../chunk.js" -import { SearchIndexName } from "../../site/search/searchTypes.js" -import { getIndexName } from "../../site/search/searchClient.js" - -type ExplorerBlockColumns = { - type: "columns" - block: { name: string; additionalInfo?: string }[] -} - -export type ExplorerBlockGraphers = { - type: "graphers" - block: { - title?: string - subtitle?: string - grapherId?: number - }[] -} - -type ExplorerEntry = { - slug: string - title: string - subtitle: string - views_7d: number - blocks: string // (ExplorerBlockLineChart | ExplorerBlockColumns | ExplorerBlockGraphers)[] -} - -type ExplorerRecord = { - slug: string - title: string - subtitle: string - views_7d: number - text: string -} - -function extractTextFromExplorer( - blocksString: string, - graphersUsedInExplorers: Record -): string { - const blockText = new Set() - const blocks = JSON.parse(blocksString) - - if (isArray(blocks)) { - for (const block of blocks) { - if (checkIsPlainObjectWithGuard(block) && "type" in block) { - match(block) - .with( - { type: "columns" }, - (columns: ExplorerBlockColumns) => { - columns.block.forEach( - ({ name = "", additionalInfo = "" }) => { - blockText.add(name) - blockText.add(additionalInfo) - } - ) - } - ) - .with( - { type: "graphers" }, - (graphers: ExplorerBlockGraphers) => { - graphers.block.forEach( - ({ - title = "", - subtitle = "", - grapherId = undefined, - }) => { - blockText.add(title) - blockText.add(subtitle) - - if (grapherId !== undefined) { - const chartConfig = - graphersUsedInExplorers[grapherId] - - if (chartConfig) { - blockText.add( - chartConfig.title ?? "" - ) - blockText.add( - chartConfig.subtitle ?? "" - ) - } - } - } - ) - } - ) - .otherwise(() => { - // type: "tables" - // do nothing - }) - } - } - } - - return [...blockText].filter(identity).join(" ") -} - -function getNullishJSONValueAsPlaintext(value: string): string { - return value !== "null" ? cheerio.load(value)("body").text() : "" -} - -const getExplorerRecords = async ( - knex: db.KnexReadonlyTransaction -): Promise => { - const pageviews = await getAnalyticsPageviewsByUrlObj(knex) - - // Fetch info about all charts used in explorers, as linked by the explorer_charts table - const graphersUsedInExplorers = await db - .knexRaw>( - knex, - `-- sql - SELECT config FROM charts - INNER JOIN ( - SELECT DISTINCT chartId AS id FROM explorer_charts - ) AS ec - USING (id) - ` - ) - .then((charts) => charts.map((c) => parseChartConfig(c.config))) - .then((charts) => keyBy(charts, "id")) - - const explorerRecords = await db - .knexRaw>( - knex, - `-- sql - SELECT slug, - COALESCE(config->>"$.explorerSubtitle", "null") AS subtitle, - COALESCE(config->>"$.explorerTitle", "null") AS title, - COALESCE(config->>"$.blocks", "null") AS blocks - FROM explorers - WHERE isPublished = true - ` - ) - .then((results) => - results.flatMap(({ slug, title, subtitle, blocks }) => { - const textFromExplorer = extractTextFromExplorer( - blocks, - graphersUsedInExplorers - ) - const uniqueTextTokens = new Set([ - ...textFromExplorer.split(" "), - ]) - const textChunks = chunkParagraphs( - [...uniqueTextTokens].join(" "), - 1000 - ) - - // In case we don't have any text for this explorer, we still want to index it - const textChunksForIteration = textChunks.length - ? textChunks - : [""] - - const formattedTitle = `${getNullishJSONValueAsPlaintext( - title - )} Data Explorer` - - return textChunksForIteration.map((chunk, i) => ({ - slug, - title: formattedTitle, - subtitle: getNullishJSONValueAsPlaintext(subtitle), - views_7d: pageviews[`/explorers/${slug}`]?.views_7d ?? 0, - text: chunk, - objectID: `${slug}-${i}`, - })) - }) - ) - - return explorerRecords -} - -const indexExplorersToAlgolia = async () => { - if (!ALGOLIA_INDEXING) return - - const client = getAlgoliaClient() - if (!client) { - console.error( - `Failed indexing explorers (Algolia client not initialized)` - ) - return - } - - try { - const index = client.initIndex(getIndexName(SearchIndexName.Explorers)) - - const records = await db.knexReadonlyTransaction( - getExplorerRecords, - db.TransactionCloseMode.Close - ) - await index.replaceAllObjects(records) - } catch (e) { - console.log("Error indexing explorers to Algolia: ", e) - } -} - -void indexExplorersToAlgolia() diff --git a/site/search/searchTypes.ts b/site/search/searchTypes.ts index a7dec624360..0d4a71ad430 100644 --- a/site/search/searchTypes.ts +++ b/site/search/searchTypes.ts @@ -52,15 +52,6 @@ export type IExplorerViewHit = Hit & { viewTitleIndexWithinExplorer: number } -export type IExplorerHit = Hit & { - objectID: string - slug: string - subtitle: string - text: string - title: string - views_7d: number -} - export interface ChartRecord { objectID: string chartId: number @@ -83,7 +74,6 @@ export interface ChartRecord { export type IChartHit = Hit & ChartRecord export enum SearchIndexName { - Explorers = "explorers", ExplorerViews = "explorer-views", Charts = "charts", Pages = "pages", @@ -101,6 +91,5 @@ export const searchCategoryFilters: [string, SearchCategoryFilter][] = [ export const indexNameToSubdirectoryMap: Record = { [SearchIndexName.Pages]: "", [SearchIndexName.Charts]: "/grapher", - [SearchIndexName.Explorers]: "/explorers", [SearchIndexName.ExplorerViews]: "/explorers", }