From e6b1669ef266566d823cbdd2618afbfff754956a Mon Sep 17 00:00:00 2001 From: Marcel Gerber Date: Tue, 19 Mar 2024 17:59:26 +0100 Subject: [PATCH 1/7] chore(search): make country name variants one-way synonyms --- baker/algolia/configureAlgolia.ts | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/baker/algolia/configureAlgolia.ts b/baker/algolia/configureAlgolia.ts index 25e9abca2b1..b7b1603185d 100644 --- a/baker/algolia/configureAlgolia.ts +++ b/baker/algolia/configureAlgolia.ts @@ -9,7 +9,7 @@ import { ALGOLIA_INDEXING, ALGOLIA_SECRET_KEY, } from "../../settings/serverSettings.js" -import { countries } from "@ourworldindata/utils" +import { countries, excludeUndefined } from "@ourworldindata/utils" import { SearchIndexName } from "../../site/search/searchTypes.js" import { getIndexName } from "../../site/search/searchClient.js" @@ -308,12 +308,6 @@ export const configureAlgolia = async () => { ["solar", "photovoltaic", "photovoltaics", "pv"], ] - // Send all our country variant names to algolia as synonyms - for (const country of countries) { - if (country.variantNames) - synonyms.push([country.name].concat(country.variantNames)) - } - const algoliaSynonyms = synonyms.map((s) => { return { objectID: s.join("-"), @@ -322,6 +316,21 @@ export const configureAlgolia = async () => { } as Synonym }) + // Send all our country variant names to algolia as one-way synonyms + for (const country of countries) { + const alternatives = excludeUndefined([ + country.shortName, + ...(country.variantNames ?? []), + ]) + for (const alternative of alternatives) + algoliaSynonyms.push({ + objectID: `${alternative}->${country.name}`, + type: "oneWaySynonym", + input: alternative, + synonyms: [country.name], + }) + } + await pagesIndex.saveSynonyms(algoliaSynonyms, { replaceExistingSynonyms: true, }) From 36d36b1c324a369fd65c5bc16905ccb979d6b3be Mon Sep 17 00:00:00 2001 From: Marcel Gerber Date: Thu, 21 Mar 2024 14:10:09 +0100 Subject: [PATCH 2/7] feat(search): match geographic entities within search --- site/search/Search.scss | 19 +++++++++++++ site/search/SearchPanel.tsx | 57 ++++++++++++++++++++++++++++++++++--- site/search/SearchUtils.tsx | 38 +++++++++++++++++++++++++ 3 files changed, 110 insertions(+), 4 deletions(-) diff --git a/site/search/Search.scss b/site/search/Search.scss index 1c2f1454fb9..149d4760cb7 100644 --- a/site/search/Search.scss +++ b/site/search/Search.scss @@ -320,6 +320,25 @@ } } +.search-results__chart-hit-entities { + display: flex; + flex-wrap: wrap; + gap: 4px; + list-style: none; + font-size: 0.7em; + + li { + background-color: $blue-10; + padding: 4px 8px; + border-radius: 12px; + color: $blue-90; + + svg { + margin-right: 4px; + } + } +} + /* * Tabs / Filtering **/ diff --git a/site/search/SearchPanel.tsx b/site/search/SearchPanel.tsx index e768da341d7..fbe49d30375 100644 --- a/site/search/SearchPanel.tsx +++ b/site/search/SearchPanel.tsx @@ -1,5 +1,5 @@ import ReactDOM from "react-dom" -import React, { useCallback, useEffect, useState } from "react" +import React, { useCallback, useEffect, useMemo, useState } from "react" import cx from "classnames" import { keyBy, @@ -7,6 +7,8 @@ import { get, mapValues, isElementHidden, + EntityName, + Url, } from "@ourworldindata/utils" import { InstantSearch, @@ -39,7 +41,11 @@ import { } from "./searchTypes.js" import { EXPLORERS_ROUTE_FOLDER } from "../../explorer/ExplorerConstants.js" import { FontAwesomeIcon } from "@fortawesome/react-fontawesome/index.js" -import { faHeartBroken, faSearch } from "@fortawesome/free-solid-svg-icons" +import { + faHeartBroken, + faLocationDot, + faSearch, +} from "@fortawesome/free-solid-svg-icons" import { DEFAULT_SEARCH_PLACEHOLDER, getIndexName, @@ -52,7 +58,9 @@ import { import { DEFAULT_GRAPHER_HEIGHT, DEFAULT_GRAPHER_WIDTH, + setSelectedEntityNamesParam, } from "@ourworldindata/grapher" +import { pickEntitiesForChartHit } from "./SearchUtils.js" import { SiteAnalytics } from "../SiteAnalytics.js" const siteAnalytics = new SiteAnalytics() @@ -84,13 +92,43 @@ function PagesHit({ hit }: { hit: IPageHit }) { ) } +const getChartQueryStr = (slug: string, entities: EntityName[]) => { + if (entities.length === 0) return "" + else { + return setSelectedEntityNamesParam( + Url.fromQueryParams({ + tab: "chart", + }), + entities + ).queryStr + } +} + function ChartHit({ hit }: { hit: IChartHit }) { const [imgLoaded, setImgLoaded] = useState(false) const [imgError, setImgError] = useState(false) + const entities = useMemo( + () => pickEntitiesForChartHit(hit), + // eslint-disable-next-line react-hooks/exhaustive-deps + [hit._highlightResult?.availableEntities] + ) + const queryStr = useMemo( + () => getChartQueryStr(hit.slug, entities), + [hit.slug, entities] + ) + const previewUrl = queryStr + ? `/grapher/thumbnail/${hit.slug}${queryStr}` + : `${BAKED_GRAPHER_URL}/exports/${hit.slug}.svg` + + useEffect(() => { + setImgLoaded(false) + setImgError(false) + }, [previewUrl]) + return ( )} setImgLoaded(true)} onError={() => setImgError(true)} /> @@ -121,6 +160,16 @@ function ChartHit({ hit }: { hit: IChartHit }) { {hit.variantName} + {entities.length > 0 && ( +
    + {entities.map((entity) => ( +
  • + + {entity} +
  • + ))} +
+ )}
) } diff --git a/site/search/SearchUtils.tsx b/site/search/SearchUtils.tsx index 2587d56e6f7..6ffd144c390 100644 --- a/site/search/SearchUtils.tsx +++ b/site/search/SearchUtils.tsx @@ -1,3 +1,6 @@ +import { HitAttributeHighlightResult } from "instantsearch.js" +import { IChartHit } from "./searchTypes.js" +import { EntityName } from "@ourworldindata/types" import { Region, getRegionByNameOrVariantName, @@ -22,3 +25,38 @@ export const extractRegionNamesFromSearchQuery = (query: string) => { if (regionNames.length === 0) return null return regionNames.map(getRegionByNameOrVariantName) as Region[] } + +const removeHighlightTags = (text: string) => + text.replace(/<\/?(mark|strong)>/g, "") + +export function pickEntitiesForChartHit(hit: IChartHit): EntityName[] { + const availableEntitiesHighlighted = hit._highlightResult + ?.availableEntities as HitAttributeHighlightResult[] | undefined + + const pickedEntities = availableEntitiesHighlighted + ?.filter((highlightEntry) => { + // Keep the highlight if it is fully highlighted + if (highlightEntry.fullyHighlighted) return true + if (highlightEntry.matchLevel === "none") return false + + // Remove any trailing parentheses, e.g. "Africa (UN)" -> "Africa" + const withoutTrailingParens = removeHighlightTags( + highlightEntry.value + ).replace(/\s?\(.*\)$/, "") + + const matchedWordsLowerCase = highlightEntry.matchedWords.map( + (mw) => mw.toLowerCase() + ) + + // Keep the highlight if every word (except for trailing parens) is fully highlighted + // This will also highlight "Central African Republic" when searching for "african central republic", + // but that's probably okay + return withoutTrailingParens + .toLowerCase() + .split(" ") + .every((w) => matchedWordsLowerCase.includes(w)) + }) + .map((highlightEntry) => removeHighlightTags(highlightEntry.value)) + + return pickedEntities ?? [] +} From 87fc67531ffb75b81b86e3df8b3ccb452f9882bf Mon Sep 17 00:00:00 2001 From: Marcel Gerber Date: Mon, 25 Mar 2024 13:59:37 +0100 Subject: [PATCH 3/7] fix(algolia): sort entity names with variant names first, so Algolia synonyms can work --- baker/algolia/indexChartsToAlgolia.ts | 33 ++++++++++++++++++++- packages/@ourworldindata/utils/src/Util.ts | 6 ++++ packages/@ourworldindata/utils/src/index.ts | 1 + site/search/SearchUtils.tsx | 7 +++-- 4 files changed, 43 insertions(+), 4 deletions(-) diff --git a/baker/algolia/indexChartsToAlgolia.ts b/baker/algolia/indexChartsToAlgolia.ts index 1fa897c455c..aec341e7fe5 100644 --- a/baker/algolia/indexChartsToAlgolia.ts +++ b/baker/algolia/indexChartsToAlgolia.ts @@ -8,6 +8,9 @@ import { OwidGdocLinkType, excludeNullish, isNil, + countries, + orderBy, + removeTrailingParenthetical, } from "@ourworldindata/utils" import { MarkdownTextWrap } from "@ourworldindata/components" import { getAnalyticsPageviewsByUrlObj } from "../../db/model/Pageview.js" @@ -20,6 +23,33 @@ const computeScore = (record: Omit): number => { return numRelatedArticles * 500 + views_7d } +const processAvailableEntities = (availableEntities: string[] | null) => { + if (!availableEntities) return [] + + const countriesWithVariantNames = countries + .filter((country) => country.variantNames?.length || country.shortName) + .map((country) => country.name) + + // Algolia is a bit weird with synonyms: + // If we have a synonym "USA" -> "United States", and we search for "USA", + // then it seems that Algolia can only find that within `availableEntities` + // if "USA" is within the first 100-or-so entries of the array. + // So, the easy solution is to sort the entities to ensure that countries + // with variant names are at the top. + // - @marcelgerber, 2024-03-25 + return orderBy( + availableEntities, + [ + (entityName) => + countriesWithVariantNames.includes( + removeTrailingParenthetical(entityName) + ), + (entityName) => entityName, + ], + ["desc", "asc"] + ) +} + const getChartsRecords = async ( knex: db.KnexReadonlyTransaction ): Promise => { @@ -81,7 +111,7 @@ const getChartsRecords = async ( if (c.entityNames.length < 12000) c.entityNames = excludeNullish( JSON.parse(c.entityNames as string) as (string | null)[] - ) + ) as string[] else { console.info( `Chart ${c.id} has too many entities, skipping its entities` @@ -89,6 +119,7 @@ const getChartsRecords = async ( c.entityNames = [] } } + c.entityNames = processAvailableEntities(c.entityNames) c.tags = JSON.parse(c.tags) c.keyChartForTags = JSON.parse(c.keyChartForTags as string).filter( diff --git a/packages/@ourworldindata/utils/src/Util.ts b/packages/@ourworldindata/utils/src/Util.ts index 1f892bafee0..794d202962e 100644 --- a/packages/@ourworldindata/utils/src/Util.ts +++ b/packages/@ourworldindata/utils/src/Util.ts @@ -1856,6 +1856,12 @@ export function cartesian(matrix: T[][]): T[][] { ) } +// Remove any parenthetical content from _the end_ of a string +// E.g. "Africa (UN)" -> "Africa" +export function removeTrailingParenthetical(str: string): string { + return str.replace(/\s*\(.*\)$/, "") +} + export function isElementHidden(element: Element | null): boolean { if (!element) return false const computedStyle = window.getComputedStyle(element) diff --git a/packages/@ourworldindata/utils/src/index.ts b/packages/@ourworldindata/utils/src/index.ts index fccb5e5b961..9e585bbab97 100644 --- a/packages/@ourworldindata/utils/src/index.ts +++ b/packages/@ourworldindata/utils/src/index.ts @@ -120,6 +120,7 @@ export { checkIsDataInsight, checkIsAuthor, cartesian, + removeTrailingParenthetical, isElementHidden, } from "./Util.js" diff --git a/site/search/SearchUtils.tsx b/site/search/SearchUtils.tsx index 6ffd144c390..53ccc001371 100644 --- a/site/search/SearchUtils.tsx +++ b/site/search/SearchUtils.tsx @@ -6,6 +6,7 @@ import { getRegionByNameOrVariantName, regions, escapeRegExp, + removeTrailingParenthetical, } from "@ourworldindata/utils" const allCountryNamesAndVariants = regions.flatMap((c) => [ @@ -40,9 +41,9 @@ export function pickEntitiesForChartHit(hit: IChartHit): EntityName[] { if (highlightEntry.matchLevel === "none") return false // Remove any trailing parentheses, e.g. "Africa (UN)" -> "Africa" - const withoutTrailingParens = removeHighlightTags( - highlightEntry.value - ).replace(/\s?\(.*\)$/, "") + const withoutTrailingParens = removeTrailingParenthetical( + removeHighlightTags(highlightEntry.value) + ) const matchedWordsLowerCase = highlightEntry.matchedWords.map( (mw) => mw.toLowerCase() From ae12f282b698462637e4a0c48e669e026dbbbbe4 Mon Sep 17 00:00:00 2001 From: Marcel Gerber Date: Mon, 25 Mar 2024 16:02:09 +0100 Subject: [PATCH 4/7] enhance(search): refine entity-picking logic --- site/search/SearchUtils.tsx | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/site/search/SearchUtils.tsx b/site/search/SearchUtils.tsx index 53ccc001371..9a7de694ae7 100644 --- a/site/search/SearchUtils.tsx +++ b/site/search/SearchUtils.tsx @@ -5,6 +5,7 @@ import { Region, getRegionByNameOrVariantName, regions, + countries, escapeRegExp, removeTrailingParenthetical, } from "@ourworldindata/utils" @@ -36,26 +37,38 @@ export function pickEntitiesForChartHit(hit: IChartHit): EntityName[] { const pickedEntities = availableEntitiesHighlighted ?.filter((highlightEntry) => { - // Keep the highlight if it is fully highlighted - if (highlightEntry.fullyHighlighted) return true if (highlightEntry.matchLevel === "none") return false // Remove any trailing parentheses, e.g. "Africa (UN)" -> "Africa" - const withoutTrailingParens = removeTrailingParenthetical( + const entityNameWithoutTrailingParens = removeTrailingParenthetical( removeHighlightTags(highlightEntry.value) ) - const matchedWordsLowerCase = highlightEntry.matchedWords.map( - (mw) => mw.toLowerCase() + // The sequence of words that Algolia matched; could be something like ["arab", "united", "republic"] + // which we want to check against the entity name + const matchedSequenceLowerCase = highlightEntry.matchedWords + .join(" ") + .toLowerCase() + + // Pick entity if the matched sequence contains the full entity name + if ( + matchedSequenceLowerCase.includes( + entityNameWithoutTrailingParens.toLowerCase() + ) ) + return true - // Keep the highlight if every word (except for trailing parens) is fully highlighted - // This will also highlight "Central African Republic" when searching for "african central republic", - // but that's probably okay - return withoutTrailingParens - .toLowerCase() - .split(" ") - .every((w) => matchedWordsLowerCase.includes(w)) + const country = countries.find( + (c) => c.name === entityNameWithoutTrailingParens + ) + if (country?.variantNames) { + // Pick entity if the matched sequence contains any of the variant names + return country.variantNames.some((variant) => + matchedSequenceLowerCase.includes(variant.toLowerCase()) + ) + } + + return false }) .map((highlightEntry) => removeHighlightTags(highlightEntry.value)) From e0b5f37ef209e17f8c7de7c13378f0fbc26be798 Mon Sep 17 00:00:00 2001 From: Marcel Gerber Date: Mon, 25 Mar 2024 16:22:12 +0100 Subject: [PATCH 5/7] enhance(search): sort entity names --- site/search/SearchUtils.tsx | 1 + 1 file changed, 1 insertion(+) diff --git a/site/search/SearchUtils.tsx b/site/search/SearchUtils.tsx index 9a7de694ae7..c41821ddb55 100644 --- a/site/search/SearchUtils.tsx +++ b/site/search/SearchUtils.tsx @@ -71,6 +71,7 @@ export function pickEntitiesForChartHit(hit: IChartHit): EntityName[] { return false }) .map((highlightEntry) => removeHighlightTags(highlightEntry.value)) + .sort() return pickedEntities ?? [] } From 250bc9135fcbe81cccc22d665fd41f0a85fec837 Mon Sep 17 00:00:00 2001 From: Marcel Gerber Date: Tue, 26 Mar 2024 09:23:24 +0100 Subject: [PATCH 6/7] perf(algolia): optimize chart indexing code a bit --- baker/algolia/indexChartsToAlgolia.ts | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/baker/algolia/indexChartsToAlgolia.ts b/baker/algolia/indexChartsToAlgolia.ts index aec341e7fe5..8d2475ec7d7 100644 --- a/baker/algolia/indexChartsToAlgolia.ts +++ b/baker/algolia/indexChartsToAlgolia.ts @@ -23,12 +23,14 @@ const computeScore = (record: Omit): number => { return numRelatedArticles * 500 + views_7d } -const processAvailableEntities = (availableEntities: string[] | null) => { - if (!availableEntities) return [] - - const countriesWithVariantNames = countries +const countriesWithVariantNames = new Set( + countries .filter((country) => country.variantNames?.length || country.shortName) .map((country) => country.name) +) + +const processAvailableEntities = (availableEntities: string[] | null) => { + if (!availableEntities) return [] // Algolia is a bit weird with synonyms: // If we have a synonym "USA" -> "United States", and we search for "USA", @@ -41,7 +43,7 @@ const processAvailableEntities = (availableEntities: string[] | null) => { availableEntities, [ (entityName) => - countriesWithVariantNames.includes( + countriesWithVariantNames.has( removeTrailingParenthetical(entityName) ), (entityName) => entityName, From d9e4bbadeafa87a6ea1464afd7f3885f736c799b Mon Sep 17 00:00:00 2001 From: Marcel Gerber Date: Wed, 10 Apr 2024 17:20:07 +0200 Subject: [PATCH 7/7] enhance(search): show entities as comma-separated list --- site/search/Search.scss | 20 ++++++++++---------- site/search/SearchPanel.tsx | 13 +++---------- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/site/search/Search.scss b/site/search/Search.scss index 149d4760cb7..44bf89ea1e1 100644 --- a/site/search/Search.scss +++ b/site/search/Search.scss @@ -321,20 +321,20 @@ } .search-results__chart-hit-entities { - display: flex; - flex-wrap: wrap; - gap: 4px; + gap: 3px; list-style: none; - font-size: 0.7em; + font-size: 0.8em; li { - background-color: $blue-10; - padding: 4px 8px; - border-radius: 12px; - color: $blue-90; + display: inline; + color: $blue-50; - svg { - margin-right: 4px; + &::after { + content: ", "; + } + + &:last-child::after { + content: ""; } } } diff --git a/site/search/SearchPanel.tsx b/site/search/SearchPanel.tsx index fbe49d30375..5145e5f14d2 100644 --- a/site/search/SearchPanel.tsx +++ b/site/search/SearchPanel.tsx @@ -41,11 +41,7 @@ import { } from "./searchTypes.js" import { EXPLORERS_ROUTE_FOLDER } from "../../explorer/ExplorerConstants.js" import { FontAwesomeIcon } from "@fortawesome/react-fontawesome/index.js" -import { - faHeartBroken, - faLocationDot, - faSearch, -} from "@fortawesome/free-solid-svg-icons" +import { faHeartBroken, faSearch } from "@fortawesome/free-solid-svg-icons" import { DEFAULT_SEARCH_PLACEHOLDER, getIndexName, @@ -118,7 +114,7 @@ function ChartHit({ hit }: { hit: IChartHit }) { [hit.slug, entities] ) const previewUrl = queryStr - ? `/grapher/thumbnail/${hit.slug}${queryStr}` + ? `/grapher/thumbnail/${hit.slug}${queryStr}` // TODO extract to .env : `${BAKED_GRAPHER_URL}/exports/${hit.slug}.svg` useEffect(() => { @@ -163,10 +159,7 @@ function ChartHit({ hit }: { hit: IChartHit }) { {entities.length > 0 && (
    {entities.map((entity) => ( -
  • - - {entity} -
  • +
  • {entity}
  • ))}
)}