diff --git a/baker/algolia/configureAlgolia.ts b/baker/algolia/configureAlgolia.ts index 9fa27d01f94..3c87cfc70a9 100644 --- a/baker/algolia/configureAlgolia.ts +++ b/baker/algolia/configureAlgolia.ts @@ -9,7 +9,7 @@ import { ALGOLIA_INDEXING, ALGOLIA_SECRET_KEY, } from "../../settings/serverSettings.js" -import { countries, regions } from "@ourworldindata/utils" +import { countries, regions, excludeUndefined } from "@ourworldindata/utils" import { SearchIndexName } from "../../site/search/searchTypes.js" import { getIndexName } from "../../site/search/searchClient.js" @@ -296,12 +296,6 @@ export const configureAlgolia = async () => { ["solar", "photovoltaic", "photovoltaics", "pv"], ] - // Send all our country variant names to algolia as synonyms - for (const country of countries) { - if (country.variantNames) - synonyms.push([country.name].concat(country.variantNames)) - } - const algoliaSynonyms = synonyms.map((s) => { return { objectID: s.join("-"), @@ -310,6 +304,21 @@ export const configureAlgolia = async () => { } as Synonym }) + // Send all our country variant names to algolia as one-way synonyms + for (const country of countries) { + const alternatives = excludeUndefined([ + country.shortName, + ...(country.variantNames ?? []), + ]) + for (const alternative of alternatives) + algoliaSynonyms.push({ + objectID: `${alternative}->${country.name}`, + type: "oneWaySynonym", + input: alternative, + synonyms: [country.name], + }) + } + await pagesIndex.saveSynonyms(algoliaSynonyms, { replaceExistingSynonyms: true, }) diff --git a/baker/algolia/indexChartsToAlgolia.ts b/baker/algolia/indexChartsToAlgolia.ts index 1fa897c455c..8d2475ec7d7 100644 --- a/baker/algolia/indexChartsToAlgolia.ts +++ b/baker/algolia/indexChartsToAlgolia.ts @@ -8,6 +8,9 @@ import { OwidGdocLinkType, excludeNullish, isNil, + countries, + orderBy, + removeTrailingParenthetical, } from "@ourworldindata/utils" import { MarkdownTextWrap } from "@ourworldindata/components" import { getAnalyticsPageviewsByUrlObj } from "../../db/model/Pageview.js" @@ -20,6 +23,35 @@ const computeScore = (record: Omit): number => { return numRelatedArticles * 500 + views_7d } +const countriesWithVariantNames = new Set( + countries + .filter((country) => country.variantNames?.length || country.shortName) + .map((country) => country.name) +) + +const processAvailableEntities = (availableEntities: string[] | null) => { + if (!availableEntities) return [] + + // Algolia is a bit weird with synonyms: + // If we have a synonym "USA" -> "United States", and we search for "USA", + // then it seems that Algolia can only find that within `availableEntities` + // if "USA" is within the first 100-or-so entries of the array. + // So, the easy solution is to sort the entities to ensure that countries + // with variant names are at the top. + // - @marcelgerber, 2024-03-25 + return orderBy( + availableEntities, + [ + (entityName) => + countriesWithVariantNames.has( + removeTrailingParenthetical(entityName) + ), + (entityName) => entityName, + ], + ["desc", "asc"] + ) +} + const getChartsRecords = async ( knex: db.KnexReadonlyTransaction ): Promise => { @@ -81,7 +113,7 @@ const getChartsRecords = async ( if (c.entityNames.length < 12000) c.entityNames = excludeNullish( JSON.parse(c.entityNames as string) as (string | null)[] - ) + ) as string[] else { console.info( `Chart ${c.id} has too many entities, skipping its entities` @@ -89,6 +121,7 @@ const getChartsRecords = async ( c.entityNames = [] } } + c.entityNames = processAvailableEntities(c.entityNames) c.tags = JSON.parse(c.tags) c.keyChartForTags = JSON.parse(c.keyChartForTags as string).filter( diff --git a/packages/@ourworldindata/utils/src/Util.ts b/packages/@ourworldindata/utils/src/Util.ts index 1f892bafee0..794d202962e 100644 --- a/packages/@ourworldindata/utils/src/Util.ts +++ b/packages/@ourworldindata/utils/src/Util.ts @@ -1856,6 +1856,12 @@ export function cartesian(matrix: T[][]): T[][] { ) } +// Remove any parenthetical content from _the end_ of a string +// E.g. "Africa (UN)" -> "Africa" +export function removeTrailingParenthetical(str: string): string { + return str.replace(/\s*\(.*\)$/, "") +} + export function isElementHidden(element: Element | null): boolean { if (!element) return false const computedStyle = window.getComputedStyle(element) diff --git a/packages/@ourworldindata/utils/src/index.ts b/packages/@ourworldindata/utils/src/index.ts index fccb5e5b961..9e585bbab97 100644 --- a/packages/@ourworldindata/utils/src/index.ts +++ b/packages/@ourworldindata/utils/src/index.ts @@ -120,6 +120,7 @@ export { checkIsDataInsight, checkIsAuthor, cartesian, + removeTrailingParenthetical, isElementHidden, } from "./Util.js" diff --git a/site/search/Search.scss b/site/search/Search.scss index 54dcbd53bf3..66262986d59 100644 --- a/site/search/Search.scss +++ b/site/search/Search.scss @@ -431,6 +431,25 @@ $reset-button-margin: 16px; } } +.search-results__chart-hit-entities { + gap: 3px; + list-style: none; + font-size: 0.8em; + + li { + display: inline; + color: $blue-50; + + &::after { + content: ", "; + } + + &:last-child::after { + content: ""; + } + } +} + /* * Tabs / Filtering **/ diff --git a/site/search/SearchPanel.tsx b/site/search/SearchPanel.tsx index 6abaa0ba910..a7216b969a3 100644 --- a/site/search/SearchPanel.tsx +++ b/site/search/SearchPanel.tsx @@ -7,11 +7,11 @@ import { get, mapValues, isElementHidden, + EntityName, + Url, sortBy, groupBy, uniqBy, - EntityName, - Url, Region, } from "@ourworldindata/utils" import { @@ -66,9 +66,12 @@ import { DEFAULT_GRAPHER_WIDTH, setSelectedEntityNamesParam, } from "@ourworldindata/grapher" +import { + pickEntitiesForChartHit, + extractRegionNamesFromSearchQuery, +} from "./SearchUtils.js" import type { SearchResults as AlgoliaSearchResultsType } from "algoliasearch-helper" import { SiteAnalytics } from "../SiteAnalytics.js" -import { extractRegionNamesFromSearchQuery } from "./SearchUtils.js" const siteAnalytics = new SiteAnalytics() @@ -119,9 +122,24 @@ function ChartHit({ hit }: { hit: IChartHit }) { const [imgLoaded, setImgLoaded] = useState(false) const [imgError, setImgError] = useState(false) + const entities = useMemo( + () => pickEntitiesForChartHit(hit), + // eslint-disable-next-line react-hooks/exhaustive-deps + [hit._highlightResult?.availableEntities] + ) + const queryStr = useMemo(() => getEntityQueryStr(entities), [entities]) + const previewUrl = queryStr + ? `/grapher/thumbnail/${hit.slug}${queryStr}` // TODO extract to .env + : `${BAKED_GRAPHER_URL}/exports/${hit.slug}.svg` + + useEffect(() => { + setImgLoaded(false) + setImgError(false) + }, [previewUrl]) + return ( )} setImgLoaded(true)} onError={() => setImgError(true)} /> @@ -153,6 +172,13 @@ function ChartHit({ hit }: { hit: IChartHit }) { {hit.variantName} + {entities.length > 0 && ( +
    + {entities.map((entity) => ( +
  • {entity}
  • + ))} +
+ )}
) diff --git a/site/search/SearchUtils.tsx b/site/search/SearchUtils.tsx index 2587d56e6f7..c41821ddb55 100644 --- a/site/search/SearchUtils.tsx +++ b/site/search/SearchUtils.tsx @@ -1,8 +1,13 @@ +import { HitAttributeHighlightResult } from "instantsearch.js" +import { IChartHit } from "./searchTypes.js" +import { EntityName } from "@ourworldindata/types" import { Region, getRegionByNameOrVariantName, regions, + countries, escapeRegExp, + removeTrailingParenthetical, } from "@ourworldindata/utils" const allCountryNamesAndVariants = regions.flatMap((c) => [ @@ -22,3 +27,51 @@ export const extractRegionNamesFromSearchQuery = (query: string) => { if (regionNames.length === 0) return null return regionNames.map(getRegionByNameOrVariantName) as Region[] } + +const removeHighlightTags = (text: string) => + text.replace(/<\/?(mark|strong)>/g, "") + +export function pickEntitiesForChartHit(hit: IChartHit): EntityName[] { + const availableEntitiesHighlighted = hit._highlightResult + ?.availableEntities as HitAttributeHighlightResult[] | undefined + + const pickedEntities = availableEntitiesHighlighted + ?.filter((highlightEntry) => { + if (highlightEntry.matchLevel === "none") return false + + // Remove any trailing parentheses, e.g. "Africa (UN)" -> "Africa" + const entityNameWithoutTrailingParens = removeTrailingParenthetical( + removeHighlightTags(highlightEntry.value) + ) + + // The sequence of words that Algolia matched; could be something like ["arab", "united", "republic"] + // which we want to check against the entity name + const matchedSequenceLowerCase = highlightEntry.matchedWords + .join(" ") + .toLowerCase() + + // Pick entity if the matched sequence contains the full entity name + if ( + matchedSequenceLowerCase.includes( + entityNameWithoutTrailingParens.toLowerCase() + ) + ) + return true + + const country = countries.find( + (c) => c.name === entityNameWithoutTrailingParens + ) + if (country?.variantNames) { + // Pick entity if the matched sequence contains any of the variant names + return country.variantNames.some((variant) => + matchedSequenceLowerCase.includes(variant.toLowerCase()) + ) + } + + return false + }) + .map((highlightEntry) => removeHighlightTags(highlightEntry.value)) + .sort() + + return pickedEntities ?? [] +}