From 5e6c4acca88a20593fc31d17933066e3d6258f57 Mon Sep 17 00:00:00 2001 From: Ike Saunders Date: Fri, 12 Jul 2024 21:54:33 +0000 Subject: [PATCH 1/2] =?UTF-8?q?=F0=9F=8E=89=20add=20parent=20tags=20from?= =?UTF-8?q?=20tag=20graph=20to=20algolia=20chart=20records?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- baker/algolia/indexChartsToAlgolia.ts | 10 +++++- db/db.ts | 47 +++++++++++++++++++++++++-- 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/baker/algolia/indexChartsToAlgolia.ts b/baker/algolia/indexChartsToAlgolia.ts index 0813dddf929..86fad186d83 100644 --- a/baker/algolia/indexChartsToAlgolia.ts +++ b/baker/algolia/indexChartsToAlgolia.ts @@ -11,6 +11,7 @@ import { countries, orderBy, removeTrailingParenthetical, + uniq, } from "@ourworldindata/utils" import { MarkdownTextWrap } from "@ourworldindata/components" import { getAnalyticsPageviewsByUrlObj } from "../../db/model/Pageview.js" @@ -161,6 +162,8 @@ const getChartsRecords = async ( const pageviews = await getAnalyticsPageviewsByUrlObj(knex) + const parentTagsByChildName = await db.getParentTagsByChildName(knex) + const records: ChartRecord[] = [] for (const c of parsedRows) { // Our search currently cannot render explorers, so don't index them because @@ -181,6 +184,11 @@ const getChartsRecords = async ( fontSize: 10, // doesn't matter, but is a mandatory field }).plaintext + const parentTags = c.tags.flatMap( + // a chart can be tagged with a tag that isn't in the tag graph + (tag) => parentTagsByChildName[tag] || [] + ) + const record = { objectID: c.id.toString(), chartId: c.id, @@ -192,7 +200,7 @@ const getChartsRecords = async ( numDimensions: parseInt(c.numDimensions), publishedAt: c.publishedAt, updatedAt: c.updatedAt, - tags: c.tags as any as string[], + tags: uniq([...c.tags, ...parentTags]), keyChartForTags: c.keyChartForTags as string[], titleLength: c.title.length, // Number of references to this chart in all our posts and pages diff --git a/db/db.ts b/db/db.ts index f08670e38a1..190e3d259d0 100644 --- a/db/db.ts +++ b/db/db.ts @@ -7,7 +7,7 @@ import { GRAPHER_DB_PORT, } from "../settings/serverSettings.js" import { registerExitHandler } from "./cleanup.js" -import { keyBy } from "@ourworldindata/utils" +import { createTagGraph, keyBy } from "@ourworldindata/utils" import { DbChartTagJoin, ImageMetadata, @@ -26,8 +26,10 @@ import { DbPlainPostGdocLink, OwidGdocLinkType, OwidGdoc, + DbPlainTag, + TagGraphNode, } from "@ourworldindata/types" -import { groupBy } from "lodash" +import { groupBy, uniq } from "lodash" import { gdocFromJSON } from "./model/Gdoc/GdocFactory.js" // Return the first match from a mysql query @@ -525,6 +527,47 @@ export async function getFlatTagGraph(knex: KnexReadonlyTransaction): Promise< return { ...tagGraphByParentId, __rootId: tagGraphRootIdResult.id } } +// DFS through the tag graph and create a map of parent tags for each child tag +// e.g. { "Child": [ "Parent", "Grandparent" ], "Parent": [ "Grandparent" ] } +// parent tags are listed in no particular order +export async function getParentTagsByChildName( + trx: KnexReadonlyTransaction +): Promise> { + const { __rootId, ...flatTagGraph } = await getFlatTagGraph(trx) + const tagGraph = createTagGraph(flatTagGraph, __rootId) + + const tagsById = await knexRaw>( + trx, + `-- sql + SELECT id, name FROM tags` + ).then((tags) => keyBy(tags, "id")) + + const parentTagsByChildName: Record< + DbPlainTag["name"], + DbPlainTag["name"][] + > = {} + + function trackParents(node: TagGraphNode): void { + for (const child of node.children) { + trackParents(child) + } + + const preexistingParents = parentTagsByChildName[node.name] ?? [] + // node.path is an array of tag ids from the root to the current node + // slice to remove the root node and the current node, then map them into tag names + const newParents = node.path.slice(1, -1).map((id) => tagsById[id].name) + + parentTagsByChildName[node.name] = uniq([ + ...preexistingParents, + ...newParents, + ]) + } + + trackParents(tagGraph) + + return parentTagsByChildName +} + export async function updateTagGraph( knex: KnexReadWriteTransaction, tagGraph: FlatTagGraph From 6c031451d08324fd70a0efe5d39b3ac84d50c9da Mon Sep 17 00:00:00 2001 From: Ike Saunders Date: Fri, 12 Jul 2024 22:01:11 +0000 Subject: [PATCH 2/2] =?UTF-8?q?=E2=9C=A8=20use=20knex=20query=20builder?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- db/db.ts | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/db/db.ts b/db/db.ts index 190e3d259d0..0803a183708 100644 --- a/db/db.ts +++ b/db/db.ts @@ -536,11 +536,9 @@ export async function getParentTagsByChildName( const { __rootId, ...flatTagGraph } = await getFlatTagGraph(trx) const tagGraph = createTagGraph(flatTagGraph, __rootId) - const tagsById = await knexRaw>( - trx, - `-- sql - SELECT id, name FROM tags` - ).then((tags) => keyBy(tags, "id")) + const tagsById = await trx("tags") + .select("id", "name") + .then((tags) => keyBy(tags, "id")) const parentTagsByChildName: Record< DbPlainTag["name"],