Skip to content

Commit

Permalink
Fix multidimensional data pages baking logic
Browse files Browse the repository at this point in the history
* Don't delete mdims in GrapherBaker
* Delete old mdims in MultiDimBaker
* Improve the console output
* Document issue with TSConfig and getting ESNext types
  • Loading branch information
rakyi committed Jan 27, 2025
1 parent 6063099 commit 5d083fe
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 32 deletions.
43 changes: 14 additions & 29 deletions baker/GrapherBaker.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import { renderToHtmlPage } from "../baker/siteRenderers.js"
import {
excludeUndefined,
urlToSlug,
without,
uniq,
keyBy,
compact,
Expand All @@ -17,7 +16,6 @@ import {
BAKED_GRAPHER_URL,
} from "../settings/serverSettings.js"
import * as db from "../db/db.js"
import { glob } from "glob"
import { isPathRedirectedToExplorer } from "../explorerAdminServer/ExplorerRedirects.js"
import {
getPostIdFromSlug,
Expand Down Expand Up @@ -50,9 +48,10 @@ import {
import { getAllImages } from "../db/model/Image.js"
import { logErrorAndMaybeCaptureInSentry } from "../serverUtils/errorLog.js"

import { getTagToSlugMap } from "./GrapherBakingUtils.js"
import { deleteOldGraphers, getTagToSlugMap } from "./GrapherBakingUtils.js"
import { knexRaw } from "../db/db.js"
import { getRelatedChartsForVariable } from "../db/model/Chart.js"
import { getAllMultiDimDataPageSlugs } from "../db/model/MultiDimDataPage.js"
import pMap from "p-map"

const renderDatapageIfApplicable = async (
Expand Down Expand Up @@ -286,28 +285,6 @@ const bakeGrapherPage = async (
imageMetadataDictionary
)
)
console.log(outPath)
}

const deleteOldGraphers = async (bakedSiteDir: string, newSlugs: string[]) => {
// Delete any that are missing from the database
const oldSlugs = glob
.sync(`${bakedSiteDir}/grapher/*.html`)
.map((slug) =>
slug.replace(`${bakedSiteDir}/grapher/`, "").replace(".html", "")
)
const toRemove = without(oldSlugs, ...newSlugs)
// do not delete grapher slugs redirected to explorers
.filter((slug) => !isPathRedirectedToExplorer(`/grapher/${slug}`))
for (const slug of toRemove) {
const path = `${bakedSiteDir}/grapher/${slug}.html`
console.log(`DELETING ${path}`)
fs.unlink(path, (err) =>
err
? console.error(`Error deleting ${path}`, err)
: console.log(`Deleted ${path}`)
)
}
}

export interface BakeSingleGrapherChartArguments {
Expand Down Expand Up @@ -363,7 +340,8 @@ export const bakeAllChangedGrapherPagesAndDeleteRemovedGraphers = async (
ORDER BY cc.slug ASC`
)

const newSlugs = chartsToBake.map((row) => row.slug)
chartsToBake.splice(20) // TODO

await fs.mkdirp(bakedSiteDir + "/grapher")

// Prefetch imageMetadata instead of each grapher page fetching
Expand All @@ -382,7 +360,7 @@ export const bakeAllChangedGrapherPagesAndDeleteRemovedGraphers = async (
}))

const progressBar = new ProgressBar(
"bake grapher page [:bar] :current/:total :elapseds :rate/s :etas :name\n",
"bake grapher page [:bar] :current/:total :elapseds :rate/s :name\n",
{
width: 20,
total: chartsToBake.length + 1,
Expand All @@ -401,11 +379,18 @@ export const bakeAllChangedGrapherPagesAndDeleteRemovedGraphers = async (
async (knex) => await bakeSingleGrapherChart(job, knex),
db.TransactionCloseMode.KeepOpen
)
progressBar.tick({ name: `slug ${job.slug}` })
progressBar.tick({ name: job.slug })
},
{ concurrency: 10 }
)

await deleteOldGraphers(bakedSiteDir, excludeUndefined(newSlugs))
// Multi-dim data pages are baked into the same directory as graphers
// and they are handled separately.
const multiDimSlugs = await getAllMultiDimDataPageSlugs(knex)
const newSlugs = excludeUndefined([
...chartsToBake.map((row) => row.slug),
...multiDimSlugs,
])
await deleteOldGraphers(bakedSiteDir, newSlugs)
progressBar.tick({ name: `✅ Deleted old graphers` })
}
27 changes: 26 additions & 1 deletion baker/GrapherBakingUtils.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import fs from "fs-extra"
import { glob } from "glob"
import * as lodash from "lodash"

import * as db from "../db/db.js"
import md5 from "md5"
import { DbPlainTag, Url } from "@ourworldindata/utils"
import { DbPlainTag, Url, without } from "@ourworldindata/utils"
import { isPathRedirectedToExplorer } from "../explorerAdminServer/ExplorerRedirects.js"

// Splits a grapher URL like https://ourworldindata.org/grapher/soil-lifespans?tab=chart
// into its slug (soil-lifespans) and queryStr (?tab=chart)
Expand Down Expand Up @@ -74,3 +77,25 @@ export async function getSlugForTopicTag(

return slug
}

export async function deleteOldGraphers(
bakedSiteDir: string,
newSlugs: string[]
) {
// Delete any that are missing from the database
const oldSlugs = glob
.sync(`${bakedSiteDir}/grapher/*.html`)
.map((slug) =>
slug.replace(`${bakedSiteDir}/grapher/`, "").replace(".html", "")
)
const toRemove = without(oldSlugs, ...newSlugs)
// do not delete grapher slugs redirected to explorers
.filter((slug) => !isPathRedirectedToExplorer(`/grapher/${slug}`))
for (const slug of toRemove) {
const path = `${bakedSiteDir}/grapher/${slug}.html`
console.log(`DELETING ${path}`)
fs.unlink(path, (err) => {
if (err) console.error(`Error deleting ${path}`, err)
})
}
}
18 changes: 17 additions & 1 deletion baker/MultiDimBaker.tsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import fs from "fs-extra"
import path from "path"
import ProgressBar from "progress"
import {
ImageMetadata,
MultiDimDataPageConfigPreProcessed,
Expand All @@ -23,7 +24,7 @@ import {
BAKED_BASE_URL,
BAKED_GRAPHER_URL,
} from "../settings/serverSettings.js"
import { getTagToSlugMap } from "./GrapherBakingUtils.js"
import { deleteOldGraphers, getTagToSlugMap } from "./GrapherBakingUtils.js"
import { getVariableMetadata } from "../db/model/Variable.js"
import pMap from "p-map"
import {
Expand All @@ -32,6 +33,7 @@ import {
resolveFaqsForVariable,
} from "./DatapageHelpers.js"
import { logErrorAndMaybeCaptureInSentry } from "../serverUtils/errorLog.js"
import { getAllPublishedChartSlugs } from "../db/model/Chart.js"
import {
getAllMultiDimDataPages,
getMultiDimDataPageBySlug,
Expand Down Expand Up @@ -225,6 +227,14 @@ export const bakeAllMultiDimDataPages = async (
imageMetadata: Record<string, ImageMetadata>
) => {
const multiDimsBySlug = await getAllMultiDimDataPages(knex)
const progressBar = new ProgressBar(
"bake multi-dim page [:bar] :current/:total :elapseds :rate/s :name\n",
{
width: 20,
total: multiDimsBySlug.size + 1,
renderThrottle: 0,
}
)
for (const [slug, row] of multiDimsBySlug.entries()) {
await bakeMultiDimDataPage(
knex,
Expand All @@ -233,5 +243,11 @@ export const bakeAllMultiDimDataPages = async (
row.config,
imageMetadata
)
progressBar.tick({ name: slug })
}
const publishedSlugs = Object.keys(multiDimsBySlug)
const chartSlugs = await getAllPublishedChartSlugs(knex)
const newSlugs = [...publishedSlugs, ...chartSlugs]
await deleteOldGraphers(bakedSiteDir, newSlugs)
progressBar.tick({ name: `✅ Deleted old multi-dim pages` })
}
17 changes: 17 additions & 0 deletions db/model/Chart.ts
Original file line number Diff line number Diff line change
Expand Up @@ -616,3 +616,20 @@ export const getRedirectsByChartId = async (
ORDER BY id ASC`,
[chartId]
)

export async function getAllPublishedChartSlugs(
knex: db.KnexReadonlyTransaction
): Promise<string[]> {
const rows = await db.knexRaw<{
slug: string
}>(
knex,
`-- sql
SELECT cc.slug
FROM charts c
JOIN chart_configs cc ON c.configId = cc.id
WHERE cc.full->>"$.isPublished" = "true"
`
)
return rows.map((row) => row.slug)
}
9 changes: 9 additions & 0 deletions db/model/MultiDimDataPage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,15 @@ export async function getAllLinkedPublishedMultiDimDataPages(
return rows.map(enrichRow)
}

export async function getAllMultiDimDataPageSlugs(
knex: KnexReadonlyTransaction
): Promise<string[]> {
const rows = await knex<DbPlainMultiDimDataPage>(
MultiDimDataPagesTableName
).select("slug")
return rows.map((row) => row.slug)
}

export const getMultiDimDataPageBySlug = async (
knex: KnexReadonlyTransaction,
slug: string,
Expand Down
18 changes: 17 additions & 1 deletion devTools/tsconfigs/tsconfig.base.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,23 @@
"declaration": true,
"declarationMap": true,

"lib": ["dom", "dom.iterable", "es2020", "es2021", "es2022", "es2023"],
// To get newer APIs like Set.prototype.intersection(), we need to use
// ESNext lib on Node 22 and current version of TypeScript (5.7.2).
// However, using the ESNext option doesn't work with
// @types/[email protected], which we are currently pinned to, to fix an
// unrelated bug with types in Cloudflare Functions, thus we only use
// ESNext.Collection.
// https://github.com/microsoft/TypeScript/issues/59919
// https://developers.cloudflare.com/workers/languages/typescript/#transitive-loading-of-typesnode-overrides-cloudflareworkers-types
"lib": [
"dom",
"dom.iterable",
"es2020",
"es2021",
"es2022",
"es2023",
"ESNext.Collection"
],
// Using es2022 as a `target` caused the following error in wrangler:
// "Uncaught TypeError: PointVector is not a constructor".
// This seems to be related to a change in how classes are compiled in
Expand Down

0 comments on commit 5d083fe

Please sign in to comment.