Skip to content

Commit

Permalink
refactor: bake embeds from content graph
Browse files Browse the repository at this point in the history
  • Loading branch information
mlbrgl committed Feb 19, 2024
1 parent aceadfd commit b940bbc
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 19 deletions.
26 changes: 8 additions & 18 deletions baker/SiteBaker.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ import fs from "fs-extra"
import path from "path"
import { glob } from "glob"
import { keyBy, without, uniq, mapValues, pick } from "lodash"
import cheerio from "cheerio"
import ProgressBar from "progress"
import * as wpdb from "../db/wpdb.js"
import * as db from "../db/db.js"
Expand Down Expand Up @@ -86,7 +85,10 @@ import { GdocPost } from "../db/model/Gdoc/GdocPost.js"
import { Image } from "../db/model/Image.js"
import { generateEmbedSnippet } from "../site/viteUtils.js"
import { logErrorAndMaybeSendToBugsnag } from "../serverUtils/errorLog.js"
import { Chart } from "../db/model/Chart.js"
import {
Chart,
getChartEmbedUrlsInPublishedWordpressPosts,
} from "../db/model/Chart.js"
import {
BAKED_BASE_URL,
BAKED_GRAPHER_EXPORTS_BASE_URL,
Expand Down Expand Up @@ -185,23 +187,11 @@ export class SiteBaker {

private async bakeEmbeds() {
if (!this.bakeSteps.has("embeds")) return
// Find all grapher urls used as embeds in all posts on the site
const rows = await wpdb.singleton.query(
`SELECT post_content FROM wp_posts WHERE (post_type='page' OR post_type='post' OR post_type='wp_block') AND post_status='publish'`

// Find all grapher urls used as embeds in all Wordpress posts on the site
const grapherUrls = uniq(
await getChartEmbedUrlsInPublishedWordpressPosts()
)
let grapherUrls = []
for (const row of rows) {
const $ = cheerio.load(row.post_content)
grapherUrls.push(
...$("iframe")
.toArray()
.filter((el) =>
(el.attribs["src"] || "").match(/\/grapher\//)
)
.map((el) => el.attribs["src"].trim())
)
}
grapherUrls = uniq(grapherUrls)

await bakeGrapherUrls(grapherUrls)

Expand Down
56 changes: 55 additions & 1 deletion db/model/Chart.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ import {
RelatedChart,
} from "@ourworldindata/types"
import { OpenAI } from "openai"
import { OPENAI_API_KEY } from "../../settings/serverSettings.js"
import {
BAKED_BASE_URL,
OPENAI_API_KEY,
} from "../../settings/serverSettings.js"

// XXX hardcoded filtering to public parent tags
export const PUBLIC_TAG_PARENT_IDS = [
Expand Down Expand Up @@ -379,3 +382,54 @@ export const getRelatedChartsForVariable = async (
ORDER BY title ASC
`)
}

export const getChartEmbedUrlsInPublishedWordpressPosts = async (): Promise<
string[]
> => {
const chartSlugQueryString: { target: string; queryString: string }[] = (
await db.knexInstance().raw(
`
SELECT
pl.target,
pl.queryString
FROM
posts_links pl
JOIN posts p ON p.id = pl.sourceId
WHERE
pl.linkType = "grapher"
AND pl.componentType = "src"
AND p.status = "publish"
AND p.type != 'wp_block'
AND p.slug NOT IN (
-- We want to exclude the slugs of published gdocs, since they override the Wordpress posts
-- published under the same slugs.
SELECT
slug from posts_gdocs pg
WHERE
pg.slug = p.slug
AND pg.content ->> '$.type' <> 'fragment'
AND pg.published = 1
)
-- Commenting this out since we currently don't do anything with the baked embeds in gdocs posts
-- see https://github.com/owid/owid-grapher/issues/2992#issuecomment-1934690219
-- Rename to getChartEmbedUrlsInPublishedPosts if we decide to use this
-- UNION
-- SELECT
-- pgl.target,
-- pgl.queryString
-- FROM
-- posts_gdocs_links pgl
-- JOIN posts_gdocs pg on pg.id = pgl.sourceId
-- WHERE
-- pgl.linkType = "grapher"
-- AND pgl.componentType = "chart"
-- AND pg.content ->> '$.type' <> 'fragment'
-- AND pg.published = 1
`
)
)[0]

return chartSlugQueryString.map((row) => {
return `${BAKED_BASE_URL}/${row.target}${row.queryString}`
})
}

0 comments on commit b940bbc

Please sign in to comment.