Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix data pages embedding all image metadata #3451

Merged
merged 7 commits into from
Apr 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions adminSiteClient/gdocsValidation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import {
OwidGdocErrorMessageType,
OwidGdocType,
checkIsOwidGdocType,
traverseEnrichedBlocks,
traverseEnrichedBlock,
OwidGdocErrorMessageProperty,
OwidGdoc,
checkIsGdocPost,
Expand Down Expand Up @@ -58,7 +58,7 @@ function validateBody(gdoc: OwidGdoc, errors: OwidGdocErrorMessage[]) {
errors.push(getMissingContentPropertyError("body"))
} else {
for (const block of gdoc.content.body) {
traverseEnrichedBlocks(block, (block) => {
traverseEnrichedBlock(block, (block) => {
errors.push(
...block.parseErrors.map((parseError) => ({
message: parseError.message,
Expand Down Expand Up @@ -86,7 +86,7 @@ function validateRefs(
if (gdoc.content.refs.definitions) {
Object.values(gdoc.content.refs.definitions).map((definition) => {
definition.content.map((block) => {
traverseEnrichedBlocks(block, (node) => {
traverseEnrichedBlock(block, (node) => {
if (node.parseErrors.length) {
for (const parseError of node.parseErrors) {
errors.push({
Expand Down
42 changes: 19 additions & 23 deletions baker/GrapherBaker.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import {
keyBy,
mergePartialGrapherConfigs,
compact,
merge,
partition,
} from "@ourworldindata/utils"
import fs from "fs-extra"
Expand All @@ -38,7 +37,6 @@ import {
DimensionProperty,
OwidVariableWithSource,
OwidChartDimensionInterface,
OwidGdocPostInterface,
EnrichedFaq,
FaqEntryData,
FaqDictionary,
Expand Down Expand Up @@ -73,6 +71,15 @@ const renderDatapageIfApplicable = async (

if (!variable) return undefined

// When baking from `bakeSingleGrapherChart`, we cache imageMetadata to avoid fetching every image for every chart
// But when rendering a datapage from the mockSiteRouter we want to be able to fetch imageMetadata on the fly
// And this function is the point in the two paths where it makes sense to do so
if (!imageMetadataDictionary) {
imageMetadataDictionary = await getAllImages(knex).then((images) =>
keyBy(images, "filename")
)
}
ikesau marked this conversation as resolved.
Show resolved Hide resolved

return await renderDataPageV2(
{
variableId: variable.id,
Expand Down Expand Up @@ -168,23 +175,6 @@ export async function renderDataPageV2(
gdocIdToFragmentIdToBlock[gdoc.id] = faqs.faqs
})

const linkedCharts: OwidGdocPostInterface["linkedCharts"] = merge(
{},
...compact(gdocs.map((gdoc) => gdoc?.linkedCharts))
)
const linkedDocuments: OwidGdocPostInterface["linkedDocuments"] = merge(
{},
...compact(gdocs.map((gdoc) => gdoc?.linkedDocuments))
)
const imageMetadata: OwidGdocPostInterface["imageMetadata"] = merge(
{},
imageMetadataDictionary,
...compact(gdocs.map((gdoc) => gdoc?.imageMetadata))
)
const relatedCharts: OwidGdocPostInterface["relatedCharts"] = gdocs.flatMap(
(gdoc) => gdoc?.relatedCharts ?? []
)

const resolvedFaqsResults: EnrichedFaqLookupResult[] = variableMetadata
.presentation?.faqs
? variableMetadata.presentation.faqs.map((faq) => {
Expand Down Expand Up @@ -219,10 +209,6 @@ export async function renderDataPageV2(
}

const faqEntries: FaqEntryData = {
linkedCharts,
linkedDocuments,
imageMetadata,
relatedCharts,
faqs: resolvedFaqs?.flatMap((faq) => faq.enrichedFaq.content) ?? [],
}

Expand Down Expand Up @@ -300,6 +286,15 @@ export async function renderDataPageV2(
datapageData.relatedResearch =
await getRelatedResearchAndWritingForVariable(knex, variableId)

const relatedResearchFilenames = datapageData.relatedResearch
.map((r) => r.imageUrl)
.filter((f): f is string => !!f)

const imageMetadata = lodash.pick(
imageMetadataDictionary,
uniq(relatedResearchFilenames)
)

const tagToSlugMap = await getTagToSlugMap(knex)

return renderToHtmlPage(
Expand All @@ -309,6 +304,7 @@ export async function renderDataPageV2(
baseUrl={BAKED_BASE_URL}
baseGrapherUrl={BAKED_GRAPHER_URL}
isPreviewing={isPreviewing}
imageMetadata={imageMetadata}
faqEntries={faqEntries}
tagToSlugMap={tagToSlugMap}
/>
Expand Down
58 changes: 10 additions & 48 deletions db/model/Gdoc/GdocBase.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,9 @@ import {
OwidGdocErrorMessage,
OwidGdocErrorMessageType,
excludeNullish,
traverseEnrichedBlocks,
traverseEnrichedBlock,
OwidEnrichedGdocBlock,
Span,
EnrichedBlockResearchAndWritingLink,
traverseEnrichedSpan,
uniq,
identity,
Expand Down Expand Up @@ -42,6 +41,7 @@ import {
import { EXPLORERS_ROUTE_FOLDER } from "../../../explorer/ExplorerConstants.js"
import { match, P } from "ts-pattern"
import {
extractFilenamesFromBlock,
extractUrl,
getAllLinksFromResearchAndWritingBlock,
spansToSimpleString,
Expand Down Expand Up @@ -137,45 +137,7 @@ export class GdocBase implements OwidGdocBaseInterface {

for (const enrichedBlockSource of this.enrichedBlockSources) {
enrichedBlockSource.forEach((block) =>
traverseEnrichedBlocks(block, (item) => {
if ("type" in item) {
if ("filename" in item && item.filename) {
filenames.add(item.filename)
}
if (item.type === "image" && item.smallFilename) {
filenames.add(item.smallFilename)
}
if (item.type === "prominent-link" && item.thumbnail) {
filenames.add(item.thumbnail)
}
if (item.type === "research-and-writing") {
const allLinks =
getAllLinksFromResearchAndWritingBlock(item)
allLinks.forEach(
(link: EnrichedBlockResearchAndWritingLink) => {
if (link.value.filename) {
filenames.add(link.value.filename)
}
}
)
}
if (item.type === "key-insights") {
item.insights.forEach((insight) => {
if (insight.filename) {
filenames.add(insight.filename)
}
})
}
if (item.type === "homepage-intro") {
item.featuredWork.forEach((featuredWork) => {
if (featuredWork.filename) {
filenames.add(featuredWork.filename)
}
})
}
}
return item
})
traverseEnrichedBlock(block, extractFilenamesFromBlock)
)
}

Expand All @@ -187,7 +149,7 @@ export class GdocBase implements OwidGdocBaseInterface {

for (const enrichedBlockSource of this.enrichedBlockSources) {
enrichedBlockSource.forEach((block) =>
traverseEnrichedBlocks(
traverseEnrichedBlock(
block,
(x) => x,
(span) => {
Expand Down Expand Up @@ -220,7 +182,7 @@ export class GdocBase implements OwidGdocBaseInterface {

for (const enrichedBlockSource of this.enrichedBlockSources) {
enrichedBlockSource.forEach((block) =>
traverseEnrichedBlocks(
traverseEnrichedBlock(
block,
(block) => {
const extractedLinks = this.extractLinksFromBlock(block)
Expand Down Expand Up @@ -261,7 +223,7 @@ export class GdocBase implements OwidGdocBaseInterface {
const slugs = new Set<string>()
for (const enrichedBlockSource of this.enrichedBlockSources) {
for (const block of enrichedBlockSource) {
traverseEnrichedBlocks(block, (block) => {
traverseEnrichedBlock(block, (block) => {
if (block.type === "key-indicator") {
slugs.add(urlToSlug(block.datapageUrl))
}
Expand Down Expand Up @@ -295,7 +257,7 @@ export class GdocBase implements OwidGdocBaseInterface {
for (const enrichedBlockSource of this.enrichedBlockSources) {
for (const block of enrichedBlockSource) {
if (hasAllChartsBlock) break
traverseEnrichedBlocks(block, (block) => {
traverseEnrichedBlock(block, (block) => {
if (block.type === "all-charts") {
hasAllChartsBlock = true
}
Expand Down Expand Up @@ -438,7 +400,7 @@ export class GdocBase implements OwidGdocBaseInterface {
.with({ type: "key-insights" }, (block) => {
const links: DbInsertPostGdocLink[] = []

// insights content is traversed by traverseEnrichedBlocks
// insights content is traversed by traverseEnrichedBlock
block.insights.forEach((insight) => {
if (insight.url) {
const insightLink = createLinkFromUrl({
Expand Down Expand Up @@ -535,7 +497,7 @@ export class GdocBase implements OwidGdocBaseInterface {
.with(
{
// no urls directly on any of these blocks
// their children may contain urls, but they'll be addressed by traverseEnrichedBlocks
// their children may contain urls, but they'll be addressed by traverseEnrichedBlock
type: P.union(
"additional-charts",
"align",
Expand Down Expand Up @@ -793,7 +755,7 @@ export class GdocBase implements OwidGdocBaseInterface {
const contentErrors: OwidGdocErrorMessage[] = []
for (const enrichedBlockSource of this.enrichedBlockSources) {
enrichedBlockSource.forEach((block) =>
traverseEnrichedBlocks(block, (block) => {
traverseEnrichedBlock(block, (block) => {
if (block.type === "key-indicator" && block.datapageUrl) {
const slug = urlToSlug(block.datapageUrl)
const linkedChart = this.linkedCharts?.[slug]
Expand Down
4 changes: 2 additions & 2 deletions db/model/Gdoc/archieToEnriched.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import {
EnrichedBlockSimpleText,
lowercaseObjectKeys,
OwidEnrichedGdocBlock,
traverseEnrichedBlocks,
traverseEnrichedBlock,
ALL_CHARTS_ID,
KEY_INSIGHTS_ID,
ENDNOTES_ID,
Expand Down Expand Up @@ -137,7 +137,7 @@ export function generateToc(
const toc: TocHeadingWithTitleSupertitle[] = []

body.forEach((block) =>
traverseEnrichedBlocks(block, (child) => {
traverseEnrichedBlock(block, (child) => {
if (child.type === "heading") {
const { level, text, supertitle } = child
const titleString = spansToSimpleString(text)
Expand Down
92 changes: 92 additions & 0 deletions db/model/Gdoc/gdocUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import {
excludeNullish,
EnrichedBlockResearchAndWritingLink,
DATA_INSIGHTS_INDEX_PAGE_SIZE,
OwidEnrichedGdocBlock,
noop,
} from "@ourworldindata/utils"
import { match, P } from "ts-pattern"
import cheerio from "cheerio"
Expand Down Expand Up @@ -162,3 +164,93 @@ export function calculateDataInsightIndexPageCount(
): number {
return Math.ceil(publishedDataInsightCount / DATA_INSIGHTS_INDEX_PAGE_SIZE)
}

export function extractFilenamesFromBlock(
item: OwidEnrichedGdocBlock
): string[] {
const filenames = new Set<string>()
match(item)
.with({ type: "image" }, (item) => {
if (item.filename) filenames.add(item.filename)
if (item.smallFilename) filenames.add(item.smallFilename)
})
.with({ type: "prominent-link" }, (item) => {
if (item.thumbnail) filenames.add(item.thumbnail)
})
.with({ type: "video" }, (item) => {
if (item.filename) filenames.add(item.filename)
})
.with({ type: "research-and-writing" }, (item) => {
getAllLinksFromResearchAndWritingBlock(item).forEach(
(link: EnrichedBlockResearchAndWritingLink) => {
if (link.value.filename) {
filenames.add(link.value.filename)
}
}
)
})
.with({ type: "key-insights" }, (item) => {
item.insights.forEach((insight) => {
if (insight.filename) {
filenames.add(insight.filename)
}
})
})
.with(
{
type: "homepage-intro",
},
(item) => {
item.featuredWork.forEach((featuredWork) => {
if (featuredWork.filename) {
filenames.add(featuredWork.filename)
}
})
}
)
.with(
{
type: P.union(
"additional-charts",
"align",
"all-charts",
"aside",
"blockquote",
"callout",
"chart-story",
"chart",
"entry-summary",
"expandable-paragraph",
"explorer-tiles",
"gray-section",
"heading",
"homepage-search",
"horizontal-rule",
"html",
"key-indicator-collection",
"key-indicator",
"latest-data-insights",
"list",
"missing-data",
"numbered-list",
"pill-row",
"pull-quote",
"recirc",
"scroller",
"sdg-grid",
"sdg-toc",
"side-by-side",
"simple-text",
"socials",
"sticky-left",
"sticky-right",
"table",
"text",
"topic-page-intro"
),
},
noop
)
.exhaustive()
return [...filenames]
}
Loading