Skip to content

Commit

Permalink
Revert "Merge pull request #3496 from owid/image-store-caching"
Browse files Browse the repository at this point in the history
This reverts commit 041cdd2, reversing
changes made to 1976d05.
  • Loading branch information
marcelgerber committed Apr 18, 2024
1 parent 041cdd2 commit 9c6abaa
Show file tree
Hide file tree
Showing 13 changed files with 170 additions and 234 deletions.
5 changes: 2 additions & 3 deletions adminSiteServer/apiRouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ import {
getGdocBaseObjectById,
setLinksForGdoc,
setTagsForGdoc,
addImagesToContentGraph,
syncImagesAndAddToContentGraph,
updateGdocContentOnly,
upsertGdoc,
} from "../db/model/Gdoc/GdocFactory.js"
Expand Down Expand Up @@ -2276,7 +2276,6 @@ getRouteNonIdempotentWithRWTransaction(
| undefined

try {
// Beware: if contentSource=gdocs this will update images in the DB+S3 even if the gdoc is published
const gdoc = await getAndLoadGdocById(trx, id, contentSource)

if (!gdoc.published) {
Expand Down Expand Up @@ -2368,7 +2367,7 @@ putRouteWithRWTransaction(apiRouter, "/gdocs/:id", async (req, res, trx) => {
const nextGdoc = gdocFromJSON(req.body)
await nextGdoc.loadState(trx)

await addImagesToContentGraph(trx, nextGdoc)
await syncImagesAndAddToContentGraph(trx, nextGdoc)

await setLinksForGdoc(
trx,
Expand Down
59 changes: 21 additions & 38 deletions baker/SiteBaker.tsx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import fs from "fs-extra"
import path from "path"
import { glob } from "glob"
import { keyBy, without, uniq, mapValues, pick, chunk } from "lodash"
import { keyBy, without, uniq, mapValues, pick } from "lodash"
import ProgressBar from "progress"
import * as wpdb from "../db/wpdb.js"
import * as db from "../db/db.js"
Expand Down Expand Up @@ -100,10 +100,7 @@ import {
getVariableMetadata,
getVariableOfDatapageIfApplicable,
} from "../db/model/Variable.js"
import {
gdocFromJSON,
getAllMinimalGdocBaseObjects,
} from "../db/model/Gdoc/GdocFactory.js"
import { getAllMinimalGdocBaseObjects } from "../db/model/Gdoc/GdocFactory.js"
import { getBakePath } from "@ourworldindata/components"
import { GdocAuthor } from "../db/model/Gdoc/GdocAuthor.js"
import { DATA_INSIGHTS_ATOM_FEED_NAME } from "../site/gdocs/utils.js"
Expand Down Expand Up @@ -307,7 +304,6 @@ export class SiteBaker {
picks?: [string[], string[], string[], string[]]
): Promise<PrefetchedAttachments> {
if (!this._prefetchedAttachmentsCache) {
console.log("Prefetching attachments")
const publishedGdocs = await getAllMinimalGdocBaseObjects(knex)
const publishedGdocsDictionary = keyBy(publishedGdocs, "id")

Expand All @@ -332,31 +328,23 @@ export class SiteBaker {

// Includes redirects
const publishedChartsRaw = await mapSlugsToConfigs(knex)
const publishedCharts: LinkedChart[] = []

for (const publishedChartsRawChunk of chunk(
publishedChartsRaw,
20
)) {
await Promise.all(
publishedChartsRawChunk.map(async (chart) => {
const tab = chart.config.tab ?? GrapherTabOption.chart
const datapageIndicator =
await getVariableOfDatapageIfApplicable(
chart.config
)
publishedCharts.push({
originalSlug: chart.slug,
resolvedUrl: `${BAKED_GRAPHER_URL}/${chart.config.slug}`,
tab,
title: chart.config.title || "",
thumbnail: `${BAKED_GRAPHER_EXPORTS_BASE_URL}/${chart.config.slug}.svg`,
indicatorId: datapageIndicator?.id,
tags: [],
})
})
)
}
const publishedCharts: LinkedChart[] = await Promise.all(
publishedChartsRaw.map(async (chart) => {
const tab = chart.config.tab ?? GrapherTabOption.chart
const datapageIndicator =
await getVariableOfDatapageIfApplicable(chart.config)
return {
originalSlug: chart.slug,
resolvedUrl: `${BAKED_GRAPHER_URL}/${chart.config.slug}`,
tab,
queryString: "",
title: chart.config.title || "",
thumbnail: `${BAKED_GRAPHER_EXPORTS_BASE_URL}/${chart.config.slug}.svg`,
indicatorId: datapageIndicator?.id,
tags: [],
}
})
)
const publishedChartsBySlug = keyBy(publishedCharts, "originalSlug")

const publishedChartsWithIndicatorIds = publishedCharts.filter(
Expand Down Expand Up @@ -498,10 +486,7 @@ export class SiteBaker {
// TODO: this transaction is only RW because somewhere inside it we fetch images
async bakeGDocPosts(knex: db.KnexReadWriteTransaction, slugs?: string[]) {
if (!this.bakeSteps.has("gdocPosts")) return
// We don't need to load these as we prefetch all attachments
const publishedGdocs = await db
.getPublishedGdocPosts(knex)
.then((gdocs) => gdocs.map(gdocFromJSON))
const publishedGdocs = await GdocPost.getPublishedGdocPosts(knex)

const gdocsToBake =
slugs !== undefined
Expand Down Expand Up @@ -534,9 +519,7 @@ export class SiteBaker {
publishedGdoc.linkedIndicators = attachments.linkedIndicators

// this is a no-op if the gdoc doesn't have an all-chart block
if ("loadRelatedCharts" in publishedGdoc) {
await publishedGdoc.loadRelatedCharts(knex)
}
await publishedGdoc.loadRelatedCharts(knex)

await publishedGdoc.validate(knex)
if (
Expand Down
44 changes: 17 additions & 27 deletions baker/algolia/algoliaUtils.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import {
SearchIndexName,
} from "../../site/search/searchTypes.js"
import { getAnalyticsPageviewsByUrlObj } from "../../db/model/Pageview.js"
import { GdocPost } from "../../db/model/Gdoc/GdocPost.js"
import { ArticleBlocks } from "../../site/gdocs/components/ArticleBlocks.js"
import React from "react"
import {
Expand All @@ -33,8 +34,6 @@ import {
import { getIndexName } from "../../site/search/searchClient.js"
import { ObjectWithObjectID } from "@algolia/client-search"
import { SearchIndex } from "algoliasearch"
import { match, P } from "ts-pattern"
import { gdocFromJSON } from "../../db/model/Gdoc/GdocFactory.js"

interface TypeAndImportance {
type: PageType
Expand Down Expand Up @@ -147,27 +146,20 @@ function generateGdocRecords(
const getPostTypeAndImportance = (
gdoc: OwidGdocPostInterface
): TypeAndImportance => {
return match(gdoc.content.type)
.with(OwidGdocType.Article, () => ({
type: "article" as const,
importance: 0,
}))
.with(OwidGdocType.AboutPage, () => ({
type: "about" as const,
importance: 1,
}))
.with(
P.union(OwidGdocType.TopicPage, OwidGdocType.LinearTopicPage),
() => ({
type: "topic" as const,
importance: 3,
})
)
.with(P.union(OwidGdocType.Fragment, undefined), () => ({
type: "other" as const,
importance: 0,
}))
.exhaustive()
switch (gdoc.content.type) {
case OwidGdocType.TopicPage:
return { type: "topic", importance: 3 }
case OwidGdocType.LinearTopicPage:
return { type: "topic", importance: 3 }
case OwidGdocType.Fragment:
// this should not happen because we filter out fragments; but we want to have an exhaustive switch/case so we include it
return { type: "other", importance: 0 }
case OwidGdocType.AboutPage:
return { type: "about", importance: 0 }
case OwidGdocType.Article:
case undefined:
return { type: "article", importance: 0 }
}
}

const records: PageRecord[] = []
Expand Down Expand Up @@ -210,11 +202,9 @@ function generateGdocRecords(
// Generate records for countries, WP posts (not including posts that have been succeeded by Gdocs equivalents), and Gdocs
export const getPagesRecords = async (knex: db.KnexReadWriteTransaction) => {
const pageviews = await getAnalyticsPageviewsByUrlObj(knex)
const gdocs = await db
.getPublishedGdocPosts(knex)
.then((gdocs) => gdocs.map(gdocFromJSON) as OwidGdocPostInterface[])

const gdocs = await GdocPost.getPublishedGdocPosts(knex)
const publishedGdocsBySlug = keyBy(gdocs, "slug")
// TODO: the knex instance should be handed down as a parameter
const slugsWithPublishedGdocsSuccessors =
await db.getSlugsWithPublishedGdocsSuccessors(knex)
const postsApi = await getPostsFromSnapshots(knex, undefined, (post) => {
Expand Down
2 changes: 0 additions & 2 deletions baker/algolia/indexToAlgolia.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ const indexToAlgolia = async () => {
await index.replaceAllObjects(records)

await wpdb.singleton.end()

process.exit(0)
}

process.on("unhandledRejection", (e) => {
Expand Down
3 changes: 2 additions & 1 deletion baker/sitemap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import { countryProfileSpecs } from "../site/countryProfileProjects.js"
import { ExplorerAdminServer } from "../explorerAdminServer/ExplorerAdminServer.js"
import { EXPLORERS_ROUTE_FOLDER } from "../explorer/ExplorerConstants.js"
import { ExplorerProgram } from "../explorer/ExplorerProgram.js"
import { GdocPost } from "../db/model/Gdoc/GdocPost.js"
import { getPostsFromSnapshots } from "../db/model/Post.js"
import { calculateDataInsightIndexPageCount } from "../db/model/Gdoc/gdocUtils.js"

Expand Down Expand Up @@ -73,7 +74,7 @@ export const makeSitemap = async (
undefined,
(postrow) => !alreadyPublishedViaGdocsSlugsSet.has(postrow.slug)
)
const gdocPosts = await db.getPublishedGdocPosts(knex)
const gdocPosts = await GdocPost.getPublishedGdocPosts(knex)

const publishedDataInsights = await db.getPublishedDataInsights(knex)
const dataInsightFeedPageCount = calculateDataInsightIndexPageCount(
Expand Down
51 changes: 0 additions & 51 deletions db/db.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,8 @@ import { registerExitHandler } from "./cleanup.js"
import { keyBy } from "@ourworldindata/utils"
import {
DbChartTagJoin,
DbEnrichedPostGdoc,
DbRawPostGdoc,
ImageMetadata,
MinimalDataInsightInterface,
OwidGdocType,
parsePostsGdocsRow,
} from "@ourworldindata/types"

// Return the first match from a mysql query
Expand Down Expand Up @@ -314,50 +310,3 @@ export const getHomepageId = (
AND published = TRUE`
).then((result) => result?.id)
}

export const getImageMetadataByFilenames = async (
knex: KnexReadonlyTransaction,
filenames: string[]
): Promise<Record<string, ImageMetadata & { id: number }>> => {
if (filenames.length === 0) return {}
const rows = await knexRaw<ImageMetadata & { id: number }>(
knex,
`-- sql
SELECT
id,
googleId,
filename,
defaultAlt,
updatedAt,
originalWidth,
originalHeight
FROM
images
WHERE filename IN (?)`,
[filenames]
)
return keyBy(rows, "filename")
}

export const getPublishedGdocPosts = async (
knex: KnexReadonlyTransaction
): Promise<DbEnrichedPostGdoc[]> => {
return knexRaw<DbRawPostGdoc>(
knex,
`-- sql
SELECT *
FROM posts_gdocs
WHERE published = 1
AND content ->> '$.type' IN (:types)
AND publishedAt <= NOW()
ORDER BY publishedAt DESC`,
{
types: [
OwidGdocType.Article,
OwidGdocType.LinearTopicPage,
OwidGdocType.TopicPage,
OwidGdocType.AboutPage,
],
}
).then((rows) => rows.map(parsePostsGdocsRow))
}
8 changes: 5 additions & 3 deletions db/model/Gdoc/GdocAuthor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,16 @@ export class GdocAuthor extends GdocBase implements OwidGdocAuthorInterface {
return blocks
}

// TODO: this transaction is only RW because somewhere inside it we fetch images
_loadSubclassAttachments = (
knex: db.KnexReadonlyTransaction
knex: db.KnexReadWriteTransaction
): Promise<void> => {
return this.loadLatestWorkImages(knex)
}

// TODO: this transaction is only RW because somewhere inside it we fetch images
loadLatestWorkImages = async (
knex: db.KnexReadonlyTransaction
knex: db.KnexReadWriteTransaction
): Promise<void> => {
if (!this.content.title) return

Expand All @@ -75,7 +77,7 @@ export class GdocAuthor extends GdocBase implements OwidGdocAuthorInterface {
// Load the image metadata for the latest work images, including the
// default featured image which is used as a fallback in the entire
// research and writing block
return super.loadImageMetadataFromDB(knex, [
return super.loadImageMetadata(knex, [
...latestWorkImageFilenames,
DEFAULT_GDOC_FEATURED_IMAGE,
])
Expand Down
Loading

0 comments on commit 9c6abaa

Please sign in to comment.