Skip to content

Commit

Permalink
Optimize latest data insights data (#3822)
Browse files Browse the repository at this point in the history
  • Loading branch information
rakyi authored Jul 30, 2024
1 parent 6994890 commit d126305
Show file tree
Hide file tree
Showing 10 changed files with 95 additions and 64 deletions.
23 changes: 13 additions & 10 deletions baker/SiteBaker.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ import {
import {
gdocFromJSON,
getAllMinimalGdocBaseObjects,
getAndLoadPublishedDataInsights,
getLatestDataInsights,
} from "../db/model/Gdoc/GdocFactory.js"
import { getBakePath } from "@ourworldindata/components"
import { GdocAuthor, getMinimalAuthors } from "../db/model/Gdoc/GdocAuthor.js"
Expand Down Expand Up @@ -789,9 +789,10 @@ export class SiteBaker {
// TODO: this transaction is only RW because somewhere inside it we fetch images
private async bakeDataInsights(knex: db.KnexReadWriteTransaction) {
if (!this.bakeSteps.has("dataInsights")) return
const latestDataInsights = await getAndLoadPublishedDataInsights(knex, {
limit: 7,
})
const {
dataInsights: latestDataInsights,
imageMetadata: latestDataInsightsImageMetadata,
} = await getLatestDataInsights(knex)
const publishedDataInsights =
await GdocDataInsight.getPublishedDataInsights(knex)

Expand All @@ -803,13 +804,11 @@ export class SiteBaker {
dataInsight.linkedChartSlugs.grapher,
dataInsight.linkedChartSlugs.explorer,
])
// Not used just yet
// dataInsight.linkedAuthors = attachments.linkedAuthors
dataInsight.linkedDocuments = attachments.linkedDocuments
dataInsight.imageMetadata = Object.assign(
attachments.imageMetadata,
...latestDataInsights.map((insight) => insight.imageMetadata)
)
dataInsight.imageMetadata = {
...attachments.imageMetadata,
...latestDataInsightsImageMetadata,
}
dataInsight.linkedCharts = {
...attachments.linkedCharts.graphers,
...attachments.linkedCharts.explorers,
Expand Down Expand Up @@ -837,6 +836,10 @@ export class SiteBaker {
`Error baking gdoc post with id "${dataInsight.id}" and slug "${dataInsight.slug}": ${e}`
)
}
// We don't need the latest data insights nor their images in the
// feed later, when we render the list of all data insights.
dataInsight.latestDataInsights = []
dataInsight.imageMetadata = attachments.imageMetadata
}

const totalPageCount = calculateDataInsightIndexPageCount(
Expand Down
4 changes: 2 additions & 2 deletions db/model/Gdoc/GdocBase.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ import {
} from "../Variable.js"
import { createLinkFromUrl } from "../Link.js"
import {
LatestDataInsight,
LinkedAuthor,
OwidGdoc,
OwidGdocContent,
OwidGdocType,
} from "@ourworldindata/types"
import { GdocDataInsight } from "./GdocDataInsight.js"

export class GdocBase implements OwidGdocBaseInterface {
id!: string
Expand All @@ -77,7 +77,7 @@ export class GdocBase implements OwidGdocBaseInterface {
linkedCharts: Record<string, LinkedChart> = {}
linkedIndicators: Record<number, LinkedIndicator> = {}
linkedDocuments: Record<string, OwidGdocMinimalPostInterface> = {}
latestDataInsights: GdocDataInsight[] = []
latestDataInsights: LatestDataInsight[] = []
_omittableFields: string[] = []

constructor(id?: string) {
Expand Down
37 changes: 10 additions & 27 deletions db/model/Gdoc/GdocDataInsight.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,40 +6,33 @@ import {
OwidGdocMinimalPostInterface,
OwidGdocBaseInterface,
excludeNullish,
LatestDataInsight,
} from "@ourworldindata/utils"
import { GdocBase } from "./GdocBase.js"
import * as db from "../../../db/db.js"
import {
getAndLoadPublishedDataInsights,
getAndLoadPublishedDataInsightsPage,
getLatestDataInsights,
} from "./GdocFactory.js"

export class GdocDataInsight
extends GdocBase
implements OwidGdocDataInsightInterface
{
content!: OwidGdocDataInsightContent
private shouldLoadLatestDataInsights: boolean

constructor(id?: string, shouldLoadLatestDataInsights: boolean = false) {
constructor(id?: string) {
super(id)
this.shouldLoadLatestDataInsights = shouldLoadLatestDataInsights
}

static create(
obj: OwidGdocBaseInterface,
shouldLoadLatestDataInsights: boolean = false
): GdocDataInsight {
const gdoc = new GdocDataInsight(
undefined,
shouldLoadLatestDataInsights
)
static create(obj: OwidGdocBaseInterface): GdocDataInsight {
const gdoc = new GdocDataInsight(undefined)
Object.assign(gdoc, obj)
return gdoc
}

linkedDocuments: Record<string, OwidGdocMinimalPostInterface> = {}
latestDataInsights: GdocDataInsight[] = []
latestDataInsights: LatestDataInsight[] = []
// TODO: support query parameters in grapher urls so we can track country selections

protected typeSpecificUrls(): string[] {
Expand All @@ -62,20 +55,10 @@ export class GdocDataInsight
knex: db.KnexReadWriteTransaction
): Promise<void> => {
// TODO: refactor these classes to properly use knex - not going to start it now
if (this.shouldLoadLatestDataInsights) {
this.latestDataInsights = await getAndLoadPublishedDataInsights(
knex,
{
limit: 7,
}
)
this.imageMetadata = Object.assign(
this.imageMetadata,
...this.latestDataInsights.map(
(insight) => insight.imageMetadata
)
)
}
const { dataInsights, imageMetadata } =
await getLatestDataInsights(knex)
this.latestDataInsights = dataInsights
this.imageMetadata = Object.assign(this.imageMetadata, imageMetadata)
}

// TODO: this transaction is only RW because somewhere inside it we fetch images
Expand Down
56 changes: 50 additions & 6 deletions db/model/Gdoc/GdocFactory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@ import {
DbPlainTag,
DbRawPostGdoc,
GdocsContentSource,
ImageMetadata,
LatestDataInsight,
OwidGdoc,
OwidGdocBaseInterface,
OwidGdocDataInsightContent,
OwidGdocIndexItem,
OwidGdocMinimalPostInterface,
OwidGdocPublicationContext,
Expand All @@ -21,8 +24,10 @@ import {
checkIsOwidGdocType,
extractGdocIndexItem,
formatDate,
parsePostGdocContent,
parsePostsGdocsRow,
serializePostsGdocsRow,
traverseEnrichedBlock,
} from "@ourworldindata/utils"

import { GdocBase } from "./GdocBase.js"
Expand All @@ -39,6 +44,7 @@ import {
} from "../../db.js"
import { enrichedBlocksToMarkdown } from "./enrichedToMarkdown.js"
import { GdocAuthor } from "./GdocAuthor.js"
import { extractFilenamesFromBlock } from "./gdocUtils.js"
import { fetchImagesFromDriveAndSyncToS3 } from "../Image.js"

export function gdocFromJSON(
Expand Down Expand Up @@ -273,7 +279,7 @@ export async function getAndLoadGdocBySlug(
`No published Google Doc with slug "${slug}" found in the database`
)
}
return loadGdocFromGdocBase(knex, base, undefined, true)
return loadGdocFromGdocBase(knex, base)
}

// TODO: this transaction is only RW because somewhere inside it we fetch images
Expand Down Expand Up @@ -309,8 +315,7 @@ export async function createOrLoadGdocById(
export async function loadGdocFromGdocBase(
knex: KnexReadWriteTransaction,
base: OwidGdocBaseInterface,
contentSource?: GdocsContentSource,
shouldLoadLatestDataInsights?: boolean
contentSource?: GdocsContentSource
): Promise<GdocPost | GdocDataInsight | GdocHomepage | GdocAuthor> {
const type = get(base, "content.type") as unknown
if (!type)
Expand All @@ -334,9 +339,7 @@ export async function loadGdocFromGdocBase(
),
() => GdocPost.create(base)
)
.with(OwidGdocType.DataInsight, () =>
GdocDataInsight.create(base, shouldLoadLatestDataInsights)
)
.with(OwidGdocType.DataInsight, () => GdocDataInsight.create(base))
.with(OwidGdocType.Homepage, () => GdocHomepage.create(base))
.with(OwidGdocType.Author, () => GdocAuthor.create(base))
.exhaustive()
Expand Down Expand Up @@ -404,6 +407,47 @@ export async function getAndLoadPublishedDataInsightsPage(
return await getAndLoadPublishedDataInsights(knex, options)
}

export async function getLatestDataInsights(
knex: KnexReadonlyTransaction
): Promise<{
dataInsights: LatestDataInsight[]
imageMetadata: Record<string, ImageMetadata>
}> {
const rows = await knexRaw<DbRawPostGdoc>(
knex,
`SELECT id, slug, publishedAt, content
FROM posts_gdocs
WHERE type = :type
AND published = 1
AND publishedAt <= NOW()
ORDER BY publishedAt DESC
LIMIT 7`,
{ type: OwidGdocType.DataInsight }
)
const dataInsights = rows.map((row) => {
return {
...row,
content: parsePostGdocContent(
row.content
) as OwidGdocDataInsightContent,
}
})
const filenames = new Set<string>()
for (const dataInsight of dataInsights) {
for (const block of dataInsight.content.body) {
traverseEnrichedBlock(block, (block) => {
for (const filename of extractFilenamesFromBlock(block)) {
filenames.add(filename)
}
})
}
}
return {
dataInsights,
imageMetadata: await getImageMetadataByFilenames(knex, [...filenames]),
}
}

// TODO: this transaction is only RW because somewhere inside it we fetch images
export async function getAndLoadPublishedGdocPosts(
knex: KnexReadWriteTransaction
Expand Down
13 changes: 5 additions & 8 deletions db/model/Gdoc/GdocHomepage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import {
OwidGdocHomepageMetadata,
} from "@ourworldindata/types"
import { getUniqueTopicCount } from "../../../site/SiteNavigation.js"
import { getAndLoadPublishedDataInsights } from "./GdocFactory.js"
import { getLatestDataInsights } from "./GdocFactory.js"

export class GdocHomepage
extends GdocBase
Expand Down Expand Up @@ -72,12 +72,9 @@ export class GdocHomepage
topicCount: getUniqueTopicCount(),
}

this.latestDataInsights = await getAndLoadPublishedDataInsights(knex, {
limit: 7,
})
this.imageMetadata = Object.assign(
this.imageMetadata,
...this.latestDataInsights.map((insight) => insight.imageMetadata)
)
const { dataInsights, imageMetadata } =
await getLatestDataInsights(knex)
this.latestDataInsights = dataInsights
this.imageMetadata = Object.assign(this.imageMetadata, imageMetadata)
}
}
10 changes: 9 additions & 1 deletion packages/@ourworldindata/types/src/gdocTypes/Gdoc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@ export enum OwidGdocPublicationContext {
listed = "listed",
}

export interface LatestDataInsight {
id: string
slug: string
publishedAt: Date | null
content: OwidGdocDataInsightContent
index?: number
}

export interface LinkedAuthor {
name: string
slug: string
Expand Down Expand Up @@ -136,7 +144,7 @@ export const DATA_INSIGHTS_INDEX_PAGE_SIZE = 20

export interface OwidGdocDataInsightInterface extends OwidGdocBaseInterface {
content: OwidGdocDataInsightContent
latestDataInsights?: OwidGdocDataInsightInterface[]
latestDataInsights?: LatestDataInsight[]
}

export type MinimalDataInsightInterface = Pick<
Expand Down
1 change: 1 addition & 0 deletions packages/@ourworldindata/types/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@ export {
type DetailDictionary,
GdocsContentSource,
type OwidArticleBackportingStatistics,
type LatestDataInsight,
type LinkedAuthor,
type LinkedChart,
OwidGdocLinkType,
Expand Down
4 changes: 2 additions & 2 deletions site/gdocs/OwidGdoc.tsx
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import React, { createContext } from "react"
import ReactDOM from "react-dom"
import {
LatestDataInsight,
LinkedAuthor,
LinkedChart,
LinkedIndicator,
ImageMetadata,
RelatedChart,
OwidGdocType,
OwidGdoc as OwidGdocInterface,
OwidGdocDataInsightInterface,
OwidGdocMinimalPostInterface,
OwidGdocHomepageMetadata,
DbEnrichedLatestWork,
Expand All @@ -29,7 +29,7 @@ export const AttachmentsContext = createContext<{
linkedDocuments: Record<string, OwidGdocMinimalPostInterface>
imageMetadata: Record<string, ImageMetadata>
relatedCharts: RelatedChart[]
latestDataInsights?: OwidGdocDataInsightInterface[]
latestDataInsights?: LatestDataInsight[]
homepageMetadata?: OwidGdocHomepageMetadata
latestWorkLinks?: DbEnrichedLatestWork[]
}>({
Expand Down
6 changes: 1 addition & 5 deletions site/gdocs/components/LatestDataInsights.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,13 @@ import { Button } from "@ourworldindata/components"
import {
EnrichedBlockImage,
OwidEnrichedGdocBlock,
OwidGdocDataInsightInterface,
LatestDataInsight,
} from "@ourworldindata/utils"
import { dataInsightIndexToIdMap } from "../pages/DataInsight.js"
import Image from "./Image.js"
import { ArticleBlocks } from "./ArticleBlocks.js"
import DataInsightDateline from "./DataInsightDateline.js"

export interface LatestDataInsight extends OwidGdocDataInsightInterface {
index?: number
}

export default function LatestDataInsights({
className,
latestDataInsights,
Expand Down
5 changes: 2 additions & 3 deletions site/gdocs/pages/DataInsight.tsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import cx from "classnames"
import {
LatestDataInsight,
OwidGdocDataInsightInterface,
copyToClipboard,
MinimalTag,
Expand All @@ -15,9 +16,7 @@ import { FontAwesomeIcon } from "@fortawesome/react-fontawesome/index.js"
import { ArticleBlocks } from "../components/ArticleBlocks.js"
import LinkedAuthor from "../components/LinkedAuthor.js"
import DataInsightDateline from "../components/DataInsightDateline.js"
import LatestDataInsights, {
LatestDataInsight,
} from "../components/LatestDataInsights.js"
import LatestDataInsights from "../components/LatestDataInsights.js"
import { AttachmentsContext } from "../OwidGdoc.js"
import { BAKED_BASE_URL } from "../../../settings/clientSettings.js"
import DataInsightsNewsletterBanner from "../../DataInsightsNewsletterBanner.js"
Expand Down

0 comments on commit d126305

Please sign in to comment.