From efefec867efcaab58a12c9ae553130842cb9e8a9 Mon Sep 17 00:00:00 2001 From: Matthieu Bergel Date: Wed, 31 Jan 2024 15:56:40 +0000 Subject: [PATCH] feat(wp): add wp api snapshot to syncPostsToGrapher --- db/syncPostsToGrapher.ts | 76 +++++++++++-------- db/wpdb.ts | 34 ++++++--- .../types/src/dbTypes/Posts.ts | 13 +++- 3 files changed, 78 insertions(+), 45 deletions(-) diff --git a/db/syncPostsToGrapher.ts b/db/syncPostsToGrapher.ts index 8807f4e692c..b66a7a34f4a 100644 --- a/db/syncPostsToGrapher.ts +++ b/db/syncPostsToGrapher.ts @@ -15,6 +15,7 @@ import { import { postsTable, select } from "./model/Post.js" import { PostLink } from "./model/PostLink.js" import { renderTablePress } from "../site/Tablepress.js" +import pMap from "p-map" const zeroDateString = "0000-00-00 00:00:00" @@ -327,39 +328,48 @@ const syncPostsToGrapher = async (): Promise => { .filter((p) => !doesExistInWordpress[p.id]) .map((p) => p.id) - const toInsert = rows.map((post: any) => { - const content = post.post_content as string - const formattingOptions = extractFormattingOptions(content) - const authors: string[] = sortBy( - JSON.parse(post.authors), - (item: { author: string; order: number }) => item.order - ).map((author: { author: string; order: number }) => author.author) - - return { - id: post.ID, - title: post.post_title, - slug: post.post_name.replace(/__/g, "/"), - type: post.post_type, - status: post.post_status, - content: dereferenceTablePressFn( - dereferenceReusableBlocksFn(content) - ), - featured_image: post.featured_image || "", - published_at: - post.post_date_gmt === zeroDateString - ? null - : post.post_date_gmt, - updated_at_in_wordpress: - post.post_modified_gmt === zeroDateString - ? "1970-01-01 00:00:00" - : post.post_modified_gmt, - authors: authors, - excerpt: post.post_excerpt, - created_at_in_wordpress: - post.created_at === zeroDateString ? null : post.created_at, - formattingOptions: formattingOptions, - } - }) as DbEnrichedPost[] + const toInsert = (await pMap( + rows, + async (post: any) => { + console.log("Processing post", post.ID, post.post_title) + const content = post.post_content as string + const formattingOptions = extractFormattingOptions(content) + const authors: string[] = sortBy( + JSON.parse(post.authors), + (item: { author: string; order: number }) => item.order + ).map((author: { author: string; order: number }) => author.author) + + return { + id: post.ID, + title: post.post_title, + slug: post.post_name.replace(/__/g, "/"), + type: post.post_type, + status: post.post_status, + content: dereferenceTablePressFn( + dereferenceReusableBlocksFn(content) + ), + wpApiSnapshot: + post.post_type === "wp_block" + ? await wpdb.getBlockApi(post.ID) + : await wpdb.getPostApiBySlug(post.post_name), + featured_image: post.featured_image || "", + published_at: + post.post_date_gmt === zeroDateString + ? null + : post.post_date_gmt, + updated_at_in_wordpress: + post.post_modified_gmt === zeroDateString + ? "1970-01-01 00:00:00" + : post.post_modified_gmt, + authors: authors, + excerpt: post.post_excerpt, + created_at_in_wordpress: + post.created_at === zeroDateString ? null : post.created_at, + formattingOptions: formattingOptions, + } + }, + { concurrency: 20 } + )) as DbEnrichedPost[] const postLinks = await PostLink.find() const postLinksById = groupBy(postLinks, (link: PostLink) => link.sourceId) diff --git a/db/wpdb.ts b/db/wpdb.ts index fda7cd6406d..41ecf2483b4 100644 --- a/db/wpdb.ts +++ b/db/wpdb.ts @@ -446,9 +446,7 @@ export const getPostIdAndTypeBySlug = async ( return { id: rows[0].ID, type: rows[0].post_type } } -// We might want to cache this as the network of prominent links densifies and -// multiple requests to the same posts are happening. -export const getPostBySlug = async (slug: string): Promise => { +export const getPostApiBySlug = async (slug: string): Promise => { if (!isWordpressAPIEnabled) { throw new JsonError(`Need wordpress API to match slug ${slug}`, 404) } @@ -459,11 +457,19 @@ export const getPostBySlug = async (slug: string): Promise => { const { id, type } = postIdAndType - const postArr = await apiQuery( - `${WP_API_ENDPOINT}/${getEndpointSlugFromType(type)}/${id}` - ) + return apiQuery(`${WP_API_ENDPOINT}/${getEndpointSlugFromType(type)}/${id}`) +} + +// We might want to cache this as the network of prominent links densifies and +// multiple requests to the same posts are happening. +export const getPostBySlug = async (slug: string): Promise => { + if (!isWordpressAPIEnabled) { + throw new JsonError(`Need wordpress API to match slug ${slug}`, 404) + } - return getFullPost(postArr) + const postApi = await getPostApiBySlug(slug) + + return getFullPost(postApi) } // the /revisions endpoint does not send back all the metadata required for @@ -739,9 +745,7 @@ export const getRelatedArticles = async ( ) } -export const getBlockContent = async ( - id: number -): Promise => { +export const getBlockApi = async (id: number): Promise => { if (!isWordpressAPIEnabled) return undefined const query = ` @@ -751,7 +755,15 @@ export const getBlockContent = async ( } } ` - const post = await graphqlQuery(query, { id }) + return graphqlQuery(query, { id }) +} + +export const getBlockContent = async ( + id: number +): Promise => { + if (!isWordpressAPIEnabled) return undefined + + const post = await getBlockApi(id) return post.data?.wpBlock?.content ?? undefined } diff --git a/packages/@ourworldindata/types/src/dbTypes/Posts.ts b/packages/@ourworldindata/types/src/dbTypes/Posts.ts index 5eae3c6c3a4..51f3e436204 100644 --- a/packages/@ourworldindata/types/src/dbTypes/Posts.ts +++ b/packages/@ourworldindata/types/src/dbTypes/Posts.ts @@ -1,6 +1,7 @@ import { WP_PostType, FormattingOptions, + PostRestApi, } from "../wordpressTypes/WordpressTypes.js" import { OwidArticleBackportingStatistics, @@ -27,16 +28,22 @@ export interface DbInsertPost { formattingOptions?: string | null archieml?: string | null archieml_update_statistics?: string | null + wpApiSnapshot?: string | null } export type DbRawPost = Required export type DbEnrichedPost = Omit< DbRawPost, - "authors" | "formattingOptions" | "archieml" | "archieml_update_statistics" + | "authors" + | "formattingOptions" + | "archieml" + | "archieml_update_statistics" + | "wpApiSnapshot" > & { authors: string[] | null formattingOptions: FormattingOptions | null archieml: OwidGdocPostInterface | null archieml_update_statistics: OwidArticleBackportingStatistics | null + wpApiSnapshot: PostRestApi | null } export interface DbRawPostWithGdocPublishStatus extends DbRawPost { isGdocPublished: boolean @@ -69,6 +76,9 @@ export function parsePostRow(postRow: DbRawPost): DbEnrichedPost { archieml_update_statistics: postRow.archieml_update_statistics ? JSON.parse(postRow.archieml_update_statistics) : null, + wpApiSnapshot: postRow.wpApiSnapshot + ? JSON.parse(postRow.wpApiSnapshot) + : null, } } @@ -81,5 +91,6 @@ export function serializePostRow(postRow: DbEnrichedPost): DbRawPost { archieml_update_statistics: JSON.stringify( postRow.archieml_update_statistics ), + wpApiSnapshot: JSON.stringify(postRow.wpApiSnapshot), } }