diff --git a/.vscode/settings.json b/.vscode/settings.json index 05bdc24a80d..fcfae73aa0d 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -20,5 +20,8 @@ "javascript.preferences.importModuleSpecifierEnding": "js", "[sql]": { "editor.defaultFormatter": "inferrinizzard.prettier-sql-vscode" - } -} \ No newline at end of file + }, + "Prettier-SQL.keywordCase": "upper", + "Prettier-SQL.SQLFlavourOverride": "mysql", + "Prettier-SQL.expressionWidth": 80 +} diff --git a/adminSiteClient/gdocsDeploy.ts b/adminSiteClient/gdocsDeploy.ts index cfb4a34dc82..5d8c4a68d37 100644 --- a/adminSiteClient/gdocsDeploy.ts +++ b/adminSiteClient/gdocsDeploy.ts @@ -28,8 +28,8 @@ export const checkFullDeployFallback = ( * */ export const checkIsLightningUpdate = ( - prevGdoc: DbEnrichedPostGdoc, - nextGdoc: DbEnrichedPostGdoc, + prevGdoc: OwidGdoc, + nextGdoc: OwidGdoc, hasChanges: boolean ) => { if ( @@ -59,6 +59,7 @@ export const checkIsLightningUpdate = ( relatedCharts: true, revisionId: true, updatedAt: true, + markdown: true, createdAt: false, // weird case - can't be updated id: false, // weird case - can't be updated tags: false, // could require updating datapages, though it's currently not possible to have a difference between prevGdoc.tags and nextGdoc.tags @@ -67,7 +68,6 @@ export const checkIsLightningUpdate = ( published: false, // requires an update of the blog roll publishedAt: false, // could require an update of the blog roll slug: false, // requires updating any articles that link to it - markdown: true, } const postlightningPropContentConfigMap: Record< diff --git a/adminSiteServer/apiRouter.ts b/adminSiteServer/apiRouter.ts index 31c3dbb706c..94a476ea1eb 100644 --- a/adminSiteServer/apiRouter.ts +++ b/adminSiteServer/apiRouter.ts @@ -124,6 +124,7 @@ import { } from "./routerHelpers.js" import { getPublishedLinksTo } from "../db/model/Link.js" import { + GdocLinkUpdateMode, createGdocAndInsertIntoDb, gdocFromJSON, getAllGdocIndexItemsOrderedByUpdatedAt, @@ -131,6 +132,7 @@ import { getDbEnrichedGdocFromOwidGdoc, getGdocBaseObjectById, loadGdocFromGdocBase, + setLinksForGdoc, setTagsForGdoc, updateGdocContentOnly, upsertGdoc, @@ -215,7 +217,12 @@ const getReferencesByChartId = async ( ) const postGdocsPromise = getGdocsPostReferencesByChartId(chartId, knex) const explorerSlugsPromise = db.queryMysql( - `select distinct explorerSlug from explorer_charts where chartId = ?`, + `SELECT DISTINCT + explorerSlug + FROM + explorer_charts + WHERE + chartId = ?`, [chartId] ) const [postsWordpress, postsGdocs, explorerSlugs] = await Promise.all([ @@ -554,7 +561,12 @@ getRouteWithROTransaction( const pageviewsByUrl = await db.knexRawFirst( trx, - "select * from analytics_pageviews where url = ?", + `-- sql + SELECT * + FROM + analytics_pageviews + WHERE + url = ?`, [`https://ourworldindata.org/grapher/${slug}`] ) @@ -1612,10 +1624,16 @@ apiRouter.patch("/variable-annotations", async (req) => { }) apiRouter.get("/variables.usages.json", async (req) => { - const query = `SELECT variableId, COUNT(DISTINCT chartId) AS usageCount -FROM chart_dimensions -GROUP BY variableId -ORDER BY usageCount DESC` + const query = `-- sql + SELECT + variableId, + COUNT(DISTINCT chartId) AS usageCount + FROM + chart_dimensions + GROUP BY + variableId + ORDER BY + usageCount DESC` const rows = await db.queryMysql(query) @@ -1642,7 +1660,7 @@ getRouteWithROTransaction( const charts = await db.knexRaw( trx, - ` + `-- sql SELECT ${oldChartFieldList} FROM charts JOIN users lastEditedByUser ON lastEditedByUser.id = charts.lastEditedByUserId @@ -1695,7 +1713,7 @@ getRouteWithROTransaction( async (req, res, trx) => { const datasets = await db.knexRaw>( trx, - ` + `-- sql WITH variable_counts AS ( SELECT v.datasetId, @@ -1732,7 +1750,7 @@ getRouteWithROTransaction( Pick >( trx, - ` + `-- sql SELECT dt.datasetId, t.id, t.name FROM dataset_tags dt JOIN tags t ON dt.tagId = t.id ` @@ -1758,7 +1776,7 @@ getRouteWithROTransaction( const dataset = await db.knexRawFirst>( trx, - ` + `-- sql SELECT d.id, d.namespace, d.name, @@ -1801,10 +1819,17 @@ getRouteWithROTransaction( > >( trx, - ` - SELECT v.id, v.name, v.description, v.display, v.catalogPath - FROM variables AS v - WHERE v.datasetId = ? + `-- sql + SELECT + v.id, + v.name, + v.description, + v.display, + v.catalogPath + FROM + VARIABLES AS v + WHERE + v.datasetId = ? `, [datasetId] ) @@ -1818,13 +1843,15 @@ getRouteWithROTransaction( // add all origins const origins: DbRawOrigin[] = await db.knexRaw( trx, - ` - select distinct - o.* - from origins_variables as ov - join origins as o on ov.originId = o.id - join variables as v on ov.variableId = v.id - where v.datasetId = ? + `-- sql + SELECT DISTINCT + o.* + FROM + origins_variables AS ov + JOIN origins AS o ON ov.originId = o.id + JOIN VARIABLES AS v ON ov.variableId = v.id + WHERE + v.datasetId = ? `, [datasetId] ) @@ -1859,7 +1886,7 @@ getRouteWithROTransaction( const charts = await db.knexRaw( trx, - ` + `-- sql SELECT ${oldChartFieldList} FROM charts JOIN chart_dimensions AS cd ON cd.chartId = charts.id @@ -2349,37 +2376,59 @@ deleteRouteWithRWTransaction( apiRouter.get("/posts.json", async (req) => { const raw_rows = await db.queryMysql( `-- sql - with posts_tags_aggregated as ( - select post_id, if(count(tags.id) = 0, json_array(), json_arrayagg(json_object("id", tags.id, "name", tags.name))) as tags - from post_tags - left join tags on tags.id = post_tags.tag_id - group by post_id - ), post_gdoc_slug_successors as ( - select posts.id, if (count(gdocSlugSuccessor.id) = 0, json_array(), json_arrayagg(json_object("id", gdocSlugSuccessor.id, "published", gdocSlugSuccessor.published ))) as gdocSlugSuccessors - from posts - left join posts_gdocs gdocSlugSuccessor on gdocSlugSuccessor.slug = posts.slug - group by posts.id - ) - select - posts.id as id, - posts.title as title, - posts.type as type, - posts.slug as slug, - status, - updated_at_in_wordpress, - posts.authors, - posts_tags_aggregated.tags as tags, - gdocSuccessorId, - gdocSuccessor.published as isGdocSuccessorPublished, - -- posts can either have explict successors via the gdocSuccessorId column - -- or implicit successors if a gdoc has been created that uses the same slug - -- as a Wp post (the gdoc one wins once it is published) - post_gdoc_slug_successors.gdocSlugSuccessors as gdocSlugSuccessors - from posts - left join post_gdoc_slug_successors on post_gdoc_slug_successors.id = posts.id - left join posts_gdocs gdocSuccessor on gdocSuccessor.id = posts.gdocSuccessorId - left join posts_tags_aggregated on posts_tags_aggregated.post_id = posts.id - order by updated_at_in_wordpress desc`, + WITH + posts_tags_aggregated AS ( + SELECT + post_id, + IF( + COUNT(tags.id) = 0, + JSON_ARRAY(), + JSON_ARRAYAGG(JSON_OBJECT("id", tags.id, "name", tags.name)) + ) AS tags + FROM + post_tags + LEFT JOIN tags ON tags.id = post_tags.tag_id + GROUP BY + post_id + ), + post_gdoc_slug_successors AS ( + SELECT + posts.id, + IF( + COUNT(gdocSlugSuccessor.id) = 0, + JSON_ARRAY(), + JSON_ARRAYAGG( + JSON_OBJECT("id", gdocSlugSuccessor.id, "published", gdocSlugSuccessor.published) + ) + ) AS gdocSlugSuccessors + FROM + posts + LEFT JOIN posts_gdocs gdocSlugSuccessor ON gdocSlugSuccessor.slug = posts.slug + GROUP BY + posts.id + ) + SELECT + posts.id AS id, + posts.title AS title, + posts.type AS TYPE, + posts.slug AS slug, + STATUS, + updated_at_in_wordpress, + posts.authors, + posts_tags_aggregated.tags AS tags, + gdocSuccessorId, + gdocSuccessor.published AS isGdocSuccessorPublished, + -- posts can either have explict successors via the gdocSuccessorId column + -- or implicit successors if a gdoc has been created that uses the same slug + -- as a Wp post (the gdoc one wins once it is published) + post_gdoc_slug_successors.gdocSlugSuccessors AS gdocSlugSuccessors + FROM + posts + LEFT JOIN post_gdoc_slug_successors ON post_gdoc_slug_successors.id = posts.id + LEFT JOIN posts_gdocs gdocSuccessor ON gdocSuccessor.id = posts.gdocSuccessorId + LEFT JOIN posts_tags_aggregated ON posts_tags_aggregated.post_id = posts.id + ORDER BY + updated_at_in_wordpress DESC`, [] ) const rows = raw_rows.map((row: any) => ({ @@ -2553,7 +2602,7 @@ apiRouter.put("/deploy", async (req, res) => { triggerStaticBuild(res.locals.user, "Manually triggered deploy") }) -getRouteWithROTransaction(apiRouter, "/gdocs", async (req, res, trx) => { +getRouteWithROTransaction(apiRouter, "/gdocs", (req, res, trx) => { return getAllGdocIndexItemsOrderedByUpdatedAt(trx) }) @@ -2570,12 +2619,7 @@ getRouteNonIdempotentWithRWTransaction( const gdoc = await getAndLoadGdocById(trx, id, contentSource) if (!gdoc.published) { - await updateGdocContentOnly( - trx, - id, - gdoc, - gdoc.enrichedBlockSources.flat() - ) + await updateGdocContentOnly(trx, id, gdoc) } res.set("Cache-Control", "no-store") @@ -2651,15 +2695,14 @@ putRouteWithRWTransaction(apiRouter, "/gdocs/:id", async (req, res, trx) => { } } - await trx.table(PostsGdocsLinksTableName).where({ sourceId: id }).delete() - - if ( - nextGdoc.published && - nextGdoc.links !== undefined && - nextGdoc.links.length > 0 - ) { - await trx.table(PostsGdocsLinksTableName).insert(nextGdoc.links) - } + await setLinksForGdoc( + trx, + nextGdoc.id, + nextGdoc.links, + nextGdoc.published + ? GdocLinkUpdateMode.DeleteAndInsert + : GdocLinkUpdateMode.DeleteOnly + ) //todo #gdocsvalidationserver: run validation before saving published //articles, in addition to the first pass performed in front-end code (see @@ -2681,8 +2724,8 @@ putRouteWithRWTransaction(apiRouter, "/gdocs/:id", async (req, res, trx) => { await upsertGdoc(trx, nextGdoc) const hasChanges = checkHasChanges(prevGdoc, nextGdoc) - const prevJson = getDbEnrichedGdocFromOwidGdoc(prevGdoc) - const nextJson = getDbEnrichedGdocFromOwidGdoc(nextGdoc) + const prevJson = prevGdoc.toJSON() + const nextJson = nextGdoc.toJSON() if (checkIsLightningUpdate(prevJson, nextJson, hasChanges)) { await enqueueLightningChange( res.locals.user, @@ -2727,12 +2770,13 @@ postRouteWithRWTransaction( const { gdocId } = req.params const { tagIds } = req.body - await trx.table(PostsGdocsXTagsTableName).where({ gdocId }).delete() - if (tagIds.length) { - await trx - .table(PostsGdocsXTagsTableName) - .insert(tagIds.map((tagId: number) => ({ gdocId, tagId }))) - } + await setTagsForGdoc( + trx, + gdocId, + tagIds.map((id: number) => { + id + }) + ) return { success: true } } diff --git a/adminSiteServer/mockSiteRouter.tsx b/adminSiteServer/mockSiteRouter.tsx index c8ba6140331..44ea488659d 100644 --- a/adminSiteServer/mockSiteRouter.tsx +++ b/adminSiteServer/mockSiteRouter.tsx @@ -205,7 +205,7 @@ mockSiteRouter.get("/thank-you", async (req, res) => ) mockSiteRouter.get("/data-insights/:pageNumberOrSlug?", async (req, res) => { - return await db.knexReadonlyTransaction(async (knex) => { + return db.knexReadonlyTransaction(async (knex) => { const totalPageCount = calculateDataInsightIndexPageCount( await db .getPublishedDataInsights(knex) diff --git a/baker/DatapageHelpers.ts b/baker/DatapageHelpers.ts index ba6213c42d9..c91534c2a1b 100644 --- a/baker/DatapageHelpers.ts +++ b/baker/DatapageHelpers.ts @@ -12,11 +12,11 @@ import { omitUndefinedValues, } from "@ourworldindata/utils" import { - getAndLoadGdocById, getGdocBaseObjectById, + loadGdocFromGdocBase, } from "../db/model/Gdoc/GdocFactory.js" import { OwidGoogleAuth } from "../db/OwidGoogleAuth.js" -import { GrapherInterface } from "@ourworldindata/types" +import { GrapherInterface, OwidGdocBaseInterface } from "@ourworldindata/types" import { KnexReadonlyTransaction } from "../db/db.js" export const getDatapageDataV2 = async ( @@ -82,7 +82,7 @@ export const getDatapageGdoc = async ( knex: KnexReadonlyTransaction, googleDocEditLinkOrId: string, isPreviewing: boolean -): Promise => { +): Promise => { // Get the google doc id from the datapage JSON file and return early if // none found const isPlainGoogleId = gdocIdRegex.exec(googleDocEditLinkOrId) @@ -101,18 +101,15 @@ export const getDatapageGdoc = async ( // gdoc found in the database, if any. This use case doesn't currently // support images (imageMetadata won't be set). - const datapageGdoc = - isPreviewing && OwidGoogleAuth.areGdocAuthKeysSet() - ? ((await getAndLoadGdocById( - knex, - googleDocId, - GdocsContentSource.Gdocs - )) as OwidGdocPostInterface) - : ((await getGdocBaseObjectById( - knex, - googleDocId, - true - )) as OwidGdocPostInterface) + let datapageGdoc = + (await getGdocBaseObjectById(knex, googleDocId, true)) ?? null + + if (datapageGdoc && isPreviewing && OwidGoogleAuth.areGdocAuthKeysSet()) + datapageGdoc = await loadGdocFromGdocBase( + knex, + datapageGdoc, + GdocsContentSource.Gdocs + ) return datapageGdoc } diff --git a/baker/GrapherBaker.tsx b/baker/GrapherBaker.tsx index 4049259dbf9..690ecdedb3f 100644 --- a/baker/GrapherBaker.tsx +++ b/baker/GrapherBaker.tsx @@ -157,7 +157,10 @@ export async function renderDataPageV2( const gdocIdToFragmentIdToBlock: Record = {} gdocs.forEach((gdoc) => { if (!gdoc) return - const faqs = parseFaqs(gdoc.content.faqs, gdoc.id) + const faqs = parseFaqs( + ("faqs" in gdoc.content && gdoc.content?.faqs) ?? [], + gdoc.id + ) gdocIdToFragmentIdToBlock[gdoc.id] = faqs.faqs }) diff --git a/baker/countryProfiles.tsx b/baker/countryProfiles.tsx index c0f1d2b5183..0a65489212e 100644 --- a/baker/countryProfiles.tsx +++ b/baker/countryProfiles.tsx @@ -95,7 +95,13 @@ export const denormalizeLatestCountryData = async ( const existingVariableIds = ( await db.knexRaw<{ variable_id: number }>( trx, - `select variable_id from country_latest_data where variable_id in (?)`, + `-- sql + SELECT + variable_id + FROM + country_latest_data + WHERE + variable_id IN (?)`, [variableIds] ) ).map((r) => r.variable_id) diff --git a/db/db.ts b/db/db.ts index cb28560df5e..116d67dfe18 100644 --- a/db/db.ts +++ b/db/db.ts @@ -186,8 +186,12 @@ export const getSlugsWithPublishedGdocsSuccessors = async ( return knexRaw( knex, `-- sql - select slug from posts_with_gdoc_publish_status - where isGdocPublished = TRUE` + SELECT + slug + FROM + posts_with_gdoc_publish_status + WHERE + isGdocPublished = TRUE` ).then((rows) => new Set(rows.map((row: any) => row.slug))) } diff --git a/db/model/Chart.ts b/db/model/Chart.ts index b1a6619c9eb..546db307ac4 100644 --- a/db/model/Chart.ts +++ b/db/model/Chart.ts @@ -1,17 +1,6 @@ -// import { -// Entity, -// PrimaryGeneratedColumn, -// Column, -// BaseEntity, -// ManyToOne, -// OneToMany, -// type Relation, -// } from "typeorm" import * as lodash from "lodash" import * as db from "../db.js" import { getDataForMultipleVariables } from "./Variable.js" -// import { User } from "./User.js" -// import { ChartRevision } from "./ChartRevision.js" import { JsonError, KeyChartLevel, @@ -42,25 +31,6 @@ export const PUBLIC_TAG_PARENT_IDS = [ 1505, 1508, 1512, 1510, 1834, 1835, ] -// @Entity("charts") -// export class Chart extends BaseEntity { -// @PrimaryGeneratedColumn() id!: number -// @Column({ type: "json" }) config!: GrapherInterface -// @Column() lastEditedAt!: Date -// @Column() lastEditedByUserId!: number -// @Column({ nullable: true }) publishedAt!: Date -// @Column({ nullable: true }) publishedByUserId!: number -// @Column() createdAt!: Date -// @Column() updatedAt!: Date -// @Column() isExplorable!: boolean - -// @ManyToOne(() => User, (user) => user.lastEditedCharts) -// lastEditedByUser!: Relation -// @ManyToOne(() => User, (user) => user.publishedCharts) -// publishedByUser!: Relation -// @OneToMany(() => ChartRevision, (rev) => rev.chart) -// logs!: Relation -// } // Only considers published charts, because only in that case the mapping slug -> id is unique export async function mapSlugsToIds( knex: db.KnexReadonlyTransaction @@ -101,7 +71,7 @@ export async function mapSlugsToConfigs( return db .knexRaw<{ slug: string; config: string; id: number }>( knex, - ` + `-- sql SELECT csr.slug AS slug, c.config AS config, c.id AS id FROM chart_slug_redirects csr JOIN charts c @@ -134,10 +104,14 @@ export async function getEnrichedChartBySlug( if (!chart) { chart = await db.knexRawFirst( knex, - `select c.* - from chart_slug_redirects csr - join charts c on csr.chart_id = c.id - where csr.slug = ?`, + `-- sql + SELECT + c.* + FROM + chart_slug_redirects csr + JOIN charts c ON csr.chart_id = c.id + WHERE + csr.slug = ?`, [slug] ) } @@ -239,7 +213,13 @@ export async function setChartTags( const parentIds = tags.length ? await db.knexRaw<{ parentId: number }>( knex, - "select parentId from tags where id in (?)", + `-- sql + SELECT + parentId + FROM + tags + WHERE + id IN (?)`, [tags.map((t) => t.id)] ) : [] diff --git a/db/model/Gdoc/GdocBase.ts b/db/model/Gdoc/GdocBase.ts index cb5c5405f65..c49f3aa85a4 100644 --- a/db/model/Gdoc/GdocBase.ts +++ b/db/model/Gdoc/GdocBase.ts @@ -27,6 +27,7 @@ import { DbInsertPostGdocLink, DbPlainTag, formatDate, + omit, } from "@ourworldindata/utils" import { BAKED_GRAPHER_URL } from "../../../settings/serverSettings.js" import { google } from "googleapis" @@ -52,7 +53,7 @@ import { getVariableOfDatapageIfApplicable, } from "../Variable.js" import { createLinkFromUrl } from "../Link.js" -import { OwidGdocContent, OwidGdocType } from "@ourworldindata/types" +import { OwidGdoc, OwidGdocContent, OwidGdocType } from "@ourworldindata/types" import { KnexReadonlyTransaction } from "../../db" export class GdocBase implements OwidGdocBaseInterface { @@ -101,11 +102,6 @@ export class GdocBase implements OwidGdocBaseInterface { protected typeSpecificUrls(): string[] { return [] } - // static create(obj: OwidGdocBaseInterface): GdocBase { - // const gdoc = new GdocBase() - // Object.assign(gdoc, obj) - // return gdoc - // } get enrichedBlockSources(): OwidEnrichedGdocBlock[][] { const enrichedBlockSources: OwidEnrichedGdocBlock[][] = excludeNullish([ @@ -672,7 +668,7 @@ export class GdocBase implements OwidGdocBaseInterface { async loadLinkedDocuments(knex: db.KnexReadonlyTransaction): Promise { const linkedDocuments: OwidGdocMinimalPostInterface[] = - await getMinimalGdocBaseObjectsByIds(knex, this.linkedDocumentIds) + await getMinimalGdocPostsByIds(knex, this.linkedDocumentIds) this.linkedDocuments = keyBy(linkedDocuments, "id") } @@ -831,10 +827,26 @@ export class GdocBase implements OwidGdocBaseInterface { await this._loadSubclassAttachments(knex) await this.validate(knex) } + + toJSON(): OwidGdoc { + // TODO: this function is currently only used to shrink the object a bit + // that is used for the isLightningDeploy check (but not, for example, to + // shrink the object we send over the wire at the /gdoc/:id endpoint). + // My hunch is that we'll want to clean up the class instance vs objects + // divergence a bit in the near future - until then this can stay as is. + return omit(this, [ + "_enrichSubclassContent", + "_filenameProperties", + "_getSubclassEnrichedBlocks", + "_omittableFields", + "_validateSubclass", + ...this._omittableFields, + ]) as any as OwidGdoc + } } // This function would naturally live in GdocFactory but that would create a circular dependency -export async function getMinimalGdocBaseObjectsByIds( +export async function getMinimalGdocPostsByIds( knex: KnexReadonlyTransaction, ids: string[] ): Promise { diff --git a/db/model/Gdoc/GdocFactory.ts b/db/model/Gdoc/GdocFactory.ts index eb558566366..3e0c6131775 100644 --- a/db/model/Gdoc/GdocFactory.ts +++ b/db/model/Gdoc/GdocFactory.ts @@ -3,6 +3,7 @@ import { match, P } from "ts-pattern" import { DATA_INSIGHTS_INDEX_PAGE_SIZE, DbEnrichedPostGdoc, + DbInsertPostGdocLink, DbPlainTag, DbRawPostGdoc, GdocsContentSource, @@ -13,6 +14,7 @@ import { OwidGdocMinimalPostInterface, OwidGdocPublicationContext, OwidGdocType, + PostsGdocsLinksTableName, PostsGdocsTableName, PostsGdocsXTagsTableName, checkIsOwidGdocType, @@ -114,11 +116,11 @@ export async function createGdocAndInsertIntoDb( export async function updateGdocContentOnly( knex: KnexReadonlyTransaction, id: string, - gdoc: OwidGdoc, - markdownContentSource: OwidEnrichedGdocBlock[] + gdoc: GdocPost | GdocDataInsight | GdocHomepage | GdocAuthor ): Promise { let markdown: string | null = gdoc.markdown try { + const markdownContentSource = gdoc.enrichedBlockSources.flat() markdown = enrichedBlocksToMarkdown(markdownContentSource, true) ?? null } catch (e) { console.error("Error when converting content to markdown", e) @@ -434,7 +436,7 @@ export async function loadPublishedGdocAuthors( } satisfies OwidGdocBaseInterface }) const gdocs = await Promise.all( - enrichedRows.map(async (row) => loadGdocFromGdocBase(knex, row)) + enrichedRows.map((row) => loadGdocFromGdocBase(knex, row)) ) return gdocs as GdocAuthor[] } @@ -482,10 +484,30 @@ export async function setTagsForGdoc( gdocId: string, tagIds: Pick[] ): Promise { - await knex.table(PostsGdocsXTagsTableName).where("gdocId", gdocId).delete() + await knex.table(PostsGdocsXTagsTableName).where({ gdocId }).delete() + if (tagIds.length) + await knex + .table(PostsGdocsXTagsTableName) + .insert(tagIds.map(({ id: tagId }) => ({ gdocId, tagId }))) +} + +export enum GdocLinkUpdateMode { + DeleteOnly = "DeleteOnly", + DeleteAndInsert = "DeleteAndInsert", +} + +export async function setLinksForGdoc( + knex: KnexReadWriteTransaction, + gdocId: string, + links: DbInsertPostGdocLink[], + updatedMode: GdocLinkUpdateMode +): Promise { await knex - .table(PostsGdocsXTagsTableName) - .insert(tagIds.map(({ id: tagId }) => ({ gdocId, tagId }))) + .table(PostsGdocsLinksTableName) + .where({ sourceId: gdocId }) + .delete() + if (updatedMode === GdocLinkUpdateMode.DeleteAndInsert && links.length) + await knex.table(PostsGdocsLinksTableName).insert(links) } export function getDbEnrichedGdocFromOwidGdoc( @@ -527,7 +549,6 @@ export async function upsertGdoc( } } -// TODO: export async function getAllGdocIndexItemsOrderedByUpdatedAt( knex: KnexReadonlyTransaction ): Promise { diff --git a/db/model/Gdoc/GdocFaq.ts b/db/model/Gdoc/GdocFaq.ts deleted file mode 100644 index 1f93df50674..00000000000 --- a/db/model/Gdoc/GdocFaq.ts +++ /dev/null @@ -1,64 +0,0 @@ -// import { FaqDictionary, ParseError } from "@ourworldindata/utils" - -// interface InterfaceGdocFaq { -// parsedFaqs: FaqDictionary -// parseErrors: ParseError[] -// faqs: unknown // unparsed -// } - -/** - * This is an unused mock prototype, I think *something* like this could work, - * but we'd need a different code path for registering FAQs through the server - * instead of using the same endpoint for both posts and faqs. - * For now, we'll just use the GdocPost class. - */ -// export class GdocFaq extends GdocBase { -// static table = "posts_gdocs" -// content!: InterfaceGdocFaq - -// constructor(id?: string) { -// super() -// if (id) { -// this.id = id -// } -// } - -// static create(obj: OwidGdocBaseInterface): GdocFaq { -// const gdoc = new GdocFaq() -// Object.assign(gdoc, obj) -// return gdoc -// } -// _omittableFields: string[] = ["content.parseErrors, content.faqs"] - -// _enrichSubclassContent = (content: Record): void => { -// if (content.faqs) { -// const faqResults = parseFaqs(content.faqs, this.id) -// content.parsedFaqs = faqResults.faqs -// content.parseErrors = faqResults.parseErrors -// } -// } - -// _validateSubclass = async (): Promise => { -// const errors: OwidGdocErrorMessage[] = [] - -// for (const parseError of this.content.parseErrors) { -// errors.push({ -// ...parseError, -// property: "faqs", -// type: OwidGdocErrorMessageType.Error, -// }) -// } - -// return errors -// } - -// _getSubclassEnrichedBlocks = (gdoc: this): OwidEnrichedGdocBlock[] => { -// const enrichedBlocks: OwidEnrichedGdocBlock[] = [] - -// for (const faq of Object.values(gdoc.content.parsedFaqs)) { -// enrichedBlocks.push(...faq.content) -// } - -// return enrichedBlocks -// } -// } diff --git a/db/model/Gdoc/GdocPost.ts b/db/model/Gdoc/GdocPost.ts index 788a06f5b49..6f8167b2fe5 100644 --- a/db/model/Gdoc/GdocPost.ts +++ b/db/model/Gdoc/GdocPost.ts @@ -224,7 +224,7 @@ export class GdocPost extends GdocBase implements OwidGdocPostInterface { details: {}, parseErrors: [ { - message: `Details on demand document with id "${GDOCS_DETAILS_ON_DEMAND_ID}" isn't registered and/or published. Please add it via ${ADMIN_BASE_URL}/admin/gdocs`, + message: `Details on demand document with id "${GDOCS_DETAILS_ON_DEMAND_ID}" isn't registered and/or published; or it does not contain a [.details] block. Please add it via ${ADMIN_BASE_URL}/admin/gdocs`, }, ], } @@ -250,13 +250,4 @@ export class GdocPost extends GdocBase implements OwidGdocPostInterface { // but also makes it less of a source of truth when considered in isolation. return getAndLoadPublishedGdocPosts(knex) } - - /** - * Excludes published listed Gdocs with a publication date in the future - */ - static async getListedGdocPosts( - knex: KnexReadonlyTransaction - ): Promise { - return getAndLoadListedGdocPosts(knex) - } } diff --git a/db/model/Image.ts b/db/model/Image.ts index 325fc935694..3671f4c1b23 100644 --- a/db/model/Image.ts +++ b/db/model/Image.ts @@ -17,6 +17,7 @@ import { parseImageRow, DbInsertImage, serializeImageRow, + ImagesTableName, } from "@ourworldindata/utils" import { OwidGoogleAuth } from "../OwidGoogleAuth.js" import { @@ -256,7 +257,7 @@ export async function getImageByFilename( filename: string ): Promise { const image = await knex - .table("images") + .table(ImagesTableName) .where({ filename }) .first() if (!image) return undefined diff --git a/db/model/Link.ts b/db/model/Link.ts index e91407ce1ac..077cf2d282a 100644 --- a/db/model/Link.ts +++ b/db/model/Link.ts @@ -17,11 +17,16 @@ export async function getPublishedLinksTo( const rows = await knexRaw( knex, `-- sql - select posts_gdocs_links.*, posts_gdocs.slug as sourceSlug - from posts_gdocs_links - join posts_gdocs on posts_gdocs_links.source = posts_gdocs.id - where target in (?) and linkType = ? - and published = true + SELECT + posts_gdocs_links.*, + posts_gdocs.slug AS sourceSlug + FROM + posts_gdocs_links + JOIN posts_gdocs ON posts_gdocs_links.source = posts_gdocs.id + WHERE + target IN (?) + AND linkType = ? + AND published = TRUE `, [ids, linkType] ) diff --git a/db/model/Post.ts b/db/model/Post.ts index bd21c3ff60e..7130448a71d 100644 --- a/db/model/Post.ts +++ b/db/model/Post.ts @@ -32,6 +32,7 @@ import { GdocPost } from "./Gdoc/GdocPost.js" import { SiteNavigationStatic } from "../../site/SiteNavigation.js" import { decodeHTML } from "entities" import { RelatedResearchQueryResult } from "../wpdb" +import { getAndLoadListedGdocPosts } from "./Gdoc/GdocFactory.js" export const postsTable = "posts" @@ -265,7 +266,7 @@ const selectHomepagePosts: FilterFnPostRestApi = (post) => export const getBlogIndex = memoize( async (knex: db.KnexReadonlyTransaction): Promise => { await db.getConnection() // side effect: ensure connection is established - const gdocPosts = await GdocPost.getListedGdocPosts(knex) + const gdocPosts = await getAndLoadListedGdocPosts(knex) const wpPosts = await Promise.all( await getPostsFromSnapshots( knex, @@ -457,58 +458,53 @@ export const getRelatedResearchAndWritingForVariable = async ( -- One important complication is that the slugs that are used in posts to -- embed charts can either be the current slugs or old slugs that are redirected -- now. - select - distinct - pl.target as linkTargetSlug, - pl.componentType as componentType, - coalesce(csr.slug, c.slug) as chartSlug, - p.title as title, - p.slug as postSlug, - coalesce(csr.chart_id, c.id) as chartId, - p.authors as authors, - p.featured_image as thumbnail, - coalesce(pv.views_365d, 0) as pageviews, - 'wordpress' as post_source, - (select coalesce(JSON_ARRAYAGG(t.name), JSON_ARRAY()) - from post_tags pt - join tags t on pt.tag_id = t.id - where pt.post_id = p.id - ) as tags - from + SELECT DISTINCT + pl.target AS linkTargetSlug, + pl.componentType AS componentType, + COALESCE(csr.slug, c.slug) AS chartSlug, + p.title AS title, + p.slug AS postSlug, + COALESCE(csr.chart_id, c.id) AS chartId, + p.authors AS authors, + p.featured_image AS thumbnail, + COALESCE(pv.views_365d, 0) AS pageviews, + 'wordpress' AS post_source, + ( + SELECT + COALESCE(JSON_ARRAYAGG(t.name), JSON_ARRAY()) + FROM + post_tags pt + JOIN tags t ON pt.tag_id = t.id + WHERE + pt.post_id = p.id + ) AS tags + FROM posts_links pl - join posts p on - pl.sourceId = p.id - left join charts c on - pl.target = c.slug - left join chart_slug_redirects csr on - pl.target = csr.slug - left join chart_dimensions cd on - cd.chartId = coalesce(csr.chart_id, c.id) - left join analytics_pageviews pv on - pv.url = concat('https://ourworldindata.org/', p.slug ) - left join posts_gdocs pg on - pg.id = p.gdocSuccessorId - left join posts_gdocs pgs on - pgs.slug = p.slug - left join post_tags pt on - pt.post_id = p.id - where + JOIN posts p ON pl.sourceId = p.id + LEFT JOIN charts c ON pl.target = c.slug + LEFT JOIN chart_slug_redirects csr ON pl.target = csr.slug + LEFT JOIN chart_dimensions cd ON cd.chartId = COALESCE(csr.chart_id, c.id) + LEFT JOIN analytics_pageviews pv ON pv.url = CONCAT('https://ourworldindata.org/', p.slug) + LEFT JOIN posts_gdocs pg ON pg.id = p.gdocSuccessorId + LEFT JOIN posts_gdocs pgs ON pgs.slug = p.slug + LEFT JOIN post_tags pt ON pt.post_id = p.id + WHERE -- we want only urls that point to grapher charts pl.linkType = 'grapher' -- componentType src is for those links that matched the anySrcregex (not anyHrefRegex or prominentLinkRegex) -- this means that only the links that are of the iframe kind will be kept - normal a href style links will -- be disregarded - and componentType = 'src' - and cd.variableId = ? - and cd.property in ('x', 'y') -- ignore cases where the indicator is size, color etc - and p.status = 'publish' -- only use published wp posts - and p.type != 'wp_block' - and coalesce(pg.published, 0) = 0 -- ignore posts if the wp post has a published gdoc successor. The - -- coalesce makes sure that if there is no gdoc successor then - -- the filter keeps the post - and coalesce(pgs.published, 0) = 0 -- ignore posts if there is a gdoc post with the same slug that is published - -- this case happens for example for topic pages that are newly created (successorId is null) - -- but that replace an old wordpress page + AND componentType = 'src' + AND cd.variableId = ? + AND cd.property IN ('x', 'y') -- ignore cases where the indicator is size, color etc + AND p.status = 'publish' -- only use published wp posts + AND p.type != 'wp_block' + AND COALESCE(pg.published, 0) = 0 -- ignore posts if the wp post has a published gdoc successor. The + -- coalesce makes sure that if there is no gdoc successor then + -- the filter keeps the post + AND COALESCE(pgs.published, 0) = 0 -- ignore posts if there is a gdoc post with the same slug that is published + -- this case happens for example for topic pages that are newly created (successorId is null) + -- but that replace an old wordpress page `, [variableId] @@ -517,44 +513,41 @@ export const getRelatedResearchAndWritingForVariable = async ( const gdocs_posts: RelatedResearchQueryResult[] = await db.knexRaw( knex, `-- sql - select - distinct - pl.target as linkTargetSlug, - pl.componentType as componentType, - coalesce(csr.slug, c.slug) as chartSlug, - p.content ->> '$.title' as title, - p.slug as postSlug, - coalesce(csr.chart_id, c.id) as chartId, - p.content ->> '$.authors' as authors, - p.content ->> '$."featured-image"' as thumbnail, - coalesce(pv.views_365d, 0) as pageviews, - 'gdocs' as post_source, - (select coalesce(JSON_ARRAYAGG(t.name), JSON_ARRAY()) - from posts_gdocs_x_tags pt - join tags t on pt.tagId = t.id - where pt.gdocId = p.id - ) as tags - from - posts_gdocs_links pl - join posts_gdocs p on - pl.sourceId = p.id - left join charts c on - pl.target = c.slug - left join chart_slug_redirects csr on - pl.target = csr.slug - join chart_dimensions cd on - cd.chartId = coalesce(csr.chart_id, c.id) - left join analytics_pageviews pv on - pv.url = concat('https://ourworldindata.org/', p.slug ) - left join posts_gdocs_x_tags pt on - pt.gdocId = p.id - where - pl.linkType = 'grapher' - and componentType = 'chart' -- this filters out links in tags and keeps only embedded charts - and cd.variableId = ? - and cd.property in ('x', 'y') -- ignore cases where the indicator is size, color etc - and p.published = 1 - and p.content ->> '$.type' != 'fragment'`, + SELECT DISTINCT + pl.target AS linkTargetSlug, + pl.componentType AS componentType, + COALESCE(csr.slug, c.slug) AS chartSlug, + p.content ->> '$.title' AS title, + p.slug AS postSlug, + COALESCE(csr.chart_id, c.id) AS chartId, + p.content ->> '$.authors' AS authors, + p.content ->> '$."featured-image"' AS thumbnail, + COALESCE(pv.views_365d, 0) AS pageviews, + 'gdocs' AS post_source, + ( + SELECT + COALESCE(JSON_ARRAYAGG(t.name), JSON_ARRAY()) + FROM + posts_gdocs_x_tags pt + JOIN tags t ON pt.tagId = t.id + WHERE + pt.gdocId = p.id + ) AS tags + FROM + posts_gdocs_links pl + JOIN posts_gdocs p ON pl.sourceId = p.id + LEFT JOIN charts c ON pl.target = c.slug + LEFT JOIN chart_slug_redirects csr ON pl.target = csr.slug + JOIN chart_dimensions cd ON cd.chartId = COALESCE(csr.chart_id, c.id) + LEFT JOIN analytics_pageviews pv ON pv.url = CONCAT('https://ourworldindata.org/', p.slug) + LEFT JOIN posts_gdocs_x_tags pt ON pt.gdocId = p.id + WHERE + pl.linkType = 'grapher' + AND componentType = 'chart' -- this filters out links in tags and keeps only embedded charts + AND cd.variableId = ? + AND cd.property IN ('x', 'y') -- ignore cases where the indicator is size, color etc + AND p.published = 1 + AND p.content ->> '$.type' != 'fragment'`, [variableId] ) @@ -589,7 +582,7 @@ export const getLatestWorkByAuthor = async ( ): Promise => { const rawLatestWorkLinks: DbRawLatestWork[] = await db.knexRaw( knex, - ` + `-- sql SELECT pg.id, pg.slug,