From ceee9f945c0084670ff92ceb1aa715e7b1d89364 Mon Sep 17 00:00:00 2001 From: Lars Yencken Date: Wed, 24 Jul 2024 16:14:25 +0100 Subject: [PATCH 1/4] Add author pages to sitemap --- baker/sitemap.ts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/baker/sitemap.ts b/baker/sitemap.ts index c02c799f866..dd4a28bab89 100644 --- a/baker/sitemap.ts +++ b/baker/sitemap.ts @@ -17,6 +17,7 @@ import { EXPLORERS_ROUTE_FOLDER } from "../explorer/ExplorerConstants.js" import { ExplorerProgram } from "../explorer/ExplorerProgram.js" import { getPostsFromSnapshots } from "../db/model/Post.js" import { calculateDataInsightIndexPageCount } from "../db/model/Gdoc/gdocUtils.js" +import { GdocAuthor } from "../db/model/Gdoc/GdocAuthor.js" interface SitemapUrl { loc: string @@ -74,6 +75,7 @@ export const makeSitemap = async ( (postrow) => !alreadyPublishedViaGdocsSlugsSet.has(postrow.slug) ) const gdocPosts = await db.getPublishedGdocPosts(knex) + const authorPages = await GdocAuthor.getPublishedAuthors(knex) const publishedDataInsights = await db.getPublishedDataInsights(knex) const dataInsightFeedPageCount = calculateDataInsightIndexPageCount( @@ -140,6 +142,12 @@ export const makeSitemap = async ( })) ) .concat(explorers.flatMap(explorerToSitemapUrl)) + .concat( + authorPages.map((a) => ({ + loc: urljoin(BAKED_BASE_URL, "team", a.slug), + lastmod: dayjs(a.updatedAt).format("YYYY-MM-DD"), + })) + ) const sitemap = ` From a4fb06e96738f0f1bfc8487575dcb15634f9175b Mon Sep 17 00:00:00 2001 From: Lars Yencken Date: Wed, 24 Jul 2024 18:57:17 +0000 Subject: [PATCH 2/4] :honeybee: Use getMinimalAuthors instead of getPublishedAuthors It doesn't need as many other assets, and fixes the `lastmod` in the sitemap so that it doesn't show the unix epoch time. --- baker/sitemap.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/baker/sitemap.ts b/baker/sitemap.ts index dd4a28bab89..8cd4e1b7fcd 100644 --- a/baker/sitemap.ts +++ b/baker/sitemap.ts @@ -17,7 +17,7 @@ import { EXPLORERS_ROUTE_FOLDER } from "../explorer/ExplorerConstants.js" import { ExplorerProgram } from "../explorer/ExplorerProgram.js" import { getPostsFromSnapshots } from "../db/model/Post.js" import { calculateDataInsightIndexPageCount } from "../db/model/Gdoc/gdocUtils.js" -import { GdocAuthor } from "../db/model/Gdoc/GdocAuthor.js" +import { GdocAuthor, getMinimalAuthors } from "../db/model/Gdoc/GdocAuthor.js" interface SitemapUrl { loc: string @@ -75,7 +75,7 @@ export const makeSitemap = async ( (postrow) => !alreadyPublishedViaGdocsSlugsSet.has(postrow.slug) ) const gdocPosts = await db.getPublishedGdocPosts(knex) - const authorPages = await GdocAuthor.getPublishedAuthors(knex) + const authorPages = await getMinimalAuthors(knex) const publishedDataInsights = await db.getPublishedDataInsights(knex) const dataInsightFeedPageCount = calculateDataInsightIndexPageCount( From 8a0522cdd67ab524bc496d469d052701d6deb4a1 Mon Sep 17 00:00:00 2001 From: Lars Yencken Date: Wed, 31 Jul 2024 15:35:34 +0000 Subject: [PATCH 3/4] :bug: Add `updatedAt` to getMinimalAuthors --- baker/sitemap.ts | 2 +- db/model/Gdoc/GdocAuthor.ts | 3 ++- db/model/Gdoc/GdocBase.ts | 3 ++- packages/@ourworldindata/types/src/gdocTypes/Gdoc.ts | 1 + 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/baker/sitemap.ts b/baker/sitemap.ts index 8cd4e1b7fcd..8ddcd6e39c8 100644 --- a/baker/sitemap.ts +++ b/baker/sitemap.ts @@ -17,7 +17,7 @@ import { EXPLORERS_ROUTE_FOLDER } from "../explorer/ExplorerConstants.js" import { ExplorerProgram } from "../explorer/ExplorerProgram.js" import { getPostsFromSnapshots } from "../db/model/Post.js" import { calculateDataInsightIndexPageCount } from "../db/model/Gdoc/gdocUtils.js" -import { GdocAuthor, getMinimalAuthors } from "../db/model/Gdoc/GdocAuthor.js" +import { getMinimalAuthors } from "../db/model/Gdoc/GdocAuthor.js" interface SitemapUrl { loc: string diff --git a/db/model/Gdoc/GdocAuthor.ts b/db/model/Gdoc/GdocAuthor.ts index fabcde052a1..f49d149b349 100644 --- a/db/model/Gdoc/GdocAuthor.ts +++ b/db/model/Gdoc/GdocAuthor.ts @@ -170,7 +170,8 @@ export async function getMinimalAuthors( SELECT slug, content->>'$.title' as name, - content->>'$."featured-image"' as featuredImage + content->>'$."featured-image"' as featuredImage, + updatedAt FROM posts_gdocs WHERE type = 'author' AND published = 1` diff --git a/db/model/Gdoc/GdocBase.ts b/db/model/Gdoc/GdocBase.ts index a58d3c75b55..7170730bd48 100644 --- a/db/model/Gdoc/GdocBase.ts +++ b/db/model/Gdoc/GdocBase.ts @@ -911,7 +911,8 @@ export async function getMinimalAuthorsByNames( SELECT slug, content->>'$.title' AS name, - content->>'$."featured-image"' AS featuredImage + content->>'$."featured-image"' AS featuredImage, + updatedAt FROM posts_gdocs WHERE type = 'author' AND content->>'$.title' in (:names) diff --git a/packages/@ourworldindata/types/src/gdocTypes/Gdoc.ts b/packages/@ourworldindata/types/src/gdocTypes/Gdoc.ts index b416684bda9..7060f22d64b 100644 --- a/packages/@ourworldindata/types/src/gdocTypes/Gdoc.ts +++ b/packages/@ourworldindata/types/src/gdocTypes/Gdoc.ts @@ -32,6 +32,7 @@ export interface LinkedAuthor { name: string slug: string featuredImage: string | null + updatedAt: Date } // A minimal object containing metadata needed for rendering prominent links etc in the client From f02d01f9e25942ff596334b902df674c01573a5c Mon Sep 17 00:00:00 2001 From: Lars Yencken Date: Wed, 31 Jul 2024 17:10:49 +0100 Subject: [PATCH 4/4] :bug: Work around a data issue with gdocs_posts.updatedAt --- db/model/Gdoc/GdocAuthor.ts | 3 ++- db/model/Gdoc/GdocBase.ts | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/db/model/Gdoc/GdocAuthor.ts b/db/model/Gdoc/GdocAuthor.ts index f49d149b349..8f7271c3c3f 100644 --- a/db/model/Gdoc/GdocAuthor.ts +++ b/db/model/Gdoc/GdocAuthor.ts @@ -171,7 +171,8 @@ export async function getMinimalAuthors( slug, content->>'$.title' as name, content->>'$."featured-image"' as featuredImage, - updatedAt + -- updatedAt is often set to the unix epoch instead of null + COALESCE(NULLIF(updatedAt, '1970-01-01'), createdAt) updatedAt FROM posts_gdocs WHERE type = 'author' AND published = 1` diff --git a/db/model/Gdoc/GdocBase.ts b/db/model/Gdoc/GdocBase.ts index 7170730bd48..b63ae07fcc7 100644 --- a/db/model/Gdoc/GdocBase.ts +++ b/db/model/Gdoc/GdocBase.ts @@ -912,7 +912,8 @@ export async function getMinimalAuthorsByNames( slug, content->>'$.title' AS name, content->>'$."featured-image"' AS featuredImage, - updatedAt + -- updatedAt is often set to the unix epoch instead of null + COALESCE(NULLIF(updatedAt, '1970-01-01'), createdAt) updatedAt FROM posts_gdocs WHERE type = 'author' AND content->>'$.title' in (:names)