Skip to content

Commit

Permalink
✨ use a uuid for cloudflareId, fix algolia indexing
Browse files Browse the repository at this point in the history
  • Loading branch information
ikesau committed Dec 10, 2024
1 parent 9ba6495 commit 24576e2
Show file tree
Hide file tree
Showing 8 changed files with 57 additions and 36 deletions.
60 changes: 50 additions & 10 deletions baker/algolia/utils/pages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ import {
PostRestApi,
DbPlainTag,
OwidGdocPostInterface,
getThumbnailPath,
ARCHVED_THUMBNAIL_FILENAME,
DEFAULT_GDOC_FEATURED_IMAGE,
DEFAULT_THUMBNAIL_FILENAME,
DbEnrichedImage,
} from "@ourworldindata/utils"
import { formatPost } from "../../formatWordpressPost.js"
import ReactDOMServer from "react-dom/server.js"
Expand All @@ -40,6 +40,11 @@ import { match, P } from "ts-pattern"
import { gdocFromJSON } from "../../../db/model/Gdoc/GdocFactory.js"
import { formatUrls } from "../../../site/formatting.js"
import { TypeAndImportance } from "./types.js"
import {
BAKED_BASE_URL,
CLOUDFLARE_IMAGES_URL,
} from "../../../settings/clientSettings.js"
import { logErrorAndMaybeSendToBugsnag } from "../../../serverUtils/errorLog.js"

const computePageScore = (record: Omit<PageRecord, "score">): number => {
const { importance, views_7d } = record
Expand Down Expand Up @@ -155,19 +160,38 @@ async function generateWordpressRecords(
return records
}

function getGdocThumbnailUrl(gdoc: OwidGdocPostInterface): string {
const getThumbnailUrl = (
gdoc: OwidGdocPostInterface,
cloudflareImages: Record<string, DbEnrichedImage>
): string => {
if (gdoc.content["deprecation-notice"]) {
return `/${ARCHVED_THUMBNAIL_FILENAME}`
return `${BAKED_BASE_URL}/${ARCHVED_THUMBNAIL_FILENAME}`
}

if (!gdoc.content["featured-image"]) {
return `${BAKED_BASE_URL}/${DEFAULT_GDOC_FEATURED_IMAGE}`
}
if (gdoc.content["featured-image"]) {
return getThumbnailPath(gdoc.content["featured-image"])

const thumbnailFilename = gdoc.content["featured-image"]
const cloudflareId = cloudflareImages[thumbnailFilename]?.cloudflareId

if (!cloudflareId) {
void logErrorAndMaybeSendToBugsnag(
new Error(
`Gdoc ${gdoc.id} has no cloudflare image with filename ${thumbnailFilename}`
)
)
// won't render in the search page
return ""
}
return `/images/published/${DEFAULT_GDOC_FEATURED_IMAGE}`

return `${CLOUDFLARE_IMAGES_URL}/${cloudflareId}/w=512`
}

function generateGdocRecords(
gdocs: OwidGdocPostInterface[],
pageviews: Record<string, RawPageview>
pageviews: Record<string, RawPageview>,
cloudflareImagesByFilename: Record<string, DbEnrichedImage>
): PageRecord[] {
const getPostTypeAndImportance = (
gdoc: OwidGdocPostInterface
Expand Down Expand Up @@ -211,7 +235,8 @@ function generateGdocRecords(
const chunks = generateChunksFromHtmlText(renderedPostContent)
const postTypeAndImportance = getPostTypeAndImportance(gdoc)
let i = 0
const thumbnailUrl = getGdocThumbnailUrl(gdoc)

const thumbnailUrl = getThumbnailUrl(gdoc, cloudflareImagesByFilename)

for (const chunk of chunks) {
const record = {
Expand Down Expand Up @@ -266,7 +291,15 @@ export const getPagesRecords = async (knex: db.KnexReadonlyTransaction) => {
pageviews,
knex
)
const gdocsRecords = generateGdocRecords(gdocs, pageviews)
const cloudflareImagesByFilename = await db
.getCloudflareImages(knex)
.then((images) => keyBy(images, "filename"))

const gdocsRecords = generateGdocRecords(
gdocs,
pageviews,
cloudflareImagesByFilename
)

return [...countryRecords, ...wordpressRecords, ...gdocsRecords]
}
Expand Down Expand Up @@ -317,6 +350,9 @@ export async function indexIndividualGdocPost(
}
const index = client.initIndex(getIndexName(SearchIndexName.Pages))
const pageviews = await getAnalyticsPageviewsByUrlObj(knex)
const cloudflareImagesByFilename = await db
.getCloudflareImages(knex)
.then((images) => keyBy(images, "filename"))
const existingPageviews = pageviews[`/${indexedSlug}`]
const pageviewsForGdoc = {
[gdoc.slug]: existingPageviews || {
Expand All @@ -327,7 +363,11 @@ export async function indexIndividualGdocPost(
url: gdoc.slug,
},
}
const records = generateGdocRecords([gdoc], pageviewsForGdoc)
const records = generateGdocRecords(
[gdoc],
pageviewsForGdoc,
cloudflareImagesByFilename
)

const existingRecordsForPost: ObjectWithObjectID[] =
await getExistingRecordsForSlug(index, indexedSlug)
Expand Down
2 changes: 1 addition & 1 deletion db/migration/1731360326761-CloudflareImages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ export class CloudflareImages1731360326761 implements MigrationInterface {
public async up(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`-- sql
ALTER TABLE images
ADD COLUMN cloudflareId VARCHAR(255) NULL,
ADD COLUMN cloudflareId CHAR(36) NULL,
ADD CONSTRAINT images_cloudflareId_unique UNIQUE (cloudflareId),
ADD COLUMN hash VARCHAR(255) NULL,
MODIFY COLUMN googleId VARCHAR(255) NULL,
Expand Down
4 changes: 1 addition & 3 deletions devTools/cloudflareImagesSync/cloudflareImagesSync.ts
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ async function purgeRecords(trx: db.KnexReadWriteTransaction) {
console.log("Deleting image:", image.filename)
try {
await fetch(
`https://api.cloudflare.com/client/v4/accounts/${CLOUDFLARE_IMAGES_ACCOUNT_ID}/images/v1/${encodeURIComponent(image.id)}`,
`https://api.cloudflare.com/client/v4/accounts/${CLOUDFLARE_IMAGES_ACCOUNT_ID}/images/v1/${image.id}`,
{
method: "DELETE",
headers: {
Expand Down Expand Up @@ -295,7 +295,6 @@ async function uploadImageToCloudflareImages(

const formData = new FormData()
formData.append("url", imageUrl)
formData.append("id", encodeURIComponent(filename))
formData.append("metadata", metadata)
formData.append("requireSignedURLs", "false")

Expand Down Expand Up @@ -430,7 +429,6 @@ You need to set "CLOUDFLARE_IMAGES_ACCOUNT_ID" and "CLOUDFLARE_IMAGES_API_KEY" i

await db.knexReadWriteTransaction(async (trx) => {
// await purgeRecords(trx)

const directory = await getCloudflareImageDirectory()
const { isValid, invalidImages } = await validateDirectory(
trx,
Expand Down
5 changes: 3 additions & 2 deletions packages/@ourworldindata/types/src/gdocTypes/GdocConstants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@ export const GDOCS_URL_PLACEHOLDER = `${GDOCS_BASE_URL}/document/d/****/edit`

export const gdocIdRegex = /^[0-9A-Za-z\-_]{44}$/

// This file is saved in Drive in the Unattributed Images folder
// Somewhat fragile, should be fixed as part of https://github.com/owid/owid-grapher/issues/2485
// Stored in this repo and served
export const DEFAULT_GDOC_FEATURED_IMAGE = "default-featured-image.png"

// Stored in this repo and served
export const DEFAULT_THUMBNAIL_FILENAME = "default-thumbnail.png"

// Stored in this repo and served
export const ARCHVED_THUMBNAIL_FILENAME = "archived-thumbnail.jpg"
11 changes: 1 addition & 10 deletions packages/@ourworldindata/utils/src/image.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,6 @@ export function getFilenameAsPng(filename: ImageMetadata["filename"]): string {
return `${getFilenameWithoutExtension(filename)}.png`
}

export function getFilenameAsThumbnail(
filename: ImageMetadata["filename"]
): string {
return `${getFilenameWithoutExtension(filename)}_${LARGE_THUMBNAIL_WIDTH}.png`
}

export function getThumbnailPath(filename: string): string {
return `/images/published/${getFilenameAsThumbnail(filename)}`
}

export function getFilenameMIMEType(filename: string): string | undefined {
const fileExtension = getFilenameExtension(filename)
const MIMEType = {
Expand Down Expand Up @@ -133,6 +123,7 @@ export function getFeaturedImageFilename(gdoc: OwidGdoc): string | undefined {
const featuredImageSlug = match.content["featured-image"]
if (!featuredImageSlug) return undefined
// Social media platforms don't support SVG's for og:image, in which case, use the fallback PNG that the baker generates
// TODO: remove this and add an error if an author tries to use an SVG as a featured image
return getFilenameExtension(featuredImageSlug) === "svg"
? getFilenameAsPng(featuredImageSlug)
: featuredImageSlug
Expand Down
2 changes: 0 additions & 2 deletions packages/@ourworldindata/utils/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -308,8 +308,6 @@ export {
getSizes,
generateSrcSet,
getFilenameWithoutExtension,
getFilenameAsThumbnail,
getThumbnailPath,
getFilenameAsPng,
getFilenameExtension,
getFilenameMIMEType,
Expand Down
Binary file added public/default-featured-image.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
9 changes: 1 addition & 8 deletions site/search/SearchPanel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import {
groupBy,
uniqBy,
Region,
DEFAULT_GDOC_FEATURED_IMAGE,
} from "@ourworldindata/utils"
import {
InstantSearch,
Expand Down Expand Up @@ -66,12 +65,6 @@ import { ChartHit } from "./ChartHit.js"
const siteAnalytics = new SiteAnalytics()

function PagesHit({ hit }: { hit: IPageHit }) {
// a temporary fix for articles that have been indexed without the directory
const src =
hit.thumbnailUrl === `/${DEFAULT_GDOC_FEATURED_IMAGE}`
? `/images/published/${DEFAULT_GDOC_FEATURED_IMAGE}`
: hit.thumbnailUrl

return (
<a
href={`${BAKED_BASE_URL}/${hit.slug}`}
Expand All @@ -83,7 +76,7 @@ function PagesHit({ hit }: { hit: IPageHit }) {
{hit.thumbnailUrl && (
<div className="search-results__page-hit-img-container">
<img
src={src}
src={hit.thumbnailUrl}
role="presentation"
className="search-results__page-hit-img"
/>
Expand Down

0 comments on commit 24576e2

Please sign in to comment.