From 66590a58b049c7628932664e893432821fd766ee Mon Sep 17 00:00:00 2001 From: Ike Saunders Date: Tue, 16 Apr 2024 18:49:20 -0400 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20improve=20image=20sync=20error=20ha?= =?UTF-8?q?ndling,=20typing,=20and=20documentation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- adminSiteServer/apiRouter.ts | 1 + db/db.ts | 6 +-- db/model/Gdoc/GdocFactory.ts | 3 +- db/model/Image.ts | 39 +++++++++++-------- .../types/src/gdocTypes/Image.ts | 23 ++++++----- 5 files changed, 40 insertions(+), 32 deletions(-) diff --git a/adminSiteServer/apiRouter.ts b/adminSiteServer/apiRouter.ts index fad1edb0c8c..b963fc502b2 100644 --- a/adminSiteServer/apiRouter.ts +++ b/adminSiteServer/apiRouter.ts @@ -2276,6 +2276,7 @@ getRouteNonIdempotentWithRWTransaction( | undefined try { + // Beware: if contentSource=gdocs this will update images in the DB+S3 even if the gdoc is published const gdoc = await getAndLoadGdocById(trx, id, contentSource) if (!gdoc.published) { diff --git a/db/db.ts b/db/db.ts index 84953774e61..4269454b3d1 100644 --- a/db/db.ts +++ b/db/db.ts @@ -319,9 +319,9 @@ export const getHomepageId = ( export const getImageMetadataByFilenames = async ( knex: KnexReadonlyTransaction, filenames: string[] -): Promise> => { +): Promise> => { if (filenames.length === 0) return {} - const rows = (await knexRaw( + const rows = await knexRaw( knex, `-- sql SELECT @@ -336,7 +336,7 @@ export const getImageMetadataByFilenames = async ( images WHERE filename IN (?)`, [filenames] - )) as DbEnrichedImage[] + ) return keyBy(rows, "filename") } diff --git a/db/model/Gdoc/GdocFactory.ts b/db/model/Gdoc/GdocFactory.ts index 783d4db7f01..5bf79021708 100644 --- a/db/model/Gdoc/GdocFactory.ts +++ b/db/model/Gdoc/GdocFactory.ts @@ -601,8 +601,7 @@ export async function addImagesToContentGraph( if (filenames.length && gdoc.published) { const images = await getImageMetadataByFilenames(trx, filenames) const gdocXImagesToInsert: DbInsertPostGdocXImage[] = [] - for (const filename in images) { - const image = images[filename] as DbEnrichedImage + for (const image of Object.values(images)) { gdocXImagesToInsert.push({ gdocId: gdoc.id, imageId: image.id, diff --git a/db/model/Image.ts b/db/model/Image.ts index 240f042196c..03cd745f83a 100644 --- a/db/model/Image.ts +++ b/db/model/Image.ts @@ -18,7 +18,6 @@ import { DbInsertImage, serializeImageRow, ImagesTableName, - excludeUndefined, } from "@ourworldindata/utils" import { OwidGoogleAuth } from "../OwidGoogleAuth.js" import { @@ -97,8 +96,8 @@ class ImageStore { filename: google.name, defaultAlt: google.description ?? "", updatedAt: new Date(google.modifiedTime).getTime(), - originalWidth: google.imageMediaMetadata?.width, - originalHeight: google.imageMediaMetadata?.height, + originalWidth: google.imageMediaMetadata?.width ?? null, + originalHeight: google.imageMediaMetadata?.height ?? null, })) const duplicateFilenames = findDuplicates( @@ -114,7 +113,14 @@ class ImageStore { console.log( `Fetched ${images.length} images' metadata from Google Drive` ) - return keyBy(images, "filename") + const imageMetadata = keyBy(images, "filename") + // Only applies when we're fetching specific images i.e. `filenames` is not empty + for (const filename of filenames) { + if (!imageMetadata[filename]) { + throw Error(`Image ${filename} not found in Google Drive`) + } + } + return imageMetadata } async syncImagesToS3( @@ -179,7 +185,7 @@ export class Image implements ImageMetadata { static async syncImage( knex: KnexReadWriteTransaction, metadata: ImageMetadata - ): Promise { + ): Promise { const fresh = new Image(metadata) const stored = await getImageByFilename(knex, metadata.filename) @@ -211,9 +217,8 @@ export class Image implements ImageMetadata { return fresh } } catch (e) { - console.error(`Error syncing ${fresh.filename}`, e) + throw new Error(`Error syncing image ${metadata.filename}: ${e}`) } - return } async fetchFromDriveAndUploadToS3(): Promise { @@ -311,15 +316,15 @@ export async function fetchImagesFromDriveAndSyncToS3( ): Promise { if (!filenames.length) return [] - const images = await imageStore - .fetchImageMetadata(filenames) - .then((obj) => Object.values(obj)) - .then(excludeUndefined) + try { + const imageMetadata = await imageStore.fetchImageMetadata(filenames) - return Promise.all(images.map((i) => Image.syncImage(knex, i))) - .then(excludeUndefined) - .catch((e) => { - console.error(`Error syncing images to S3`, e) - return [] - }) + const metadataArray = Object.values(imageMetadata) as ImageMetadata[] + + return Promise.all( + metadataArray.map((metadata) => Image.syncImage(knex, metadata)) + ) + } catch (e) { + throw new Error(`Error fetching images from Drive: ${e}`) + } } diff --git a/packages/@ourworldindata/types/src/gdocTypes/Image.ts b/packages/@ourworldindata/types/src/gdocTypes/Image.ts index 4c56fc9e9fd..f9e162ac31c 100644 --- a/packages/@ourworldindata/types/src/gdocTypes/Image.ts +++ b/packages/@ourworldindata/types/src/gdocTypes/Image.ts @@ -1,3 +1,5 @@ +import { DbEnrichedImage } from "../dbTypes/Images.js" + // This is the JSON we get from Google's API before remapping the keys to be consistent with the rest of our interfaces export interface GDriveImageMetadata { name: string // -> filename @@ -10,13 +12,14 @@ export interface GDriveImageMetadata { } } -export interface ImageMetadata { - googleId: string - filename: string - defaultAlt: string - // MySQL Date objects round to the nearest second, whereas Google includes milliseconds - // so we store as an epoch to avoid any conversion issues - updatedAt: number | null - originalWidth?: number | null - originalHeight?: number | null -} +// All the data we use in the client to render images +// everything except the ID, effectively +export type ImageMetadata = Pick< + DbEnrichedImage, + | "defaultAlt" + | "filename" + | "googleId" + | "originalHeight" + | "originalWidth" + | "updatedAt" +>