Skip to content

Commit

Permalink
✨ improve image sync error handling, typing, and documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
ikesau committed Apr 16, 2024
1 parent b0991a1 commit 66590a5
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 32 deletions.
1 change: 1 addition & 0 deletions adminSiteServer/apiRouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2276,6 +2276,7 @@ getRouteNonIdempotentWithRWTransaction(
| undefined

try {
// Beware: if contentSource=gdocs this will update images in the DB+S3 even if the gdoc is published
const gdoc = await getAndLoadGdocById(trx, id, contentSource)

if (!gdoc.published) {
Expand Down
6 changes: 3 additions & 3 deletions db/db.ts
Original file line number Diff line number Diff line change
Expand Up @@ -319,9 +319,9 @@ export const getHomepageId = (
export const getImageMetadataByFilenames = async (
knex: KnexReadonlyTransaction,
filenames: string[]
): Promise<Record<string, ImageMetadata>> => {
): Promise<Record<string, ImageMetadata & { id: number }>> => {
if (filenames.length === 0) return {}
const rows = (await knexRaw(
const rows = await knexRaw<ImageMetadata & { id: number }>(
knex,
`-- sql
SELECT
Expand All @@ -336,7 +336,7 @@ export const getImageMetadataByFilenames = async (
images
WHERE filename IN (?)`,
[filenames]
)) as DbEnrichedImage[]
)
return keyBy(rows, "filename")
}

Expand Down
3 changes: 1 addition & 2 deletions db/model/Gdoc/GdocFactory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -601,8 +601,7 @@ export async function addImagesToContentGraph(
if (filenames.length && gdoc.published) {
const images = await getImageMetadataByFilenames(trx, filenames)
const gdocXImagesToInsert: DbInsertPostGdocXImage[] = []
for (const filename in images) {
const image = images[filename] as DbEnrichedImage
for (const image of Object.values(images)) {
gdocXImagesToInsert.push({
gdocId: gdoc.id,
imageId: image.id,
Expand Down
39 changes: 22 additions & 17 deletions db/model/Image.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import {
DbInsertImage,
serializeImageRow,
ImagesTableName,
excludeUndefined,
} from "@ourworldindata/utils"
import { OwidGoogleAuth } from "../OwidGoogleAuth.js"
import {
Expand Down Expand Up @@ -97,8 +96,8 @@ class ImageStore {
filename: google.name,
defaultAlt: google.description ?? "",
updatedAt: new Date(google.modifiedTime).getTime(),
originalWidth: google.imageMediaMetadata?.width,
originalHeight: google.imageMediaMetadata?.height,
originalWidth: google.imageMediaMetadata?.width ?? null,
originalHeight: google.imageMediaMetadata?.height ?? null,
}))

const duplicateFilenames = findDuplicates(
Expand All @@ -114,7 +113,14 @@ class ImageStore {
console.log(
`Fetched ${images.length} images' metadata from Google Drive`
)
return keyBy(images, "filename")
const imageMetadata = keyBy(images, "filename")
// Only applies when we're fetching specific images i.e. `filenames` is not empty
for (const filename of filenames) {
if (!imageMetadata[filename]) {
throw Error(`Image ${filename} not found in Google Drive`)
}
}
return imageMetadata
}

async syncImagesToS3(
Expand Down Expand Up @@ -179,7 +185,7 @@ export class Image implements ImageMetadata {
static async syncImage(
knex: KnexReadWriteTransaction,
metadata: ImageMetadata
): Promise<Image | undefined> {
): Promise<Image> {
const fresh = new Image(metadata)
const stored = await getImageByFilename(knex, metadata.filename)

Expand Down Expand Up @@ -211,9 +217,8 @@ export class Image implements ImageMetadata {
return fresh
}
} catch (e) {
console.error(`Error syncing ${fresh.filename}`, e)
throw new Error(`Error syncing image ${metadata.filename}: ${e}`)
}
return
}

async fetchFromDriveAndUploadToS3(): Promise<void> {
Expand Down Expand Up @@ -311,15 +316,15 @@ export async function fetchImagesFromDriveAndSyncToS3(
): Promise<Image[]> {
if (!filenames.length) return []

const images = await imageStore
.fetchImageMetadata(filenames)
.then((obj) => Object.values(obj))
.then(excludeUndefined)
try {
const imageMetadata = await imageStore.fetchImageMetadata(filenames)

return Promise.all(images.map((i) => Image.syncImage(knex, i)))
.then(excludeUndefined)
.catch((e) => {
console.error(`Error syncing images to S3`, e)
return []
})
const metadataArray = Object.values(imageMetadata) as ImageMetadata[]

return Promise.all(
metadataArray.map((metadata) => Image.syncImage(knex, metadata))
)
} catch (e) {
throw new Error(`Error fetching images from Drive: ${e}`)
}
}
23 changes: 13 additions & 10 deletions packages/@ourworldindata/types/src/gdocTypes/Image.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { DbEnrichedImage } from "../dbTypes/Images.js"

// This is the JSON we get from Google's API before remapping the keys to be consistent with the rest of our interfaces
export interface GDriveImageMetadata {
name: string // -> filename
Expand All @@ -10,13 +12,14 @@ export interface GDriveImageMetadata {
}
}

export interface ImageMetadata {
googleId: string
filename: string
defaultAlt: string
// MySQL Date objects round to the nearest second, whereas Google includes milliseconds
// so we store as an epoch to avoid any conversion issues
updatedAt: number | null
originalWidth?: number | null
originalHeight?: number | null
}
// All the data we use in the client to render images
// everything except the ID, effectively
export type ImageMetadata = Pick<
DbEnrichedImage,
| "defaultAlt"
| "filename"
| "googleId"
| "originalHeight"
| "originalWidth"
| "updatedAt"
>

0 comments on commit 66590a5

Please sign in to comment.