diff --git a/db/model/Gdoc/GdocBase.ts b/db/model/Gdoc/GdocBase.ts index f1921f17efe..e816d359799 100644 --- a/db/model/Gdoc/GdocBase.ts +++ b/db/model/Gdoc/GdocBase.ts @@ -590,13 +590,9 @@ export class GdocBase implements OwidGdocBaseInterface { } } - async loadLinkedCharts(knex: db.KnexReadonlyTransaction): Promise<{ - timeToLoadGrapherCharts: number - timeToLoadExplorerCharts: number - }> { + async loadLinkedCharts(knex: db.KnexReadonlyTransaction): Promise { const slugToIdMap = await mapSlugsToIds(knex) // TODO: rewrite this as a single query instead of N queries - const start = performance.now() const linkedGrapherCharts = await Promise.all( this.linkedChartSlugs.grapher.map(async (originalSlug) => { const chartId = slugToIdMap[originalSlug] @@ -620,7 +616,6 @@ export class GdocBase implements OwidGdocBaseInterface { return linkedChart }) ).then(excludeNullish) - const timeToLoadGrapherCharts = performance.now() - start const publishedExplorersBySlug = await db.getPublishedExplorersBySlug(knex) @@ -641,14 +636,11 @@ export class GdocBase implements OwidGdocBaseInterface { return linkedChart }) ) - const timeToLoadExplorerCharts = - performance.now() - start - timeToLoadGrapherCharts this.linkedCharts = keyBy( [...linkedGrapherCharts, ...linkedExplorerCharts], "originalSlug" ) - return { timeToLoadGrapherCharts, timeToLoadExplorerCharts } } async loadLinkedIndicators(): Promise { @@ -822,43 +814,12 @@ export class GdocBase implements OwidGdocBaseInterface { } async loadState(knex: db.KnexReadonlyTransaction): Promise { - const start = performance.now() await this.loadLinkedDocuments(knex) - const linkedDocumentTime = performance.now() - start await this.loadImageMetadataFromDB(knex) - const imageMetadataTime = performance.now() - start - linkedDocumentTime - const { timeToLoadGrapherCharts, timeToLoadExplorerCharts } = - await this.loadLinkedCharts(knex) - const linkedChartsTime = performance.now() - start - imageMetadataTime + await this.loadLinkedCharts(knex) await this.loadLinkedIndicators() // depends on linked charts - const linkedIndicatorsTime = - performance.now() - start - linkedChartsTime await this._loadSubclassAttachments(knex) - const subclassAttachmentsTime = - performance.now() - start - linkedIndicatorsTime await this.validate(knex) - const validationTime = - performance.now() - start - subclassAttachmentsTime - // append to log - await fs.promises.appendFile( - "gdoc-load-times.log", - JSON.stringify( - { - id: this.id, - linkedDocumentTime, - imageMetadataTime, - linkedChartsTime, - linkedIndicatorsTime, - subclassAttachmentsTime, - timeToLoadGrapherCharts, - timeToLoadExplorerCharts, - validationTime, - linkedGrapherCount: this.linkedChartSlugs.grapher.length, - }, - null, - 2 - ) - ) } toJSON(): OwidGdoc { diff --git a/db/model/Gdoc/GdocFactory.ts b/db/model/Gdoc/GdocFactory.ts index d573bd873cc..783d4db7f01 100644 --- a/db/model/Gdoc/GdocFactory.ts +++ b/db/model/Gdoc/GdocFactory.ts @@ -422,7 +422,6 @@ export async function getAndLoadPublishedGdocPosts( const gdocs = await Promise.all( enrichedRows.map(async (row) => loadGdocFromGdocBase(knex, row)) ) - console.log("after all loaded") return gdocs as GdocPost[] } diff --git a/db/model/Image.ts b/db/model/Image.ts index 322e170f839..240f042196c 100644 --- a/db/model/Image.ts +++ b/db/model/Image.ts @@ -36,94 +36,85 @@ class ImageStore { async fetchImageMetadata( filenames: string[] ): Promise> { - try { - console.log( - `Fetching image metadata from Google Drive ${ - filenames.length ? `for ${filenames.join(", ")}` : "" - }` - ) - const driveClient = google.drive({ - version: "v3", - auth: OwidGoogleAuth.getGoogleReadonlyAuth(), - }) - // e.g. `and (name="example.png" or name="image.svg")` - // https://developers.google.com/drive/api/guides/search-files#examples - const filenamesFilter = filenames.length - ? `and (${filenames - .map((filename) => `name='${filename}'`) - .join(" or ")})` - : "" - - const listParams: drive_v3.Params$Resource$Files$List = { - fields: "nextPageToken, files(id, name, description, modifiedTime, imageMediaMetadata, trashed)", - q: `'${GDOCS_CLIENT_EMAIL}' in readers and mimeType contains 'image/' ${filenamesFilter}`, - driveId: GDOCS_SHARED_DRIVE_ID, - corpora: "drive", - supportsAllDrives: true, - includeItemsFromAllDrives: true, - pageSize: 1000, - } - - let files: drive_v3.Schema$File[] = [] - let nextPageToken: drive_v3.Schema$FileList["nextPageToken"] = - undefined - let isInitialQuery = true - - while (nextPageToken || isInitialQuery) { - await driveClient.files - .list({ - ...listParams, - pageToken: nextPageToken, - }) - // chaining this so that reassigning nextPageToken doesn't trip up TypeScript - .then((res) => { - const nextFiles = res.data.files ?? [] - nextPageToken = res.data.nextPageToken - files = [...files, ...nextFiles] - }) - isInitialQuery = false - } + console.log( + `Fetching image metadata from Google Drive ${ + filenames.length ? `for ${filenames.join(", ")}` : "" + }` + ) + const driveClient = google.drive({ + version: "v3", + auth: OwidGoogleAuth.getGoogleReadonlyAuth(), + }) + // e.g. `and (name="example.png" or name="image.svg")` + // https://developers.google.com/drive/api/guides/search-files#examples + const filenamesFilter = filenames.length + ? `and (${filenames + .map((filename) => `name='${filename}'`) + .join(" or ")})` + : "" + + const listParams: drive_v3.Params$Resource$Files$List = { + fields: "nextPageToken, files(id, name, description, modifiedTime, imageMediaMetadata, trashed)", + q: `'${GDOCS_CLIENT_EMAIL}' in readers and mimeType contains 'image/' ${filenamesFilter}`, + driveId: GDOCS_SHARED_DRIVE_ID, + corpora: "drive", + supportsAllDrives: true, + includeItemsFromAllDrives: true, + pageSize: 1000, + } - function validateImage( - image: drive_v3.Schema$File - ): image is GDriveImageMetadata { - return Boolean( - image.id && - image.name && - image.modifiedTime && - !image.trashed - ) - } + let files: drive_v3.Schema$File[] = [] + let nextPageToken: drive_v3.Schema$FileList["nextPageToken"] = undefined + let isInitialQuery = true + + while (nextPageToken || isInitialQuery) { + await driveClient.files + .list({ + ...listParams, + pageToken: nextPageToken, + }) + // chaining this so that reassigning nextPageToken doesn't trip up TypeScript + .then((res) => { + const nextFiles = res.data.files ?? [] + nextPageToken = res.data.nextPageToken + files = [...files, ...nextFiles] + }) + isInitialQuery = false + } - const images: ImageMetadata[] = files - .filter(validateImage) - .map((google: GDriveImageMetadata) => ({ - googleId: google.id, - filename: google.name, - defaultAlt: google.description ?? "", - updatedAt: new Date(google.modifiedTime).getTime(), - originalWidth: google.imageMediaMetadata?.width, - originalHeight: google.imageMediaMetadata?.height, - })) - - const duplicateFilenames = findDuplicates( - images.map((image) => image.filename) + function validateImage( + image: drive_v3.Schema$File + ): image is GDriveImageMetadata { + return Boolean( + image.id && image.name && image.modifiedTime && !image.trashed ) + } - if (duplicateFilenames.length) { - throw new Error( - `Multiple images are named ${duplicateFilenames.join(", ")}` - ) - } + const images: ImageMetadata[] = files + .filter(validateImage) + .map((google: GDriveImageMetadata) => ({ + googleId: google.id, + filename: google.name, + defaultAlt: google.description ?? "", + updatedAt: new Date(google.modifiedTime).getTime(), + originalWidth: google.imageMediaMetadata?.width, + originalHeight: google.imageMediaMetadata?.height, + })) + + const duplicateFilenames = findDuplicates( + images.map((image) => image.filename) + ) - console.log( - `Fetched ${images.length} images' metadata from Google Drive` + if (duplicateFilenames.length) { + throw new Error( + `Multiple images are named ${duplicateFilenames.join(", ")}` ) - return keyBy(images, "filename") - } catch (error) { - console.error(`Error fetching image metadata`, error) - throw error } + + console.log( + `Fetched ${images.length} images' metadata from Google Drive` + ) + return keyBy(images, "filename") } async syncImagesToS3(