Skip to content

Commit

Permalink
🐝 remove some more unused benchmarking code
Browse files Browse the repository at this point in the history
  • Loading branch information
ikesau committed Apr 15, 2024
1 parent 0f72d97 commit 3cb6fbf
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 123 deletions.
43 changes: 2 additions & 41 deletions db/model/Gdoc/GdocBase.ts
Original file line number Diff line number Diff line change
Expand Up @@ -590,13 +590,9 @@ export class GdocBase implements OwidGdocBaseInterface {
}
}

async loadLinkedCharts(knex: db.KnexReadonlyTransaction): Promise<{
timeToLoadGrapherCharts: number
timeToLoadExplorerCharts: number
}> {
async loadLinkedCharts(knex: db.KnexReadonlyTransaction): Promise<void> {
const slugToIdMap = await mapSlugsToIds(knex)
// TODO: rewrite this as a single query instead of N queries
const start = performance.now()
const linkedGrapherCharts = await Promise.all(
this.linkedChartSlugs.grapher.map(async (originalSlug) => {
const chartId = slugToIdMap[originalSlug]
Expand All @@ -620,7 +616,6 @@ export class GdocBase implements OwidGdocBaseInterface {
return linkedChart
})
).then(excludeNullish)
const timeToLoadGrapherCharts = performance.now() - start

const publishedExplorersBySlug =
await db.getPublishedExplorersBySlug(knex)
Expand All @@ -641,14 +636,11 @@ export class GdocBase implements OwidGdocBaseInterface {
return linkedChart
})
)
const timeToLoadExplorerCharts =
performance.now() - start - timeToLoadGrapherCharts

this.linkedCharts = keyBy(
[...linkedGrapherCharts, ...linkedExplorerCharts],
"originalSlug"
)
return { timeToLoadGrapherCharts, timeToLoadExplorerCharts }
}

async loadLinkedIndicators(): Promise<void> {
Expand Down Expand Up @@ -822,43 +814,12 @@ export class GdocBase implements OwidGdocBaseInterface {
}

async loadState(knex: db.KnexReadonlyTransaction): Promise<void> {
const start = performance.now()
await this.loadLinkedDocuments(knex)
const linkedDocumentTime = performance.now() - start
await this.loadImageMetadataFromDB(knex)
const imageMetadataTime = performance.now() - start - linkedDocumentTime
const { timeToLoadGrapherCharts, timeToLoadExplorerCharts } =
await this.loadLinkedCharts(knex)
const linkedChartsTime = performance.now() - start - imageMetadataTime
await this.loadLinkedCharts(knex)
await this.loadLinkedIndicators() // depends on linked charts
const linkedIndicatorsTime =
performance.now() - start - linkedChartsTime
await this._loadSubclassAttachments(knex)
const subclassAttachmentsTime =
performance.now() - start - linkedIndicatorsTime
await this.validate(knex)
const validationTime =
performance.now() - start - subclassAttachmentsTime
// append to log
await fs.promises.appendFile(
"gdoc-load-times.log",
JSON.stringify(
{
id: this.id,
linkedDocumentTime,
imageMetadataTime,
linkedChartsTime,
linkedIndicatorsTime,
subclassAttachmentsTime,
timeToLoadGrapherCharts,
timeToLoadExplorerCharts,
validationTime,
linkedGrapherCount: this.linkedChartSlugs.grapher.length,
},
null,
2
)
)
}

toJSON(): OwidGdoc {
Expand Down
1 change: 0 additions & 1 deletion db/model/Gdoc/GdocFactory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,6 @@ export async function getAndLoadPublishedGdocPosts(
const gdocs = await Promise.all(
enrichedRows.map(async (row) => loadGdocFromGdocBase(knex, row))
)
console.log("after all loaded")
return gdocs as GdocPost[]
}

Expand Down
153 changes: 72 additions & 81 deletions db/model/Image.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,94 +36,85 @@ class ImageStore {
async fetchImageMetadata(
filenames: string[]
): Promise<Record<string, ImageMetadata | undefined>> {
try {
console.log(
`Fetching image metadata from Google Drive ${
filenames.length ? `for ${filenames.join(", ")}` : ""
}`
)
const driveClient = google.drive({
version: "v3",
auth: OwidGoogleAuth.getGoogleReadonlyAuth(),
})
// e.g. `and (name="example.png" or name="image.svg")`
// https://developers.google.com/drive/api/guides/search-files#examples
const filenamesFilter = filenames.length
? `and (${filenames
.map((filename) => `name='${filename}'`)
.join(" or ")})`
: ""

const listParams: drive_v3.Params$Resource$Files$List = {
fields: "nextPageToken, files(id, name, description, modifiedTime, imageMediaMetadata, trashed)",
q: `'${GDOCS_CLIENT_EMAIL}' in readers and mimeType contains 'image/' ${filenamesFilter}`,
driveId: GDOCS_SHARED_DRIVE_ID,
corpora: "drive",
supportsAllDrives: true,
includeItemsFromAllDrives: true,
pageSize: 1000,
}

let files: drive_v3.Schema$File[] = []
let nextPageToken: drive_v3.Schema$FileList["nextPageToken"] =
undefined
let isInitialQuery = true

while (nextPageToken || isInitialQuery) {
await driveClient.files
.list({
...listParams,
pageToken: nextPageToken,
})
// chaining this so that reassigning nextPageToken doesn't trip up TypeScript
.then((res) => {
const nextFiles = res.data.files ?? []
nextPageToken = res.data.nextPageToken
files = [...files, ...nextFiles]
})
isInitialQuery = false
}
console.log(
`Fetching image metadata from Google Drive ${
filenames.length ? `for ${filenames.join(", ")}` : ""
}`
)
const driveClient = google.drive({
version: "v3",
auth: OwidGoogleAuth.getGoogleReadonlyAuth(),
})
// e.g. `and (name="example.png" or name="image.svg")`
// https://developers.google.com/drive/api/guides/search-files#examples
const filenamesFilter = filenames.length
? `and (${filenames
.map((filename) => `name='${filename}'`)
.join(" or ")})`
: ""

const listParams: drive_v3.Params$Resource$Files$List = {
fields: "nextPageToken, files(id, name, description, modifiedTime, imageMediaMetadata, trashed)",
q: `'${GDOCS_CLIENT_EMAIL}' in readers and mimeType contains 'image/' ${filenamesFilter}`,
driveId: GDOCS_SHARED_DRIVE_ID,
corpora: "drive",
supportsAllDrives: true,
includeItemsFromAllDrives: true,
pageSize: 1000,
}

function validateImage(
image: drive_v3.Schema$File
): image is GDriveImageMetadata {
return Boolean(
image.id &&
image.name &&
image.modifiedTime &&
!image.trashed
)
}
let files: drive_v3.Schema$File[] = []
let nextPageToken: drive_v3.Schema$FileList["nextPageToken"] = undefined
let isInitialQuery = true

while (nextPageToken || isInitialQuery) {
await driveClient.files
.list({
...listParams,
pageToken: nextPageToken,
})
// chaining this so that reassigning nextPageToken doesn't trip up TypeScript
.then((res) => {
const nextFiles = res.data.files ?? []
nextPageToken = res.data.nextPageToken
files = [...files, ...nextFiles]
})
isInitialQuery = false
}

const images: ImageMetadata[] = files
.filter(validateImage)
.map((google: GDriveImageMetadata) => ({
googleId: google.id,
filename: google.name,
defaultAlt: google.description ?? "",
updatedAt: new Date(google.modifiedTime).getTime(),
originalWidth: google.imageMediaMetadata?.width,
originalHeight: google.imageMediaMetadata?.height,
}))

const duplicateFilenames = findDuplicates(
images.map((image) => image.filename)
function validateImage(
image: drive_v3.Schema$File
): image is GDriveImageMetadata {
return Boolean(
image.id && image.name && image.modifiedTime && !image.trashed
)
}

if (duplicateFilenames.length) {
throw new Error(
`Multiple images are named ${duplicateFilenames.join(", ")}`
)
}
const images: ImageMetadata[] = files
.filter(validateImage)
.map((google: GDriveImageMetadata) => ({
googleId: google.id,
filename: google.name,
defaultAlt: google.description ?? "",
updatedAt: new Date(google.modifiedTime).getTime(),
originalWidth: google.imageMediaMetadata?.width,
originalHeight: google.imageMediaMetadata?.height,
}))

const duplicateFilenames = findDuplicates(
images.map((image) => image.filename)
)

console.log(
`Fetched ${images.length} images' metadata from Google Drive`
if (duplicateFilenames.length) {
throw new Error(
`Multiple images are named ${duplicateFilenames.join(", ")}`
)
return keyBy(images, "filename")
} catch (error) {
console.error(`Error fetching image metadata`, error)
throw error
}

console.log(
`Fetched ${images.length} images' metadata from Google Drive`
)
return keyBy(images, "filename")
}

async syncImagesToS3(
Expand Down

0 comments on commit 3cb6fbf

Please sign in to comment.