diff --git a/package-lock.json b/package-lock.json index 22a1bdde3..5df85bf27 100644 --- a/package-lock.json +++ b/package-lock.json @@ -29,6 +29,7 @@ "jest-mock-extended": "^2.0.2-beta2", "js-yaml": "^3.13.1", "mongodb": "^5.1.0", + "p-limit": "^5.0.0", "simple-git": "^2.45.1", "tsscmp": "^1.0.6", "validator": "^10.11.0" @@ -18277,14 +18278,25 @@ } }, "node_modules/p-limit": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.3.0.tgz", - "integrity": "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==", + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-5.0.0.tgz", + "integrity": "sha512-/Eaoq+QyLSiXQ4lyYV23f14mZRQcXnxfHrN0vCai+ak9G0pp9iEQukIIZq5NccEvwRB8PUnZT0KsOoDCINS1qQ==", "dependencies": { - "p-try": "^2.0.0" + "yocto-queue": "^1.0.0" }, "engines": { - "node": ">=6" + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/p-limit/node_modules/yocto-queue": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.0.0.tgz", + "integrity": "sha512-9bnSc/HEW2uRy67wc+T8UwauLuPJVn28jb+GtJY16iiKWyvmYJRXVT4UamsAEGQfPohgr2q4Tq0sQbQlxTfi1g==", + "engines": { + "node": ">=12.20" }, "funding": { "url": "https://github.com/sponsors/sindresorhus" @@ -18301,6 +18313,20 @@ "node": ">=8" } }, + "node_modules/p-locate/node_modules/p-limit": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.3.0.tgz", + "integrity": "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==", + "dependencies": { + "p-try": "^2.0.0" + }, + "engines": { + "node": ">=6" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/p-map": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/p-map/-/p-map-4.0.0.tgz", @@ -35968,11 +35994,18 @@ "peer": true }, "p-limit": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.3.0.tgz", - "integrity": "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==", + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-5.0.0.tgz", + "integrity": "sha512-/Eaoq+QyLSiXQ4lyYV23f14mZRQcXnxfHrN0vCai+ak9G0pp9iEQukIIZq5NccEvwRB8PUnZT0KsOoDCINS1qQ==", "requires": { - "p-try": "^2.0.0" + "yocto-queue": "^1.0.0" + }, + "dependencies": { + "yocto-queue": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.0.0.tgz", + "integrity": "sha512-9bnSc/HEW2uRy67wc+T8UwauLuPJVn28jb+GtJY16iiKWyvmYJRXVT4UamsAEGQfPohgr2q4Tq0sQbQlxTfi1g==" + } } }, "p-locate": { @@ -35981,6 +36014,16 @@ "integrity": "sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==", "requires": { "p-limit": "^2.2.0" + }, + "dependencies": { + "p-limit": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.3.0.tgz", + "integrity": "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==", + "requires": { + "p-try": "^2.0.0" + } + } } }, "p-map": { diff --git a/package.json b/package.json index d78e8845b..86278e81d 100644 --- a/package.json +++ b/package.json @@ -45,6 +45,7 @@ "jest-mock-extended": "^2.0.2-beta2", "js-yaml": "^3.13.1", "mongodb": "^5.1.0", + "p-limit": "^5.0.0", "simple-git": "^2.45.1", "tsscmp": "^1.0.6", "validator": "^10.11.0" diff --git a/src/job/jobHandler.ts b/src/job/jobHandler.ts index a3f933193..432bdb4b7 100644 --- a/src/job/jobHandler.ts +++ b/src/job/jobHandler.ts @@ -1,6 +1,8 @@ -import { GetObjectCommand, ListObjectsV2Command, S3Client } from '@aws-sdk/client-s3'; +import { ListObjectsV2Command, S3Client } from '@aws-sdk/client-s3'; import axios, { AxiosResponse } from 'axios'; import path from 'path'; +import pLimit from 'p-limit'; +import fs from 'fs'; import { Payload, Job, JobStatus } from '../entities/job'; import { JobRepository } from '../repositories/jobRepository'; import { RepoBranchesRepository } from '../repositories/repoBranchesRepository'; @@ -18,7 +20,6 @@ import { MONOREPO_NAME } from '../monorepo/utils/monorepo-constants'; import { nextGenHtml, nextGenParse, oasPageBuild, persistenceModule, prepareBuild } from '../commands'; import { downloadBuildDependencies, writeFileAsync } from '../commands/src/helpers/dependency-helpers'; import { CliCommandResponse } from '../commands/src/helpers'; -require('fs'); export abstract class JobHandler { private _currJob: Job; @@ -524,13 +525,36 @@ export abstract class JobHandler { process.env.REGRESSION = regression; } - // TODO-4442: Need to figure out how to split between cache and S3 files + private async downloadByUrl(objKey: string, destPath: string) { + const s3Url = `https://docs-mongodb-org-stg.s3.us-east-2.amazonaws.com/${objKey}`; + const maxAttempts = 3; + + // Retry in case of random network issues + for (let i = maxAttempts; i > 0; i--) { + try { + const res = await axios.get(s3Url, { timeout: 10000, responseType: 'stream' }); + const dirName = path.dirname(destPath); + this._fileSystemServices.createDirIfNotExists(dirName); + const dest = fs.createWriteStream(destPath); + res.data.pipe(dest); + console.log(`${objKey} is okay!`); + } catch (err) { + console.error(`Failed fetchinng ${objKey}, retrying`); + const delay = 1000; + await new Promise((resolve) => setTimeout(resolve, delay)); + } + } + } + private async downloadExistingArtifacts() { - await this._logger.save(this._currJob._id, 'Attempting to download existing artifacts'); + const timerLabel = 'downloadExistingArtifacts - No limit'; + console.time(timerLabel); + + console.log('Attempting to download existing artifacts'); const client = new S3Client({ region: 'us-east-2' }); - const bucket = process.env.BUCKET; + const bucket = 'docs-mongodb-org-stg'; if (!bucket) { - this._logger.error(this._currJob._id, `Missing bucket: ${bucket}`); + console.error(`Missing bucket: ${bucket}`); return; } @@ -539,19 +563,20 @@ export abstract class JobHandler { // We'll need to figure out how to handle differences between prod deploy vs. content staging build, if necessary const s3Prefix = 'docs/docsworker-xlarge/DOP-4442/'; const listCommand = new ListObjectsV2Command({ Bucket: bucket, Prefix: s3Prefix }); - const repoDir = this._config.get('repo_dir'); + const repoDir = 'test-s3-fetching-repo'; // Since the Makefiles move the path to Snooty a bit, we want to make sure we target the original, before the // frontend is built - const originalSnootyPath = `${repoDir}/../../snooty`; - await this._logger.save(this._currJob._id, `originalSnootyPath: ${originalSnootyPath}`); + const originalSnootyPath = `${repoDir}/../snooty`; + console.log(`originalSnootyPath: ${originalSnootyPath}`); const targetPublicDirectory = path.join(originalSnootyPath, '/public'); - await this._logger.save(this._currJob._id, `targetPublicDirectory: ${targetPublicDirectory}`); + console.log(`targetPublicDirectory: ${targetPublicDirectory}`); + // Target cache directory should just be the root snooty dir since objects will have ".cache/" already included + const targetCacheDirectory = path.join(originalSnootyPath); + console.log(`targetCacheDirectory: ${targetCacheDirectory}`); - // For debugging purposes - let contents = ''; - let n = 0; + // Need to type this + const keysList: any[] = []; - // NOTE: This currently does not taking into account the .cache folder try { let isTruncated = true; @@ -561,47 +586,132 @@ export abstract class JobHandler { while (isTruncated) { const { Contents, IsTruncated, NextContinuationToken } = await client.send(listCommand); if (!Contents) { - this._logger.info(this._currJob._id, 'No contents'); + console.log('No contents'); break; } + console.log('Contents found'); + for (const obj of Contents) { const objKey = obj.Key; if (!objKey) { continue; } - const getCommand = new GetObjectCommand({ Bucket: bucket, Key: objKey }); - const { Body: objBody } = await client.send(getCommand); - // Save S3 objects to local file paths // Files in the local public directory should exclude path prefixes const localFileName = objKey.replace(s3Prefix, ''); - const targetFilePath = path.join(targetPublicDirectory, localFileName); - this._logger.info(this._currJob._id, `targetFilePath: ${targetFilePath}`); - if (objBody) { - await writeFileAsync(targetFilePath, await objBody.transformToString()); + const targetDir = objKey.includes('.cache') ? targetCacheDirectory : targetPublicDirectory; + const targetFilePath = path.join(targetDir, localFileName); + + // Some objects are just empty directories, apparently + if (!objKey.endsWith('/')) { + keysList.push({ objKey, destPath: targetFilePath }); } } - // For debugging - const contentsList = Contents.map((c) => { - n++; - return `${c.Key}\n`; - }); - contents += contentsList; - isTruncated = !!IsTruncated; listCommand.input.ContinuationToken = NextContinuationToken; } } catch (e) { - this._logger.error(this._currJob._id, e); + console.error(e); } + // Limit concurrency to avoid rate limits + const limit = pLimit(5); + const downloadPromises = keysList.map(({ objKey, destPath }) => { + return limit(() => this.downloadByUrl(objKey, destPath)); + }); + // For debugging purposes - this._logger.info(this._currJob._id, contents); + // console.info(contents); + try { + await Promise.all(downloadPromises); + } catch (err) { + console.error(err); + } + + console.timeEnd(timerLabel); } + // TODO-4442: Need to figure out how to split between cache and S3 files + // private async downloadExistingArtifacts() { + // await this._logger.save(this._currJob._id, 'Attempting to download existing artifacts'); + // const client = new S3Client({ region: 'us-east-2' }); + // const bucket = process.env.BUCKET; + // if (!bucket) { + // this._logger.error(this._currJob._id, `Missing bucket: ${bucket}`); + // return; + // } + + // // S3 object prefix should match the path prefix that Mut uploads to for the build + // // Probably want to make this an argument, but leave as a static variable for testing + // // We'll need to figure out how to handle differences between prod deploy vs. content staging build, if necessary + // const s3Prefix = 'docs/docsworker-xlarge/DOP-4442/'; + // const listCommand = new ListObjectsV2Command({ Bucket: bucket, Prefix: s3Prefix }); + // const repoDir = this._config.get('repo_dir'); + // // Since the Makefiles move the path to Snooty a bit, we want to make sure we target the original, before the + // // frontend is built + // const originalSnootyPath = `${repoDir}/../../snooty`; + // await this._logger.save(this._currJob._id, `originalSnootyPath: ${originalSnootyPath}`); + // const targetPublicDirectory = path.join(originalSnootyPath, '/public'); + // await this._logger.save(this._currJob._id, `targetPublicDirectory: ${targetPublicDirectory}`); + + // // For debugging purposes + // let contents = ''; + // let n = 0; + + // // NOTE: This currently does not taking into account the .cache folder + // try { + // let isTruncated = true; + + // this._fileSystemServices.createDirIfNotExists(targetPublicDirectory); + + // // Grab contents, and then attempt to continue, in case there are more objects + // while (isTruncated) { + // const { Contents, IsTruncated, NextContinuationToken } = await client.send(listCommand); + // if (!Contents) { + // this._logger.info(this._currJob._id, 'No contents'); + // break; + // } + + // for (const obj of Contents) { + // const objKey = obj.Key; + // if (!objKey) { + // continue; + // } + + // const getCommand = new GetObjectCommand({ Bucket: bucket, Key: objKey }); + // const { Body: objBody } = await client.send(getCommand); + + // // Save S3 objects to local file paths + // // Files in the local public directory should exclude path prefixes + // const localFileName = objKey.replace(s3Prefix, ''); + // const targetFilePath = path.join(targetPublicDirectory, localFileName); + // this._logger.info(this._currJob._id, `targetFilePath: ${targetFilePath}`); + // if (objBody) { + // await writeFileAsync(targetFilePath, await objBody.transformToString()); + // } + // } + + // // For debugging + // const contentsList = Contents.map((c) => { + // n++; + // return `${c.Key}\n`; + // }); + // contents += contentsList; + + // isTruncated = !!IsTruncated; + // listCommand.input.ContinuationToken = NextContinuationToken; + // } + // } catch (e) { + // this._logger.error(this._currJob._id, e); + // } + + // // For debugging purposes + // this._logger.info(this._currJob._id, contents); + // } + @throwIfJobInterupted() protected async buildWithMakefiles(): Promise { this.cleanup();