Skip to content

Commit

Permalink
Try get requests instead S3 get command
Browse files Browse the repository at this point in the history
  • Loading branch information
rayangler committed Apr 2, 2024
1 parent 0a9ccca commit fe2418a
Show file tree
Hide file tree
Showing 3 changed files with 194 additions and 40 deletions.
61 changes: 52 additions & 9 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
"jest-mock-extended": "^2.0.2-beta2",
"js-yaml": "^3.13.1",
"mongodb": "^5.1.0",
"p-limit": "^5.0.0",
"simple-git": "^2.45.1",
"tsscmp": "^1.0.6",
"validator": "^10.11.0"
Expand Down
172 changes: 141 additions & 31 deletions src/job/jobHandler.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import { GetObjectCommand, ListObjectsV2Command, S3Client } from '@aws-sdk/client-s3';
import { ListObjectsV2Command, S3Client } from '@aws-sdk/client-s3';
import axios, { AxiosResponse } from 'axios';
import path from 'path';
import pLimit from 'p-limit';
import fs from 'fs';
import { Payload, Job, JobStatus } from '../entities/job';
import { JobRepository } from '../repositories/jobRepository';
import { RepoBranchesRepository } from '../repositories/repoBranchesRepository';
Expand All @@ -18,7 +20,6 @@ import { MONOREPO_NAME } from '../monorepo/utils/monorepo-constants';
import { nextGenHtml, nextGenParse, oasPageBuild, persistenceModule, prepareBuild } from '../commands';
import { downloadBuildDependencies, writeFileAsync } from '../commands/src/helpers/dependency-helpers';
import { CliCommandResponse } from '../commands/src/helpers';
require('fs');

export abstract class JobHandler {
private _currJob: Job;
Expand Down Expand Up @@ -524,13 +525,36 @@ export abstract class JobHandler {
process.env.REGRESSION = regression;
}

// TODO-4442: Need to figure out how to split between cache and S3 files
private async downloadByUrl(objKey: string, destPath: string) {
const s3Url = `https://docs-mongodb-org-stg.s3.us-east-2.amazonaws.com/${objKey}`;
const maxAttempts = 3;

// Retry in case of random network issues
for (let i = maxAttempts; i > 0; i--) {
try {
const res = await axios.get(s3Url, { timeout: 10000, responseType: 'stream' });
const dirName = path.dirname(destPath);
this._fileSystemServices.createDirIfNotExists(dirName);
const dest = fs.createWriteStream(destPath);
res.data.pipe(dest);
console.log(`${objKey} is okay!`);
} catch (err) {
console.error(`Failed fetchinng ${objKey}, retrying`);
const delay = 1000;
await new Promise((resolve) => setTimeout(resolve, delay));
}
}
}

private async downloadExistingArtifacts() {
await this._logger.save(this._currJob._id, 'Attempting to download existing artifacts');
const timerLabel = 'downloadExistingArtifacts - No limit';
console.time(timerLabel);

console.log('Attempting to download existing artifacts');
const client = new S3Client({ region: 'us-east-2' });
const bucket = process.env.BUCKET;
const bucket = 'docs-mongodb-org-stg';
if (!bucket) {
this._logger.error(this._currJob._id, `Missing bucket: ${bucket}`);
console.error(`Missing bucket: ${bucket}`);
return;
}

Expand All @@ -539,19 +563,20 @@ export abstract class JobHandler {
// We'll need to figure out how to handle differences between prod deploy vs. content staging build, if necessary
const s3Prefix = 'docs/docsworker-xlarge/DOP-4442/';
const listCommand = new ListObjectsV2Command({ Bucket: bucket, Prefix: s3Prefix });
const repoDir = this._config.get<string>('repo_dir');
const repoDir = 'test-s3-fetching-repo';
// Since the Makefiles move the path to Snooty a bit, we want to make sure we target the original, before the
// frontend is built
const originalSnootyPath = `${repoDir}/../../snooty`;
await this._logger.save(this._currJob._id, `originalSnootyPath: ${originalSnootyPath}`);
const originalSnootyPath = `${repoDir}/../snooty`;
console.log(`originalSnootyPath: ${originalSnootyPath}`);
const targetPublicDirectory = path.join(originalSnootyPath, '/public');
await this._logger.save(this._currJob._id, `targetPublicDirectory: ${targetPublicDirectory}`);
console.log(`targetPublicDirectory: ${targetPublicDirectory}`);
// Target cache directory should just be the root snooty dir since objects will have ".cache/" already included
const targetCacheDirectory = path.join(originalSnootyPath);
console.log(`targetCacheDirectory: ${targetCacheDirectory}`);

// For debugging purposes
let contents = '';
let n = 0;
// Need to type this
const keysList: any[] = [];

// NOTE: This currently does not taking into account the .cache folder
try {
let isTruncated = true;

Expand All @@ -561,47 +586,132 @@ export abstract class JobHandler {
while (isTruncated) {
const { Contents, IsTruncated, NextContinuationToken } = await client.send(listCommand);
if (!Contents) {
this._logger.info(this._currJob._id, 'No contents');
console.log('No contents');
break;
}

console.log('Contents found');

for (const obj of Contents) {
const objKey = obj.Key;
if (!objKey) {
continue;
}

const getCommand = new GetObjectCommand({ Bucket: bucket, Key: objKey });
const { Body: objBody } = await client.send(getCommand);

// Save S3 objects to local file paths
// Files in the local public directory should exclude path prefixes
const localFileName = objKey.replace(s3Prefix, '');
const targetFilePath = path.join(targetPublicDirectory, localFileName);
this._logger.info(this._currJob._id, `targetFilePath: ${targetFilePath}`);
if (objBody) {
await writeFileAsync(targetFilePath, await objBody.transformToString());
const targetDir = objKey.includes('.cache') ? targetCacheDirectory : targetPublicDirectory;
const targetFilePath = path.join(targetDir, localFileName);

// Some objects are just empty directories, apparently
if (!objKey.endsWith('/')) {
keysList.push({ objKey, destPath: targetFilePath });
}
}

// For debugging
const contentsList = Contents.map((c) => {
n++;
return `${c.Key}\n`;
});
contents += contentsList;

isTruncated = !!IsTruncated;
listCommand.input.ContinuationToken = NextContinuationToken;
}
} catch (e) {
this._logger.error(this._currJob._id, e);
console.error(e);
}

// Limit concurrency to avoid rate limits
const limit = pLimit(5);
const downloadPromises = keysList.map(({ objKey, destPath }) => {
return limit(() => this.downloadByUrl(objKey, destPath));
});

// For debugging purposes
this._logger.info(this._currJob._id, contents);
// console.info(contents);
try {
await Promise.all(downloadPromises);
} catch (err) {
console.error(err);
}

console.timeEnd(timerLabel);
}

// TODO-4442: Need to figure out how to split between cache and S3 files
// private async downloadExistingArtifacts() {
// await this._logger.save(this._currJob._id, 'Attempting to download existing artifacts');
// const client = new S3Client({ region: 'us-east-2' });
// const bucket = process.env.BUCKET;
// if (!bucket) {
// this._logger.error(this._currJob._id, `Missing bucket: ${bucket}`);
// return;
// }

// // S3 object prefix should match the path prefix that Mut uploads to for the build
// // Probably want to make this an argument, but leave as a static variable for testing
// // We'll need to figure out how to handle differences between prod deploy vs. content staging build, if necessary
// const s3Prefix = 'docs/docsworker-xlarge/DOP-4442/';
// const listCommand = new ListObjectsV2Command({ Bucket: bucket, Prefix: s3Prefix });
// const repoDir = this._config.get<string>('repo_dir');
// // Since the Makefiles move the path to Snooty a bit, we want to make sure we target the original, before the
// // frontend is built
// const originalSnootyPath = `${repoDir}/../../snooty`;
// await this._logger.save(this._currJob._id, `originalSnootyPath: ${originalSnootyPath}`);
// const targetPublicDirectory = path.join(originalSnootyPath, '/public');
// await this._logger.save(this._currJob._id, `targetPublicDirectory: ${targetPublicDirectory}`);

// // For debugging purposes
// let contents = '';
// let n = 0;

// // NOTE: This currently does not taking into account the .cache folder
// try {
// let isTruncated = true;

// this._fileSystemServices.createDirIfNotExists(targetPublicDirectory);

// // Grab contents, and then attempt to continue, in case there are more objects
// while (isTruncated) {
// const { Contents, IsTruncated, NextContinuationToken } = await client.send(listCommand);
// if (!Contents) {
// this._logger.info(this._currJob._id, 'No contents');
// break;
// }

// for (const obj of Contents) {
// const objKey = obj.Key;
// if (!objKey) {
// continue;
// }

// const getCommand = new GetObjectCommand({ Bucket: bucket, Key: objKey });
// const { Body: objBody } = await client.send(getCommand);

// // Save S3 objects to local file paths
// // Files in the local public directory should exclude path prefixes
// const localFileName = objKey.replace(s3Prefix, '');
// const targetFilePath = path.join(targetPublicDirectory, localFileName);
// this._logger.info(this._currJob._id, `targetFilePath: ${targetFilePath}`);
// if (objBody) {
// await writeFileAsync(targetFilePath, await objBody.transformToString());
// }
// }

// // For debugging
// const contentsList = Contents.map((c) => {
// n++;
// return `${c.Key}\n`;
// });
// contents += contentsList;

// isTruncated = !!IsTruncated;
// listCommand.input.ContinuationToken = NextContinuationToken;
// }
// } catch (e) {
// this._logger.error(this._currJob._id, e);
// }

// // For debugging purposes
// this._logger.info(this._currJob._id, contents);
// }

@throwIfJobInterupted()
protected async buildWithMakefiles(): Promise<boolean> {
this.cleanup();
Expand Down

0 comments on commit fe2418a

Please sign in to comment.