From 5bd8a71397095051cbb95f47ee730bea6d1374f2 Mon Sep 17 00:00:00 2001 From: Brandon Ly Date: Wed, 31 Jan 2024 17:13:08 -0600 Subject: [PATCH] [DOP-4171]: Create API Gateway endpoint and Lambda to handle snooty parser cache updates (#970) * [DOP-4171]: Add basic lambda handler * [DOP-4171]: Add basic lambda handler * [DOP-4171]: Add validation and code to run ecs task * [DOP-4171]: Add cache updater api stack * [DOP-4171]: Add necessary info to start task * [DOP-4171]: Add api key * [DOP-4171]: Add test workflow * [DOP-4171]: Add test workflow * [DOP-4171]: Add test workflow * [DOP-4171]: Add test for rebuild cache * [DOP-4171]: Add workspace * [DOP-4171]: Add checkout sep * [DOP-4171]: Exit out of task when done * [DOP-4171]: Add ability to pass snooty parser version to cdk * [DOP-4171]: Add webhook for github push * [DOP-4171]: Add webhook for github push * [DOP-4171]: Add error state if webhook is hit with invalid branch or org * [DOP-4171]: Warn instead of throw error on getSnootyParserVersion * [DOP-4171]: Revert deploy feature branch workflow * [DOP-4171]: Check if snooty.toml is changed * [DOP-4171]: Run prettier format * [DOP-4171]: Remove cache updater deploy step as it's already handled elsewhere * [DOP-4171]: Add new workflow for release * [DOP-4171]: Manually change parser version to test * [DOP-4171]: Remove test workflow * [DOP-4171]: Update README.md * [DOP-4171]: Format readme * [DOP-4171]: Respond to PR feedback * [DOP-4171]: Respond to PR feedback --- .../workflows/deploy-prd-enhanced-cache.yml | 26 +++ .github/workflows/update-feature-branch.yml | 37 +--- api/controllers/v2/cache.ts | 185 ++++++++++++++++++ cdk-infra/bin/cdk-infra.ts | 2 +- .../constructs/auto-builder-vpc-construct.ts | 4 +- .../cache-updater-api-construct.ts | 114 +++++++++++ .../cache-updater-worker-construct.ts | 14 +- .../lib/stacks/auto-builder-vpc-stack.ts | 4 +- cdk-infra/lib/stacks/cache-updater-stack.ts | 28 ++- cdk-infra/utils/env.ts | 15 ++ images/snooty-cache-updater.svg | 1 + src/cache-updater/Dockerfile.cacheUpdater | 3 - src/cache-updater/README.md | 12 +- src/cache-updater/index.ts | 22 ++- 14 files changed, 405 insertions(+), 62 deletions(-) create mode 100644 .github/workflows/deploy-prd-enhanced-cache.yml create mode 100644 api/controllers/v2/cache.ts create mode 100644 cdk-infra/lib/constructs/cache-updater/cache-updater-api-construct.ts create mode 100644 images/snooty-cache-updater.svg diff --git a/.github/workflows/deploy-prd-enhanced-cache.yml b/.github/workflows/deploy-prd-enhanced-cache.yml new file mode 100644 index 000000000..2990a4d32 --- /dev/null +++ b/.github/workflows/deploy-prd-enhanced-cache.yml @@ -0,0 +1,26 @@ +on: + release: + types: [released] +concurrency: + group: environment-prd-enhanced-cacheUpdate-${{ github.ref }} + cancel-in-progress: true +name: Deploy Production ECS Enhanced Webhooks +jobs: + deploy-prd: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-node@v1 + with: + node-version: '18.x' + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: us-east-2 + - name: Rebuild Cache if New Snooty Parser Version + uses: mongodb/docs-worker-actions/rebuild-parse-cache@DOP-4294 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + WORKSPACE: ${{ github.workspace }} \ No newline at end of file diff --git a/.github/workflows/update-feature-branch.yml b/.github/workflows/update-feature-branch.yml index 2dd08b7b4..151a8faed 100644 --- a/.github/workflows/update-feature-branch.yml +++ b/.github/workflows/update-feature-branch.yml @@ -110,39 +110,4 @@ jobs: run: | cd cdk-infra/ npm run deploy:feature:stack -- -c env=stg -c customFeatureName=enhancedApp-stg-${{github.head_ref}} \ - auto-builder-stack-enhancedApp-stg-${{github.head_ref}}-worker - build-cache-updater: - needs: prep-build - runs-on: ubuntu-latest - steps: - - - uses: dorny/paths-filter@v2 - id: filter - with: - filters: | - cache: - - 'src/cache-updater/**' - - 'cdk-infra/lib/constructs/cache-updater/**' - - uses: actions/checkout@v4 - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: us-east-2 - - uses: actions/setup-node@v4 - with: - node-version: '18.x' - - uses: actions/cache/restore@v3 - id: cache-restore - with: - path: | - node_modules - cdk-infra/node_modules - key: ${{ github.head_ref }} - - name: Update Cache Updater - if: steps.filter.outputs.cache == 'true' - run: | - cd cdk-infra/ - npm run deploy:feature:stack -- -c env=stg -c customFeatureName=enhancedApp-stg-${{github.head_ref}} \ - cache-updater + auto-builder-stack-enhancedApp-stg-${{github.head_ref}}-worker \ No newline at end of file diff --git a/api/controllers/v2/cache.ts b/api/controllers/v2/cache.ts new file mode 100644 index 000000000..4060b665b --- /dev/null +++ b/api/controllers/v2/cache.ts @@ -0,0 +1,185 @@ +import { APIGatewayEvent, APIGatewayProxyResult } from 'aws-lambda'; + +import { RepoInfo } from '../../../src/cache-updater/index'; +import { ECSClient, RunTaskCommand } from '@aws-sdk/client-ecs'; +import { validateJsonWebhook } from '../../handlers/github'; +import { PushEvent } from '@octokit/webhooks-types'; + +/** + * validates request + * @param body The result of calling `JSON.parse` on the `event.body`. + * @returns a boolean representing whether or not we have a valid rebuild request. + */ +function isRebuildRequest(body: unknown): body is RepoInfo[] { + // if body is falsy (e.g. 0, '', undefined, null, etc.), it's not valid here. + if (!body || typeof body !== 'object') return false; + + // if we get an array of sites to rebuild, check to make sure + // they are correctly formatted. + try { + const repoInfos = body as RepoInfo[]; + + // Array.prototype.every returns true if every value returned from the callback is true, otherwise it'll return false. + return repoInfos.every(({ repoOwner, repoName }) => typeof repoOwner === 'string' && typeof repoName === 'string'); + } catch { + // if we get an error, the data is probably wrong, so we can return false here. + return false; + } +} + +async function runCacheRebuildJob(repos: RepoInfo[]) { + const { TASK_DEFINITION, CONTAINER_NAME, CLUSTER, SUBNETS } = process.env; + + if (!TASK_DEFINITION) throw new Error('ERROR! process.env.TASK_DEFINITION is not defined'); + if (!CONTAINER_NAME) throw new Error('ERROR! process.env.CONTAINER_NAME is not defined'); + if (!CLUSTER) throw new Error('ERROR! process.env.CLUSTER is not defined'); + if (!SUBNETS) throw new Error('ERROR! process.env.SUBNETS is not defined'); + + const client = new ECSClient({ + region: 'us-east-2', + }); + + const command = new RunTaskCommand({ + taskDefinition: TASK_DEFINITION, + cluster: CLUSTER, + launchType: 'FARGATE', + networkConfiguration: { + awsvpcConfiguration: { + subnets: JSON.parse(SUBNETS), + }, + }, + overrides: { + containerOverrides: [ + { + name: CONTAINER_NAME, + environment: [ + { + name: 'REPOS', + value: JSON.stringify(repos), + }, + ], + }, + ], + }, + }); + + await client.send(command); +} + +/** + * Handles requests from individual doc sites and when the docs-worker-pool repository has a release with an updated Snooty Parser version. + * In the latter case, we should receive an event to build all doc site caches. + * @param {APIGatewayEvent} event An event object that comes from either a webhook payload or from the custom GitHub Action for the docs-worker pool. + * + * In either scenario, the body should contain an array of RepoInfo objects. + * @returns {Promise} + */ +export async function rebuildCacheHandler(event: APIGatewayEvent): Promise { + if (!event.body) { + const errorMessage = 'Error! No body found in event payload.'; + console.error(errorMessage); + return { + statusCode: 400, + body: errorMessage, + }; + } + + const rebuildRequest = JSON.parse(event.body); + + if (!isRebuildRequest(rebuildRequest)) { + const errorMessage = 'Error! Invalid rebuild request.'; + console.error(errorMessage); + return { + statusCode: 400, + body: errorMessage, + }; + } + + try { + await runCacheRebuildJob(rebuildRequest); + return { + statusCode: 200, + body: 'Cache rebuild job successfully created', + }; + } catch (e) { + console.error(e); + return { + statusCode: 500, + body: 'Error occurred when starting cache rebuild job', + }; + } +} + +/** + * This is for the GitHub webhooks. The GitHub webhooks will be used by individual doc sites to rebuild the cache if + * the snooty.toml file is modified. + * @param {APIGatewayEvent} event GitHub webhook push event. Body should be a PushEvent type. + * @returns {Promise} + */ +export async function rebuildCacheGithubWebhookHandler(event: APIGatewayEvent): Promise { + if (!event.body) { + const errorMessage = 'Error! No body found in event payload.'; + console.error(errorMessage); + return { + statusCode: 400, + body: errorMessage, + }; + } + + let body: PushEvent; + try { + body = JSON.parse(event.body) as PushEvent; + } catch (e) { + console.log('ERROR! Could not parse event.body', e); + return { + statusCode: 502, + headers: { 'Content-Type': 'text/plain' }, + body: ' ERROR! Could not parse event.body', + }; + } + + const repoOwner = body.repository.owner.login; + const repoName = body.repository.name; + + // Checks the commits to see if there have been changes made to the snooty.toml file. + const snootyTomlChanged = body.commits.some( + (commit) => + commit.added.some((fileName) => fileName === 'snooty.toml') || + commit.removed.some((fileName) => fileName === 'snooty.toml') || + commit.modified.some((fileName) => fileName === 'snooty.toml') + ); + + if (!snootyTomlChanged) { + return { statusCode: 202, body: 'snooty.toml has not changed, no need to rebuild cache' }; + } + + const ref = body.ref; + // For webhook requests, this should only run on the primary branch, and if the repository belongs to the 10gen or mongodb orgs. + if ((ref !== 'refs/head/master' && ref !== 'refs/head/main') || (repoOwner !== '10gen' && repoOwner !== 'mongodb')) { + return { + statusCode: 403, + body: 'Cache job not processed because the request is not for the primary branch and/or the repository does not belong to the 10gen or mongodb organizations', + }; + } + + const cacheUpdateBody = JSON.stringify([{ repoOwner, repoName }]); + const { GITHUB_SECRET } = process.env; + + if (!GITHUB_SECRET) { + console.error('GITHUB_SECRET is not defined'); + return { + statusCode: 500, + body: 'internal server error', + }; + } + + if (!validateJsonWebhook(event, GITHUB_SECRET)) { + const errMsg = "X-Hub-Signature incorrect. Github webhook token doesn't match"; + return { + statusCode: 401, + headers: { 'Content-Type': 'text/plain' }, + body: errMsg, + }; + } + return rebuildCacheHandler({ ...event, body: cacheUpdateBody }); +} diff --git a/cdk-infra/bin/cdk-infra.ts b/cdk-infra/bin/cdk-infra.ts index cc4f248d9..44e4eec80 100644 --- a/cdk-infra/bin/cdk-infra.ts +++ b/cdk-infra/bin/cdk-infra.ts @@ -46,7 +46,7 @@ async function main() { env, }); - new CacheUpdaterStack(app, 'cache-updater', { vpc }); + new CacheUpdaterStack(app, `${stackName}-cache`, { vpc, env, githubSecret: workerSecureStrings.GITHUB_SECRET }); } main(); diff --git a/cdk-infra/lib/constructs/auto-builder-vpc-construct.ts b/cdk-infra/lib/constructs/auto-builder-vpc-construct.ts index fdfe11ea8..b1b97cfdf 100644 --- a/cdk-infra/lib/constructs/auto-builder-vpc-construct.ts +++ b/cdk-infra/lib/constructs/auto-builder-vpc-construct.ts @@ -1,8 +1,8 @@ -import { Vpc, GatewayVpcEndpointAwsService, InterfaceVpcEndpointAwsService, IVpc } from 'aws-cdk-lib/aws-ec2'; +import { Vpc, GatewayVpcEndpointAwsService, InterfaceVpcEndpointAwsService } from 'aws-cdk-lib/aws-ec2'; import { Construct } from 'constructs'; export class AutoBuilderVpcConstruct extends Construct { - readonly vpc: IVpc; + readonly vpc: Vpc; constructor(scope: Construct, id: string) { super(scope, id); diff --git a/cdk-infra/lib/constructs/cache-updater/cache-updater-api-construct.ts b/cdk-infra/lib/constructs/cache-updater/cache-updater-api-construct.ts new file mode 100644 index 000000000..fc73bfef4 --- /dev/null +++ b/cdk-infra/lib/constructs/cache-updater/cache-updater-api-construct.ts @@ -0,0 +1,114 @@ +import { Duration } from 'aws-cdk-lib'; +import { + ApiKeySourceType, + Cors, + LambdaIntegration, + LambdaRestApi, + LogGroupLogDestination, +} from 'aws-cdk-lib/aws-apigateway'; +import { Vpc } from 'aws-cdk-lib/aws-ec2'; +import { TaskDefinition } from 'aws-cdk-lib/aws-ecs'; +import { Code, Function, Runtime } from 'aws-cdk-lib/aws-lambda'; +import { NodejsFunction } from 'aws-cdk-lib/aws-lambda-nodejs'; +import { LogGroup } from 'aws-cdk-lib/aws-logs'; +import { Construct } from 'constructs'; +import path from 'path'; + +interface CacheUpdaterApiConstructProps { + clusterName: string; + taskDefinition: TaskDefinition; + containerName: string; + vpc: Vpc; + githubSecret: string; +} + +const HANDLERS_PATH = path.join(__dirname, '/../../../../api/controllers/v2'); + +/** + * This stack creates the resources for the Snooty Parser cache updater. + */ +export class CacheUpdaterApiConstruct extends Construct { + constructor( + scope: Construct, + id: string, + { clusterName, taskDefinition, containerName, vpc, githubSecret }: CacheUpdaterApiConstructProps + ) { + super(scope, id); + + const cacheWebhookLambda = new NodejsFunction(this, 'cacheUpdaterWebhookLambda', { + entry: `${HANDLERS_PATH}/cache.ts`, + handler: 'rebuildCacheHandler', + runtime: Runtime.NODEJS_18_X, + timeout: Duration.minutes(2), + memorySize: 1024, + environment: { + CLUSTER: clusterName, + TASK_DEFINITION: taskDefinition.taskDefinitionArn, + CONTAINER_NAME: containerName, + SUBNETS: JSON.stringify(vpc.privateSubnets.map((subnet) => subnet.subnetId)), + }, + }); + + const cacheGithubWebhookLambda = new NodejsFunction(this, 'cacheUpdaterGithubWebhookLambda', { + entry: `${HANDLERS_PATH}/cache.ts`, + handler: 'rebuildCacheGithubWebhookHandler', + runtime: Runtime.NODEJS_18_X, + timeout: Duration.minutes(2), + memorySize: 1024, + environment: { + CLUSTER: clusterName, + TASK_DEFINITION: taskDefinition.taskDefinitionArn, + CONTAINER_NAME: containerName, + SUBNETS: JSON.stringify(vpc.privateSubnets.map((subnet) => subnet.subnetId)), + GITHUB_SECRET: githubSecret, + }, + }); + + taskDefinition.grantRun(cacheWebhookLambda); + taskDefinition.grantRun(cacheGithubWebhookLambda); + + // generic handler for the root endpoint + const rootEndpointLambda = new Function(this, 'RootEndpointLambda', { + code: Code.fromInline('exports.default = (event) => { console.log("hello, world!!"); }'), + runtime: Runtime.NODEJS_18_X, + handler: 'RootEndpointLambda', + }); + + const apiLogGroup = new LogGroup(this, 'cacheUpdaterLogGroup'); + + const restApi = new LambdaRestApi(this, 'cacheUpdaterRestApi', { + handler: rootEndpointLambda, + proxy: false, + apiKeySourceType: ApiKeySourceType.HEADER, + deployOptions: { + accessLogDestination: new LogGroupLogDestination(apiLogGroup), + }, + }); + + const webhook = restApi.root.addResource('webhook', { + defaultCorsPreflightOptions: { allowOrigins: Cors.ALL_ORIGINS }, + }); + + webhook.addMethod('POST', new LambdaIntegration(cacheWebhookLambda), { apiKeyRequired: true }); + + const usagePlan = restApi.addUsagePlan('cacheUpdaterUsagePlan', { + name: 'defaultPlan', + apiStages: [ + { + api: restApi, + stage: restApi.deploymentStage, + }, + ], + }); + + const apiKey = restApi.addApiKey('cacheUpdaterApiKey'); + + usagePlan.addApiKey(apiKey); + + const githubWebhook = webhook.addResource('github', { + defaultCorsPreflightOptions: { allowOrigins: Cors.ALL_ORIGINS }, + }); + + githubWebhook.addMethod('POST', new LambdaIntegration(cacheGithubWebhookLambda), { apiKeyRequired: false }); + } +} diff --git a/cdk-infra/lib/constructs/cache-updater/cache-updater-worker-construct.ts b/cdk-infra/lib/constructs/cache-updater/cache-updater-worker-construct.ts index d44e618ae..31052e3c2 100644 --- a/cdk-infra/lib/constructs/cache-updater/cache-updater-worker-construct.ts +++ b/cdk-infra/lib/constructs/cache-updater/cache-updater-worker-construct.ts @@ -1,10 +1,11 @@ import { IVpc } from 'aws-cdk-lib/aws-ec2'; -import { Cluster, ContainerImage, FargateTaskDefinition, LogDrivers } from 'aws-cdk-lib/aws-ecs'; +import { Cluster, ContainerImage, FargateTaskDefinition, LogDrivers, TaskDefinition } from 'aws-cdk-lib/aws-ecs'; import { Role, ServicePrincipal } from 'aws-cdk-lib/aws-iam'; import { LogGroup } from 'aws-cdk-lib/aws-logs'; import { Bucket } from 'aws-cdk-lib/aws-s3'; import { Construct } from 'constructs'; import path from 'path'; +import { getSnootyParserVersion } from '../../../utils/env'; const SNOOTY_CACHE_BUCKET_NAME = 'snooty-parse-cache'; @@ -13,7 +14,9 @@ interface CacheUpdaterWorkerConstructProps { } export class CacheUpdaterWorkerConstruct extends Construct { - clusterName: string; + readonly clusterName: string; + readonly taskDefinition: TaskDefinition; + readonly containerName: string; constructor(scope: Construct, id: string, { vpc }: CacheUpdaterWorkerConstructProps) { super(scope, id); @@ -36,12 +39,15 @@ export class CacheUpdaterWorkerConstruct extends Construct { taskRole, }); + const containerName = 'cacheUpdaterWorkerImage'; const taskDefLogGroup = new LogGroup(this, 'cacheUpdaterWorkerLogGroup'); + const snootyParserVersion = getSnootyParserVersion(); + taskDefinition.addContainer('cacheUpdaterWorkerImage', { image: ContainerImage.fromAsset(path.join(__dirname, '../../../../'), { file: 'src/cache-updater/Dockerfile.cacheUpdater', - buildArgs: { SNOOTY_PARSER_VERSION: '0.15.2' }, + buildArgs: { SNOOTY_PARSER_VERSION: snootyParserVersion }, exclude: ['tests/', 'node_modules/', 'cdk-infra/'], // adding this just in case it doesn't pick up our dockerignore }), environment: { @@ -54,5 +60,7 @@ export class CacheUpdaterWorkerConstruct extends Construct { }); this.clusterName = cluster.clusterName; + this.taskDefinition = taskDefinition; + this.containerName = containerName; } } diff --git a/cdk-infra/lib/stacks/auto-builder-vpc-stack.ts b/cdk-infra/lib/stacks/auto-builder-vpc-stack.ts index 0fe16c591..8cd4fab5c 100644 --- a/cdk-infra/lib/stacks/auto-builder-vpc-stack.ts +++ b/cdk-infra/lib/stacks/auto-builder-vpc-stack.ts @@ -1,10 +1,10 @@ import { Stack, StackProps } from 'aws-cdk-lib'; -import { IVpc } from 'aws-cdk-lib/aws-ec2'; +import { Vpc } from 'aws-cdk-lib/aws-ec2'; import { Construct } from 'constructs'; import { AutoBuilderVpcConstruct } from '../constructs/auto-builder-vpc-construct'; export class AutoBuilderVpcStack extends Stack { - readonly vpc: IVpc; + readonly vpc: Vpc; constructor(scope: Construct, id: string, props?: StackProps) { super(scope, id, props); diff --git a/cdk-infra/lib/stacks/cache-updater-stack.ts b/cdk-infra/lib/stacks/cache-updater-stack.ts index 0f7f4667e..a4467a0a7 100644 --- a/cdk-infra/lib/stacks/cache-updater-stack.ts +++ b/cdk-infra/lib/stacks/cache-updater-stack.ts @@ -1,15 +1,29 @@ -import { Stack } from 'aws-cdk-lib'; +import { Stack, StackProps } from 'aws-cdk-lib'; import { Construct } from 'constructs'; import { CacheUpdaterWorkerConstruct } from '../constructs/cache-updater/cache-updater-worker-construct'; -import { IVpc } from 'aws-cdk-lib/aws-ec2'; +import { Vpc } from 'aws-cdk-lib/aws-ec2'; +import { CacheUpdaterApiConstruct } from '../constructs/cache-updater/cache-updater-api-construct'; -interface CacheUpdaterStackProps { - vpc: IVpc; +interface CacheUpdaterStackProps extends StackProps { + vpc: Vpc; + githubSecret: string; } export class CacheUpdaterStack extends Stack { - constructor(scope: Construct, id: string, { vpc }: CacheUpdaterStackProps) { - super(scope, id); + constructor(scope: Construct, id: string, { vpc, githubSecret, ...props }: CacheUpdaterStackProps) { + super(scope, id, props); - new CacheUpdaterWorkerConstruct(this, 'cache-updater-resources', { vpc }); + const { clusterName, taskDefinition, containerName } = new CacheUpdaterWorkerConstruct( + this, + 'cache-updater-resources', + { vpc } + ); + + new CacheUpdaterApiConstruct(this, 'cache-updater-api', { + clusterName, + taskDefinition, + containerName, + vpc, + githubSecret, + }); } } diff --git a/cdk-infra/utils/env.ts b/cdk-infra/utils/env.ts index 27b065f8a..9979090fb 100644 --- a/cdk-infra/utils/env.ts +++ b/cdk-infra/utils/env.ts @@ -9,6 +9,7 @@ const autoBuilderContextVariables = [ 'customFeatureName', 'env', 'useCustomBuckets', + 'snootyParserVersion', ] as const; export type SnootyEnv = (typeof snootyEnvs)[number]; @@ -77,6 +78,20 @@ export function isEnhanced(): boolean { return !!contextVarsMap.get('enhanced'); } +/** + * This value is used by the rebuild-parse-cache custom GitHub Action + */ +export function getSnootyParserVersion(): string { + checkContextInit(); + const snootyParserVersion = contextVarsMap.get('snootyParserVersion'); + + if (!snootyParserVersion) { + console.warn('Error! The context variable snootyParserVersion is not defined.'); + return ''; + } + + return snootyParserVersion; +} export function getFeatureName(): string { checkContextInit(); diff --git a/images/snooty-cache-updater.svg b/images/snooty-cache-updater.svg new file mode 100644 index 000000000..014f3e568 --- /dev/null +++ b/images/snooty-cache-updater.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/src/cache-updater/Dockerfile.cacheUpdater b/src/cache-updater/Dockerfile.cacheUpdater index a22daed94..74907071d 100644 --- a/src/cache-updater/Dockerfile.cacheUpdater +++ b/src/cache-updater/Dockerfile.cacheUpdater @@ -30,12 +30,9 @@ RUN npm run build:esbuild:cacheUpdater FROM node:18-bullseye-slim WORKDIR /usr/app - ENV PATH="${PATH}:/opt/snooty" RUN apt-get -o Acquire::Check-Valid-Until=false update && apt-get -y install git - - COPY --from=python-builder /opt/ /opt/ COPY --from=builder /usr/app/dist/ . diff --git a/src/cache-updater/README.md b/src/cache-updater/README.md index 1bbd47027..b2b3f8752 100644 --- a/src/cache-updater/README.md +++ b/src/cache-updater/README.md @@ -10,8 +10,18 @@ A cache invalidation event occurs under two circumstances: ### Architecture +#### Worker + The cache update worker is an ephemeral ECS task that is spun up in response to a cache invalidation event. The doc sites that need to be rebuilt will be provided as an environment variable called `REPOS`. This is an array of objects that contain the `repoOwner` and `repoName` properties. The task is capable of processing multiple uploads concurrently. The code defined for it can be found in `src/cache-updater.ts`. The infrastructure defined for the worker is defined [here](../../cdk-infra/lib/constructs/cache-updater/cache-updater-worker-construct.ts). -(TODO): Update README when [DOP-4171 is complete](https://jira.mongodb.org/browse/DOP-4171). +#### API + +The cache updater has two endpoints to handle requests to rebuild caches. One handles individual doc site requests via a GitHub webhook push event, while the other handles requests from the [custom GitHub Action for releases](https://github.com/mongodb/docs-worker-actions/blob/2dfaefe5785bad73e2e9239e80318759868751d0/src/rebuild-parse-cache/index.ts#L30). The reason there are two separate endpoints stem from security reasons. We need to use the custom GitHub action for the release process as it must first deploy the changes to the cache updater stack, and then send a request. Because of this, we can't use a GitHub webhook push event because the event would be sent before the cache updater stack has been updated. + +Because of this, the authentication process will be different. GitHub webhook requests have specific headers that are generated by the request (specifically, the `X-Hub-Signature-256`), that is required for authentication. So, the only difference between the two endpoints is how they are authenticated. The endpoint for the custom GitHub Action uses an API key that API Gateway generates. This ensures that the URL cannot be used publicly. + +#### Diagram outlining the cache update process + +![Snooty Parse Cache Diagram](../../images/snooty-cache-updater.svg) diff --git a/src/cache-updater/index.ts b/src/cache-updater/index.ts index 1b739e3f6..ce1d8ac9e 100644 --- a/src/cache-updater/index.ts +++ b/src/cache-updater/index.ts @@ -78,7 +78,7 @@ async function uploadCacheToS3(repoName: string, repoOwner: string) { } } -interface RepoInfo { +export interface RepoInfo { repoOwner: string; repoName: string; } @@ -123,10 +123,18 @@ function getRepos(): RepoInfo[] { } } -const repos = getRepos(); +async function main() { + const repos = getRepos(); -repos.forEach((repo) => - handler(repo).catch((error) => { - console.error('An error occurred!', error); - }) -); + await Promise.all( + repos.map((repo) => + handler(repo).catch((error) => { + console.error('An error occurred!', error); + }) + ) + ); + + process.exit(0); +} + +main();