From af25f15f4b9e11cd960762f2ca3fc8295063e121 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 5 Jan 2022 12:04:00 -0800 Subject: [PATCH 01/41] feat: Toil engine for CWL --- examples/demo-cwl-project/agc-project.yaml | 20 ++ .../workflows/hello/hello.cwl | 15 ++ packages/cdk/lib/constructs/batch.ts | 1 + .../cdk/lib/env/context-app-parameters.ts | 7 +- .../lib/roles/policies/toil-batch-policy.ts | 33 +++ packages/cdk/lib/roles/toil-engine-role.ts | 57 +++++ packages/cdk/lib/roles/toil-job-role.ts | 50 +++++ packages/cdk/lib/stacks/context-stack.ts | 48 ++++ .../stacks/engines/toil-engine-construct.ts | 104 +++++++++ .../internal/pkg/cli/account_activate_test.go | 7 + .../cli/internal/pkg/cli/context/manager.go | 10 +- .../pkg/cli/context/manager_deploy_test.go | 10 +- packages/cli/internal/pkg/cli/project_init.go | 3 +- .../cli/internal/pkg/cli/project_init_test.go | 7 +- .../internal/pkg/environment/environment.go | 18 +- packages/engines/toil/Dockerfile | 61 ++++++ packages/engines/toil/LICENSE | 201 +++++++++++++++++ packages/engines/toil/README.md | 48 ++++ packages/engines/toil/THIRD-PARTY | 207 ++++++++++++++++++ packages/engines/toil/buildspec.yml | 20 ++ packages/engines/toil/rabbitmq.repo | 61 ++++++ packages/engines/toil/toil.aws.sh | 28 +++ 22 files changed, 1002 insertions(+), 14 deletions(-) create mode 100644 examples/demo-cwl-project/agc-project.yaml create mode 100644 examples/demo-cwl-project/workflows/hello/hello.cwl create mode 100644 packages/cdk/lib/roles/policies/toil-batch-policy.ts create mode 100644 packages/cdk/lib/roles/toil-engine-role.ts create mode 100644 packages/cdk/lib/roles/toil-job-role.ts create mode 100644 packages/cdk/lib/stacks/engines/toil-engine-construct.ts create mode 100644 packages/engines/toil/Dockerfile create mode 100644 packages/engines/toil/LICENSE create mode 100644 packages/engines/toil/README.md create mode 100644 packages/engines/toil/THIRD-PARTY create mode 100644 packages/engines/toil/buildspec.yml create mode 100644 packages/engines/toil/rabbitmq.repo create mode 100644 packages/engines/toil/toil.aws.sh diff --git a/examples/demo-cwl-project/agc-project.yaml b/examples/demo-cwl-project/agc-project.yaml new file mode 100644 index 00000000..c5ba5fa7 --- /dev/null +++ b/examples/demo-cwl-project/agc-project.yaml @@ -0,0 +1,20 @@ +--- +name: Demo +schemaVersion: 1 +workflows: + hello: + type: + language: cwl + version: v1.2 + sourceURL: workflows/hello/hello.cwl +contexts: + myContext: + engines: + - type: cwl + engine: toil + + spotCtx: + requestSpotInstances: true + engines: + - type: cwl + engine: toil diff --git a/examples/demo-cwl-project/workflows/hello/hello.cwl b/examples/demo-cwl-project/workflows/hello/hello.cwl new file mode 100644 index 00000000..041f7714 --- /dev/null +++ b/examples/demo-cwl-project/workflows/hello/hello.cwl @@ -0,0 +1,15 @@ +cwlVersion: v1.0 +class: CommandLineTool +baseCommand: echo +stdout: output.txt +inputs: + - id: message + type: string + default: "Hello world!" + inputBinding: + position: 1 +outputs: + - id: output + type: File + outputBinding: + glob: output.txt diff --git a/packages/cdk/lib/constructs/batch.ts b/packages/cdk/lib/constructs/batch.ts index 8988ffdd..7845a2e9 100644 --- a/packages/cdk/lib/constructs/batch.ts +++ b/packages/cdk/lib/constructs/batch.ts @@ -93,6 +93,7 @@ export interface BatchProps extends ComputeOptions { const defaultComputeType = ComputeResourceType.ON_DEMAND; export class Batch extends Construct { + // This is the role that the backing instances use, not the role that batch jobs run as. public readonly role: IRole; public readonly computeEnvironment: IComputeEnvironment; public readonly jobQueue: IJobQueue; diff --git a/packages/cdk/lib/env/context-app-parameters.ts b/packages/cdk/lib/env/context-app-parameters.ts index 4f550a81..0e6e585a 100644 --- a/packages/cdk/lib/env/context-app-parameters.ts +++ b/packages/cdk/lib/env/context-app-parameters.ts @@ -152,7 +152,11 @@ export class ContextAppParameters { return `${this.getContextBucketPath()}/${this.engineName}-execution`; } - public getEngineContainer(jobQueueArn: string): ServiceContainer { + /** + * This function defines the container that server-based engines (like Toil + * or Cromwell) will run their servers in. It is going to run on Fargate. + */ + public getEngineContainer(jobQueueArn: string, additionalEnvVars?: { [key: string]: string }): ServiceContainer { return { serviceName: this.engineName, imageConfig: { designation: this.engineDesignation }, @@ -164,6 +168,7 @@ export class ContextAppParameters { S3BUCKET: this.outputBucketName, ROOT_DIR: this.getEngineBucketPath(), JOB_QUEUE_ARN: jobQueueArn, + ...additionalEnvVars, }, }; } diff --git a/packages/cdk/lib/roles/policies/toil-batch-policy.ts b/packages/cdk/lib/roles/policies/toil-batch-policy.ts new file mode 100644 index 00000000..bdb467cd --- /dev/null +++ b/packages/cdk/lib/roles/policies/toil-batch-policy.ts @@ -0,0 +1,33 @@ +import { PolicyDocument, PolicyStatement, Effect } from "aws-cdk-lib/aws-iam"; + +export interface ToilBatchPolicyProps { + jobQueueArn: string; + // This is actually a pattern that matches all ARNs for potentially relevant + // definitions, since Toil makes its own definitions. + toilJobArn: string; +} + +export class ToilBatchPolicy extends PolicyDocument { + constructor(props: ToilBatchPolicyProps) { + super({ + assignSids: true, + statements: [ + new PolicyStatement({ + effect: Effect.ALLOW, + actions: ["batch:DescribeJobDefinitions", "batch:ListJobs", "batch:DescribeJobs", "batch:DescribeJobQueues", "batch:DescribeComputeEnvironments"], + resources: ["*"], + }), + new PolicyStatement({ + effect: Effect.ALLOW, + actions: ["batch:RegisterJobDefinition", "batch:DeregisterJobDefinition"], + resources: [props.toilJobArn], + }), + new PolicyStatement({ + effect: Effect.ALLOW, + actions: ["batch:SubmitJob"], + resources: [props.toilJobArn, props.jobQueueArn], + }), + ], + }); + } +} diff --git a/packages/cdk/lib/roles/toil-engine-role.ts b/packages/cdk/lib/roles/toil-engine-role.ts new file mode 100644 index 00000000..33f7bcd5 --- /dev/null +++ b/packages/cdk/lib/roles/toil-engine-role.ts @@ -0,0 +1,57 @@ +import { ToilBatchPolicy } from "./policies/toil-batch-policy"; +import { ToilJobRole, ToilJobRoleProps } from "./toil-job-role"; +import { Arn, Aws, Stack } from "aws-cdk-lib"; +import { Construct } from "constructs"; +import { PolicyDocument, PolicyStatement, Effect } from "aws-cdk-lib/aws-iam"; + +interface ToilEngineRoleProps extends ToilJobRoleProps { + // This is the queue to which we are authorizing jobs to be submitted by + // something with this role. + jobQueueArn: string; + // And this other role can be assigned by this role + jobRoleArn: string; +} + +// This role grants access to Toil job stores, but also the access needed to +// launch jobs on AWS Batch that themselves have a ToilJobRole role assigned. +export class ToilEngineRole extends ToilJobRole { + constructor(scope: Construct, id: string, props: ToilEngineRoleProps) { + const toilJobArn = Arn.format( + { + account: Aws.ACCOUNT_ID, + region: Aws.REGION, + partition: Aws.PARTITION, + resource: "job-definition/*", + service: "batch", + }, + scope as Stack + ); + super(scope, id, props, { + ToilEngineBatchPolicy: new ToilBatchPolicy({ + ...props, + toilJobArn: toilJobArn, + }), + // TODO: Can we restrict this to allow passing the role only to jobs? + ToilIamPassJobRole: new PolicyDocument({ + assignSids: true, + statements: [ + new PolicyStatement({ + effect: Effect.ALLOW, + actions: ["iam:PassRole"], + resources: [props.jobRoleArn], + }), + ], + }), + ToilEcsDescribeInstances: new PolicyDocument({ + assignSids: true, + statements: [ + new PolicyStatement({ + effect: Effect.ALLOW, + actions: ["ecs:DescribeContainerInstances", "s3:ListAllMyBuckets"], + resources: ["*"], + }), + ], + }), + }); + } +} diff --git a/packages/cdk/lib/roles/toil-job-role.ts b/packages/cdk/lib/roles/toil-job-role.ts new file mode 100644 index 00000000..b500565e --- /dev/null +++ b/packages/cdk/lib/roles/toil-job-role.ts @@ -0,0 +1,50 @@ +import { PolicyOptions } from "../types/engine-options"; +import { BucketOperations } from "../common/BucketOperations"; +import { Construct } from "constructs"; +import { Role, ServicePrincipal, PolicyDocument, PolicyStatement, Effect } from "aws-cdk-lib/aws-iam"; + +export interface ToilJobRoleProps { + readOnlyBucketArns: string[]; + readWriteBucketArns: string[]; + policies: PolicyOptions; +} + +// This role grants access to everything a Toil job needs to talk to the AWS +// job store and/or additional user data in S3. +export class ToilJobRole extends Role { + constructor(scope: Construct, id: string, props: ToilJobRoleProps, additionalInlinePolicies?: { [key: string]: PolicyDocument }) { + super(scope, id, { + assumedBy: new ServicePrincipal("ecs-tasks.amazonaws.com"), + inlinePolicies: { + // TODO: Remove this when Toil no longer uses its own SimpleDB domains + ToilSimpleDBFullAccess: new PolicyDocument({ + assignSids: true, + statements: [ + new PolicyStatement({ + effect: Effect.ALLOW, + actions: ["sdb:*"], + resources: ["*"], + }), + ], + }), + // TODO: Remove this when Toil is taught to use AGC buckets to store + // its workflow state and doesn't need to make and destroy its own. + ToilS3FullAccess: new PolicyDocument({ + assignSids: true, + statements: [ + new PolicyStatement({ + effect: Effect.ALLOW, + actions: ["s3:*"], + resources: ["*"], + }), + ], + }), + ...additionalInlinePolicies, + }, + ...props.policies, + }); + + BucketOperations.grantBucketAccess(this, this, props.readOnlyBucketArns, true); + BucketOperations.grantBucketAccess(this, this, props.readWriteBucketArns); + } +} diff --git a/packages/cdk/lib/stacks/context-stack.ts b/packages/cdk/lib/stacks/context-stack.ts index 992be400..91ec0025 100644 --- a/packages/cdk/lib/stacks/context-stack.ts +++ b/packages/cdk/lib/stacks/context-stack.ts @@ -9,6 +9,7 @@ import { CromwellEngineConstruct } from "./engines/cromwell-engine-construct"; import { NextflowEngineConstruct } from "./engines/nextflow-engine-construct"; import { MiniwdlEngineConstruct } from "./engines/miniwdl-engine-construct"; import { SnakemakeEngineConstruct } from "./engines/snakemake-engine-construct"; +import { ToilEngineConstruct } from "./engines/toil-engine-construct"; export interface ContextStackProps extends StackProps { readonly contextParameters: ContextAppParameters; @@ -55,6 +56,9 @@ export class ContextStack extends Stack { } this.renderSnakemakeStack(props); break; + case "toil": + this.renderToilStack(props); + break; default: throw Error(`Engine '${engineName}' is not supported`); } @@ -64,6 +68,9 @@ export class ContextStack extends Stack { const batchProps = this.getCromwellBatchProps(props); const batchStack = this.renderBatchStack(batchProps); + // Cromwell submits workflow jobs to a single on-demand or spot queue. It + // has a server that runs elsewhere in a Fargate service, and also a WES + // adapter lambda. let jobQueue; if (props.contextParameters.requestSpotInstances) { jobQueue = batchStack.batchSpot.jobQueue; @@ -82,6 +89,9 @@ export class ContextStack extends Stack { const batchProps = this.getNextflowBatchProps(props); const batchStack = this.renderBatchStack(batchProps); + // Nextflow submits workflow head jobs to an on demand queue, and + // optionally workflow jobs to a spot queue. There is no server, just an + // adapter lambda. let jobQueue, headQueue; if (props.contextParameters.requestSpotInstances) { jobQueue = batchStack.batchSpot.jobQueue; @@ -99,18 +109,43 @@ export class ContextStack extends Stack { } private renderMiniwdlStack(props: ContextStackProps) { + // Miniwdl's engine construct takes care of setting up its own Batch + // queues. const commonEngineProps = this.getCommonEngineProps(props); new MiniwdlEngineConstruct(this, ENGINE_MINIWDL, { ...commonEngineProps, }).outputToParent(); } + private renderToilStack(props: ContextStackProps) { + const batchProps = this.getToilBatchProps(props); + const batchStack = this.renderBatchStack(batchProps); + + // Toil submits workflow jobs to a single on-demand or spot queue. It + // has a server that runs elsewhere in a Fargate service, and speaks WES + // itself. + let jobQueue; + if (props.contextParameters.requestSpotInstances) { + jobQueue = batchStack.batchSpot.jobQueue; + } else { + jobQueue = batchStack.batchOnDemand.jobQueue; + } + + const commonEngineProps = this.getCommonEngineProps(props); + new ToilEngineConstruct(this, "toil", { + jobQueue, + ...commonEngineProps, + }).outputToParent(); + } + private getCromwellBatchProps(props: ContextStackProps) { const commonBatchProps = this.getCommonBatchProps(props); const { requestSpotInstances } = props.contextParameters; return { ...commonBatchProps, + // We only use one stack for the Cromwell jobs. The server lives in + // Fargate and doesn't run in either of these. createSpotBatch: requestSpotInstances, createOnDemandBatch: !requestSpotInstances, }; @@ -143,6 +178,19 @@ export class ContextStack extends Stack { }; } + private getToilBatchProps(props: ContextStackProps) { + const commonBatchProps = this.getCommonBatchProps(props); + const { requestSpotInstances } = props.contextParameters; + + return { + ...commonBatchProps, + // We only use one Batch from the stack for the Toil jobs. The server + // lives in Fargate and doesn't run in either of these. + createSpotBatch: requestSpotInstances, + createOnDemandBatch: !requestSpotInstances, + }; + } + private renderBatchStack(props: BatchConstructProps) { return new BatchConstruct(this, "Batch", props); } diff --git a/packages/cdk/lib/stacks/engines/toil-engine-construct.ts b/packages/cdk/lib/stacks/engines/toil-engine-construct.ts new file mode 100644 index 00000000..80afe656 --- /dev/null +++ b/packages/cdk/lib/stacks/engines/toil-engine-construct.ts @@ -0,0 +1,104 @@ +import { Aws } from "aws-cdk-lib"; +import { IVpc } from "aws-cdk-lib/aws-ec2"; +import { FargateTaskDefinition, LogDriver } from "aws-cdk-lib/aws-ecs"; +import { ApiProxy, SecureService } from "../../constructs"; +import { IRole } from "aws-cdk-lib/aws-iam"; +import { createEcrImage, renderServiceWithTaskDefinition } from "../../util"; +import { Bucket } from "aws-cdk-lib/aws-s3"; +import { EngineOptions, ServiceContainer } from "../../types"; +import { LogGroup, ILogGroup } from "aws-cdk-lib/aws-logs"; +import { EngineOutputs, EngineConstruct } from "./engine-construct"; +import { ToilJobRole } from "../../roles/toil-job-role"; +import { ToilEngineRole } from "../../roles/toil-engine-role"; +import { IJobQueue } from "@aws-cdk/aws-batch-alpha"; +import { Construct } from "constructs"; + +export interface ToilEngineConstructProps extends EngineOptions { + /** + * AWS Batch JobQueue to use for running workflows. + */ + readonly jobQueue: IJobQueue; +} + +export class ToilEngineConstruct extends EngineConstruct { + public readonly engine: SecureService; + public readonly adapterRole: IRole; + public readonly apiProxy: ApiProxy; + public readonly adapterLogGroup: ILogGroup; + public readonly engineLogGroup: ILogGroup; + public readonly engineRole: IRole; + public readonly jobRole: IRole; + + constructor(scope: Construct, id: string, props: ToilEngineConstructProps) { + super(scope, id); + const params = props.contextParameters; + this.engineLogGroup = new LogGroup(this, "EngineLogGroup"); + const artifactBucket = Bucket.fromBucketName(this, "ArtifactBucket", params.artifactBucketName); + const outputBucket = Bucket.fromBucketName(this, "OutputBucket", params.outputBucketName); + + // Make a role for the jobs to run as + this.jobRole = new ToilJobRole(this, "ToilJobRole", { + readOnlyBucketArns: (params.readBucketArns ?? []).concat(artifactBucket.bucketArn), + readWriteBucketArns: (params.readWriteBucketArns ?? []).concat(outputBucket.bucketArn), + policies: props.policyOptions, + }); + + // Make a role for the server to run as + this.engineRole = new ToilEngineRole(this, "ToilEngineRole", { + jobQueueArn: props.jobQueue.jobQueueArn, + jobRoleArn: this.jobRole.roleArn, + readOnlyBucketArns: (params.readBucketArns ?? []).concat(artifactBucket.bucketArn), + readWriteBucketArns: (params.readWriteBucketArns ?? []).concat(outputBucket.bucketArn), + policies: props.policyOptions, + }); + + // Make the container and pass it the ARN of the role to use for individual jobs. + const engineContainer = params.getEngineContainer(props.jobQueue.jobQueueArn, { + TOIL_AWS_BATCH_JOB_ROLE_ARN: this.jobRole.roleArn, + }); + + // TODO: Move log group creation into service construct and make it a property + this.engine = this.getEngineServiceDefinition(props.vpc, engineContainer, this.engineLogGroup); + // This is unused because we have no adapter, but a log group is required. + this.adapterLogGroup = new LogGroup(this, "AdapterLogGroup"); + + // We don't use an adapter, so put the access-controlling proxy right in + // front of the engine load balancer. + this.apiProxy = new ApiProxy(this, { + apiName: `${params.projectName}${params.contextName}${engineContainer.serviceName}ApiProxy`, + loadBalancer: this.engine.loadBalancer, + allowedAccountIds: [Aws.ACCOUNT_ID], + }); + } + + protected getOutputs(): EngineOutputs { + return { + accessLogGroup: this.apiProxy.accessLogGroup, + adapterLogGroup: this.adapterLogGroup, + engineLogGroup: this.engineLogGroup, + wesUrl: this.apiProxy.restApi.url, + }; + } + + private getEngineServiceDefinition(vpc: IVpc, serviceContainer: ServiceContainer, logGroup: ILogGroup) { + const id = "Engine"; + const definition = new FargateTaskDefinition(this, "EngineTaskDef", { + taskRole: this.engineRole, + cpu: serviceContainer.cpu, + memoryLimitMiB: serviceContainer.memoryLimitMiB, + }); + + definition.addContainer(serviceContainer.serviceName, { + cpu: serviceContainer.cpu, + memoryLimitMiB: serviceContainer.memoryLimitMiB, + environment: serviceContainer.environment, + containerName: serviceContainer.serviceName, + image: createEcrImage(this, serviceContainer.imageConfig.designation), + logging: LogDriver.awsLogs({ logGroup, streamPrefix: id }), + portMappings: serviceContainer.containerPort ? [{ containerPort: serviceContainer.containerPort }] : [], + }); + + const engine = renderServiceWithTaskDefinition(this, id, serviceContainer, definition, vpc); + return engine; + } +} diff --git a/packages/cli/internal/pkg/cli/account_activate_test.go b/packages/cli/internal/pkg/cli/account_activate_test.go index 846507a0..18580925 100644 --- a/packages/cli/internal/pkg/cli/account_activate_test.go +++ b/packages/cli/internal/pkg/cli/account_activate_test.go @@ -23,6 +23,7 @@ const ( testCromwellRepository = "test-cromwell-repo" testNextflowRepository = "test-nextflow-repo" testMiniwdlRepository = "test-miniwdl-repo" + testToilRepository = "test-toil-repo" testCoreStackName = "Agc-Core" ) @@ -52,6 +53,12 @@ var ( RepositoryName: testMiniwdlRepository, ImageTag: testImageTag, }, + "TOIL": { + RegistryId: testAccountId, + Region: testAccountRegion, + RepositoryName: testToilRepository, + ImageTag: testImageTag, + }, } ) diff --git a/packages/cli/internal/pkg/cli/context/manager.go b/packages/cli/internal/pkg/cli/context/manager.go index 1d9b903c..b3133b5c 100644 --- a/packages/cli/internal/pkg/cli/context/manager.go +++ b/packages/cli/internal/pkg/cli/context/manager.go @@ -87,8 +87,16 @@ var showExecution = cdk.ShowExecution var silentExecution = cdk.SilentExecution func (m *Manager) getEnvironmentVars() []string { + // Different engines will need different environment variables to define + // their Docker images. + engine := m.contextEnv.EngineName + // Each engine has its own section in imageRefs, and for now we assume they + // all care about a WES adapter image. + var relevantImageKeys []string + relevantImageKeys = append(relevantImageKeys, strings.ToUpper(engine)) + relevantImageKeys = append(relevantImageKeys, environment.WesImageKey) var environmentVars []string - for imageName := range m.imageRefs { + for _, imageName := range relevantImageKeys { environmentVars = append(environmentVars, fmt.Sprintf("ECR_%s_ACCOUNT_ID=%s", imageName, m.imageRefs[imageName].RegistryId), fmt.Sprintf("ECR_%s_REGION=%s", imageName, m.region), diff --git a/packages/cli/internal/pkg/cli/context/manager_deploy_test.go b/packages/cli/internal/pkg/cli/context/manager_deploy_test.go index e9330092..3dae0f11 100644 --- a/packages/cli/internal/pkg/cli/context/manager_deploy_test.go +++ b/packages/cli/internal/pkg/cli/context/manager_deploy_test.go @@ -46,7 +46,7 @@ func TestManager_Deploy(t *testing.T) { mockClients.ssmMock.EXPECT().GetCustomTags().Return(testTags) mockClients.ecrClientMock.EXPECT().VerifyImageExists(environment.CommonImages["NEXTFLOW"]).Return(nil) clearContext := mockClients.cdkMock.EXPECT().ClearContext(filepath.Join(testHomeDir, ".agc/cdk/apps/context")).Return(nil) - mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(43), testContextName3).After(clearContext).Return(mockClients.progressStream1, nil) + mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(27), testContextName3).After(clearContext).Return(mockClients.progressStream1, nil) displayProgressBar = mockClients.cdkMock.DisplayProgressBar mockClients.cdkMock.EXPECT().DisplayProgressBar(fmt.Sprintf("Deploying resources for context(s) %s", []string{testContextName3}), []cdk.ProgressStream{mockClients.progressStream1}).Return([]cdk.Result{{Outputs: []string{"some message"}, ExecutionName: testContextName3}}) return mockClients @@ -69,7 +69,7 @@ func TestManager_Deploy(t *testing.T) { mockClients.ssmMock.EXPECT().GetCustomTags().Return("") mockClients.ecrClientMock.EXPECT().VerifyImageExists(environment.CommonImages["NEXTFLOW"]).Return(nil) clearContext := mockClients.cdkMock.EXPECT().ClearContext(filepath.Join(testHomeDir, ".agc/cdk/apps/context")).Return(nil) - mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(43), testContextName3).After(clearContext).Return(mockClients.progressStream1, nil) + mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(27), testContextName3).After(clearContext).Return(mockClients.progressStream1, nil) displayProgressBar = mockClients.cdkMock.DisplayProgressBar mockClients.cdkMock.EXPECT().DisplayProgressBar(fmt.Sprintf("Deploying resources for context(s) %s", []string{testContextName3}), []cdk.ProgressStream{mockClients.progressStream1}).Return([]cdk.Result{{Outputs: []string{"some message"}, ExecutionName: testContextName3}}) return mockClients @@ -94,8 +94,8 @@ func TestManager_Deploy(t *testing.T) { mockClients.ecrClientMock.EXPECT().VerifyImageExists(environment.CommonImages["CROMWELL"]).Times(2).Return(nil) clearContext := mockClients.cdkMock.EXPECT().ClearContext(filepath.Join(testHomeDir, ".agc/cdk/apps/context")).Return(nil) clearContext2 := mockClients.cdkMock.EXPECT().ClearContext(filepath.Join(testHomeDir, ".agc/cdk/apps/context")).Return(nil) - mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(43), testContextName1).After(clearContext).Return(mockClients.progressStream1, nil) - mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(43), testContextName2).After(clearContext2).Return(mockClients.progressStream2, nil) + mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(27), testContextName1).After(clearContext).Return(mockClients.progressStream1, nil) + mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(27), testContextName2).After(clearContext2).Return(mockClients.progressStream2, nil) displayProgressBar = mockClients.cdkMock.DisplayProgressBar expectedCdkResult := []cdk.Result{{Outputs: []string{"some message"}, ExecutionName: testContextName1}, {Outputs: []string{"some other message"}, ExecutionName: testContextName2}} mockClients.cdkMock.EXPECT().DisplayProgressBar(fmt.Sprintf("Deploying resources for context(s) %s", []string{testContextName1, testContextName2}), []cdk.ProgressStream{mockClients.progressStream1, mockClients.progressStream2}).Return(expectedCdkResult) @@ -235,7 +235,7 @@ func TestManager_Deploy(t *testing.T) { mockClients.ssmMock.EXPECT().GetCommonParameter("installed-artifacts/s3-root-url").Return(testArtifactBucket, nil) mockClients.ssmMock.EXPECT().GetCustomTags().Return(testTags) mockClients.cdkMock.EXPECT().ClearContext(filepath.Join(testHomeDir, ".agc/cdk/apps/context")).Return(nil) - mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(43), testContextName1).Return(nil, fmt.Errorf("some context error")) + mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(27), testContextName1).Return(nil, fmt.Errorf("some context error")) mockClients.ecrClientMock.EXPECT().VerifyImageExists(environment.CommonImages["CROMWELL"]).Return(nil) return mockClients }, diff --git a/packages/cli/internal/pkg/cli/project_init.go b/packages/cli/internal/pkg/cli/project_init.go index e96b502e..698615f9 100644 --- a/packages/cli/internal/pkg/cli/project_init.go +++ b/packages/cli/internal/pkg/cli/project_init.go @@ -27,6 +27,7 @@ var ( "nextflow": "nextflow", "wdl": "cromwell", "snakemake": "snakemake", + "cwl": "toil", } supportedWorkflowTypes []string ) @@ -125,7 +126,7 @@ func (o *initProjectOpts) validateProject() error { func BuildProjectInitCommand() *cobra.Command { vars := initProjectVars{} cmd := &cobra.Command{ - Use: "init project_name --workflow-type {wdl|nextflow|snakemake}", + Use: "init project_name --workflow-type {cwl|wdl|nextflow|snakemake}", Short: "Initialize current directory with a new empty AGC project for a particular workflow type.", Long: `Initialize current directory with a new empty AGC project for a particular workflow type. Project specification file 'agc-project.yaml' will be created in the current directory.`, diff --git a/packages/cli/internal/pkg/cli/project_init_test.go b/packages/cli/internal/pkg/cli/project_init_test.go index 9d93805c..c86817bb 100644 --- a/packages/cli/internal/pkg/cli/project_init_test.go +++ b/packages/cli/internal/pkg/cli/project_init_test.go @@ -43,7 +43,7 @@ func TestProjectInit_Validate(t *testing.T) { workflowType: "nextflow", }, "invalid workflow type": { - expectedErr: "invalid workflow type supplied: 'aBadEngineName'. Supported workflow types are: [nextflow snakemake wdl]", + expectedErr: "invalid workflow type supplied: 'aBadEngineName'. Supported workflow types are: [cwl nextflow snakemake wdl]", workflowType: "aBadEngineName", projectName: testProjectName, }, @@ -91,6 +91,11 @@ func TestProjectInit_Execute(t *testing.T) { engineName: "nextflow", expectedEngine: []spec.Engine{{Type: "nextflow", Engine: "nextflow"}}, }, + "toil engine generation": { + projectName: testProjectName, + engineName: "cwl", + expectedEngine: []spec.Engine{{Type: "cwl", Engine: "toil"}}, + }, } for name, tc := range testCases { diff --git a/packages/cli/internal/pkg/environment/environment.go b/packages/cli/internal/pkg/environment/environment.go index 245f1148..0d82b909 100644 --- a/packages/cli/internal/pkg/environment/environment.go +++ b/packages/cli/internal/pkg/environment/environment.go @@ -15,44 +15,52 @@ const DefaultNextflowTag = "21.04.3" const DefaultWesTag = "0.1.0" const DefaultMiniwdlTag = "v0.1.11" const DefaultSnakemakeTag = "internal-fork" +const DefaultToilTag = "v6.0.0" const WesImageKey = "WES" const CromwellImageKey = "CROMWELL" const NextflowImageKey = "NEXTFLOW" const MiniwdlImageKey = "MINIWDL" const SnakemakeImageKey = "SNAKEMAKE" +const ToilImageKey = "TOIL" var CommonImages = map[string]ecr.ImageReference{ WesImageKey: { RegistryId: LookUpEnvOrDefault("ECR_WES_ACCOUNT_ID", DefaultEcrRegistry), Region: LookUpEnvOrDefault("ECR_WES_REGION", DefaultEcrRegion), - RepositoryName: "aws/wes-release", + RepositoryName: LookUpEnvOrDefault("ECR_WES_REPOSITORY", "aws/wes-release"), ImageTag: LookUpEnvOrDefault("ECR_WES_TAG", DefaultWesTag), }, CromwellImageKey: { RegistryId: LookUpEnvOrDefault("ECR_CROMWELL_ACCOUNT_ID", DefaultEcrRegistry), Region: LookUpEnvOrDefault("ECR_CROMWELL_REGION", DefaultEcrRegion), - RepositoryName: "aws/cromwell-mirror", + RepositoryName: LookUpEnvOrDefault("ECR_CROMWELL_REPOSITORY", "aws/cromwell-mirror"), ImageTag: LookUpEnvOrDefault("ECR_CROMWELL_TAG", DefaultCromwellTag), }, NextflowImageKey: { RegistryId: LookUpEnvOrDefault("ECR_NEXTFLOW_ACCOUNT_ID", DefaultEcrRegistry), Region: LookUpEnvOrDefault("ECR_NEXTFLOW_REGION", DefaultEcrRegion), - RepositoryName: "aws/nextflow-mirror", + RepositoryName: LookUpEnvOrDefault("ECR_NEXTFLOW_REPOSITORY", "aws/nextflow-mirror"), ImageTag: LookUpEnvOrDefault("ECR_NEXTFLOW_TAG", DefaultNextflowTag), }, MiniwdlImageKey: { RegistryId: LookUpEnvOrDefault("ECR_MINIWDL_ACCOUNT_ID", DefaultEcrRegistry), Region: LookUpEnvOrDefault("ECR_MINIWDL_REGION", DefaultEcrRegion), - RepositoryName: "aws/miniwdl-mirror", + RepositoryName: LookUpEnvOrDefault("ECR_MINIWDL_REPOSITORY", "aws/miniwdl-mirror"), ImageTag: LookUpEnvOrDefault("ECR_MINIWDL_TAG", DefaultMiniwdlTag), }, SnakemakeImageKey: { RegistryId: LookUpEnvOrDefault("ECR_SNAKEMAKE_ACCOUNT_ID", DefaultEcrRegistry), Region: LookUpEnvOrDefault("ECR_SNAKEMAKE_REGION", DefaultEcrRegion), - RepositoryName: "aws/snakemake-mirror", + RepositoryName: LookUpEnvOrDefault("ECR_SNAKEMAKE_REPOSITORY", "aws/snakemake-mirror"), ImageTag: LookUpEnvOrDefault("ECR_SNAKEMAKE_TAG", DefaultSnakemakeTag), }, + ToilImageKey: { + RegistryId: LookUpEnvOrDefault("ECR_TOIL_ACCOUNT_ID", DefaultEcrRegistry), + Region: LookUpEnvOrDefault("ECR_TOIL_REGION", DefaultEcrRegion), + RepositoryName: LookUpEnvOrDefault("ECR_TOIL_REPOSITORY", "aws/toil-mirror"), + ImageTag: LookUpEnvOrDefault("ECR_TOIL_TAG", DefaultMiniwdlTag), + }, } func LookUpEnvOrDefault(envVariableName string, defaultValue string) string { diff --git a/packages/engines/toil/Dockerfile b/packages/engines/toil/Dockerfile new file mode 100644 index 00000000..610bb7c0 --- /dev/null +++ b/packages/engines/toil/Dockerfile @@ -0,0 +1,61 @@ +FROM public.ecr.aws/amazonlinux/amazonlinux:2 AS final + +# COPY THIRD-PARTY /opt/ +COPY LICENSE /opt/ + +RUN yum update -y \ + && yum install -y \ + curl \ + hostname \ + "java-11-amazon-corretto-headless(x86-64)" \ + unzip \ + jq \ + && yum clean -y all \ + && rm -rf /var/cache/yum + +# install awscli v2 +RUN curl -s "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "/tmp/awscliv2.zip" \ + && unzip -q /tmp/awscliv2.zip -d /tmp \ + && /tmp/aws/install -b /usr/bin \ + && rm -rf /tmp/aws* + +##### MODIFY ####### +## In this area install your new engine into the container as well as any requirements for that engine. +## Dockerfile documentation is found here: https://docs.docker.com/engine/reference/builder/ + +# Add rabbitmq repository +ADD rabbitmq.repo /etc/yum.repos.d/rabbitmq.repo + +# Sadly pre-importing keys doesn't seem to save any time whan we use yum later, so don't so it. + +# Install deps +RUN curl -fsSL https://rpm.nodesource.com/setup_16.x | bash - \ + && yum update -y \ + && yum install -y \ + python3 \ + rabbitmq-server \ + nodejs \ + git \ + && yum clean -y all \ + && rm -rf /var/cache/yum + +# Install concurrently, for running all our servers in one session +RUN npm install -g concurrently@7.0.0 + +# Install Toil +COPY THIRD-PARTY /opt/ + +ARG TOIL_VERSION="e9a82098629046f672aaee4c5f14f46bc67be4ce" +RUN python3 -m pip install git+https://github.com/DataBiosphere/toil.git@${TOIL_VERSION}#egg=toil[aws,cwl,server] + +# copy the entrypoint script to the image +COPY toil.aws.sh /opt/bin/toil.aws.sh +RUN chmod +x /opt/bin/toil.aws.sh + +EXPOSE 8000 + +#### END MODIFY ###### + +WORKDIR /opt/work +ENTRYPOINT ["/opt/bin/toil.aws.sh"] + diff --git a/packages/engines/toil/LICENSE b/packages/engines/toil/LICENSE new file mode 100644 index 00000000..f5d32e98 --- /dev/null +++ b/packages/engines/toil/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/packages/engines/toil/README.md b/packages/engines/toil/README.md new file mode 100644 index 00000000..cd084548 --- /dev/null +++ b/packages/engines/toil/README.md @@ -0,0 +1,48 @@ +## Toil AWS Mirror + +A Toil mono-container WES server for use with Amazon AGC. + +### Building the Container Manually + +Go to this directory and run: + +```bash +docker build . -f Dockerfile -t adamnovak/toil-agc +``` + +### Running for Testing + +Having built the container, run: + +```bash +docker run -ti --rm -p "127.0.0.1:8000:8000" adamnovak/toil-agc +``` + +This will start the containerized server and make it available on port 8000 on the loopback interface. You can inspect the port mapping with: + +```bash +docker port "$(docker ps | grep adamnovak/toil-agc | rev | cut -f1 -d' ' | rev)" +``` + +Then you can talk to it with e.g.: + +```bash +curl -vvv "http://localhost:8000/ga4gh/wes/v1/service-info" +``` + +For debugging, you can get inside the container with: + +```bash +docker exec -ti "$(docker ps | grep adamnovak/toil-agc | rev | cut -f1 -d' ' | rev)" /bin/bash +``` + +### Deploying + +To push this to an Amazon ECR repo, where AGC can get at it, you can do something like: + +```bash +aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 318423852362.dkr.ecr.us-west-2.amazonaws.com +docker build -t adamnovak/toil-agc . +docker tag adamnovak/toil-agc:latest 318423852362.dkr.ecr.us-west-2.amazonaws.com/adamnovak/toil-agc:latest +docker push 318423852362.dkr.ecr.us-west-2.amazonaws.com/adamnovak/toil-agc:latest +``` diff --git a/packages/engines/toil/THIRD-PARTY b/packages/engines/toil/THIRD-PARTY new file mode 100644 index 00000000..8b1fd594 --- /dev/null +++ b/packages/engines/toil/THIRD-PARTY @@ -0,0 +1,207 @@ +** toil; version 5.5.0 -- https://github.com/DataBiosphere/toil + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + + Copyright 2011 UCSC Computational Genomics Lab + Original Contributors: Benedict Paten, Hannes Schmidt, John Vivian, + Christopher Ketchum, Joel Armstrong and co-authors (benedictpaten@gmail.com) + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/packages/engines/toil/buildspec.yml b/packages/engines/toil/buildspec.yml new file mode 100644 index 00000000..815f591a --- /dev/null +++ b/packages/engines/toil/buildspec.yml @@ -0,0 +1,20 @@ +version: 0.2 + +env: + shell: bash + variables: + TOIL_IMAGE_NAME: "toil" +phases: + pre_build: + commands: + - TOIL_IMAGE_URI=${TOIL_IMAGE_NAME}:latest + build: + commands: + - docker build -t ${TOIL_IMAGE_URI} ./ + post_build: + commands: + - docker save -o toil_image.tar ${TOIL_IMAGE_URI} + +artifacts: + files: + - toil_image.tar diff --git a/packages/engines/toil/rabbitmq.repo b/packages/engines/toil/rabbitmq.repo new file mode 100644 index 00000000..3d919acc --- /dev/null +++ b/packages/engines/toil/rabbitmq.repo @@ -0,0 +1,61 @@ +# In /etc/yum.repos.d/rabbitmq.repo +# From https://www.rabbitmq.com/install-rpm.html#package-cloud +# Modified to request RHEL 7 packages. + +## +## Zero dependency Erlang +## + +[rabbitmq_erlang] +name=rabbitmq_erlang +baseurl=https://packagecloud.io/rabbitmq/erlang/el/7/$basearch +repo_gpgcheck=1 +gpgcheck=1 +enabled=1 +# PackageCloud's repository key and RabbitMQ package signing key +gpgkey=https://packagecloud.io/rabbitmq/erlang/gpgkey + https://github.com/rabbitmq/signing-keys/releases/download/2.0/rabbitmq-release-signing-key.asc +sslverify=1 +sslcacert=/etc/pki/tls/certs/ca-bundle.crt +metadata_expire=300 + +[rabbitmq_erlang-source] +name=rabbitmq_erlang-source +baseurl=https://packagecloud.io/rabbitmq/erlang/el/7/SRPMS +repo_gpgcheck=1 +gpgcheck=0 +enabled=1 +# PackageCloud's repository key and RabbitMQ package signing key +gpgkey=https://packagecloud.io/rabbitmq/erlang/gpgkey + https://github.com/rabbitmq/signing-keys/releases/download/2.0/rabbitmq-release-signing-key.asc +sslverify=1 +sslcacert=/etc/pki/tls/certs/ca-bundle.crt +metadata_expire=300 + +## +## RabbitMQ server +## + +[rabbitmq_server] +name=rabbitmq_server +baseurl=https://packagecloud.io/rabbitmq/rabbitmq-server/el/7/$basearch +repo_gpgcheck=1 +gpgcheck=0 +enabled=1 +# PackageCloud's repository key and RabbitMQ package signing key +gpgkey=https://packagecloud.io/rabbitmq/rabbitmq-server/gpgkey + https://github.com/rabbitmq/signing-keys/releases/download/2.0/rabbitmq-release-signing-key.asc +sslverify=1 +sslcacert=/etc/pki/tls/certs/ca-bundle.crt +metadata_expire=300 + +[rabbitmq_server-source] +name=rabbitmq_server-source +baseurl=https://packagecloud.io/rabbitmq/rabbitmq-server/el/7/SRPMS +repo_gpgcheck=1 +gpgcheck=0 +enabled=1 +gpgkey=https://packagecloud.io/rabbitmq/rabbitmq-server/gpgkey +sslverify=1 +sslcacert=/etc/pki/tls/certs/ca-bundle.crt +metadata_expire=300 diff --git a/packages/engines/toil/toil.aws.sh b/packages/engines/toil/toil.aws.sh new file mode 100644 index 00000000..a256a360 --- /dev/null +++ b/packages/engines/toil/toil.aws.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +# Toil is a WES server and so it gets this custom entrypoint script + +DEFAULT_AWS_CLI_PATH=/opt/aws-cli/bin/aws +AWS_CLI_PATH=${JOB_AWS_CLI_PATH:-$DEFAULT_AWS_CLI_PATH} + +echo "=== ENVIRONMENT ===" +printenv + +echo "=== START SERVER ===" + +# We expect some AGC info in the environment: JOB_QUEUE_ARN +# These come from packages/cdk/lib/env/context-app-parameters.ts +# If we need more we'll need to add them in the Toil engine construct, or maybe stop passing getEngineContainer() down as a parameter. +# We assume whatever role the batch jobs get when they go in the queue is the right role for them. +AWS_REGION=$(echo ${JOB_QUEUE_ARN} | cut -f4 -d':') +set -x + +export TOIL_WES_BROKER_URL="amqp://guest:guest@localhost:5672//" +export TOIL_WES_JOB_STORE_TYPE="aws" + +concurrently -n rabbitmq,celery,toil \ + "rabbitmq-server" \ + "celery --broker=${TOIL_WES_BROKER_URL} -A toil.server.celery_app worker --loglevel=INFO" \ + "toil server --debug --host=0.0.0.0 --port=8000 --opt=--batchSystem=aws_batch '--opt=--awsBatchQueue=${JOB_QUEUE_ARN}' '--opt=--awsBatchRegion=${AWS_REGION}'" + + From a5bfbb55e8e83ccd001a7567f99bdb392641b9da Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 8 Mar 2022 12:53:39 -0800 Subject: [PATCH 02/41] Expect to have exactly 30 arguments --- .../internal/pkg/cli/context/manager_deploy_test.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/cli/internal/pkg/cli/context/manager_deploy_test.go b/packages/cli/internal/pkg/cli/context/manager_deploy_test.go index 3dae0f11..cd086e2b 100644 --- a/packages/cli/internal/pkg/cli/context/manager_deploy_test.go +++ b/packages/cli/internal/pkg/cli/context/manager_deploy_test.go @@ -46,7 +46,7 @@ func TestManager_Deploy(t *testing.T) { mockClients.ssmMock.EXPECT().GetCustomTags().Return(testTags) mockClients.ecrClientMock.EXPECT().VerifyImageExists(environment.CommonImages["NEXTFLOW"]).Return(nil) clearContext := mockClients.cdkMock.EXPECT().ClearContext(filepath.Join(testHomeDir, ".agc/cdk/apps/context")).Return(nil) - mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(27), testContextName3).After(clearContext).Return(mockClients.progressStream1, nil) + mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(30), testContextName3).After(clearContext).Return(mockClients.progressStream1, nil) displayProgressBar = mockClients.cdkMock.DisplayProgressBar mockClients.cdkMock.EXPECT().DisplayProgressBar(fmt.Sprintf("Deploying resources for context(s) %s", []string{testContextName3}), []cdk.ProgressStream{mockClients.progressStream1}).Return([]cdk.Result{{Outputs: []string{"some message"}, ExecutionName: testContextName3}}) return mockClients @@ -69,7 +69,7 @@ func TestManager_Deploy(t *testing.T) { mockClients.ssmMock.EXPECT().GetCustomTags().Return("") mockClients.ecrClientMock.EXPECT().VerifyImageExists(environment.CommonImages["NEXTFLOW"]).Return(nil) clearContext := mockClients.cdkMock.EXPECT().ClearContext(filepath.Join(testHomeDir, ".agc/cdk/apps/context")).Return(nil) - mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(27), testContextName3).After(clearContext).Return(mockClients.progressStream1, nil) + mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(30), testContextName3).After(clearContext).Return(mockClients.progressStream1, nil) displayProgressBar = mockClients.cdkMock.DisplayProgressBar mockClients.cdkMock.EXPECT().DisplayProgressBar(fmt.Sprintf("Deploying resources for context(s) %s", []string{testContextName3}), []cdk.ProgressStream{mockClients.progressStream1}).Return([]cdk.Result{{Outputs: []string{"some message"}, ExecutionName: testContextName3}}) return mockClients @@ -94,8 +94,8 @@ func TestManager_Deploy(t *testing.T) { mockClients.ecrClientMock.EXPECT().VerifyImageExists(environment.CommonImages["CROMWELL"]).Times(2).Return(nil) clearContext := mockClients.cdkMock.EXPECT().ClearContext(filepath.Join(testHomeDir, ".agc/cdk/apps/context")).Return(nil) clearContext2 := mockClients.cdkMock.EXPECT().ClearContext(filepath.Join(testHomeDir, ".agc/cdk/apps/context")).Return(nil) - mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(27), testContextName1).After(clearContext).Return(mockClients.progressStream1, nil) - mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(27), testContextName2).After(clearContext2).Return(mockClients.progressStream2, nil) + mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(30), testContextName1).After(clearContext).Return(mockClients.progressStream1, nil) + mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(30), testContextName2).After(clearContext2).Return(mockClients.progressStream2, nil) displayProgressBar = mockClients.cdkMock.DisplayProgressBar expectedCdkResult := []cdk.Result{{Outputs: []string{"some message"}, ExecutionName: testContextName1}, {Outputs: []string{"some other message"}, ExecutionName: testContextName2}} mockClients.cdkMock.EXPECT().DisplayProgressBar(fmt.Sprintf("Deploying resources for context(s) %s", []string{testContextName1, testContextName2}), []cdk.ProgressStream{mockClients.progressStream1, mockClients.progressStream2}).Return(expectedCdkResult) @@ -235,7 +235,7 @@ func TestManager_Deploy(t *testing.T) { mockClients.ssmMock.EXPECT().GetCommonParameter("installed-artifacts/s3-root-url").Return(testArtifactBucket, nil) mockClients.ssmMock.EXPECT().GetCustomTags().Return(testTags) mockClients.cdkMock.EXPECT().ClearContext(filepath.Join(testHomeDir, ".agc/cdk/apps/context")).Return(nil) - mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(27), testContextName1).Return(nil, fmt.Errorf("some context error")) + mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(30), testContextName1).Return(nil, fmt.Errorf("some context error")) mockClients.ecrClientMock.EXPECT().VerifyImageExists(environment.CommonImages["CROMWELL"]).Return(nil) return mockClients }, From ebf697149703be9ffcc797e5615d6e3ae12b3643 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 17 Feb 2022 13:59:49 -0800 Subject: [PATCH 03/41] Add a bigger CWL workflow --- examples/demo-cwl-project/agc-project.yaml | 5 +++ .../workflows/nontrivial/MANIFEST.json | 7 ++++ .../workflows/nontrivial/compress-file.cwl | 18 +++++++++ .../nontrivial/concatenate-files.cwl | 14 +++++++ .../workflows/nontrivial/inputs.json | 1 + .../workflows/nontrivial/make-data.js | 11 ++++++ .../workflows/nontrivial/nontrivial.cwl | 39 +++++++++++++++++++ .../workflows/nontrivial/options.json | 1 + .../workflows/nontrivial/run-script.cwl | 21 ++++++++++ 9 files changed, 117 insertions(+) create mode 100644 examples/demo-cwl-project/workflows/nontrivial/MANIFEST.json create mode 100644 examples/demo-cwl-project/workflows/nontrivial/compress-file.cwl create mode 100644 examples/demo-cwl-project/workflows/nontrivial/concatenate-files.cwl create mode 100644 examples/demo-cwl-project/workflows/nontrivial/inputs.json create mode 100644 examples/demo-cwl-project/workflows/nontrivial/make-data.js create mode 100644 examples/demo-cwl-project/workflows/nontrivial/nontrivial.cwl create mode 100644 examples/demo-cwl-project/workflows/nontrivial/options.json create mode 100644 examples/demo-cwl-project/workflows/nontrivial/run-script.cwl diff --git a/examples/demo-cwl-project/agc-project.yaml b/examples/demo-cwl-project/agc-project.yaml index c5ba5fa7..cd6becb8 100644 --- a/examples/demo-cwl-project/agc-project.yaml +++ b/examples/demo-cwl-project/agc-project.yaml @@ -7,6 +7,11 @@ workflows: language: cwl version: v1.2 sourceURL: workflows/hello/hello.cwl + nontrivial: + type: + language: cwl + version: v1.2 + sourceURL: workflows/nontrivial contexts: myContext: engines: diff --git a/examples/demo-cwl-project/workflows/nontrivial/MANIFEST.json b/examples/demo-cwl-project/workflows/nontrivial/MANIFEST.json new file mode 100644 index 00000000..dbf09c46 --- /dev/null +++ b/examples/demo-cwl-project/workflows/nontrivial/MANIFEST.json @@ -0,0 +1,7 @@ +{ + "mainWorkflowURL": "nontrivial.cwl", + "inputFileURLs": [ + "inputs.json" + ], + "optionsFileURL": "options.json" +} diff --git a/examples/demo-cwl-project/workflows/nontrivial/compress-file.cwl b/examples/demo-cwl-project/workflows/nontrivial/compress-file.cwl new file mode 100644 index 00000000..d3217f83 --- /dev/null +++ b/examples/demo-cwl-project/workflows/nontrivial/compress-file.cwl @@ -0,0 +1,18 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.2 +class: CommandLineTool +baseCommand: ["-9", "-p", "8", "-c"] +hints: + DockerRequirement: + dockerPull: bytesco/pigz + ResourceRequirement: + coresMin: 8 +inputs: + input_file: + type: File + inputBinding: + position: 1 +outputs: + compressed_file: + type: stdout +stdout: compressed.gz diff --git a/examples/demo-cwl-project/workflows/nontrivial/concatenate-files.cwl b/examples/demo-cwl-project/workflows/nontrivial/concatenate-files.cwl new file mode 100644 index 00000000..310f8639 --- /dev/null +++ b/examples/demo-cwl-project/workflows/nontrivial/concatenate-files.cwl @@ -0,0 +1,14 @@ +cwlVersion: v1.2 +class: CommandLineTool +baseCommand: cat +inputs: + files: + type: File[] + inputBinding: + position: 1 + +outputs: + concatenated_file: + type: stdout + +stdout: concatenated.txt diff --git a/examples/demo-cwl-project/workflows/nontrivial/inputs.json b/examples/demo-cwl-project/workflows/nontrivial/inputs.json new file mode 100644 index 00000000..c1fce304 --- /dev/null +++ b/examples/demo-cwl-project/workflows/nontrivial/inputs.json @@ -0,0 +1 @@ +{"script_file": {"class": "File", "path": "make-data.js"}, "script_arguments": ["A", "B", "C"]} diff --git a/examples/demo-cwl-project/workflows/nontrivial/make-data.js b/examples/demo-cwl-project/workflows/nontrivial/make-data.js new file mode 100644 index 00000000..c0d1ef69 --- /dev/null +++ b/examples/demo-cwl-project/workflows/nontrivial/make-data.js @@ -0,0 +1,11 @@ +if (process.argv.length > 2) { + let arg = process.argv[2] + console.log("Argument is: " + arg); + + for (let i = 1000; i >= 0; i--) { + console.log(i + " bottles of " + arg + " on the wall..."); + } + +} else { + console.log("Didn't get an argument"); +} diff --git a/examples/demo-cwl-project/workflows/nontrivial/nontrivial.cwl b/examples/demo-cwl-project/workflows/nontrivial/nontrivial.cwl new file mode 100644 index 00000000..c3651afe --- /dev/null +++ b/examples/demo-cwl-project/workflows/nontrivial/nontrivial.cwl @@ -0,0 +1,39 @@ +cwlVersion: v1.2 +class: Workflow + +requirements: + SubworkflowFeatureRequirement: {} + ScatterFeatureRequirement: {} + +inputs: + - id: script_file + type: File + - id: script_arguments + type: string[] + +steps: + scripts: + run: run-script.cwl + scatter: arg + in: + src: script_file + arg: script_arguments + out: [script_output] + + concat: + run: concatenate-files.cwl + in: + files: scripts/script_output + out: + [concatenated_file] + + compress: + run: compress-file.cwl + in: + input_file: concat/concatenated_file + out: [compressed_file] + +outputs: + - id: output + type: File + outputSource: compress/compressed_file diff --git a/examples/demo-cwl-project/workflows/nontrivial/options.json b/examples/demo-cwl-project/workflows/nontrivial/options.json new file mode 100644 index 00000000..e07ad5a6 --- /dev/null +++ b/examples/demo-cwl-project/workflows/nontrivial/options.json @@ -0,0 +1 @@ +{"--destBucket": "s3://agc-318423852362-us-west-2/adamnovak-scratch/out", "--singularity": null} diff --git a/examples/demo-cwl-project/workflows/nontrivial/run-script.cwl b/examples/demo-cwl-project/workflows/nontrivial/run-script.cwl new file mode 100644 index 00000000..0f0eed8f --- /dev/null +++ b/examples/demo-cwl-project/workflows/nontrivial/run-script.cwl @@ -0,0 +1,21 @@ +#!/usr/bin/env cwl-runner +# Modified from the CWL docs +cwlVersion: v1.2 +class: CommandLineTool +baseCommand: node +hints: + DockerRequirement: + dockerPull: node:slim +inputs: + src: + type: File + inputBinding: + position: 1 + arg: + type: string + inputBinding: + position: 2 +outputs: + script_output: + type: stdout +stdout: output.txt From ba475348dab296df336c10b62794ec5d5a31668a Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Fri, 18 Feb 2022 09:02:32 -0800 Subject: [PATCH 04/41] Make workflow harder --- .../workflows/nontrivial/inputs.json | 8 +++++++- .../workflows/nontrivial/make-data.js | 18 ++++++++++++++---- .../workflows/nontrivial/nontrivial.cwl | 9 ++++++++- .../workflows/nontrivial/run-script.cwl | 3 +++ .../workflows/nontrivial/sort-file.cwl | 17 +++++++++++++++++ 5 files changed, 49 insertions(+), 6 deletions(-) create mode 100644 examples/demo-cwl-project/workflows/nontrivial/sort-file.cwl diff --git a/examples/demo-cwl-project/workflows/nontrivial/inputs.json b/examples/demo-cwl-project/workflows/nontrivial/inputs.json index c1fce304..0e5c2c71 100644 --- a/examples/demo-cwl-project/workflows/nontrivial/inputs.json +++ b/examples/demo-cwl-project/workflows/nontrivial/inputs.json @@ -1 +1,7 @@ -{"script_file": {"class": "File", "path": "make-data.js"}, "script_arguments": ["A", "B", "C"]} +{ + "script_file": {"class": "File", "path": "make-data.js"}, + "script_arguments": [ + "A", "B", "C", "D", "E", "F", "G", + "cats", "pigs", "dogs", "space weasles", "snacks" + ] +} diff --git a/examples/demo-cwl-project/workflows/nontrivial/make-data.js b/examples/demo-cwl-project/workflows/nontrivial/make-data.js index c0d1ef69..7ba45fb2 100644 --- a/examples/demo-cwl-project/workflows/nontrivial/make-data.js +++ b/examples/demo-cwl-project/workflows/nontrivial/make-data.js @@ -1,11 +1,21 @@ +const crypto = require('crypto') if (process.argv.length > 2) { let arg = process.argv[2] - console.log("Argument is: " + arg); + console.log("Argument is: " + arg) - for (let i = 1000; i >= 0; i--) { - console.log(i + " bottles of " + arg + " on the wall..."); + let hashes = [arg] + + for (let i = 10000; i >= 0; i--) { + console.log(i + " bottles of " + arg + " on the wall...") + hasher = crypto.createHash('sha512') + for (let h of hashes) { + hasher.update(h) + } + hashes.push(hasher.digest('hex')) } + console.log("After meditating on the nature of " + arg + ", it turns out to be " + hashes[hashes.length - 1]) + } else { - console.log("Didn't get an argument"); + console.log("Didn't get an argument") } diff --git a/examples/demo-cwl-project/workflows/nontrivial/nontrivial.cwl b/examples/demo-cwl-project/workflows/nontrivial/nontrivial.cwl index c3651afe..f59f801b 100644 --- a/examples/demo-cwl-project/workflows/nontrivial/nontrivial.cwl +++ b/examples/demo-cwl-project/workflows/nontrivial/nontrivial.cwl @@ -19,11 +19,18 @@ steps: src: script_file arg: script_arguments out: [script_output] + + sort: + run: sort-file.cwl + scatter: input_file + in: + input_file: scripts/script_output + out: [sorted_file] concat: run: concatenate-files.cwl in: - files: scripts/script_output + files: sort/sorted_file out: [concatenated_file] diff --git a/examples/demo-cwl-project/workflows/nontrivial/run-script.cwl b/examples/demo-cwl-project/workflows/nontrivial/run-script.cwl index 0f0eed8f..02e0aa9f 100644 --- a/examples/demo-cwl-project/workflows/nontrivial/run-script.cwl +++ b/examples/demo-cwl-project/workflows/nontrivial/run-script.cwl @@ -6,6 +6,9 @@ baseCommand: node hints: DockerRequirement: dockerPull: node:slim + ResourceRequirement: + coresMax: 1 + ramMin: 2000 inputs: src: type: File diff --git a/examples/demo-cwl-project/workflows/nontrivial/sort-file.cwl b/examples/demo-cwl-project/workflows/nontrivial/sort-file.cwl new file mode 100644 index 00000000..c3e049da --- /dev/null +++ b/examples/demo-cwl-project/workflows/nontrivial/sort-file.cwl @@ -0,0 +1,17 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.2 +class: CommandLineTool +baseCommand: ["sort"] +hints: + ResourceRequirement: + coresMax: 1 + outdirMin: $(inputs.input_file.size) +inputs: + input_file: + type: File + inputBinding: + position: 1 +outputs: + sorted_file: + type: stdout +stdout: sorted.txt From cab340df7189515398c7e130bdcf290700f9534d Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Fri, 18 Feb 2022 13:59:21 -0800 Subject: [PATCH 05/41] Use a Toil that can generate dest buckets and mount Docker --- .../demo-cwl-project/workflows/nontrivial/options.json | 2 +- packages/engines/toil/Dockerfile | 2 +- packages/engines/toil/toil.aws.sh | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/demo-cwl-project/workflows/nontrivial/options.json b/examples/demo-cwl-project/workflows/nontrivial/options.json index e07ad5a6..0967ef42 100644 --- a/examples/demo-cwl-project/workflows/nontrivial/options.json +++ b/examples/demo-cwl-project/workflows/nontrivial/options.json @@ -1 +1 @@ -{"--destBucket": "s3://agc-318423852362-us-west-2/adamnovak-scratch/out", "--singularity": null} +{} diff --git a/packages/engines/toil/Dockerfile b/packages/engines/toil/Dockerfile index 610bb7c0..bdd809ad 100644 --- a/packages/engines/toil/Dockerfile +++ b/packages/engines/toil/Dockerfile @@ -45,7 +45,7 @@ RUN npm install -g concurrently@7.0.0 # Install Toil COPY THIRD-PARTY /opt/ -ARG TOIL_VERSION="e9a82098629046f672aaee4c5f14f46bc67be4ce" +ARG TOIL_VERSION="8ae548c95b6ae66ec62e946667ed58825dcb55a9" RUN python3 -m pip install git+https://github.com/DataBiosphere/toil.git@${TOIL_VERSION}#egg=toil[aws,cwl,server] # copy the entrypoint script to the image diff --git a/packages/engines/toil/toil.aws.sh b/packages/engines/toil/toil.aws.sh index a256a360..755cff66 100644 --- a/packages/engines/toil/toil.aws.sh +++ b/packages/engines/toil/toil.aws.sh @@ -10,10 +10,10 @@ printenv echo "=== START SERVER ===" -# We expect some AGC info in the environment: JOB_QUEUE_ARN +# We expect some AGC info in the environment: JOB_QUEUE_ARN and ROOT_DIR # These come from packages/cdk/lib/env/context-app-parameters.ts -# If we need more we'll need to add them in the Toil engine construct, or maybe stop passing getEngineContainer() down as a parameter. -# We assume whatever role the batch jobs get when they go in the queue is the right role for them. +# And also TOIL_AWS_BATCH_JOB_ROLE_ARN must be set in Toil's environment. +# This comes from packages/cdk/lib/stacks/engines/toil-engine-construct.ts AWS_REGION=$(echo ${JOB_QUEUE_ARN} | cut -f4 -d':') set -x @@ -23,6 +23,6 @@ export TOIL_WES_JOB_STORE_TYPE="aws" concurrently -n rabbitmq,celery,toil \ "rabbitmq-server" \ "celery --broker=${TOIL_WES_BROKER_URL} -A toil.server.celery_app worker --loglevel=INFO" \ - "toil server --debug --host=0.0.0.0 --port=8000 --opt=--batchSystem=aws_batch '--opt=--awsBatchQueue=${JOB_QUEUE_ARN}' '--opt=--awsBatchRegion=${AWS_REGION}'" + "toil server --debug --host=0.0.0.0 --port=8000 --dest_bucket_base=${ROOT_DIR} --opt=--batchSystem=aws_batch '--opt=--awsBatchQueue=${JOB_QUEUE_ARN}' '--opt=--awsBatchRegion=${AWS_REGION}'" From a9df643eea95c3003a2402156e338dc44f108598 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 15 Mar 2022 14:22:54 -0700 Subject: [PATCH 06/41] Fix undefined type in mock --- packages/cli/internal/pkg/mocks/manager/mock_interfaces.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/cli/internal/pkg/mocks/manager/mock_interfaces.go b/packages/cli/internal/pkg/mocks/manager/mock_interfaces.go index cb5dc0f6..b6290c82 100644 --- a/packages/cli/internal/pkg/mocks/manager/mock_interfaces.go +++ b/packages/cli/internal/pkg/mocks/manager/mock_interfaces.go @@ -51,7 +51,7 @@ func (mr *MockWorkflowManagerMockRecorder) GetRunLog(runId interface{}) *gomock. } // GetRunLogData mocks base method. -func (m *MockWorkflowManager) GetRunLogData(runId, dataUrl string) (*io.ReadCloser, error) { +func (m *MockWorkflowManager) GetRunLogData(runId string, dataUrl string) (*io.ReadCloser, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "GetRunLogData", runId, dataUrl) ret0, _ := ret[0].(*io.ReadCloser) @@ -60,7 +60,7 @@ func (m *MockWorkflowManager) GetRunLogData(runId, dataUrl string) (*io.ReadClos } // GetRunLogData indicates an expected call of GetRunLogData. -func (mr *MockWorkflowManagerMockRecorder) GetRunLogData(runId, dataUrl interface{}) *gomock.Call { +func (mr *MockWorkflowManagerMockRecorder) GetRunLogData(runId string, dataUrl interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetRunLogData", reflect.TypeOf((*MockWorkflowManager)(nil).GetRunLogData), runId, dataUrl) } From c0f4fbba4689472cfe80ef7310404eabd6031334 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 15 Mar 2022 14:23:33 -0700 Subject: [PATCH 07/41] Fix size requirement in example workflow --- examples/demo-cwl-project/workflows/nontrivial/sort-file.cwl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/demo-cwl-project/workflows/nontrivial/sort-file.cwl b/examples/demo-cwl-project/workflows/nontrivial/sort-file.cwl index c3e049da..40a1c5d3 100644 --- a/examples/demo-cwl-project/workflows/nontrivial/sort-file.cwl +++ b/examples/demo-cwl-project/workflows/nontrivial/sort-file.cwl @@ -2,10 +2,12 @@ cwlVersion: v1.2 class: CommandLineTool baseCommand: ["sort"] +requirements: + InlineJavascriptRequirement: {} hints: ResourceRequirement: coresMax: 1 - outdirMin: $(inputs.input_file.size) + outdirMin: $(parseInt(Math.ceil(inputs.input_file.size/(2**20)))) inputs: input_file: type: File From 674bfa4ba673867a9ca0673ca923fdd81d542e33 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 15 Mar 2022 14:33:21 -0700 Subject: [PATCH 08/41] Use current Toil with its AGC integration merged --- packages/engines/toil/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/engines/toil/Dockerfile b/packages/engines/toil/Dockerfile index bdd809ad..dd3c72d0 100644 --- a/packages/engines/toil/Dockerfile +++ b/packages/engines/toil/Dockerfile @@ -45,7 +45,7 @@ RUN npm install -g concurrently@7.0.0 # Install Toil COPY THIRD-PARTY /opt/ -ARG TOIL_VERSION="8ae548c95b6ae66ec62e946667ed58825dcb55a9" +ARG TOIL_VERSION="26006cc7b41bc6ac2e83955746308507399bf94b" RUN python3 -m pip install git+https://github.com/DataBiosphere/toil.git@${TOIL_VERSION}#egg=toil[aws,cwl,server] # copy the entrypoint script to the image From 9cc8e12fcde4c0a0f32e37fc7a076116e572d344 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 16 Mar 2022 11:45:16 -0700 Subject: [PATCH 09/41] Use older JS exponentiation --- examples/demo-cwl-project/workflows/nontrivial/sort-file.cwl | 2 +- packages/engines/toil/Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/demo-cwl-project/workflows/nontrivial/sort-file.cwl b/examples/demo-cwl-project/workflows/nontrivial/sort-file.cwl index 40a1c5d3..b80c91a0 100644 --- a/examples/demo-cwl-project/workflows/nontrivial/sort-file.cwl +++ b/examples/demo-cwl-project/workflows/nontrivial/sort-file.cwl @@ -7,7 +7,7 @@ requirements: hints: ResourceRequirement: coresMax: 1 - outdirMin: $(parseInt(Math.ceil(inputs.input_file.size/(2**20)))) + outdirMin: $(parseInt(Math.ceil(inputs.input_file.size / Math.pow(2, 20)))) inputs: input_file: type: File diff --git a/packages/engines/toil/Dockerfile b/packages/engines/toil/Dockerfile index dd3c72d0..7708e9cd 100644 --- a/packages/engines/toil/Dockerfile +++ b/packages/engines/toil/Dockerfile @@ -45,7 +45,7 @@ RUN npm install -g concurrently@7.0.0 # Install Toil COPY THIRD-PARTY /opt/ -ARG TOIL_VERSION="26006cc7b41bc6ac2e83955746308507399bf94b" +ARG TOIL_VERSION="9ac4159f00d1150269ae041138d2db3fd4844794" RUN python3 -m pip install git+https://github.com/DataBiosphere/toil.git@${TOIL_VERSION}#egg=toil[aws,cwl,server] # copy the entrypoint script to the image From 7b71d7e6e3580f16797f966d8c0bf3c28c128dd5 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 23 Mar 2022 11:39:29 -0700 Subject: [PATCH 10/41] Add a workflow that will run a lot of little jobs --- .../workflows/manyjobs/MANIFEST.json | 7 +++ .../workflows/manyjobs/compress-file.cwl | 18 +++++++ .../workflows/manyjobs/concatenate-files.cwl | 14 +++++ .../workflows/manyjobs/inputs.json | 4 ++ .../workflows/manyjobs/inputs.tiny.json | 4 ++ .../workflows/manyjobs/make-a-little-data.js | 21 ++++++++ .../workflows/manyjobs/make-array.cwl | 13 +++++ .../workflows/manyjobs/manyjobs.cwl | 52 +++++++++++++++++++ .../workflows/manyjobs/options.json | 1 + .../workflows/manyjobs/run-script.cwl | 24 +++++++++ .../workflows/manyjobs/sort-file.cwl | 19 +++++++ 11 files changed, 177 insertions(+) create mode 100644 examples/demo-cwl-project/workflows/manyjobs/MANIFEST.json create mode 100644 examples/demo-cwl-project/workflows/manyjobs/compress-file.cwl create mode 100644 examples/demo-cwl-project/workflows/manyjobs/concatenate-files.cwl create mode 100644 examples/demo-cwl-project/workflows/manyjobs/inputs.json create mode 100644 examples/demo-cwl-project/workflows/manyjobs/inputs.tiny.json create mode 100644 examples/demo-cwl-project/workflows/manyjobs/make-a-little-data.js create mode 100644 examples/demo-cwl-project/workflows/manyjobs/make-array.cwl create mode 100644 examples/demo-cwl-project/workflows/manyjobs/manyjobs.cwl create mode 100644 examples/demo-cwl-project/workflows/manyjobs/options.json create mode 100644 examples/demo-cwl-project/workflows/manyjobs/run-script.cwl create mode 100644 examples/demo-cwl-project/workflows/manyjobs/sort-file.cwl diff --git a/examples/demo-cwl-project/workflows/manyjobs/MANIFEST.json b/examples/demo-cwl-project/workflows/manyjobs/MANIFEST.json new file mode 100644 index 00000000..74d8a721 --- /dev/null +++ b/examples/demo-cwl-project/workflows/manyjobs/MANIFEST.json @@ -0,0 +1,7 @@ +{ + "mainWorkflowURL": "manyjobs.cwl", + "inputFileURLs": [ + "inputs.json" + ], + "optionsFileURL": "options.json" +} diff --git a/examples/demo-cwl-project/workflows/manyjobs/compress-file.cwl b/examples/demo-cwl-project/workflows/manyjobs/compress-file.cwl new file mode 100644 index 00000000..d3217f83 --- /dev/null +++ b/examples/demo-cwl-project/workflows/manyjobs/compress-file.cwl @@ -0,0 +1,18 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.2 +class: CommandLineTool +baseCommand: ["-9", "-p", "8", "-c"] +hints: + DockerRequirement: + dockerPull: bytesco/pigz + ResourceRequirement: + coresMin: 8 +inputs: + input_file: + type: File + inputBinding: + position: 1 +outputs: + compressed_file: + type: stdout +stdout: compressed.gz diff --git a/examples/demo-cwl-project/workflows/manyjobs/concatenate-files.cwl b/examples/demo-cwl-project/workflows/manyjobs/concatenate-files.cwl new file mode 100644 index 00000000..310f8639 --- /dev/null +++ b/examples/demo-cwl-project/workflows/manyjobs/concatenate-files.cwl @@ -0,0 +1,14 @@ +cwlVersion: v1.2 +class: CommandLineTool +baseCommand: cat +inputs: + files: + type: File[] + inputBinding: + position: 1 + +outputs: + concatenated_file: + type: stdout + +stdout: concatenated.txt diff --git a/examples/demo-cwl-project/workflows/manyjobs/inputs.json b/examples/demo-cwl-project/workflows/manyjobs/inputs.json new file mode 100644 index 00000000..398b01ee --- /dev/null +++ b/examples/demo-cwl-project/workflows/manyjobs/inputs.json @@ -0,0 +1,4 @@ +{ + "script_file": {"class": "File", "path": "make-a-little-data.js"}, + "script_argument_count": 1000 +} diff --git a/examples/demo-cwl-project/workflows/manyjobs/inputs.tiny.json b/examples/demo-cwl-project/workflows/manyjobs/inputs.tiny.json new file mode 100644 index 00000000..1305cfef --- /dev/null +++ b/examples/demo-cwl-project/workflows/manyjobs/inputs.tiny.json @@ -0,0 +1,4 @@ +{ + "script_file": {"class": "File", "path": "make-a-little-data.js"}, + "script_argument_count": 2 +} diff --git a/examples/demo-cwl-project/workflows/manyjobs/make-a-little-data.js b/examples/demo-cwl-project/workflows/manyjobs/make-a-little-data.js new file mode 100644 index 00000000..e1cb80be --- /dev/null +++ b/examples/demo-cwl-project/workflows/manyjobs/make-a-little-data.js @@ -0,0 +1,21 @@ +const crypto = require('crypto') +if (process.argv.length > 2) { + let arg = process.argv[2] + console.log("Argument is: " + arg) + + let hashes = [arg] + + for (let i = 10; i >= 0; i--) { + console.log(i + " bottles of " + arg + " on the wall...") + hasher = crypto.createHash('sha512') + for (let h of hashes) { + hasher.update(h) + } + hashes.push(hasher.digest('hex')) + } + + console.log("After meditating on the nature of " + arg + ", it turns out to be " + hashes[hashes.length - 1]) + +} else { + console.log("Didn't get an argument") +} diff --git a/examples/demo-cwl-project/workflows/manyjobs/make-array.cwl b/examples/demo-cwl-project/workflows/manyjobs/make-array.cwl new file mode 100644 index 00000000..14b68781 --- /dev/null +++ b/examples/demo-cwl-project/workflows/manyjobs/make-array.cwl @@ -0,0 +1,13 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.2 +class: ExpressionTool +requirements: + InlineJavascriptRequirement: {} +inputs: + size: + type: int +outputs: + array: + type: string[] +expression: "$({array: function(){var arr = []; for (var i = 0; i < inputs.size; i++) {arr.push('' + i)}; return arr;}()})" + diff --git a/examples/demo-cwl-project/workflows/manyjobs/manyjobs.cwl b/examples/demo-cwl-project/workflows/manyjobs/manyjobs.cwl new file mode 100644 index 00000000..1772c7b8 --- /dev/null +++ b/examples/demo-cwl-project/workflows/manyjobs/manyjobs.cwl @@ -0,0 +1,52 @@ +cwlVersion: v1.2 +class: Workflow + +requirements: + SubworkflowFeatureRequirement: {} + ScatterFeatureRequirement: {} + +inputs: + - id: script_file + type: File + - id: script_argument_count + type: int + +steps: + make_array: + run: make-array.cwl + in: + size: script_argument_count + out: [array] + + scripts: + run: run-script.cwl + scatter: arg + in: + src: script_file + arg: make_array/array + out: [script_output] + + sort: + run: sort-file.cwl + scatter: input_file + in: + input_file: scripts/script_output + out: [sorted_file] + + concat: + run: concatenate-files.cwl + in: + files: sort/sorted_file + out: + [concatenated_file] + + compress: + run: compress-file.cwl + in: + input_file: concat/concatenated_file + out: [compressed_file] + +outputs: + - id: output + type: File + outputSource: compress/compressed_file diff --git a/examples/demo-cwl-project/workflows/manyjobs/options.json b/examples/demo-cwl-project/workflows/manyjobs/options.json new file mode 100644 index 00000000..0967ef42 --- /dev/null +++ b/examples/demo-cwl-project/workflows/manyjobs/options.json @@ -0,0 +1 @@ +{} diff --git a/examples/demo-cwl-project/workflows/manyjobs/run-script.cwl b/examples/demo-cwl-project/workflows/manyjobs/run-script.cwl new file mode 100644 index 00000000..02e0aa9f --- /dev/null +++ b/examples/demo-cwl-project/workflows/manyjobs/run-script.cwl @@ -0,0 +1,24 @@ +#!/usr/bin/env cwl-runner +# Modified from the CWL docs +cwlVersion: v1.2 +class: CommandLineTool +baseCommand: node +hints: + DockerRequirement: + dockerPull: node:slim + ResourceRequirement: + coresMax: 1 + ramMin: 2000 +inputs: + src: + type: File + inputBinding: + position: 1 + arg: + type: string + inputBinding: + position: 2 +outputs: + script_output: + type: stdout +stdout: output.txt diff --git a/examples/demo-cwl-project/workflows/manyjobs/sort-file.cwl b/examples/demo-cwl-project/workflows/manyjobs/sort-file.cwl new file mode 100644 index 00000000..b80c91a0 --- /dev/null +++ b/examples/demo-cwl-project/workflows/manyjobs/sort-file.cwl @@ -0,0 +1,19 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.2 +class: CommandLineTool +baseCommand: ["sort"] +requirements: + InlineJavascriptRequirement: {} +hints: + ResourceRequirement: + coresMax: 1 + outdirMin: $(parseInt(Math.ceil(inputs.input_file.size / Math.pow(2, 20)))) +inputs: + input_file: + type: File + inputBinding: + position: 1 +outputs: + sorted_file: + type: stdout +stdout: sorted.txt From cbd8cec07a351f263e717efdee27dde59519a5ee Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 23 Mar 2022 11:44:22 -0700 Subject: [PATCH 11/41] Attach workflow to project --- examples/demo-cwl-project/agc-project.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/examples/demo-cwl-project/agc-project.yaml b/examples/demo-cwl-project/agc-project.yaml index cd6becb8..cd8e0bcf 100644 --- a/examples/demo-cwl-project/agc-project.yaml +++ b/examples/demo-cwl-project/agc-project.yaml @@ -12,6 +12,11 @@ workflows: language: cwl version: v1.2 sourceURL: workflows/nontrivial + manyjobs: + type: + language: cwl + version: v1.2 + sourceURL: workflows/manyjobs contexts: myContext: engines: From b017df9d4cc50336944c729513afa37229f1cd19 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 23 Mar 2022 14:01:50 -0700 Subject: [PATCH 12/41] Turn off Toil caching It can't work right until https://github.com/DataBiosphere/toil/issues/4050 is fixed. --- packages/engines/toil/toil.aws.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/engines/toil/toil.aws.sh b/packages/engines/toil/toil.aws.sh index 755cff66..8d8ce67b 100644 --- a/packages/engines/toil/toil.aws.sh +++ b/packages/engines/toil/toil.aws.sh @@ -23,6 +23,6 @@ export TOIL_WES_JOB_STORE_TYPE="aws" concurrently -n rabbitmq,celery,toil \ "rabbitmq-server" \ "celery --broker=${TOIL_WES_BROKER_URL} -A toil.server.celery_app worker --loglevel=INFO" \ - "toil server --debug --host=0.0.0.0 --port=8000 --dest_bucket_base=${ROOT_DIR} --opt=--batchSystem=aws_batch '--opt=--awsBatchQueue=${JOB_QUEUE_ARN}' '--opt=--awsBatchRegion=${AWS_REGION}'" + "toil server --debug --host=0.0.0.0 --port=8000 --dest_bucket_base=${ROOT_DIR} --opt=--batchSystem=aws_batch '--opt=--awsBatchQueue=${JOB_QUEUE_ARN}' '--opt=--awsBatchRegion=${AWS_REGION}' --opt=--disableCaching" From 28158991cc45ebb0e2aa2a93670a61dbd5d77ff2 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 24 Mar 2022 08:49:01 -0700 Subject: [PATCH 13/41] Rename CWL demo to not conflict with WDL demo --- examples/demo-cwl-project/agc-project.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/demo-cwl-project/agc-project.yaml b/examples/demo-cwl-project/agc-project.yaml index cd8e0bcf..14416366 100644 --- a/examples/demo-cwl-project/agc-project.yaml +++ b/examples/demo-cwl-project/agc-project.yaml @@ -1,5 +1,5 @@ --- -name: Demo +name: CWLDemo schemaVersion: 1 workflows: hello: From c996774ea7eacb3c825b630f9c49c938da466641 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 24 Mar 2022 12:23:59 -0700 Subject: [PATCH 14/41] Use Toil that will point to S3 URLs in output --- packages/engines/toil/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/engines/toil/Dockerfile b/packages/engines/toil/Dockerfile index 7708e9cd..07d4f92c 100644 --- a/packages/engines/toil/Dockerfile +++ b/packages/engines/toil/Dockerfile @@ -45,7 +45,7 @@ RUN npm install -g concurrently@7.0.0 # Install Toil COPY THIRD-PARTY /opt/ -ARG TOIL_VERSION="9ac4159f00d1150269ae041138d2db3fd4844794" +ARG TOIL_VERSION="62cf1054e5af2c2c483396e651cd0e7be85330fe" RUN python3 -m pip install git+https://github.com/DataBiosphere/toil.git@${TOIL_VERSION}#egg=toil[aws,cwl,server] # copy the entrypoint script to the image From 9f2f47cf70a17823cf477f6f16de8d71d209beed Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 30 Mar 2022 08:28:01 -0700 Subject: [PATCH 15/41] Stop changing mock files --- packages/cli/internal/pkg/mocks/manager/mock_interfaces.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/cli/internal/pkg/mocks/manager/mock_interfaces.go b/packages/cli/internal/pkg/mocks/manager/mock_interfaces.go index b6290c82..cb5dc0f6 100644 --- a/packages/cli/internal/pkg/mocks/manager/mock_interfaces.go +++ b/packages/cli/internal/pkg/mocks/manager/mock_interfaces.go @@ -51,7 +51,7 @@ func (mr *MockWorkflowManagerMockRecorder) GetRunLog(runId interface{}) *gomock. } // GetRunLogData mocks base method. -func (m *MockWorkflowManager) GetRunLogData(runId string, dataUrl string) (*io.ReadCloser, error) { +func (m *MockWorkflowManager) GetRunLogData(runId, dataUrl string) (*io.ReadCloser, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "GetRunLogData", runId, dataUrl) ret0, _ := ret[0].(*io.ReadCloser) @@ -60,7 +60,7 @@ func (m *MockWorkflowManager) GetRunLogData(runId string, dataUrl string) (*io.R } // GetRunLogData indicates an expected call of GetRunLogData. -func (mr *MockWorkflowManagerMockRecorder) GetRunLogData(runId string, dataUrl interface{}) *gomock.Call { +func (mr *MockWorkflowManagerMockRecorder) GetRunLogData(runId, dataUrl interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetRunLogData", reflect.TypeOf((*MockWorkflowManager)(nil).GetRunLogData), runId, dataUrl) } From b5ad0e6beb6e693e0ab251de106d186358ca964b Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 30 Mar 2022 08:34:16 -0700 Subject: [PATCH 16/41] Rename toilJobArn to toilJobDefinitionArnPattern --- packages/cdk/lib/roles/policies/toil-batch-policy.ts | 8 +++----- packages/cdk/lib/roles/toil-engine-role.ts | 4 ++-- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/packages/cdk/lib/roles/policies/toil-batch-policy.ts b/packages/cdk/lib/roles/policies/toil-batch-policy.ts index bdb467cd..9e360db7 100644 --- a/packages/cdk/lib/roles/policies/toil-batch-policy.ts +++ b/packages/cdk/lib/roles/policies/toil-batch-policy.ts @@ -2,9 +2,7 @@ import { PolicyDocument, PolicyStatement, Effect } from "aws-cdk-lib/aws-iam"; export interface ToilBatchPolicyProps { jobQueueArn: string; - // This is actually a pattern that matches all ARNs for potentially relevant - // definitions, since Toil makes its own definitions. - toilJobArn: string; + toilJobDefinitionArnPattern: string; } export class ToilBatchPolicy extends PolicyDocument { @@ -20,12 +18,12 @@ export class ToilBatchPolicy extends PolicyDocument { new PolicyStatement({ effect: Effect.ALLOW, actions: ["batch:RegisterJobDefinition", "batch:DeregisterJobDefinition"], - resources: [props.toilJobArn], + resources: [props.toilJobDefinitionArnPattern], }), new PolicyStatement({ effect: Effect.ALLOW, actions: ["batch:SubmitJob"], - resources: [props.toilJobArn, props.jobQueueArn], + resources: [props.toilJobDefinitionArnPattern, props.jobQueueArn], }), ], }); diff --git a/packages/cdk/lib/roles/toil-engine-role.ts b/packages/cdk/lib/roles/toil-engine-role.ts index 33f7bcd5..f45fdfdf 100644 --- a/packages/cdk/lib/roles/toil-engine-role.ts +++ b/packages/cdk/lib/roles/toil-engine-role.ts @@ -16,7 +16,7 @@ interface ToilEngineRoleProps extends ToilJobRoleProps { // launch jobs on AWS Batch that themselves have a ToilJobRole role assigned. export class ToilEngineRole extends ToilJobRole { constructor(scope: Construct, id: string, props: ToilEngineRoleProps) { - const toilJobArn = Arn.format( + const toilJobDefinitionArnPattern = Arn.format( { account: Aws.ACCOUNT_ID, region: Aws.REGION, @@ -29,7 +29,7 @@ export class ToilEngineRole extends ToilJobRole { super(scope, id, props, { ToilEngineBatchPolicy: new ToilBatchPolicy({ ...props, - toilJobArn: toilJobArn, + toilJobDefinitionArnPattern: toilJobDefinitionArnPattern, }), // TODO: Can we restrict this to allow passing the role only to jobs? ToilIamPassJobRole: new PolicyDocument({ From 1a573f5feeeaaf0b93d63b53a1204e33e72b5546 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 30 Mar 2022 08:57:34 -0700 Subject: [PATCH 17/41] Avoid adding permissions on * for Toil batch access --- .../lib/roles/policies/toil-batch-policy.ts | 37 +++++++++---------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/packages/cdk/lib/roles/policies/toil-batch-policy.ts b/packages/cdk/lib/roles/policies/toil-batch-policy.ts index 9e360db7..86570b23 100644 --- a/packages/cdk/lib/roles/policies/toil-batch-policy.ts +++ b/packages/cdk/lib/roles/policies/toil-batch-policy.ts @@ -1,31 +1,28 @@ import { PolicyDocument, PolicyStatement, Effect } from "aws-cdk-lib/aws-iam"; +import { CromwellBatchPolicy } from "./cromwell-batch-policy"; export interface ToilBatchPolicyProps { jobQueueArn: string; - toilJobDefinitionArnPattern: string; + toilJobArnPattern: string; } -export class ToilBatchPolicy extends PolicyDocument { +export class ToilBatchPolicy extends CromwellBatchPolicy { constructor(props: ToilBatchPolicyProps) { + // To avoid adding more policies allowing access to "*", we are based on + // the Cromwell policy set. When the permissions for that get locked + // down to the minimum required to use Batch, we will inherit those + // improvements. super({ - assignSids: true, - statements: [ - new PolicyStatement({ - effect: Effect.ALLOW, - actions: ["batch:DescribeJobDefinitions", "batch:ListJobs", "batch:DescribeJobs", "batch:DescribeJobQueues", "batch:DescribeComputeEnvironments"], - resources: ["*"], - }), - new PolicyStatement({ - effect: Effect.ALLOW, - actions: ["batch:RegisterJobDefinition", "batch:DeregisterJobDefinition"], - resources: [props.toilJobDefinitionArnPattern], - }), - new PolicyStatement({ - effect: Effect.ALLOW, - actions: ["batch:SubmitJob"], - resources: [props.toilJobDefinitionArnPattern, props.jobQueueArn], - }), - ], + jobQueueArn: props.jobQueueArn, + cromwellJobArn: props.toilJobArnPattern + }); + + // The only additional thing we need is to be able to deregister job + // definitions, which Cromwell doesn't do. + this.addStatements(new PolicyStatement({ + effect: Effect.ALLOW, + actions: ["batch:DeregisterJobDefinition"], + resources: [props.toilJobArnPattern], }); } } From c8f2634aaf41f5f2d086e64924c4f88ac3fa2158 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 30 Mar 2022 09:01:51 -0700 Subject: [PATCH 18/41] Avoid granting Toil extra permissions that maybe only Cromwell needs --- packages/cdk/lib/roles/toil-engine-role.ts | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/packages/cdk/lib/roles/toil-engine-role.ts b/packages/cdk/lib/roles/toil-engine-role.ts index f45fdfdf..cceb113d 100644 --- a/packages/cdk/lib/roles/toil-engine-role.ts +++ b/packages/cdk/lib/roles/toil-engine-role.ts @@ -16,7 +16,7 @@ interface ToilEngineRoleProps extends ToilJobRoleProps { // launch jobs on AWS Batch that themselves have a ToilJobRole role assigned. export class ToilEngineRole extends ToilJobRole { constructor(scope: Construct, id: string, props: ToilEngineRoleProps) { - const toilJobDefinitionArnPattern = Arn.format( + const toilJobArnPattern = Arn.format( { account: Aws.ACCOUNT_ID, region: Aws.REGION, @@ -29,7 +29,7 @@ export class ToilEngineRole extends ToilJobRole { super(scope, id, props, { ToilEngineBatchPolicy: new ToilBatchPolicy({ ...props, - toilJobDefinitionArnPattern: toilJobDefinitionArnPattern, + toilJobArnPattern: toilJobArnPattern, }), // TODO: Can we restrict this to allow passing the role only to jobs? ToilIamPassJobRole: new PolicyDocument({ @@ -42,16 +42,6 @@ export class ToilEngineRole extends ToilJobRole { }), ], }), - ToilEcsDescribeInstances: new PolicyDocument({ - assignSids: true, - statements: [ - new PolicyStatement({ - effect: Effect.ALLOW, - actions: ["ecs:DescribeContainerInstances", "s3:ListAllMyBuckets"], - resources: ["*"], - }), - ], - }), }); } } From 4c71b9b1bcd4cde309a3e6b8c3c1b25cf4d85731 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 30 Mar 2022 09:02:52 -0700 Subject: [PATCH 19/41] Drop TODO comment --- packages/cdk/lib/roles/toil-engine-role.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/cdk/lib/roles/toil-engine-role.ts b/packages/cdk/lib/roles/toil-engine-role.ts index cceb113d..61b05a88 100644 --- a/packages/cdk/lib/roles/toil-engine-role.ts +++ b/packages/cdk/lib/roles/toil-engine-role.ts @@ -31,7 +31,6 @@ export class ToilEngineRole extends ToilJobRole { ...props, toilJobArnPattern: toilJobArnPattern, }), - // TODO: Can we restrict this to allow passing the role only to jobs? ToilIamPassJobRole: new PolicyDocument({ assignSids: true, statements: [ From a8a78f469aa569596988226fbb2c42a13be1d8d1 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 30 Mar 2022 09:22:16 -0700 Subject: [PATCH 20/41] Restrict Toil S3 and SimpleDB policies to job store data --- packages/cdk/lib/roles/toil-job-role.ts | 46 +++++++++++++++++++++---- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/packages/cdk/lib/roles/toil-job-role.ts b/packages/cdk/lib/roles/toil-job-role.ts index b500565e..d8228ea3 100644 --- a/packages/cdk/lib/roles/toil-job-role.ts +++ b/packages/cdk/lib/roles/toil-job-role.ts @@ -1,6 +1,7 @@ import { PolicyOptions } from "../types/engine-options"; import { BucketOperations } from "../common/BucketOperations"; import { Construct } from "constructs"; +import { Arn, Aws, Stack } from "aws-cdk-lib"; import { Role, ServicePrincipal, PolicyDocument, PolicyStatement, Effect } from "aws-cdk-lib/aws-iam"; export interface ToilJobRoleProps { @@ -13,29 +14,60 @@ export interface ToilJobRoleProps { // job store and/or additional user data in S3. export class ToilJobRole extends Role { constructor(scope: Construct, id: string, props: ToilJobRoleProps, additionalInlinePolicies?: { [key: string]: PolicyDocument }) { + + // Toil workflows make S3 buckets and SimpleDB domains for scratch. + // In SimpleDB we use the "toil-registry" domain, and also domains for job + // stores generated by toil.jobStores.util.generate_locator, which also + // start with "toil-". + // We will no longer need this when + // https://github.com/DataBiosphere/toil/issues/964 is fixed and Toil stops + // using SimpleDB. + const jobStoreSimpleDbArnPattern = Arn.format( + { + account: Aws.ACCOUNT_ID, + region: Aws.REGION, + partition: Aws.PARTITION, + resource: "domain/toil-*", + service: "sdb", + }, + scope as Stack + ); + // In S3 we use buckets for job stores generated by + // toil.jobStores.util.generate_locator, which start with "toil-". + // We will no longer need this when + // https://github.com/DataBiosphere/toil/issues/3983 is fixed and Toil + // becomes able to just use the one AGC-provided bucket. + const jobStoreS3ArnPattern = Arn.format( + { + account: Aws.ACCOUNT_ID, + region: Aws.REGION, + partition: Aws.PARTITION, + resource: "toil-*", + service: "s3", + }, + scope as Stack + ); + super(scope, id, { assumedBy: new ServicePrincipal("ecs-tasks.amazonaws.com"), inlinePolicies: { - // TODO: Remove this when Toil no longer uses its own SimpleDB domains - ToilSimpleDBFullAccess: new PolicyDocument({ + ToilSimpleDbJobStoreAccess: new PolicyDocument({ assignSids: true, statements: [ new PolicyStatement({ effect: Effect.ALLOW, actions: ["sdb:*"], - resources: ["*"], + resources: [jobStoreSimpleDbArnPattern], }), ], }), - // TODO: Remove this when Toil is taught to use AGC buckets to store - // its workflow state and doesn't need to make and destroy its own. - ToilS3FullAccess: new PolicyDocument({ + ToilS3JobStoreAccess: new PolicyDocument({ assignSids: true, statements: [ new PolicyStatement({ effect: Effect.ALLOW, actions: ["s3:*"], - resources: ["*"], + resources: [jobStoreS3ArnPattern], }), ], }), From 74ee3f31279dd04a63f527137a2b66704acd96ba Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 30 Mar 2022 09:24:23 -0700 Subject: [PATCH 21/41] Revise comment per review --- packages/cdk/lib/stacks/context-stack.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/cdk/lib/stacks/context-stack.ts b/packages/cdk/lib/stacks/context-stack.ts index 91ec0025..1ccc7bed 100644 --- a/packages/cdk/lib/stacks/context-stack.ts +++ b/packages/cdk/lib/stacks/context-stack.ts @@ -184,8 +184,9 @@ export class ContextStack extends Stack { return { ...commonBatchProps, - // We only use one Batch from the stack for the Toil jobs. The server - // lives in Fargate and doesn't run in either of these. + // We only use one Batch compute environment and queue from the stack for + // the Toil jobs. The server lives in Fargate and doesn't run in either + // of these. createSpotBatch: requestSpotInstances, createOnDemandBatch: !requestSpotInstances, }; From 9d99a7ff5cfbf2f418dcb088cd35a06fee6393e4 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 30 Mar 2022 09:25:51 -0700 Subject: [PATCH 22/41] Clarify what the JobQueue is for --- packages/cdk/lib/stacks/engines/toil-engine-construct.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/cdk/lib/stacks/engines/toil-engine-construct.ts b/packages/cdk/lib/stacks/engines/toil-engine-construct.ts index 80afe656..68691fc8 100644 --- a/packages/cdk/lib/stacks/engines/toil-engine-construct.ts +++ b/packages/cdk/lib/stacks/engines/toil-engine-construct.ts @@ -15,7 +15,7 @@ import { Construct } from "constructs"; export interface ToilEngineConstructProps extends EngineOptions { /** - * AWS Batch JobQueue to use for running workflows. + * AWS Batch JobQueue to use for running workflow tasks. */ readonly jobQueue: IJobQueue; } From 83193cbf5a70dfcf46bf46f53e63e8714ce905a0 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 30 Mar 2022 09:36:06 -0700 Subject: [PATCH 23/41] Make adapter log group optional --- packages/cdk/lib/stacks/engines/engine-construct.ts | 5 +++-- packages/cdk/lib/stacks/engines/toil-engine-construct.ts | 6 ------ 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/packages/cdk/lib/stacks/engines/engine-construct.ts b/packages/cdk/lib/stacks/engines/engine-construct.ts index 48115e37..5989cc18 100644 --- a/packages/cdk/lib/stacks/engines/engine-construct.ts +++ b/packages/cdk/lib/stacks/engines/engine-construct.ts @@ -11,7 +11,7 @@ import { getCommonParameter } from "../../util"; export interface EngineOutputs { accessLogGroup: ILogGroup; - adapterLogGroup: ILogGroup; + adapterLogGroup?: ILogGroup; engineLogGroup: ILogGroup; wesUrl: string; } @@ -24,7 +24,8 @@ export abstract class EngineConstruct extends Construct { public outputToParent(): void { const outputs = this.getOutputs(); new CfnOutput(Stack.of(this), "AccessLogGroupName", { value: outputs.accessLogGroup.logGroupName }); - new CfnOutput(Stack.of(this), "AdapterLogGroupName", { value: outputs.adapterLogGroup.logGroupName }); + // We don't always have a WES log group, but the AGC CLI always expects us to have an AdapterLogGroupName output + new CfnOutput(Stack.of(this), "AdapterLogGroupName", { value: outputs.adapterLogGroup ? outputs.adapterLogGroup.logGroupName : "" }); new CfnOutput(Stack.of(this), "EngineLogGroupName", { value: outputs.engineLogGroup.logGroupName }); new CfnOutput(Stack.of(this), "WesUrl", { value: outputs.wesUrl }); } diff --git a/packages/cdk/lib/stacks/engines/toil-engine-construct.ts b/packages/cdk/lib/stacks/engines/toil-engine-construct.ts index 68691fc8..5eed84d6 100644 --- a/packages/cdk/lib/stacks/engines/toil-engine-construct.ts +++ b/packages/cdk/lib/stacks/engines/toil-engine-construct.ts @@ -22,9 +22,7 @@ export interface ToilEngineConstructProps extends EngineOptions { export class ToilEngineConstruct extends EngineConstruct { public readonly engine: SecureService; - public readonly adapterRole: IRole; public readonly apiProxy: ApiProxy; - public readonly adapterLogGroup: ILogGroup; public readonly engineLogGroup: ILogGroup; public readonly engineRole: IRole; public readonly jobRole: IRole; @@ -57,10 +55,7 @@ export class ToilEngineConstruct extends EngineConstruct { TOIL_AWS_BATCH_JOB_ROLE_ARN: this.jobRole.roleArn, }); - // TODO: Move log group creation into service construct and make it a property this.engine = this.getEngineServiceDefinition(props.vpc, engineContainer, this.engineLogGroup); - // This is unused because we have no adapter, but a log group is required. - this.adapterLogGroup = new LogGroup(this, "AdapterLogGroup"); // We don't use an adapter, so put the access-controlling proxy right in // front of the engine load balancer. @@ -74,7 +69,6 @@ export class ToilEngineConstruct extends EngineConstruct { protected getOutputs(): EngineOutputs { return { accessLogGroup: this.apiProxy.accessLogGroup, - adapterLogGroup: this.adapterLogGroup, engineLogGroup: this.engineLogGroup, wesUrl: this.apiProxy.restApi.url, }; From 01ea17730207af073995294be2fdba00153b5219 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 30 Mar 2022 12:50:39 -0700 Subject: [PATCH 24/41] Provide WES images only when needed --- packages/cli/internal/pkg/cli/context/manager.go | 16 +++++++++++++--- .../cli/internal/pkg/environment/environment.go | 10 ++++++++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/packages/cli/internal/pkg/cli/context/manager.go b/packages/cli/internal/pkg/cli/context/manager.go index b3133b5c..2bf2cea7 100644 --- a/packages/cli/internal/pkg/cli/context/manager.go +++ b/packages/cli/internal/pkg/cli/context/manager.go @@ -90,13 +90,23 @@ func (m *Manager) getEnvironmentVars() []string { // Different engines will need different environment variables to define // their Docker images. engine := m.contextEnv.EngineName - // Each engine has its own section in imageRefs, and for now we assume they - // all care about a WES adapter image. var relevantImageKeys []string relevantImageKeys = append(relevantImageKeys, strings.ToUpper(engine)) - relevantImageKeys = append(relevantImageKeys, environment.WesImageKey) + // Do one level of dependency resolution, without deduplication. + // If the dependency structure becomes more complex we will have to upgrade + // this algorithm. + var dependencyImageKeys []string + for imageKey := range relevantImageKeys { + for dependencies := range environment.ImageDependencies[imageKey] { + // Collect the dependencies of all the relevant images + dependencyImageKeys = append(dependencyImageKeys, dependencies) + } + } + // And add them to the relevant images + relevantImageKeys = append(relevantImageKeys, dependencyImageKeys) var environmentVars []string for _, imageName := range relevantImageKeys { + // Each engine or other component has its own section in imageRefs environmentVars = append(environmentVars, fmt.Sprintf("ECR_%s_ACCOUNT_ID=%s", imageName, m.imageRefs[imageName].RegistryId), fmt.Sprintf("ECR_%s_REGION=%s", imageName, m.region), diff --git a/packages/cli/internal/pkg/environment/environment.go b/packages/cli/internal/pkg/environment/environment.go index 0d82b909..63de087b 100644 --- a/packages/cli/internal/pkg/environment/environment.go +++ b/packages/cli/internal/pkg/environment/environment.go @@ -63,6 +63,16 @@ var CommonImages = map[string]ecr.ImageReference{ }, } +// Some workflow engines require other images +var ImageDependencies = map[string][]string{ + WesImageKey: [], + CromwellImageKey: [WesImageKey], + NextflowImageKey: [WesImageKey], + MiniwdlImageKey: [WesImageKey], + SnakemakeImageKey: [WesImageKey], + ToilImageKey: [] +} + func LookUpEnvOrDefault(envVariableName string, defaultValue string) string { if value, ok := os.LookupEnv(envVariableName); ok { return value From a064c4b8c162ee1ee934bfc6e96b5b61504c5928 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 30 Mar 2022 13:07:52 -0700 Subject: [PATCH 25/41] Use constants to track expected arg counts --- .../internal/pkg/cli/context/common_test.go | 6 +++ .../pkg/cli/context/manager_deploy_test.go | 43 ++++++++++++++++--- 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/packages/cli/internal/pkg/cli/context/common_test.go b/packages/cli/internal/pkg/cli/context/common_test.go index 2c23b47e..e3d155c0 100644 --- a/packages/cli/internal/pkg/cli/context/common_test.go +++ b/packages/cli/internal/pkg/cli/context/common_test.go @@ -14,6 +14,7 @@ const ( testContextName1 = "testContextName1" testContextName2 = "testContextName2" testContextName3 = "testContextName3" + testContextName4 = "testContextName4" testUnknownContextName = "unknown-context-name" testS3Location1 = "s3://test-s3-location-1" testS3Location2 = "s3://test-s3-location-2" @@ -48,6 +49,11 @@ var ( {Type: "nextflow", Engine: "nextflow"}, }, }, + testContextName4: { + Engines: []spec.Engine{ + {Type: "cwl", Engine: "toil"}, + }, + }, }, } ) diff --git a/packages/cli/internal/pkg/cli/context/manager_deploy_test.go b/packages/cli/internal/pkg/cli/context/manager_deploy_test.go index cd086e2b..c1181e39 100644 --- a/packages/cli/internal/pkg/cli/context/manager_deploy_test.go +++ b/packages/cli/internal/pkg/cli/context/manager_deploy_test.go @@ -14,6 +14,16 @@ import ( "github.com/stretchr/testify/assert" ) +const ( + // We check a lot of generated CDK commands to make sure they have the + // right number of command line arguments. How many should there be to + // start? + testCdkBaseArgumentCount = 26 + // And how many do we expect if the WES adapter images are also to be + // passed? + testCdkAdaptedArgumentCount = testCdkBaseArgumentCount + 4 +) + func TestManager_Deploy(t *testing.T) { origVerbose := logging.Verbose origDisplayProgressBar := displayProgressBar @@ -46,7 +56,7 @@ func TestManager_Deploy(t *testing.T) { mockClients.ssmMock.EXPECT().GetCustomTags().Return(testTags) mockClients.ecrClientMock.EXPECT().VerifyImageExists(environment.CommonImages["NEXTFLOW"]).Return(nil) clearContext := mockClients.cdkMock.EXPECT().ClearContext(filepath.Join(testHomeDir, ".agc/cdk/apps/context")).Return(nil) - mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(30), testContextName3).After(clearContext).Return(mockClients.progressStream1, nil) + mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(testCdkAdaptedArgumentCount), testContextName3).After(clearContext).Return(mockClients.progressStream1, nil) displayProgressBar = mockClients.cdkMock.DisplayProgressBar mockClients.cdkMock.EXPECT().DisplayProgressBar(fmt.Sprintf("Deploying resources for context(s) %s", []string{testContextName3}), []cdk.ProgressStream{mockClients.progressStream1}).Return([]cdk.Result{{Outputs: []string{"some message"}, ExecutionName: testContextName3}}) return mockClients @@ -69,12 +79,35 @@ func TestManager_Deploy(t *testing.T) { mockClients.ssmMock.EXPECT().GetCustomTags().Return("") mockClients.ecrClientMock.EXPECT().VerifyImageExists(environment.CommonImages["NEXTFLOW"]).Return(nil) clearContext := mockClients.cdkMock.EXPECT().ClearContext(filepath.Join(testHomeDir, ".agc/cdk/apps/context")).Return(nil) - mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(30), testContextName3).After(clearContext).Return(mockClients.progressStream1, nil) + mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(testCdkAdaptedArgumentCount), testContextName3).After(clearContext).Return(mockClients.progressStream1, nil) displayProgressBar = mockClients.cdkMock.DisplayProgressBar mockClients.cdkMock.EXPECT().DisplayProgressBar(fmt.Sprintf("Deploying resources for context(s) %s", []string{testContextName3}), []cdk.ProgressStream{mockClients.progressStream1}).Return([]cdk.Result{{Outputs: []string{"some message"}, ExecutionName: testContextName3}}) return mockClients }, }, + "deploy success (no adapter)": { + contextList: []string{testContextName4}, + expectedProgressResultList: []ProgressResult{ + {Outputs: []string{"some message"}, Context: testContextName4}, + }, + setupMocks: func(t *testing.T) mockClients { + mockClients := createMocks(t) + defer close(mockClients.progressStream1) + defer close(mockClients.progressStream2) + mockClients.configMock.EXPECT().GetUserEmailAddress().Return(testUserEmail, nil) + mockClients.configMock.EXPECT().GetUserId().Return(testUserId, nil) + mockClients.projMock.EXPECT().Read().Return(testValidProjectSpec, nil) + mockClients.ssmMock.EXPECT().GetOutputBucket().Return(testOutputBucket, nil) + mockClients.ssmMock.EXPECT().GetCommonParameter("installed-artifacts/s3-root-url").Return(testArtifactBucket, nil) + mockClients.ssmMock.EXPECT().GetCustomTags().Return("") + mockClients.ecrClientMock.EXPECT().VerifyImageExists(environment.CommonImages["TOIL"]).Return(nil) + clearContext := mockClients.cdkMock.EXPECT().ClearContext(filepath.Join(testHomeDir, ".agc/cdk/apps/context")).Return(nil) + mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(testCdkBaseArgumentCount), testCdkBaseArgumentCount).After(clearContext).Return(mockClients.progressStream1, nil) + displayProgressBar = mockClients.cdkMock.DisplayProgressBar + mockClients.cdkMock.EXPECT().DisplayProgressBar(fmt.Sprintf("Deploying resources for context(s) %s", []string{testContextName4}), []cdk.ProgressStream{mockClients.progressStream1}).Return([]cdk.Result{{Outputs: []string{"some message"}, ExecutionName: testContextName4}}) + return mockClients + }, + }, "multiple deploy success": { contextList: []string{testContextName1, testContextName2}, expectedProgressResultList: []ProgressResult{ @@ -94,8 +127,8 @@ func TestManager_Deploy(t *testing.T) { mockClients.ecrClientMock.EXPECT().VerifyImageExists(environment.CommonImages["CROMWELL"]).Times(2).Return(nil) clearContext := mockClients.cdkMock.EXPECT().ClearContext(filepath.Join(testHomeDir, ".agc/cdk/apps/context")).Return(nil) clearContext2 := mockClients.cdkMock.EXPECT().ClearContext(filepath.Join(testHomeDir, ".agc/cdk/apps/context")).Return(nil) - mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(30), testContextName1).After(clearContext).Return(mockClients.progressStream1, nil) - mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(30), testContextName2).After(clearContext2).Return(mockClients.progressStream2, nil) + mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(testCdkAdaptedArgumentCount), testContextName1).After(clearContext).Return(mockClients.progressStream1, nil) + mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(testCdkAdaptedArgumentCount), testContextName2).After(clearContext2).Return(mockClients.progressStream2, nil) displayProgressBar = mockClients.cdkMock.DisplayProgressBar expectedCdkResult := []cdk.Result{{Outputs: []string{"some message"}, ExecutionName: testContextName1}, {Outputs: []string{"some other message"}, ExecutionName: testContextName2}} mockClients.cdkMock.EXPECT().DisplayProgressBar(fmt.Sprintf("Deploying resources for context(s) %s", []string{testContextName1, testContextName2}), []cdk.ProgressStream{mockClients.progressStream1, mockClients.progressStream2}).Return(expectedCdkResult) @@ -235,7 +268,7 @@ func TestManager_Deploy(t *testing.T) { mockClients.ssmMock.EXPECT().GetCommonParameter("installed-artifacts/s3-root-url").Return(testArtifactBucket, nil) mockClients.ssmMock.EXPECT().GetCustomTags().Return(testTags) mockClients.cdkMock.EXPECT().ClearContext(filepath.Join(testHomeDir, ".agc/cdk/apps/context")).Return(nil) - mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(30), testContextName1).Return(nil, fmt.Errorf("some context error")) + mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(testCdkAdaptedArgumentCount), testContextName1).Return(nil, fmt.Errorf("some context error")) mockClients.ecrClientMock.EXPECT().VerifyImageExists(environment.CommonImages["CROMWELL"]).Return(nil) return mockClients }, From f08b1ba43e5ecb4630b2ed66a393a861dcfe1c0d Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 30 Mar 2022 13:09:27 -0700 Subject: [PATCH 26/41] Alphabetize workflow types in help --- packages/cli/internal/pkg/cli/project_init.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/cli/internal/pkg/cli/project_init.go b/packages/cli/internal/pkg/cli/project_init.go index 698615f9..1646fbce 100644 --- a/packages/cli/internal/pkg/cli/project_init.go +++ b/packages/cli/internal/pkg/cli/project_init.go @@ -126,7 +126,7 @@ func (o *initProjectOpts) validateProject() error { func BuildProjectInitCommand() *cobra.Command { vars := initProjectVars{} cmd := &cobra.Command{ - Use: "init project_name --workflow-type {cwl|wdl|nextflow|snakemake}", + Use: "init project_name --workflow-type {cwl|nextflow|snakemake|wdl}", Short: "Initialize current directory with a new empty AGC project for a particular workflow type.", Long: `Initialize current directory with a new empty AGC project for a particular workflow type. Project specification file 'agc-project.yaml' will be created in the current directory.`, From 1aeb097b5a32f64f354b903df57f17fe507e9faf Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 30 Mar 2022 13:10:16 -0700 Subject: [PATCH 27/41] Fix spelling --- packages/engines/toil/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/engines/toil/Dockerfile b/packages/engines/toil/Dockerfile index 07d4f92c..6cbaf1d7 100644 --- a/packages/engines/toil/Dockerfile +++ b/packages/engines/toil/Dockerfile @@ -26,7 +26,7 @@ RUN curl -s "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "/tmp/ # Add rabbitmq repository ADD rabbitmq.repo /etc/yum.repos.d/rabbitmq.repo -# Sadly pre-importing keys doesn't seem to save any time whan we use yum later, so don't so it. +# Sadly pre-importing keys doesn't seem to save any time when we use yum later, so don't so it. # Install deps RUN curl -fsSL https://rpm.nodesource.com/setup_16.x | bash - \ From 6bd9b434d1bc597f2c08f2214feb7119f38ff0b1 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 30 Mar 2022 13:17:21 -0700 Subject: [PATCH 28/41] Update Toil readme to fix spelling and remove details --- packages/engines/toil/README.md | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/packages/engines/toil/README.md b/packages/engines/toil/README.md index cd084548..5295a231 100644 --- a/packages/engines/toil/README.md +++ b/packages/engines/toil/README.md @@ -1,6 +1,6 @@ ## Toil AWS Mirror -A Toil mono-container WES server for use with Amazon AGC. +A Toil mono-container WES server for use with Amazon Genomics CLI. ### Building the Container Manually @@ -41,8 +41,11 @@ docker exec -ti "$(docker ps | grep adamnovak/toil-agc | rev | cut -f1 -d' ' | r To push this to an Amazon ECR repo, where AGC can get at it, you can do something like: ```bash -aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 318423852362.dkr.ecr.us-west-2.amazonaws.com -docker build -t adamnovak/toil-agc . -docker tag adamnovak/toil-agc:latest 318423852362.dkr.ecr.us-west-2.amazonaws.com/adamnovak/toil-agc:latest -docker push 318423852362.dkr.ecr.us-west-2.amazonaws.com/adamnovak/toil-agc:latest +AWS_REGION= # For example, us-west-2 +AWS_ACCOUNT= # For example, 123456789012 +ECR_REPO= # For example, yourname/toil-agc. Needs to be created in the ECR console. +aws ecr get-login-password --region ${AWS_REGION} | docker login --username AWS --password-stdin ${AWS_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com +docker build -t ${ECR_REPO} . +docker tag adamnovak/toil-agc:latest ${AWS_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com/${ECR_REPO}:latest +docker push ${AWS_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com/${ECR_REPO}:latest ``` From f64eb0efd489dd3224cfac8a91b61f4072dd15a2 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 30 Mar 2022 13:39:56 -0700 Subject: [PATCH 29/41] Fix formatting and run tests to completion --- .../lib/roles/policies/toil-batch-policy.ts | 16 +++++++----- packages/cdk/lib/roles/toil-job-role.ts | 1 - .../internal/pkg/cli/account_activate_test.go | 2 +- .../internal/pkg/cli/context/common_test.go | 4 +-- .../cli/internal/pkg/cli/context/manager.go | 26 +++++++++---------- .../pkg/cli/context/manager_deploy_test.go | 4 +-- .../pkg/cli/context/manager_list_test.go | 9 +++++++ .../internal/pkg/environment/environment.go | 14 +++++----- 8 files changed, 43 insertions(+), 33 deletions(-) diff --git a/packages/cdk/lib/roles/policies/toil-batch-policy.ts b/packages/cdk/lib/roles/policies/toil-batch-policy.ts index 86570b23..3a641f02 100644 --- a/packages/cdk/lib/roles/policies/toil-batch-policy.ts +++ b/packages/cdk/lib/roles/policies/toil-batch-policy.ts @@ -1,4 +1,4 @@ -import { PolicyDocument, PolicyStatement, Effect } from "aws-cdk-lib/aws-iam"; +import { PolicyStatement, Effect } from "aws-cdk-lib/aws-iam"; import { CromwellBatchPolicy } from "./cromwell-batch-policy"; export interface ToilBatchPolicyProps { @@ -14,15 +14,17 @@ export class ToilBatchPolicy extends CromwellBatchPolicy { // improvements. super({ jobQueueArn: props.jobQueueArn, - cromwellJobArn: props.toilJobArnPattern + cromwellJobArn: props.toilJobArnPattern, }); // The only additional thing we need is to be able to deregister job // definitions, which Cromwell doesn't do. - this.addStatements(new PolicyStatement({ - effect: Effect.ALLOW, - actions: ["batch:DeregisterJobDefinition"], - resources: [props.toilJobArnPattern], - }); + this.addStatements( + new PolicyStatement({ + effect: Effect.ALLOW, + actions: ["batch:DeregisterJobDefinition"], + resources: [props.toilJobArnPattern], + }) + ); } } diff --git a/packages/cdk/lib/roles/toil-job-role.ts b/packages/cdk/lib/roles/toil-job-role.ts index d8228ea3..287fa393 100644 --- a/packages/cdk/lib/roles/toil-job-role.ts +++ b/packages/cdk/lib/roles/toil-job-role.ts @@ -14,7 +14,6 @@ export interface ToilJobRoleProps { // job store and/or additional user data in S3. export class ToilJobRole extends Role { constructor(scope: Construct, id: string, props: ToilJobRoleProps, additionalInlinePolicies?: { [key: string]: PolicyDocument }) { - // Toil workflows make S3 buckets and SimpleDB domains for scratch. // In SimpleDB we use the "toil-registry" domain, and also domains for job // stores generated by toil.jobStores.util.generate_locator, which also diff --git a/packages/cli/internal/pkg/cli/account_activate_test.go b/packages/cli/internal/pkg/cli/account_activate_test.go index 18580925..bca2389a 100644 --- a/packages/cli/internal/pkg/cli/account_activate_test.go +++ b/packages/cli/internal/pkg/cli/account_activate_test.go @@ -23,7 +23,7 @@ const ( testCromwellRepository = "test-cromwell-repo" testNextflowRepository = "test-nextflow-repo" testMiniwdlRepository = "test-miniwdl-repo" - testToilRepository = "test-toil-repo" + testToilRepository = "test-toil-repo" testCoreStackName = "Agc-Core" ) diff --git a/packages/cli/internal/pkg/cli/context/common_test.go b/packages/cli/internal/pkg/cli/context/common_test.go index e3d155c0..7dff1309 100644 --- a/packages/cli/internal/pkg/cli/context/common_test.go +++ b/packages/cli/internal/pkg/cli/context/common_test.go @@ -14,7 +14,7 @@ const ( testContextName1 = "testContextName1" testContextName2 = "testContextName2" testContextName3 = "testContextName3" - testContextName4 = "testContextName4" + testContextName4 = "testContextName4" testUnknownContextName = "unknown-context-name" testS3Location1 = "s3://test-s3-location-1" testS3Location2 = "s3://test-s3-location-2" @@ -49,7 +49,7 @@ var ( {Type: "nextflow", Engine: "nextflow"}, }, }, - testContextName4: { + testContextName4: { Engines: []spec.Engine{ {Type: "cwl", Engine: "toil"}, }, diff --git a/packages/cli/internal/pkg/cli/context/manager.go b/packages/cli/internal/pkg/cli/context/manager.go index 2bf2cea7..e0f410fb 100644 --- a/packages/cli/internal/pkg/cli/context/manager.go +++ b/packages/cli/internal/pkg/cli/context/manager.go @@ -92,21 +92,21 @@ func (m *Manager) getEnvironmentVars() []string { engine := m.contextEnv.EngineName var relevantImageKeys []string relevantImageKeys = append(relevantImageKeys, strings.ToUpper(engine)) - // Do one level of dependency resolution, without deduplication. - // If the dependency structure becomes more complex we will have to upgrade - // this algorithm. - var dependencyImageKeys []string - for imageKey := range relevantImageKeys { - for dependencies := range environment.ImageDependencies[imageKey] { - // Collect the dependencies of all the relevant images - dependencyImageKeys = append(dependencyImageKeys, dependencies) - } - } - // And add them to the relevant images - relevantImageKeys = append(relevantImageKeys, dependencyImageKeys) + // Do one level of dependency resolution, without deduplication. + // If the dependency structure becomes more complex we will have to upgrade + // this algorithm. + var dependencyImageKeys []string + for _, imageKey := range relevantImageKeys { + for _, dependencies := range environment.ImageDependencies[imageKey] { + // Collect the dependencies of all the relevant images + dependencyImageKeys = append(dependencyImageKeys, dependencies) + } + } + // And add them to the relevant images + relevantImageKeys = append(relevantImageKeys, dependencyImageKeys...) var environmentVars []string for _, imageName := range relevantImageKeys { - // Each engine or other component has its own section in imageRefs + // Each engine or other component has its own section in imageRefs environmentVars = append(environmentVars, fmt.Sprintf("ECR_%s_ACCOUNT_ID=%s", imageName, m.imageRefs[imageName].RegistryId), fmt.Sprintf("ECR_%s_REGION=%s", imageName, m.region), diff --git a/packages/cli/internal/pkg/cli/context/manager_deploy_test.go b/packages/cli/internal/pkg/cli/context/manager_deploy_test.go index c1181e39..83016b4a 100644 --- a/packages/cli/internal/pkg/cli/context/manager_deploy_test.go +++ b/packages/cli/internal/pkg/cli/context/manager_deploy_test.go @@ -18,7 +18,7 @@ const ( // We check a lot of generated CDK commands to make sure they have the // right number of command line arguments. How many should there be to // start? - testCdkBaseArgumentCount = 26 + testCdkBaseArgumentCount = 27 // And how many do we expect if the WES adapter images are also to be // passed? testCdkAdaptedArgumentCount = testCdkBaseArgumentCount + 4 @@ -102,7 +102,7 @@ func TestManager_Deploy(t *testing.T) { mockClients.ssmMock.EXPECT().GetCustomTags().Return("") mockClients.ecrClientMock.EXPECT().VerifyImageExists(environment.CommonImages["TOIL"]).Return(nil) clearContext := mockClients.cdkMock.EXPECT().ClearContext(filepath.Join(testHomeDir, ".agc/cdk/apps/context")).Return(nil) - mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(testCdkBaseArgumentCount), testCdkBaseArgumentCount).After(clearContext).Return(mockClients.progressStream1, nil) + mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(testCdkBaseArgumentCount), testContextName4).After(clearContext).Return(mockClients.progressStream1, nil) displayProgressBar = mockClients.cdkMock.DisplayProgressBar mockClients.cdkMock.EXPECT().DisplayProgressBar(fmt.Sprintf("Deploying resources for context(s) %s", []string{testContextName4}), []cdk.ProgressStream{mockClients.progressStream1}).Return([]cdk.Result{{Outputs: []string{"some message"}, ExecutionName: testContextName4}}) return mockClients diff --git a/packages/cli/internal/pkg/cli/context/manager_list_test.go b/packages/cli/internal/pkg/cli/context/manager_list_test.go index fc6005c2..9e8ec923 100644 --- a/packages/cli/internal/pkg/cli/context/manager_list_test.go +++ b/packages/cli/internal/pkg/cli/context/manager_list_test.go @@ -43,6 +43,15 @@ func TestManager_List(t *testing.T) { }, }, }, + testContextName4: { + Name: testContextName4, + Engines: []spec.Engine{ + { + Type: "cwl", + Engine: "toil", + }, + }, + }, }, setupMocks: func(t *testing.T) mockClients { mockClients := createMocks(t) diff --git a/packages/cli/internal/pkg/environment/environment.go b/packages/cli/internal/pkg/environment/environment.go index 63de087b..20b05061 100644 --- a/packages/cli/internal/pkg/environment/environment.go +++ b/packages/cli/internal/pkg/environment/environment.go @@ -64,13 +64,13 @@ var CommonImages = map[string]ecr.ImageReference{ } // Some workflow engines require other images -var ImageDependencies = map[string][]string{ - WesImageKey: [], - CromwellImageKey: [WesImageKey], - NextflowImageKey: [WesImageKey], - MiniwdlImageKey: [WesImageKey], - SnakemakeImageKey: [WesImageKey], - ToilImageKey: [] +var ImageDependencies = map[string]([]string){ + WesImageKey: {}, + CromwellImageKey: {WesImageKey}, + NextflowImageKey: {WesImageKey}, + MiniwdlImageKey: {WesImageKey}, + SnakemakeImageKey: {WesImageKey}, + ToilImageKey: {}, } func LookUpEnvOrDefault(envVariableName string, defaultValue string) string { From 4145c5b134f63b05ca84ba72a6e2aab72bf28d0e Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 30 Mar 2022 13:50:19 -0700 Subject: [PATCH 30/41] Define a default filesystem for the Toil engine --- packages/cdk/lib/env/context-app-parameters.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/cdk/lib/env/context-app-parameters.ts b/packages/cdk/lib/env/context-app-parameters.ts index 0e6e585a..0ce1447d 100644 --- a/packages/cdk/lib/env/context-app-parameters.ts +++ b/packages/cdk/lib/env/context-app-parameters.ts @@ -204,6 +204,9 @@ export class ContextAppParameters { case "snakemake": defFilesystem = "EFS"; break; + case "toil": + defFilesystem = "S3"; + break; default: throw Error(`Engine '${this.engineName}' is not supported`); } From 595913cf478c7d1ad0d971b5ca9117d749e236f5 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 30 Mar 2022 14:03:58 -0700 Subject: [PATCH 31/41] Remove disallowed pieces from S3 ARNs --- packages/cdk/lib/roles/toil-job-role.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/cdk/lib/roles/toil-job-role.ts b/packages/cdk/lib/roles/toil-job-role.ts index 287fa393..b4613047 100644 --- a/packages/cdk/lib/roles/toil-job-role.ts +++ b/packages/cdk/lib/roles/toil-job-role.ts @@ -38,8 +38,7 @@ export class ToilJobRole extends Role { // becomes able to just use the one AGC-provided bucket. const jobStoreS3ArnPattern = Arn.format( { - account: Aws.ACCOUNT_ID, - region: Aws.REGION, + // Note that regions and account IDs aren't allowed in S3 ARNs partition: Aws.PARTITION, resource: "toil-*", service: "s3", From 01947147c7eda4b657173f74b69eab22d72d0012 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 30 Mar 2022 14:08:06 -0700 Subject: [PATCH 32/41] Pass empty components to appease ARN builder --- packages/cdk/lib/roles/toil-job-role.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/packages/cdk/lib/roles/toil-job-role.ts b/packages/cdk/lib/roles/toil-job-role.ts index b4613047..ae77cf89 100644 --- a/packages/cdk/lib/roles/toil-job-role.ts +++ b/packages/cdk/lib/roles/toil-job-role.ts @@ -38,7 +38,11 @@ export class ToilJobRole extends Role { // becomes able to just use the one AGC-provided bucket. const jobStoreS3ArnPattern = Arn.format( { - // Note that regions and account IDs aren't allowed in S3 ARNs + // Note that regions and account IDs aren't allowed in S3 ARNs. + // But the formatter requires them to be passed, and wants an empty + // string if we don't actually want them filled in. + account: "", + region: "", partition: Aws.PARTITION, resource: "toil-*", service: "s3", From 94f90632fbf883f97fdfa1232509d6ea706678c9 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 30 Mar 2022 15:19:08 -0700 Subject: [PATCH 33/41] Fix CI error from linter having opinions about loops --- packages/cli/internal/pkg/cli/context/manager.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/packages/cli/internal/pkg/cli/context/manager.go b/packages/cli/internal/pkg/cli/context/manager.go index e0f410fb..2583365e 100644 --- a/packages/cli/internal/pkg/cli/context/manager.go +++ b/packages/cli/internal/pkg/cli/context/manager.go @@ -97,10 +97,7 @@ func (m *Manager) getEnvironmentVars() []string { // this algorithm. var dependencyImageKeys []string for _, imageKey := range relevantImageKeys { - for _, dependencies := range environment.ImageDependencies[imageKey] { - // Collect the dependencies of all the relevant images - dependencyImageKeys = append(dependencyImageKeys, dependencies) - } + dependencyImageKeys = append(dependencyImageKeys, environment.ImageDependencies[imageKey]...) } // And add them to the relevant images relevantImageKeys = append(relevantImageKeys, dependencyImageKeys...) From e80e18584bd484bde222c3fd52cb4abfbab9aeaf Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 26 Apr 2022 13:19:44 -0700 Subject: [PATCH 34/41] Add Toil engine documentation --- site/content/en/docs/Concepts/engines.md | 1 + site/content/en/docs/Workflow engines/toil.md | 63 +++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 site/content/en/docs/Workflow engines/toil.md diff --git a/site/content/en/docs/Concepts/engines.md b/site/content/en/docs/Concepts/engines.md index 1726722c..388c154f 100644 --- a/site/content/en/docs/Concepts/engines.md +++ b/site/content/en/docs/Concepts/engines.md @@ -21,6 +21,7 @@ Currently, Amazon Genomics CLI's officially supported engines can be used to run | [Nextflow](https://www.nextflow.io) | [Nextflow DSL](https://www.nextflow.io/docs/latest/script.html) | Standard and DSL 2 | Head Process | | [miniwdl](https://miniwdl.readthedocs.io/en/latest/) | [WDL](https://openwdl.org) | [documented here](https://miniwdl.readthedocs.io/en/latest/runner_reference.html?highlight=errata#wdl-interoperability) | Head Process | | [Snakemake](https://snakemake.readthedocs.io/en/stable/) | [Snakemake](https://snakemake.readthedocs.io/en/stable/snakefiles/writing_snakefiles.html) | All versions | Head Process | +| [Toil](http://toil.ucsc-cgl.org/) | [CWL](https://www.commonwl.org/) | All versions up to 1.2 | Server | Overtime we plan to add additional engine and language support and provide the ability for third party developers to develop engine plugins. diff --git a/site/content/en/docs/Workflow engines/toil.md b/site/content/en/docs/Workflow engines/toil.md new file mode 100644 index 00000000..5fcbcd32 --- /dev/null +++ b/site/content/en/docs/Workflow engines/toil.md @@ -0,0 +1,63 @@ +--- +title: "Toil" +date: 2022-04-26T15:34:00-04:00 +draft: false +weight: 20 +description: > + Details on the Toil engine deployed by Amazon Genomics CLI +--- + +## Description + +[Toil](http://toil.ucsc-cgl.org/) is a workflow engine developed by the +[Computational Genomics Lab](https://cglgenomics.ucsc.edu/) at the +[UC Santa Cruz Genomics Institute](https://genomics.ucsc.edu/). In Amazon Genomics +CLI, Toil is an engine that can be deployed in a +[context]( {{< relref "../Concepts/contexts" >}} ) as an +[engine]( {{< relref "../Concepts/engines">}} ) to run workflows based on the +[CWL](https://www.commonwl.org/) specification. + +Toil is an open source project distributed by UC Santa Cruz under the [Apache 2 +license](https://github.com/DataBiosphere/toil/blob/master/LICENSE) and +available on +[GitHub](https://github.com/DataBiosphere/toil). + +## Architecture + +There are two components of a Toil engine as deployed in an Amazon Genomics +CLI context: + +### Engine Service + +The Toil engine is run in "server mode" as a container service in ECS. The +engine can run multiple workflows asynchronously. Workflow tasks are run in an +elastic [compute environment]( #compute-environment ) and monitored by Toil. +Amazon Genomics CLI communicates with the Toil engine via a GA4GH +[WES](https://github.com/ga4gh/workflow-execution-service-schemas) REST service +which the server offers, available via API Gateway. + +### Compute Environment + +Workflow tasks are submitted by Toil to an AWS Batch queue and run in +Toil-provided containers using an AWS Compute Environment. Tasks which use the +[CWL `DockerRequirement`](https://www.commonwl.org/user_guide/07-containers/index.html) +will additionally be run under +[Singularity](https://github.com/sylabs/singularity#readme). AWS Batch +coordinates the elastic provisioning of EC2 instances (container hosts) based +on the available work in the queue. Batch will place containers on container +hosts as space allows. + +#### Disk Expansion + +Container hosts in the Batch compute environment use EBS volumes as local +scratch space. As an EBS volume approaches a capacity threshold, new EBS +volumes will be attached and merged into the file system. These volumes are +destroyed when AWS Batch terminates the container host. CWL disk space +requirements are ignored by Toil when running against AWS Batch. + +This setup means that workflows that succeed on AGC may fail on other CWL +runners (because they do not request enough disk space) and workflows that +succeed on other CWL runners may fail on AGC (because they allocate disk space +faster than the expansion process can react). + + From f586d88b10da1f23327039f94fb03ffa2e680cca Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 27 Apr 2022 07:07:05 -0700 Subject: [PATCH 35/41] Correct inaccuracies about container setup --- site/content/en/docs/Workflow engines/toil.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/site/content/en/docs/Workflow engines/toil.md b/site/content/en/docs/Workflow engines/toil.md index 5fcbcd32..1bf1b9e9 100644 --- a/site/content/en/docs/Workflow engines/toil.md +++ b/site/content/en/docs/Workflow engines/toil.md @@ -41,11 +41,10 @@ which the server offers, available via API Gateway. Workflow tasks are submitted by Toil to an AWS Batch queue and run in Toil-provided containers using an AWS Compute Environment. Tasks which use the [CWL `DockerRequirement`](https://www.commonwl.org/user_guide/07-containers/index.html) -will additionally be run under -[Singularity](https://github.com/sylabs/singularity#readme). AWS Batch -coordinates the elastic provisioning of EC2 instances (container hosts) based -on the available work in the queue. Batch will place containers on container -hosts as space allows. +will additionally be run in sibling containers on the host Docker daemon. AWS +Batch coordinates the elastic provisioning of EC2 instances (container hosts) +based on the available work in the queue. Batch will place containers on +container hosts as space allows. #### Disk Expansion From 5687a893037884b76662103baa5af4ec0f234ba8 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 27 Apr 2022 13:59:52 -0700 Subject: [PATCH 36/41] Set a plausible-looking set of IAM permissions for the Toil job store --- packages/cdk/lib/roles/toil-engine-role.ts | 6 ++- packages/cdk/lib/roles/toil-job-role.ts | 45 +++++++++++++++++++++- 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/packages/cdk/lib/roles/toil-engine-role.ts b/packages/cdk/lib/roles/toil-engine-role.ts index 61b05a88..e8068075 100644 --- a/packages/cdk/lib/roles/toil-engine-role.ts +++ b/packages/cdk/lib/roles/toil-engine-role.ts @@ -21,16 +21,20 @@ export class ToilEngineRole extends ToilJobRole { account: Aws.ACCOUNT_ID, region: Aws.REGION, partition: Aws.PARTITION, - resource: "job-definition/*", + // Toil makes all its job definition names start with "toil-" + resource: "job-definition/toil-*", service: "batch", }, scope as Stack ); super(scope, id, props, { + // In addition to what jobs do, we need to be able to manipulate AWS + // Batch. ToilEngineBatchPolicy: new ToilBatchPolicy({ ...props, toilJobArnPattern: toilJobArnPattern, }), + // And we need to be able to pass the job role to AWS Batch jobs. ToilIamPassJobRole: new PolicyDocument({ assignSids: true, statements: [ diff --git a/packages/cdk/lib/roles/toil-job-role.ts b/packages/cdk/lib/roles/toil-job-role.ts index ae77cf89..eb97cbe4 100644 --- a/packages/cdk/lib/roles/toil-job-role.ts +++ b/packages/cdk/lib/roles/toil-job-role.ts @@ -58,7 +58,20 @@ export class ToilJobRole extends Role { statements: [ new PolicyStatement({ effect: Effect.ALLOW, - actions: ["sdb:*"], + // These are the SimpleDB IAM actions associated with the + // SimpleDB operations that the Toil AWS job store calls. They + // are most likely all used, but leaving each out has not been + // tested. + actions: [ + "sdb:CreateDomain", + "sdb:DeleteDomain", + "sdb:GetAttributes", + "sdb:PutAttributes", + "sdb:BatchPutAttributes", + "sdb:DeleteAttributes", + "sdb:BatchDeleteAttributes", + "sdb:Select" + ], resources: [jobStoreSimpleDbArnPattern], }), ], @@ -68,7 +81,35 @@ export class ToilJobRole extends Role { statements: [ new PolicyStatement({ effect: Effect.ALLOW, - actions: ["s3:*"], + // These are the IAM actions which seem relevant to the Boto3 + // client and resource operations that the Toil AWS job store + // does on its job store bucket. + // It is possible some are not actually used, especially some of + // the get/list operations which may or may not actually be + // required to construct the associated Boto3 Resource objects. + // Leaving each out has not been tested. + actions: [ + "s3:CreateBucket", + "s3:DeleteBucket", + "s3:GetBucketTagging", + "s3:PutBucketTagging", + "s3:GetBucketVersioning", + "s3:PutBucketVersioning", + "s3:HeadBucket", + "s3:GetObject", + "s3:GetObjectVersion", + "s3:PutObject", + "s3:ListBucket", + "s3:ListBucketVersions", + "s3:ListObjects", + "s3:DeleteObject", + "s3:DeleteObjectVersion", + "s3:GetObjectAcl", + "s3:PutObjectAcl", + "s3:ListBucketMultipartUploads", + "s3:ListMultipartUploadParts", + "s3:AbortMultipartUpload" + ], resources: [jobStoreS3ArnPattern], }), ], From a8d26fe9f115196beb04d7157a2f71ffd25e3df6 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 27 Apr 2022 14:27:50 -0700 Subject: [PATCH 37/41] Add HeadObject which we might need and a trailing comma --- packages/cdk/lib/roles/toil-job-role.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/cdk/lib/roles/toil-job-role.ts b/packages/cdk/lib/roles/toil-job-role.ts index eb97cbe4..6ab1b3cd 100644 --- a/packages/cdk/lib/roles/toil-job-role.ts +++ b/packages/cdk/lib/roles/toil-job-role.ts @@ -70,7 +70,7 @@ export class ToilJobRole extends Role { "sdb:BatchPutAttributes", "sdb:DeleteAttributes", "sdb:BatchDeleteAttributes", - "sdb:Select" + "sdb:Select", ], resources: [jobStoreSimpleDbArnPattern], }), @@ -96,6 +96,7 @@ export class ToilJobRole extends Role { "s3:GetBucketVersioning", "s3:PutBucketVersioning", "s3:HeadBucket", + "s3:HeadObject", "s3:GetObject", "s3:GetObjectVersion", "s3:PutObject", @@ -108,7 +109,7 @@ export class ToilJobRole extends Role { "s3:PutObjectAcl", "s3:ListBucketMultipartUploads", "s3:ListMultipartUploadParts", - "s3:AbortMultipartUpload" + "s3:AbortMultipartUpload", ], resources: [jobStoreS3ArnPattern], }), From d1a64a3d61d7e17bd66e26168bd0f05eceedd0aa Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 28 Apr 2022 11:13:51 -0400 Subject: [PATCH 38/41] Remove space in comment which causes build to fail --- packages/cdk/lib/roles/toil-engine-role.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/cdk/lib/roles/toil-engine-role.ts b/packages/cdk/lib/roles/toil-engine-role.ts index e8068075..d788b1bf 100644 --- a/packages/cdk/lib/roles/toil-engine-role.ts +++ b/packages/cdk/lib/roles/toil-engine-role.ts @@ -29,7 +29,7 @@ export class ToilEngineRole extends ToilJobRole { ); super(scope, id, props, { // In addition to what jobs do, we need to be able to manipulate AWS - // Batch. + // Batch. ToilEngineBatchPolicy: new ToilBatchPolicy({ ...props, toilJobArnPattern: toilJobArnPattern, From be5e01becd1b0eead506271fcef5c71b41ac6125 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 3 May 2022 08:38:31 -0700 Subject: [PATCH 39/41] Add s3:GetBucketLocation permission for Toil --- packages/cdk/lib/roles/toil-job-role.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/cdk/lib/roles/toil-job-role.ts b/packages/cdk/lib/roles/toil-job-role.ts index 6ab1b3cd..01a363a4 100644 --- a/packages/cdk/lib/roles/toil-job-role.ts +++ b/packages/cdk/lib/roles/toil-job-role.ts @@ -96,6 +96,7 @@ export class ToilJobRole extends Role { "s3:GetBucketVersioning", "s3:PutBucketVersioning", "s3:HeadBucket", + "s3:GetBucketLocation", "s3:HeadObject", "s3:GetObject", "s3:GetObjectVersion", From d979cacb14a6beb160f9ce6bff1fc4d66fc9bc1e Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 3 May 2022 08:45:47 -0700 Subject: [PATCH 40/41] Delete IAM actions that do not exist according to the console --- packages/cdk/lib/roles/toil-job-role.ts | 3 --- 1 file changed, 3 deletions(-) diff --git a/packages/cdk/lib/roles/toil-job-role.ts b/packages/cdk/lib/roles/toil-job-role.ts index 01a363a4..542ae259 100644 --- a/packages/cdk/lib/roles/toil-job-role.ts +++ b/packages/cdk/lib/roles/toil-job-role.ts @@ -95,15 +95,12 @@ export class ToilJobRole extends Role { "s3:PutBucketTagging", "s3:GetBucketVersioning", "s3:PutBucketVersioning", - "s3:HeadBucket", "s3:GetBucketLocation", - "s3:HeadObject", "s3:GetObject", "s3:GetObjectVersion", "s3:PutObject", "s3:ListBucket", "s3:ListBucketVersions", - "s3:ListObjects", "s3:DeleteObject", "s3:DeleteObjectVersion", "s3:GetObjectAcl", From c017154ee13261c3f4b93954dcad6a9ff7a41b37 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 3 May 2022 08:55:26 -0700 Subject: [PATCH 41/41] docs: Incorporate README fixes from 26dc5add --- packages/engines/toil/README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/packages/engines/toil/README.md b/packages/engines/toil/README.md index 5295a231..cf102dfa 100644 --- a/packages/engines/toil/README.md +++ b/packages/engines/toil/README.md @@ -7,7 +7,7 @@ A Toil mono-container WES server for use with Amazon Genomics CLI. Go to this directory and run: ```bash -docker build . -f Dockerfile -t adamnovak/toil-agc +docker build . -f Dockerfile -t toil-agc ``` ### Running for Testing @@ -15,13 +15,13 @@ docker build . -f Dockerfile -t adamnovak/toil-agc Having built the container, run: ```bash -docker run -ti --rm -p "127.0.0.1:8000:8000" adamnovak/toil-agc +docker run --name toil-agc-test -ti --rm -p "127.0.0.1:8000:8000" toil-agc ``` This will start the containerized server and make it available on port 8000 on the loopback interface. You can inspect the port mapping with: ```bash -docker port "$(docker ps | grep adamnovak/toil-agc | rev | cut -f1 -d' ' | rev)" +docker port toil-agc-test ``` Then you can talk to it with e.g.: @@ -33,7 +33,7 @@ curl -vvv "http://localhost:8000/ga4gh/wes/v1/service-info" For debugging, you can get inside the container with: ```bash -docker exec -ti "$(docker ps | grep adamnovak/toil-agc | rev | cut -f1 -d' ' | rev)" /bin/bash +docker exec -ti toil-agc-test /bin/bash ``` ### Deploying @@ -46,6 +46,7 @@ AWS_ACCOUNT= # For example, 123456789012 ECR_REPO= # For example, yourname/toil-agc. Needs to be created in the ECR console. aws ecr get-login-password --region ${AWS_REGION} | docker login --username AWS --password-stdin ${AWS_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com docker build -t ${ECR_REPO} . -docker tag adamnovak/toil-agc:latest ${AWS_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com/${ECR_REPO}:latest +docker tag ${ECR_REPO}:latest ${AWS_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com/${ECR_REPO}:latest docker push ${AWS_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com/${ECR_REPO}:latest ``` +