diff --git a/examples/demo-cwl-project/agc-project.yaml b/examples/demo-cwl-project/agc-project.yaml new file mode 100644 index 00000000..14416366 --- /dev/null +++ b/examples/demo-cwl-project/agc-project.yaml @@ -0,0 +1,30 @@ +--- +name: CWLDemo +schemaVersion: 1 +workflows: + hello: + type: + language: cwl + version: v1.2 + sourceURL: workflows/hello/hello.cwl + nontrivial: + type: + language: cwl + version: v1.2 + sourceURL: workflows/nontrivial + manyjobs: + type: + language: cwl + version: v1.2 + sourceURL: workflows/manyjobs +contexts: + myContext: + engines: + - type: cwl + engine: toil + + spotCtx: + requestSpotInstances: true + engines: + - type: cwl + engine: toil diff --git a/examples/demo-cwl-project/workflows/hello/hello.cwl b/examples/demo-cwl-project/workflows/hello/hello.cwl new file mode 100644 index 00000000..041f7714 --- /dev/null +++ b/examples/demo-cwl-project/workflows/hello/hello.cwl @@ -0,0 +1,15 @@ +cwlVersion: v1.0 +class: CommandLineTool +baseCommand: echo +stdout: output.txt +inputs: + - id: message + type: string + default: "Hello world!" + inputBinding: + position: 1 +outputs: + - id: output + type: File + outputBinding: + glob: output.txt diff --git a/examples/demo-cwl-project/workflows/manyjobs/MANIFEST.json b/examples/demo-cwl-project/workflows/manyjobs/MANIFEST.json new file mode 100644 index 00000000..74d8a721 --- /dev/null +++ b/examples/demo-cwl-project/workflows/manyjobs/MANIFEST.json @@ -0,0 +1,7 @@ +{ + "mainWorkflowURL": "manyjobs.cwl", + "inputFileURLs": [ + "inputs.json" + ], + "optionsFileURL": "options.json" +} diff --git a/examples/demo-cwl-project/workflows/manyjobs/compress-file.cwl b/examples/demo-cwl-project/workflows/manyjobs/compress-file.cwl new file mode 100644 index 00000000..d3217f83 --- /dev/null +++ b/examples/demo-cwl-project/workflows/manyjobs/compress-file.cwl @@ -0,0 +1,18 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.2 +class: CommandLineTool +baseCommand: ["-9", "-p", "8", "-c"] +hints: + DockerRequirement: + dockerPull: bytesco/pigz + ResourceRequirement: + coresMin: 8 +inputs: + input_file: + type: File + inputBinding: + position: 1 +outputs: + compressed_file: + type: stdout +stdout: compressed.gz diff --git a/examples/demo-cwl-project/workflows/manyjobs/concatenate-files.cwl b/examples/demo-cwl-project/workflows/manyjobs/concatenate-files.cwl new file mode 100644 index 00000000..310f8639 --- /dev/null +++ b/examples/demo-cwl-project/workflows/manyjobs/concatenate-files.cwl @@ -0,0 +1,14 @@ +cwlVersion: v1.2 +class: CommandLineTool +baseCommand: cat +inputs: + files: + type: File[] + inputBinding: + position: 1 + +outputs: + concatenated_file: + type: stdout + +stdout: concatenated.txt diff --git a/examples/demo-cwl-project/workflows/manyjobs/inputs.json b/examples/demo-cwl-project/workflows/manyjobs/inputs.json new file mode 100644 index 00000000..398b01ee --- /dev/null +++ b/examples/demo-cwl-project/workflows/manyjobs/inputs.json @@ -0,0 +1,4 @@ +{ + "script_file": {"class": "File", "path": "make-a-little-data.js"}, + "script_argument_count": 1000 +} diff --git a/examples/demo-cwl-project/workflows/manyjobs/inputs.tiny.json b/examples/demo-cwl-project/workflows/manyjobs/inputs.tiny.json new file mode 100644 index 00000000..1305cfef --- /dev/null +++ b/examples/demo-cwl-project/workflows/manyjobs/inputs.tiny.json @@ -0,0 +1,4 @@ +{ + "script_file": {"class": "File", "path": "make-a-little-data.js"}, + "script_argument_count": 2 +} diff --git a/examples/demo-cwl-project/workflows/manyjobs/make-a-little-data.js b/examples/demo-cwl-project/workflows/manyjobs/make-a-little-data.js new file mode 100644 index 00000000..e1cb80be --- /dev/null +++ b/examples/demo-cwl-project/workflows/manyjobs/make-a-little-data.js @@ -0,0 +1,21 @@ +const crypto = require('crypto') +if (process.argv.length > 2) { + let arg = process.argv[2] + console.log("Argument is: " + arg) + + let hashes = [arg] + + for (let i = 10; i >= 0; i--) { + console.log(i + " bottles of " + arg + " on the wall...") + hasher = crypto.createHash('sha512') + for (let h of hashes) { + hasher.update(h) + } + hashes.push(hasher.digest('hex')) + } + + console.log("After meditating on the nature of " + arg + ", it turns out to be " + hashes[hashes.length - 1]) + +} else { + console.log("Didn't get an argument") +} diff --git a/examples/demo-cwl-project/workflows/manyjobs/make-array.cwl b/examples/demo-cwl-project/workflows/manyjobs/make-array.cwl new file mode 100644 index 00000000..14b68781 --- /dev/null +++ b/examples/demo-cwl-project/workflows/manyjobs/make-array.cwl @@ -0,0 +1,13 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.2 +class: ExpressionTool +requirements: + InlineJavascriptRequirement: {} +inputs: + size: + type: int +outputs: + array: + type: string[] +expression: "$({array: function(){var arr = []; for (var i = 0; i < inputs.size; i++) {arr.push('' + i)}; return arr;}()})" + diff --git a/examples/demo-cwl-project/workflows/manyjobs/manyjobs.cwl b/examples/demo-cwl-project/workflows/manyjobs/manyjobs.cwl new file mode 100644 index 00000000..1772c7b8 --- /dev/null +++ b/examples/demo-cwl-project/workflows/manyjobs/manyjobs.cwl @@ -0,0 +1,52 @@ +cwlVersion: v1.2 +class: Workflow + +requirements: + SubworkflowFeatureRequirement: {} + ScatterFeatureRequirement: {} + +inputs: + - id: script_file + type: File + - id: script_argument_count + type: int + +steps: + make_array: + run: make-array.cwl + in: + size: script_argument_count + out: [array] + + scripts: + run: run-script.cwl + scatter: arg + in: + src: script_file + arg: make_array/array + out: [script_output] + + sort: + run: sort-file.cwl + scatter: input_file + in: + input_file: scripts/script_output + out: [sorted_file] + + concat: + run: concatenate-files.cwl + in: + files: sort/sorted_file + out: + [concatenated_file] + + compress: + run: compress-file.cwl + in: + input_file: concat/concatenated_file + out: [compressed_file] + +outputs: + - id: output + type: File + outputSource: compress/compressed_file diff --git a/examples/demo-cwl-project/workflows/manyjobs/options.json b/examples/demo-cwl-project/workflows/manyjobs/options.json new file mode 100644 index 00000000..0967ef42 --- /dev/null +++ b/examples/demo-cwl-project/workflows/manyjobs/options.json @@ -0,0 +1 @@ +{} diff --git a/examples/demo-cwl-project/workflows/manyjobs/run-script.cwl b/examples/demo-cwl-project/workflows/manyjobs/run-script.cwl new file mode 100644 index 00000000..02e0aa9f --- /dev/null +++ b/examples/demo-cwl-project/workflows/manyjobs/run-script.cwl @@ -0,0 +1,24 @@ +#!/usr/bin/env cwl-runner +# Modified from the CWL docs +cwlVersion: v1.2 +class: CommandLineTool +baseCommand: node +hints: + DockerRequirement: + dockerPull: node:slim + ResourceRequirement: + coresMax: 1 + ramMin: 2000 +inputs: + src: + type: File + inputBinding: + position: 1 + arg: + type: string + inputBinding: + position: 2 +outputs: + script_output: + type: stdout +stdout: output.txt diff --git a/examples/demo-cwl-project/workflows/manyjobs/sort-file.cwl b/examples/demo-cwl-project/workflows/manyjobs/sort-file.cwl new file mode 100644 index 00000000..b80c91a0 --- /dev/null +++ b/examples/demo-cwl-project/workflows/manyjobs/sort-file.cwl @@ -0,0 +1,19 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.2 +class: CommandLineTool +baseCommand: ["sort"] +requirements: + InlineJavascriptRequirement: {} +hints: + ResourceRequirement: + coresMax: 1 + outdirMin: $(parseInt(Math.ceil(inputs.input_file.size / Math.pow(2, 20)))) +inputs: + input_file: + type: File + inputBinding: + position: 1 +outputs: + sorted_file: + type: stdout +stdout: sorted.txt diff --git a/examples/demo-cwl-project/workflows/nontrivial/MANIFEST.json b/examples/demo-cwl-project/workflows/nontrivial/MANIFEST.json new file mode 100644 index 00000000..dbf09c46 --- /dev/null +++ b/examples/demo-cwl-project/workflows/nontrivial/MANIFEST.json @@ -0,0 +1,7 @@ +{ + "mainWorkflowURL": "nontrivial.cwl", + "inputFileURLs": [ + "inputs.json" + ], + "optionsFileURL": "options.json" +} diff --git a/examples/demo-cwl-project/workflows/nontrivial/compress-file.cwl b/examples/demo-cwl-project/workflows/nontrivial/compress-file.cwl new file mode 100644 index 00000000..d3217f83 --- /dev/null +++ b/examples/demo-cwl-project/workflows/nontrivial/compress-file.cwl @@ -0,0 +1,18 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.2 +class: CommandLineTool +baseCommand: ["-9", "-p", "8", "-c"] +hints: + DockerRequirement: + dockerPull: bytesco/pigz + ResourceRequirement: + coresMin: 8 +inputs: + input_file: + type: File + inputBinding: + position: 1 +outputs: + compressed_file: + type: stdout +stdout: compressed.gz diff --git a/examples/demo-cwl-project/workflows/nontrivial/concatenate-files.cwl b/examples/demo-cwl-project/workflows/nontrivial/concatenate-files.cwl new file mode 100644 index 00000000..310f8639 --- /dev/null +++ b/examples/demo-cwl-project/workflows/nontrivial/concatenate-files.cwl @@ -0,0 +1,14 @@ +cwlVersion: v1.2 +class: CommandLineTool +baseCommand: cat +inputs: + files: + type: File[] + inputBinding: + position: 1 + +outputs: + concatenated_file: + type: stdout + +stdout: concatenated.txt diff --git a/examples/demo-cwl-project/workflows/nontrivial/inputs.json b/examples/demo-cwl-project/workflows/nontrivial/inputs.json new file mode 100644 index 00000000..0e5c2c71 --- /dev/null +++ b/examples/demo-cwl-project/workflows/nontrivial/inputs.json @@ -0,0 +1,7 @@ +{ + "script_file": {"class": "File", "path": "make-data.js"}, + "script_arguments": [ + "A", "B", "C", "D", "E", "F", "G", + "cats", "pigs", "dogs", "space weasles", "snacks" + ] +} diff --git a/examples/demo-cwl-project/workflows/nontrivial/make-data.js b/examples/demo-cwl-project/workflows/nontrivial/make-data.js new file mode 100644 index 00000000..7ba45fb2 --- /dev/null +++ b/examples/demo-cwl-project/workflows/nontrivial/make-data.js @@ -0,0 +1,21 @@ +const crypto = require('crypto') +if (process.argv.length > 2) { + let arg = process.argv[2] + console.log("Argument is: " + arg) + + let hashes = [arg] + + for (let i = 10000; i >= 0; i--) { + console.log(i + " bottles of " + arg + " on the wall...") + hasher = crypto.createHash('sha512') + for (let h of hashes) { + hasher.update(h) + } + hashes.push(hasher.digest('hex')) + } + + console.log("After meditating on the nature of " + arg + ", it turns out to be " + hashes[hashes.length - 1]) + +} else { + console.log("Didn't get an argument") +} diff --git a/examples/demo-cwl-project/workflows/nontrivial/nontrivial.cwl b/examples/demo-cwl-project/workflows/nontrivial/nontrivial.cwl new file mode 100644 index 00000000..f59f801b --- /dev/null +++ b/examples/demo-cwl-project/workflows/nontrivial/nontrivial.cwl @@ -0,0 +1,46 @@ +cwlVersion: v1.2 +class: Workflow + +requirements: + SubworkflowFeatureRequirement: {} + ScatterFeatureRequirement: {} + +inputs: + - id: script_file + type: File + - id: script_arguments + type: string[] + +steps: + scripts: + run: run-script.cwl + scatter: arg + in: + src: script_file + arg: script_arguments + out: [script_output] + + sort: + run: sort-file.cwl + scatter: input_file + in: + input_file: scripts/script_output + out: [sorted_file] + + concat: + run: concatenate-files.cwl + in: + files: sort/sorted_file + out: + [concatenated_file] + + compress: + run: compress-file.cwl + in: + input_file: concat/concatenated_file + out: [compressed_file] + +outputs: + - id: output + type: File + outputSource: compress/compressed_file diff --git a/examples/demo-cwl-project/workflows/nontrivial/options.json b/examples/demo-cwl-project/workflows/nontrivial/options.json new file mode 100644 index 00000000..0967ef42 --- /dev/null +++ b/examples/demo-cwl-project/workflows/nontrivial/options.json @@ -0,0 +1 @@ +{} diff --git a/examples/demo-cwl-project/workflows/nontrivial/run-script.cwl b/examples/demo-cwl-project/workflows/nontrivial/run-script.cwl new file mode 100644 index 00000000..02e0aa9f --- /dev/null +++ b/examples/demo-cwl-project/workflows/nontrivial/run-script.cwl @@ -0,0 +1,24 @@ +#!/usr/bin/env cwl-runner +# Modified from the CWL docs +cwlVersion: v1.2 +class: CommandLineTool +baseCommand: node +hints: + DockerRequirement: + dockerPull: node:slim + ResourceRequirement: + coresMax: 1 + ramMin: 2000 +inputs: + src: + type: File + inputBinding: + position: 1 + arg: + type: string + inputBinding: + position: 2 +outputs: + script_output: + type: stdout +stdout: output.txt diff --git a/examples/demo-cwl-project/workflows/nontrivial/sort-file.cwl b/examples/demo-cwl-project/workflows/nontrivial/sort-file.cwl new file mode 100644 index 00000000..b80c91a0 --- /dev/null +++ b/examples/demo-cwl-project/workflows/nontrivial/sort-file.cwl @@ -0,0 +1,19 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.2 +class: CommandLineTool +baseCommand: ["sort"] +requirements: + InlineJavascriptRequirement: {} +hints: + ResourceRequirement: + coresMax: 1 + outdirMin: $(parseInt(Math.ceil(inputs.input_file.size / Math.pow(2, 20)))) +inputs: + input_file: + type: File + inputBinding: + position: 1 +outputs: + sorted_file: + type: stdout +stdout: sorted.txt diff --git a/packages/cdk/lib/constructs/batch.ts b/packages/cdk/lib/constructs/batch.ts index 8988ffdd..7845a2e9 100644 --- a/packages/cdk/lib/constructs/batch.ts +++ b/packages/cdk/lib/constructs/batch.ts @@ -93,6 +93,7 @@ export interface BatchProps extends ComputeOptions { const defaultComputeType = ComputeResourceType.ON_DEMAND; export class Batch extends Construct { + // This is the role that the backing instances use, not the role that batch jobs run as. public readonly role: IRole; public readonly computeEnvironment: IComputeEnvironment; public readonly jobQueue: IJobQueue; diff --git a/packages/cdk/lib/env/context-app-parameters.ts b/packages/cdk/lib/env/context-app-parameters.ts index 4f550a81..0ce1447d 100644 --- a/packages/cdk/lib/env/context-app-parameters.ts +++ b/packages/cdk/lib/env/context-app-parameters.ts @@ -152,7 +152,11 @@ export class ContextAppParameters { return `${this.getContextBucketPath()}/${this.engineName}-execution`; } - public getEngineContainer(jobQueueArn: string): ServiceContainer { + /** + * This function defines the container that server-based engines (like Toil + * or Cromwell) will run their servers in. It is going to run on Fargate. + */ + public getEngineContainer(jobQueueArn: string, additionalEnvVars?: { [key: string]: string }): ServiceContainer { return { serviceName: this.engineName, imageConfig: { designation: this.engineDesignation }, @@ -164,6 +168,7 @@ export class ContextAppParameters { S3BUCKET: this.outputBucketName, ROOT_DIR: this.getEngineBucketPath(), JOB_QUEUE_ARN: jobQueueArn, + ...additionalEnvVars, }, }; } @@ -199,6 +204,9 @@ export class ContextAppParameters { case "snakemake": defFilesystem = "EFS"; break; + case "toil": + defFilesystem = "S3"; + break; default: throw Error(`Engine '${this.engineName}' is not supported`); } diff --git a/packages/cdk/lib/roles/policies/toil-batch-policy.ts b/packages/cdk/lib/roles/policies/toil-batch-policy.ts new file mode 100644 index 00000000..3a641f02 --- /dev/null +++ b/packages/cdk/lib/roles/policies/toil-batch-policy.ts @@ -0,0 +1,30 @@ +import { PolicyStatement, Effect } from "aws-cdk-lib/aws-iam"; +import { CromwellBatchPolicy } from "./cromwell-batch-policy"; + +export interface ToilBatchPolicyProps { + jobQueueArn: string; + toilJobArnPattern: string; +} + +export class ToilBatchPolicy extends CromwellBatchPolicy { + constructor(props: ToilBatchPolicyProps) { + // To avoid adding more policies allowing access to "*", we are based on + // the Cromwell policy set. When the permissions for that get locked + // down to the minimum required to use Batch, we will inherit those + // improvements. + super({ + jobQueueArn: props.jobQueueArn, + cromwellJobArn: props.toilJobArnPattern, + }); + + // The only additional thing we need is to be able to deregister job + // definitions, which Cromwell doesn't do. + this.addStatements( + new PolicyStatement({ + effect: Effect.ALLOW, + actions: ["batch:DeregisterJobDefinition"], + resources: [props.toilJobArnPattern], + }) + ); + } +} diff --git a/packages/cdk/lib/roles/toil-engine-role.ts b/packages/cdk/lib/roles/toil-engine-role.ts new file mode 100644 index 00000000..d788b1bf --- /dev/null +++ b/packages/cdk/lib/roles/toil-engine-role.ts @@ -0,0 +1,50 @@ +import { ToilBatchPolicy } from "./policies/toil-batch-policy"; +import { ToilJobRole, ToilJobRoleProps } from "./toil-job-role"; +import { Arn, Aws, Stack } from "aws-cdk-lib"; +import { Construct } from "constructs"; +import { PolicyDocument, PolicyStatement, Effect } from "aws-cdk-lib/aws-iam"; + +interface ToilEngineRoleProps extends ToilJobRoleProps { + // This is the queue to which we are authorizing jobs to be submitted by + // something with this role. + jobQueueArn: string; + // And this other role can be assigned by this role + jobRoleArn: string; +} + +// This role grants access to Toil job stores, but also the access needed to +// launch jobs on AWS Batch that themselves have a ToilJobRole role assigned. +export class ToilEngineRole extends ToilJobRole { + constructor(scope: Construct, id: string, props: ToilEngineRoleProps) { + const toilJobArnPattern = Arn.format( + { + account: Aws.ACCOUNT_ID, + region: Aws.REGION, + partition: Aws.PARTITION, + // Toil makes all its job definition names start with "toil-" + resource: "job-definition/toil-*", + service: "batch", + }, + scope as Stack + ); + super(scope, id, props, { + // In addition to what jobs do, we need to be able to manipulate AWS + // Batch. + ToilEngineBatchPolicy: new ToilBatchPolicy({ + ...props, + toilJobArnPattern: toilJobArnPattern, + }), + // And we need to be able to pass the job role to AWS Batch jobs. + ToilIamPassJobRole: new PolicyDocument({ + assignSids: true, + statements: [ + new PolicyStatement({ + effect: Effect.ALLOW, + actions: ["iam:PassRole"], + resources: [props.jobRoleArn], + }), + ], + }), + }); + } +} diff --git a/packages/cdk/lib/roles/toil-job-role.ts b/packages/cdk/lib/roles/toil-job-role.ts new file mode 100644 index 00000000..542ae259 --- /dev/null +++ b/packages/cdk/lib/roles/toil-job-role.ts @@ -0,0 +1,124 @@ +import { PolicyOptions } from "../types/engine-options"; +import { BucketOperations } from "../common/BucketOperations"; +import { Construct } from "constructs"; +import { Arn, Aws, Stack } from "aws-cdk-lib"; +import { Role, ServicePrincipal, PolicyDocument, PolicyStatement, Effect } from "aws-cdk-lib/aws-iam"; + +export interface ToilJobRoleProps { + readOnlyBucketArns: string[]; + readWriteBucketArns: string[]; + policies: PolicyOptions; +} + +// This role grants access to everything a Toil job needs to talk to the AWS +// job store and/or additional user data in S3. +export class ToilJobRole extends Role { + constructor(scope: Construct, id: string, props: ToilJobRoleProps, additionalInlinePolicies?: { [key: string]: PolicyDocument }) { + // Toil workflows make S3 buckets and SimpleDB domains for scratch. + // In SimpleDB we use the "toil-registry" domain, and also domains for job + // stores generated by toil.jobStores.util.generate_locator, which also + // start with "toil-". + // We will no longer need this when + // https://github.com/DataBiosphere/toil/issues/964 is fixed and Toil stops + // using SimpleDB. + const jobStoreSimpleDbArnPattern = Arn.format( + { + account: Aws.ACCOUNT_ID, + region: Aws.REGION, + partition: Aws.PARTITION, + resource: "domain/toil-*", + service: "sdb", + }, + scope as Stack + ); + // In S3 we use buckets for job stores generated by + // toil.jobStores.util.generate_locator, which start with "toil-". + // We will no longer need this when + // https://github.com/DataBiosphere/toil/issues/3983 is fixed and Toil + // becomes able to just use the one AGC-provided bucket. + const jobStoreS3ArnPattern = Arn.format( + { + // Note that regions and account IDs aren't allowed in S3 ARNs. + // But the formatter requires them to be passed, and wants an empty + // string if we don't actually want them filled in. + account: "", + region: "", + partition: Aws.PARTITION, + resource: "toil-*", + service: "s3", + }, + scope as Stack + ); + + super(scope, id, { + assumedBy: new ServicePrincipal("ecs-tasks.amazonaws.com"), + inlinePolicies: { + ToilSimpleDbJobStoreAccess: new PolicyDocument({ + assignSids: true, + statements: [ + new PolicyStatement({ + effect: Effect.ALLOW, + // These are the SimpleDB IAM actions associated with the + // SimpleDB operations that the Toil AWS job store calls. They + // are most likely all used, but leaving each out has not been + // tested. + actions: [ + "sdb:CreateDomain", + "sdb:DeleteDomain", + "sdb:GetAttributes", + "sdb:PutAttributes", + "sdb:BatchPutAttributes", + "sdb:DeleteAttributes", + "sdb:BatchDeleteAttributes", + "sdb:Select", + ], + resources: [jobStoreSimpleDbArnPattern], + }), + ], + }), + ToilS3JobStoreAccess: new PolicyDocument({ + assignSids: true, + statements: [ + new PolicyStatement({ + effect: Effect.ALLOW, + // These are the IAM actions which seem relevant to the Boto3 + // client and resource operations that the Toil AWS job store + // does on its job store bucket. + // It is possible some are not actually used, especially some of + // the get/list operations which may or may not actually be + // required to construct the associated Boto3 Resource objects. + // Leaving each out has not been tested. + actions: [ + "s3:CreateBucket", + "s3:DeleteBucket", + "s3:GetBucketTagging", + "s3:PutBucketTagging", + "s3:GetBucketVersioning", + "s3:PutBucketVersioning", + "s3:GetBucketLocation", + "s3:GetObject", + "s3:GetObjectVersion", + "s3:PutObject", + "s3:ListBucket", + "s3:ListBucketVersions", + "s3:DeleteObject", + "s3:DeleteObjectVersion", + "s3:GetObjectAcl", + "s3:PutObjectAcl", + "s3:ListBucketMultipartUploads", + "s3:ListMultipartUploadParts", + "s3:AbortMultipartUpload", + ], + resources: [jobStoreS3ArnPattern], + }), + ], + }), + ...additionalInlinePolicies, + }, + ...props.policies, + }); + + BucketOperations.grantBucketAccess(this, this, props.readOnlyBucketArns, true); + BucketOperations.grantBucketAccess(this, this, props.readWriteBucketArns); + } +} diff --git a/packages/cdk/lib/stacks/context-stack.ts b/packages/cdk/lib/stacks/context-stack.ts index 992be400..1ccc7bed 100644 --- a/packages/cdk/lib/stacks/context-stack.ts +++ b/packages/cdk/lib/stacks/context-stack.ts @@ -9,6 +9,7 @@ import { CromwellEngineConstruct } from "./engines/cromwell-engine-construct"; import { NextflowEngineConstruct } from "./engines/nextflow-engine-construct"; import { MiniwdlEngineConstruct } from "./engines/miniwdl-engine-construct"; import { SnakemakeEngineConstruct } from "./engines/snakemake-engine-construct"; +import { ToilEngineConstruct } from "./engines/toil-engine-construct"; export interface ContextStackProps extends StackProps { readonly contextParameters: ContextAppParameters; @@ -55,6 +56,9 @@ export class ContextStack extends Stack { } this.renderSnakemakeStack(props); break; + case "toil": + this.renderToilStack(props); + break; default: throw Error(`Engine '${engineName}' is not supported`); } @@ -64,6 +68,9 @@ export class ContextStack extends Stack { const batchProps = this.getCromwellBatchProps(props); const batchStack = this.renderBatchStack(batchProps); + // Cromwell submits workflow jobs to a single on-demand or spot queue. It + // has a server that runs elsewhere in a Fargate service, and also a WES + // adapter lambda. let jobQueue; if (props.contextParameters.requestSpotInstances) { jobQueue = batchStack.batchSpot.jobQueue; @@ -82,6 +89,9 @@ export class ContextStack extends Stack { const batchProps = this.getNextflowBatchProps(props); const batchStack = this.renderBatchStack(batchProps); + // Nextflow submits workflow head jobs to an on demand queue, and + // optionally workflow jobs to a spot queue. There is no server, just an + // adapter lambda. let jobQueue, headQueue; if (props.contextParameters.requestSpotInstances) { jobQueue = batchStack.batchSpot.jobQueue; @@ -99,18 +109,43 @@ export class ContextStack extends Stack { } private renderMiniwdlStack(props: ContextStackProps) { + // Miniwdl's engine construct takes care of setting up its own Batch + // queues. const commonEngineProps = this.getCommonEngineProps(props); new MiniwdlEngineConstruct(this, ENGINE_MINIWDL, { ...commonEngineProps, }).outputToParent(); } + private renderToilStack(props: ContextStackProps) { + const batchProps = this.getToilBatchProps(props); + const batchStack = this.renderBatchStack(batchProps); + + // Toil submits workflow jobs to a single on-demand or spot queue. It + // has a server that runs elsewhere in a Fargate service, and speaks WES + // itself. + let jobQueue; + if (props.contextParameters.requestSpotInstances) { + jobQueue = batchStack.batchSpot.jobQueue; + } else { + jobQueue = batchStack.batchOnDemand.jobQueue; + } + + const commonEngineProps = this.getCommonEngineProps(props); + new ToilEngineConstruct(this, "toil", { + jobQueue, + ...commonEngineProps, + }).outputToParent(); + } + private getCromwellBatchProps(props: ContextStackProps) { const commonBatchProps = this.getCommonBatchProps(props); const { requestSpotInstances } = props.contextParameters; return { ...commonBatchProps, + // We only use one stack for the Cromwell jobs. The server lives in + // Fargate and doesn't run in either of these. createSpotBatch: requestSpotInstances, createOnDemandBatch: !requestSpotInstances, }; @@ -143,6 +178,20 @@ export class ContextStack extends Stack { }; } + private getToilBatchProps(props: ContextStackProps) { + const commonBatchProps = this.getCommonBatchProps(props); + const { requestSpotInstances } = props.contextParameters; + + return { + ...commonBatchProps, + // We only use one Batch compute environment and queue from the stack for + // the Toil jobs. The server lives in Fargate and doesn't run in either + // of these. + createSpotBatch: requestSpotInstances, + createOnDemandBatch: !requestSpotInstances, + }; + } + private renderBatchStack(props: BatchConstructProps) { return new BatchConstruct(this, "Batch", props); } diff --git a/packages/cdk/lib/stacks/engines/engine-construct.ts b/packages/cdk/lib/stacks/engines/engine-construct.ts index 48115e37..5989cc18 100644 --- a/packages/cdk/lib/stacks/engines/engine-construct.ts +++ b/packages/cdk/lib/stacks/engines/engine-construct.ts @@ -11,7 +11,7 @@ import { getCommonParameter } from "../../util"; export interface EngineOutputs { accessLogGroup: ILogGroup; - adapterLogGroup: ILogGroup; + adapterLogGroup?: ILogGroup; engineLogGroup: ILogGroup; wesUrl: string; } @@ -24,7 +24,8 @@ export abstract class EngineConstruct extends Construct { public outputToParent(): void { const outputs = this.getOutputs(); new CfnOutput(Stack.of(this), "AccessLogGroupName", { value: outputs.accessLogGroup.logGroupName }); - new CfnOutput(Stack.of(this), "AdapterLogGroupName", { value: outputs.adapterLogGroup.logGroupName }); + // We don't always have a WES log group, but the AGC CLI always expects us to have an AdapterLogGroupName output + new CfnOutput(Stack.of(this), "AdapterLogGroupName", { value: outputs.adapterLogGroup ? outputs.adapterLogGroup.logGroupName : "" }); new CfnOutput(Stack.of(this), "EngineLogGroupName", { value: outputs.engineLogGroup.logGroupName }); new CfnOutput(Stack.of(this), "WesUrl", { value: outputs.wesUrl }); } diff --git a/packages/cdk/lib/stacks/engines/toil-engine-construct.ts b/packages/cdk/lib/stacks/engines/toil-engine-construct.ts new file mode 100644 index 00000000..5eed84d6 --- /dev/null +++ b/packages/cdk/lib/stacks/engines/toil-engine-construct.ts @@ -0,0 +1,98 @@ +import { Aws } from "aws-cdk-lib"; +import { IVpc } from "aws-cdk-lib/aws-ec2"; +import { FargateTaskDefinition, LogDriver } from "aws-cdk-lib/aws-ecs"; +import { ApiProxy, SecureService } from "../../constructs"; +import { IRole } from "aws-cdk-lib/aws-iam"; +import { createEcrImage, renderServiceWithTaskDefinition } from "../../util"; +import { Bucket } from "aws-cdk-lib/aws-s3"; +import { EngineOptions, ServiceContainer } from "../../types"; +import { LogGroup, ILogGroup } from "aws-cdk-lib/aws-logs"; +import { EngineOutputs, EngineConstruct } from "./engine-construct"; +import { ToilJobRole } from "../../roles/toil-job-role"; +import { ToilEngineRole } from "../../roles/toil-engine-role"; +import { IJobQueue } from "@aws-cdk/aws-batch-alpha"; +import { Construct } from "constructs"; + +export interface ToilEngineConstructProps extends EngineOptions { + /** + * AWS Batch JobQueue to use for running workflow tasks. + */ + readonly jobQueue: IJobQueue; +} + +export class ToilEngineConstruct extends EngineConstruct { + public readonly engine: SecureService; + public readonly apiProxy: ApiProxy; + public readonly engineLogGroup: ILogGroup; + public readonly engineRole: IRole; + public readonly jobRole: IRole; + + constructor(scope: Construct, id: string, props: ToilEngineConstructProps) { + super(scope, id); + const params = props.contextParameters; + this.engineLogGroup = new LogGroup(this, "EngineLogGroup"); + const artifactBucket = Bucket.fromBucketName(this, "ArtifactBucket", params.artifactBucketName); + const outputBucket = Bucket.fromBucketName(this, "OutputBucket", params.outputBucketName); + + // Make a role for the jobs to run as + this.jobRole = new ToilJobRole(this, "ToilJobRole", { + readOnlyBucketArns: (params.readBucketArns ?? []).concat(artifactBucket.bucketArn), + readWriteBucketArns: (params.readWriteBucketArns ?? []).concat(outputBucket.bucketArn), + policies: props.policyOptions, + }); + + // Make a role for the server to run as + this.engineRole = new ToilEngineRole(this, "ToilEngineRole", { + jobQueueArn: props.jobQueue.jobQueueArn, + jobRoleArn: this.jobRole.roleArn, + readOnlyBucketArns: (params.readBucketArns ?? []).concat(artifactBucket.bucketArn), + readWriteBucketArns: (params.readWriteBucketArns ?? []).concat(outputBucket.bucketArn), + policies: props.policyOptions, + }); + + // Make the container and pass it the ARN of the role to use for individual jobs. + const engineContainer = params.getEngineContainer(props.jobQueue.jobQueueArn, { + TOIL_AWS_BATCH_JOB_ROLE_ARN: this.jobRole.roleArn, + }); + + this.engine = this.getEngineServiceDefinition(props.vpc, engineContainer, this.engineLogGroup); + + // We don't use an adapter, so put the access-controlling proxy right in + // front of the engine load balancer. + this.apiProxy = new ApiProxy(this, { + apiName: `${params.projectName}${params.contextName}${engineContainer.serviceName}ApiProxy`, + loadBalancer: this.engine.loadBalancer, + allowedAccountIds: [Aws.ACCOUNT_ID], + }); + } + + protected getOutputs(): EngineOutputs { + return { + accessLogGroup: this.apiProxy.accessLogGroup, + engineLogGroup: this.engineLogGroup, + wesUrl: this.apiProxy.restApi.url, + }; + } + + private getEngineServiceDefinition(vpc: IVpc, serviceContainer: ServiceContainer, logGroup: ILogGroup) { + const id = "Engine"; + const definition = new FargateTaskDefinition(this, "EngineTaskDef", { + taskRole: this.engineRole, + cpu: serviceContainer.cpu, + memoryLimitMiB: serviceContainer.memoryLimitMiB, + }); + + definition.addContainer(serviceContainer.serviceName, { + cpu: serviceContainer.cpu, + memoryLimitMiB: serviceContainer.memoryLimitMiB, + environment: serviceContainer.environment, + containerName: serviceContainer.serviceName, + image: createEcrImage(this, serviceContainer.imageConfig.designation), + logging: LogDriver.awsLogs({ logGroup, streamPrefix: id }), + portMappings: serviceContainer.containerPort ? [{ containerPort: serviceContainer.containerPort }] : [], + }); + + const engine = renderServiceWithTaskDefinition(this, id, serviceContainer, definition, vpc); + return engine; + } +} diff --git a/packages/cli/internal/pkg/cli/account_activate_test.go b/packages/cli/internal/pkg/cli/account_activate_test.go index 846507a0..bca2389a 100644 --- a/packages/cli/internal/pkg/cli/account_activate_test.go +++ b/packages/cli/internal/pkg/cli/account_activate_test.go @@ -23,6 +23,7 @@ const ( testCromwellRepository = "test-cromwell-repo" testNextflowRepository = "test-nextflow-repo" testMiniwdlRepository = "test-miniwdl-repo" + testToilRepository = "test-toil-repo" testCoreStackName = "Agc-Core" ) @@ -52,6 +53,12 @@ var ( RepositoryName: testMiniwdlRepository, ImageTag: testImageTag, }, + "TOIL": { + RegistryId: testAccountId, + Region: testAccountRegion, + RepositoryName: testToilRepository, + ImageTag: testImageTag, + }, } ) diff --git a/packages/cli/internal/pkg/cli/context/common_test.go b/packages/cli/internal/pkg/cli/context/common_test.go index 2c23b47e..7dff1309 100644 --- a/packages/cli/internal/pkg/cli/context/common_test.go +++ b/packages/cli/internal/pkg/cli/context/common_test.go @@ -14,6 +14,7 @@ const ( testContextName1 = "testContextName1" testContextName2 = "testContextName2" testContextName3 = "testContextName3" + testContextName4 = "testContextName4" testUnknownContextName = "unknown-context-name" testS3Location1 = "s3://test-s3-location-1" testS3Location2 = "s3://test-s3-location-2" @@ -48,6 +49,11 @@ var ( {Type: "nextflow", Engine: "nextflow"}, }, }, + testContextName4: { + Engines: []spec.Engine{ + {Type: "cwl", Engine: "toil"}, + }, + }, }, } ) diff --git a/packages/cli/internal/pkg/cli/context/manager.go b/packages/cli/internal/pkg/cli/context/manager.go index 1d9b903c..2583365e 100644 --- a/packages/cli/internal/pkg/cli/context/manager.go +++ b/packages/cli/internal/pkg/cli/context/manager.go @@ -87,8 +87,23 @@ var showExecution = cdk.ShowExecution var silentExecution = cdk.SilentExecution func (m *Manager) getEnvironmentVars() []string { + // Different engines will need different environment variables to define + // their Docker images. + engine := m.contextEnv.EngineName + var relevantImageKeys []string + relevantImageKeys = append(relevantImageKeys, strings.ToUpper(engine)) + // Do one level of dependency resolution, without deduplication. + // If the dependency structure becomes more complex we will have to upgrade + // this algorithm. + var dependencyImageKeys []string + for _, imageKey := range relevantImageKeys { + dependencyImageKeys = append(dependencyImageKeys, environment.ImageDependencies[imageKey]...) + } + // And add them to the relevant images + relevantImageKeys = append(relevantImageKeys, dependencyImageKeys...) var environmentVars []string - for imageName := range m.imageRefs { + for _, imageName := range relevantImageKeys { + // Each engine or other component has its own section in imageRefs environmentVars = append(environmentVars, fmt.Sprintf("ECR_%s_ACCOUNT_ID=%s", imageName, m.imageRefs[imageName].RegistryId), fmt.Sprintf("ECR_%s_REGION=%s", imageName, m.region), diff --git a/packages/cli/internal/pkg/cli/context/manager_deploy_test.go b/packages/cli/internal/pkg/cli/context/manager_deploy_test.go index e9330092..83016b4a 100644 --- a/packages/cli/internal/pkg/cli/context/manager_deploy_test.go +++ b/packages/cli/internal/pkg/cli/context/manager_deploy_test.go @@ -14,6 +14,16 @@ import ( "github.com/stretchr/testify/assert" ) +const ( + // We check a lot of generated CDK commands to make sure they have the + // right number of command line arguments. How many should there be to + // start? + testCdkBaseArgumentCount = 27 + // And how many do we expect if the WES adapter images are also to be + // passed? + testCdkAdaptedArgumentCount = testCdkBaseArgumentCount + 4 +) + func TestManager_Deploy(t *testing.T) { origVerbose := logging.Verbose origDisplayProgressBar := displayProgressBar @@ -46,7 +56,7 @@ func TestManager_Deploy(t *testing.T) { mockClients.ssmMock.EXPECT().GetCustomTags().Return(testTags) mockClients.ecrClientMock.EXPECT().VerifyImageExists(environment.CommonImages["NEXTFLOW"]).Return(nil) clearContext := mockClients.cdkMock.EXPECT().ClearContext(filepath.Join(testHomeDir, ".agc/cdk/apps/context")).Return(nil) - mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(43), testContextName3).After(clearContext).Return(mockClients.progressStream1, nil) + mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(testCdkAdaptedArgumentCount), testContextName3).After(clearContext).Return(mockClients.progressStream1, nil) displayProgressBar = mockClients.cdkMock.DisplayProgressBar mockClients.cdkMock.EXPECT().DisplayProgressBar(fmt.Sprintf("Deploying resources for context(s) %s", []string{testContextName3}), []cdk.ProgressStream{mockClients.progressStream1}).Return([]cdk.Result{{Outputs: []string{"some message"}, ExecutionName: testContextName3}}) return mockClients @@ -69,12 +79,35 @@ func TestManager_Deploy(t *testing.T) { mockClients.ssmMock.EXPECT().GetCustomTags().Return("") mockClients.ecrClientMock.EXPECT().VerifyImageExists(environment.CommonImages["NEXTFLOW"]).Return(nil) clearContext := mockClients.cdkMock.EXPECT().ClearContext(filepath.Join(testHomeDir, ".agc/cdk/apps/context")).Return(nil) - mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(43), testContextName3).After(clearContext).Return(mockClients.progressStream1, nil) + mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(testCdkAdaptedArgumentCount), testContextName3).After(clearContext).Return(mockClients.progressStream1, nil) displayProgressBar = mockClients.cdkMock.DisplayProgressBar mockClients.cdkMock.EXPECT().DisplayProgressBar(fmt.Sprintf("Deploying resources for context(s) %s", []string{testContextName3}), []cdk.ProgressStream{mockClients.progressStream1}).Return([]cdk.Result{{Outputs: []string{"some message"}, ExecutionName: testContextName3}}) return mockClients }, }, + "deploy success (no adapter)": { + contextList: []string{testContextName4}, + expectedProgressResultList: []ProgressResult{ + {Outputs: []string{"some message"}, Context: testContextName4}, + }, + setupMocks: func(t *testing.T) mockClients { + mockClients := createMocks(t) + defer close(mockClients.progressStream1) + defer close(mockClients.progressStream2) + mockClients.configMock.EXPECT().GetUserEmailAddress().Return(testUserEmail, nil) + mockClients.configMock.EXPECT().GetUserId().Return(testUserId, nil) + mockClients.projMock.EXPECT().Read().Return(testValidProjectSpec, nil) + mockClients.ssmMock.EXPECT().GetOutputBucket().Return(testOutputBucket, nil) + mockClients.ssmMock.EXPECT().GetCommonParameter("installed-artifacts/s3-root-url").Return(testArtifactBucket, nil) + mockClients.ssmMock.EXPECT().GetCustomTags().Return("") + mockClients.ecrClientMock.EXPECT().VerifyImageExists(environment.CommonImages["TOIL"]).Return(nil) + clearContext := mockClients.cdkMock.EXPECT().ClearContext(filepath.Join(testHomeDir, ".agc/cdk/apps/context")).Return(nil) + mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(testCdkBaseArgumentCount), testContextName4).After(clearContext).Return(mockClients.progressStream1, nil) + displayProgressBar = mockClients.cdkMock.DisplayProgressBar + mockClients.cdkMock.EXPECT().DisplayProgressBar(fmt.Sprintf("Deploying resources for context(s) %s", []string{testContextName4}), []cdk.ProgressStream{mockClients.progressStream1}).Return([]cdk.Result{{Outputs: []string{"some message"}, ExecutionName: testContextName4}}) + return mockClients + }, + }, "multiple deploy success": { contextList: []string{testContextName1, testContextName2}, expectedProgressResultList: []ProgressResult{ @@ -94,8 +127,8 @@ func TestManager_Deploy(t *testing.T) { mockClients.ecrClientMock.EXPECT().VerifyImageExists(environment.CommonImages["CROMWELL"]).Times(2).Return(nil) clearContext := mockClients.cdkMock.EXPECT().ClearContext(filepath.Join(testHomeDir, ".agc/cdk/apps/context")).Return(nil) clearContext2 := mockClients.cdkMock.EXPECT().ClearContext(filepath.Join(testHomeDir, ".agc/cdk/apps/context")).Return(nil) - mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(43), testContextName1).After(clearContext).Return(mockClients.progressStream1, nil) - mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(43), testContextName2).After(clearContext2).Return(mockClients.progressStream2, nil) + mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(testCdkAdaptedArgumentCount), testContextName1).After(clearContext).Return(mockClients.progressStream1, nil) + mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(testCdkAdaptedArgumentCount), testContextName2).After(clearContext2).Return(mockClients.progressStream2, nil) displayProgressBar = mockClients.cdkMock.DisplayProgressBar expectedCdkResult := []cdk.Result{{Outputs: []string{"some message"}, ExecutionName: testContextName1}, {Outputs: []string{"some other message"}, ExecutionName: testContextName2}} mockClients.cdkMock.EXPECT().DisplayProgressBar(fmt.Sprintf("Deploying resources for context(s) %s", []string{testContextName1, testContextName2}), []cdk.ProgressStream{mockClients.progressStream1, mockClients.progressStream2}).Return(expectedCdkResult) @@ -235,7 +268,7 @@ func TestManager_Deploy(t *testing.T) { mockClients.ssmMock.EXPECT().GetCommonParameter("installed-artifacts/s3-root-url").Return(testArtifactBucket, nil) mockClients.ssmMock.EXPECT().GetCustomTags().Return(testTags) mockClients.cdkMock.EXPECT().ClearContext(filepath.Join(testHomeDir, ".agc/cdk/apps/context")).Return(nil) - mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(43), testContextName1).Return(nil, fmt.Errorf("some context error")) + mockClients.cdkMock.EXPECT().DeployApp(filepath.Join(testHomeDir, ".agc/cdk/apps/context"), gomock.Len(testCdkAdaptedArgumentCount), testContextName1).Return(nil, fmt.Errorf("some context error")) mockClients.ecrClientMock.EXPECT().VerifyImageExists(environment.CommonImages["CROMWELL"]).Return(nil) return mockClients }, diff --git a/packages/cli/internal/pkg/cli/context/manager_list_test.go b/packages/cli/internal/pkg/cli/context/manager_list_test.go index fc6005c2..9e8ec923 100644 --- a/packages/cli/internal/pkg/cli/context/manager_list_test.go +++ b/packages/cli/internal/pkg/cli/context/manager_list_test.go @@ -43,6 +43,15 @@ func TestManager_List(t *testing.T) { }, }, }, + testContextName4: { + Name: testContextName4, + Engines: []spec.Engine{ + { + Type: "cwl", + Engine: "toil", + }, + }, + }, }, setupMocks: func(t *testing.T) mockClients { mockClients := createMocks(t) diff --git a/packages/cli/internal/pkg/cli/project_init.go b/packages/cli/internal/pkg/cli/project_init.go index e96b502e..1646fbce 100644 --- a/packages/cli/internal/pkg/cli/project_init.go +++ b/packages/cli/internal/pkg/cli/project_init.go @@ -27,6 +27,7 @@ var ( "nextflow": "nextflow", "wdl": "cromwell", "snakemake": "snakemake", + "cwl": "toil", } supportedWorkflowTypes []string ) @@ -125,7 +126,7 @@ func (o *initProjectOpts) validateProject() error { func BuildProjectInitCommand() *cobra.Command { vars := initProjectVars{} cmd := &cobra.Command{ - Use: "init project_name --workflow-type {wdl|nextflow|snakemake}", + Use: "init project_name --workflow-type {cwl|nextflow|snakemake|wdl}", Short: "Initialize current directory with a new empty AGC project for a particular workflow type.", Long: `Initialize current directory with a new empty AGC project for a particular workflow type. Project specification file 'agc-project.yaml' will be created in the current directory.`, diff --git a/packages/cli/internal/pkg/cli/project_init_test.go b/packages/cli/internal/pkg/cli/project_init_test.go index 9d93805c..c86817bb 100644 --- a/packages/cli/internal/pkg/cli/project_init_test.go +++ b/packages/cli/internal/pkg/cli/project_init_test.go @@ -43,7 +43,7 @@ func TestProjectInit_Validate(t *testing.T) { workflowType: "nextflow", }, "invalid workflow type": { - expectedErr: "invalid workflow type supplied: 'aBadEngineName'. Supported workflow types are: [nextflow snakemake wdl]", + expectedErr: "invalid workflow type supplied: 'aBadEngineName'. Supported workflow types are: [cwl nextflow snakemake wdl]", workflowType: "aBadEngineName", projectName: testProjectName, }, @@ -91,6 +91,11 @@ func TestProjectInit_Execute(t *testing.T) { engineName: "nextflow", expectedEngine: []spec.Engine{{Type: "nextflow", Engine: "nextflow"}}, }, + "toil engine generation": { + projectName: testProjectName, + engineName: "cwl", + expectedEngine: []spec.Engine{{Type: "cwl", Engine: "toil"}}, + }, } for name, tc := range testCases { diff --git a/packages/cli/internal/pkg/environment/environment.go b/packages/cli/internal/pkg/environment/environment.go index 245f1148..20b05061 100644 --- a/packages/cli/internal/pkg/environment/environment.go +++ b/packages/cli/internal/pkg/environment/environment.go @@ -15,44 +15,62 @@ const DefaultNextflowTag = "21.04.3" const DefaultWesTag = "0.1.0" const DefaultMiniwdlTag = "v0.1.11" const DefaultSnakemakeTag = "internal-fork" +const DefaultToilTag = "v6.0.0" const WesImageKey = "WES" const CromwellImageKey = "CROMWELL" const NextflowImageKey = "NEXTFLOW" const MiniwdlImageKey = "MINIWDL" const SnakemakeImageKey = "SNAKEMAKE" +const ToilImageKey = "TOIL" var CommonImages = map[string]ecr.ImageReference{ WesImageKey: { RegistryId: LookUpEnvOrDefault("ECR_WES_ACCOUNT_ID", DefaultEcrRegistry), Region: LookUpEnvOrDefault("ECR_WES_REGION", DefaultEcrRegion), - RepositoryName: "aws/wes-release", + RepositoryName: LookUpEnvOrDefault("ECR_WES_REPOSITORY", "aws/wes-release"), ImageTag: LookUpEnvOrDefault("ECR_WES_TAG", DefaultWesTag), }, CromwellImageKey: { RegistryId: LookUpEnvOrDefault("ECR_CROMWELL_ACCOUNT_ID", DefaultEcrRegistry), Region: LookUpEnvOrDefault("ECR_CROMWELL_REGION", DefaultEcrRegion), - RepositoryName: "aws/cromwell-mirror", + RepositoryName: LookUpEnvOrDefault("ECR_CROMWELL_REPOSITORY", "aws/cromwell-mirror"), ImageTag: LookUpEnvOrDefault("ECR_CROMWELL_TAG", DefaultCromwellTag), }, NextflowImageKey: { RegistryId: LookUpEnvOrDefault("ECR_NEXTFLOW_ACCOUNT_ID", DefaultEcrRegistry), Region: LookUpEnvOrDefault("ECR_NEXTFLOW_REGION", DefaultEcrRegion), - RepositoryName: "aws/nextflow-mirror", + RepositoryName: LookUpEnvOrDefault("ECR_NEXTFLOW_REPOSITORY", "aws/nextflow-mirror"), ImageTag: LookUpEnvOrDefault("ECR_NEXTFLOW_TAG", DefaultNextflowTag), }, MiniwdlImageKey: { RegistryId: LookUpEnvOrDefault("ECR_MINIWDL_ACCOUNT_ID", DefaultEcrRegistry), Region: LookUpEnvOrDefault("ECR_MINIWDL_REGION", DefaultEcrRegion), - RepositoryName: "aws/miniwdl-mirror", + RepositoryName: LookUpEnvOrDefault("ECR_MINIWDL_REPOSITORY", "aws/miniwdl-mirror"), ImageTag: LookUpEnvOrDefault("ECR_MINIWDL_TAG", DefaultMiniwdlTag), }, SnakemakeImageKey: { RegistryId: LookUpEnvOrDefault("ECR_SNAKEMAKE_ACCOUNT_ID", DefaultEcrRegistry), Region: LookUpEnvOrDefault("ECR_SNAKEMAKE_REGION", DefaultEcrRegion), - RepositoryName: "aws/snakemake-mirror", + RepositoryName: LookUpEnvOrDefault("ECR_SNAKEMAKE_REPOSITORY", "aws/snakemake-mirror"), ImageTag: LookUpEnvOrDefault("ECR_SNAKEMAKE_TAG", DefaultSnakemakeTag), }, + ToilImageKey: { + RegistryId: LookUpEnvOrDefault("ECR_TOIL_ACCOUNT_ID", DefaultEcrRegistry), + Region: LookUpEnvOrDefault("ECR_TOIL_REGION", DefaultEcrRegion), + RepositoryName: LookUpEnvOrDefault("ECR_TOIL_REPOSITORY", "aws/toil-mirror"), + ImageTag: LookUpEnvOrDefault("ECR_TOIL_TAG", DefaultMiniwdlTag), + }, +} + +// Some workflow engines require other images +var ImageDependencies = map[string]([]string){ + WesImageKey: {}, + CromwellImageKey: {WesImageKey}, + NextflowImageKey: {WesImageKey}, + MiniwdlImageKey: {WesImageKey}, + SnakemakeImageKey: {WesImageKey}, + ToilImageKey: {}, } func LookUpEnvOrDefault(envVariableName string, defaultValue string) string { diff --git a/packages/engines/toil/Dockerfile b/packages/engines/toil/Dockerfile new file mode 100644 index 00000000..6cbaf1d7 --- /dev/null +++ b/packages/engines/toil/Dockerfile @@ -0,0 +1,61 @@ +FROM public.ecr.aws/amazonlinux/amazonlinux:2 AS final + +# COPY THIRD-PARTY /opt/ +COPY LICENSE /opt/ + +RUN yum update -y \ + && yum install -y \ + curl \ + hostname \ + "java-11-amazon-corretto-headless(x86-64)" \ + unzip \ + jq \ + && yum clean -y all \ + && rm -rf /var/cache/yum + +# install awscli v2 +RUN curl -s "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "/tmp/awscliv2.zip" \ + && unzip -q /tmp/awscliv2.zip -d /tmp \ + && /tmp/aws/install -b /usr/bin \ + && rm -rf /tmp/aws* + +##### MODIFY ####### +## In this area install your new engine into the container as well as any requirements for that engine. +## Dockerfile documentation is found here: https://docs.docker.com/engine/reference/builder/ + +# Add rabbitmq repository +ADD rabbitmq.repo /etc/yum.repos.d/rabbitmq.repo + +# Sadly pre-importing keys doesn't seem to save any time when we use yum later, so don't so it. + +# Install deps +RUN curl -fsSL https://rpm.nodesource.com/setup_16.x | bash - \ + && yum update -y \ + && yum install -y \ + python3 \ + rabbitmq-server \ + nodejs \ + git \ + && yum clean -y all \ + && rm -rf /var/cache/yum + +# Install concurrently, for running all our servers in one session +RUN npm install -g concurrently@7.0.0 + +# Install Toil +COPY THIRD-PARTY /opt/ + +ARG TOIL_VERSION="62cf1054e5af2c2c483396e651cd0e7be85330fe" +RUN python3 -m pip install git+https://github.com/DataBiosphere/toil.git@${TOIL_VERSION}#egg=toil[aws,cwl,server] + +# copy the entrypoint script to the image +COPY toil.aws.sh /opt/bin/toil.aws.sh +RUN chmod +x /opt/bin/toil.aws.sh + +EXPOSE 8000 + +#### END MODIFY ###### + +WORKDIR /opt/work +ENTRYPOINT ["/opt/bin/toil.aws.sh"] + diff --git a/packages/engines/toil/LICENSE b/packages/engines/toil/LICENSE new file mode 100644 index 00000000..f5d32e98 --- /dev/null +++ b/packages/engines/toil/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/packages/engines/toil/README.md b/packages/engines/toil/README.md new file mode 100644 index 00000000..cf102dfa --- /dev/null +++ b/packages/engines/toil/README.md @@ -0,0 +1,52 @@ +## Toil AWS Mirror + +A Toil mono-container WES server for use with Amazon Genomics CLI. + +### Building the Container Manually + +Go to this directory and run: + +```bash +docker build . -f Dockerfile -t toil-agc +``` + +### Running for Testing + +Having built the container, run: + +```bash +docker run --name toil-agc-test -ti --rm -p "127.0.0.1:8000:8000" toil-agc +``` + +This will start the containerized server and make it available on port 8000 on the loopback interface. You can inspect the port mapping with: + +```bash +docker port toil-agc-test +``` + +Then you can talk to it with e.g.: + +```bash +curl -vvv "http://localhost:8000/ga4gh/wes/v1/service-info" +``` + +For debugging, you can get inside the container with: + +```bash +docker exec -ti toil-agc-test /bin/bash +``` + +### Deploying + +To push this to an Amazon ECR repo, where AGC can get at it, you can do something like: + +```bash +AWS_REGION= # For example, us-west-2 +AWS_ACCOUNT= # For example, 123456789012 +ECR_REPO= # For example, yourname/toil-agc. Needs to be created in the ECR console. +aws ecr get-login-password --region ${AWS_REGION} | docker login --username AWS --password-stdin ${AWS_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com +docker build -t ${ECR_REPO} . +docker tag ${ECR_REPO}:latest ${AWS_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com/${ECR_REPO}:latest +docker push ${AWS_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com/${ECR_REPO}:latest +``` + diff --git a/packages/engines/toil/THIRD-PARTY b/packages/engines/toil/THIRD-PARTY new file mode 100644 index 00000000..8b1fd594 --- /dev/null +++ b/packages/engines/toil/THIRD-PARTY @@ -0,0 +1,207 @@ +** toil; version 5.5.0 -- https://github.com/DataBiosphere/toil + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + + Copyright 2011 UCSC Computational Genomics Lab + Original Contributors: Benedict Paten, Hannes Schmidt, John Vivian, + Christopher Ketchum, Joel Armstrong and co-authors (benedictpaten@gmail.com) + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/packages/engines/toil/buildspec.yml b/packages/engines/toil/buildspec.yml new file mode 100644 index 00000000..815f591a --- /dev/null +++ b/packages/engines/toil/buildspec.yml @@ -0,0 +1,20 @@ +version: 0.2 + +env: + shell: bash + variables: + TOIL_IMAGE_NAME: "toil" +phases: + pre_build: + commands: + - TOIL_IMAGE_URI=${TOIL_IMAGE_NAME}:latest + build: + commands: + - docker build -t ${TOIL_IMAGE_URI} ./ + post_build: + commands: + - docker save -o toil_image.tar ${TOIL_IMAGE_URI} + +artifacts: + files: + - toil_image.tar diff --git a/packages/engines/toil/rabbitmq.repo b/packages/engines/toil/rabbitmq.repo new file mode 100644 index 00000000..3d919acc --- /dev/null +++ b/packages/engines/toil/rabbitmq.repo @@ -0,0 +1,61 @@ +# In /etc/yum.repos.d/rabbitmq.repo +# From https://www.rabbitmq.com/install-rpm.html#package-cloud +# Modified to request RHEL 7 packages. + +## +## Zero dependency Erlang +## + +[rabbitmq_erlang] +name=rabbitmq_erlang +baseurl=https://packagecloud.io/rabbitmq/erlang/el/7/$basearch +repo_gpgcheck=1 +gpgcheck=1 +enabled=1 +# PackageCloud's repository key and RabbitMQ package signing key +gpgkey=https://packagecloud.io/rabbitmq/erlang/gpgkey + https://github.com/rabbitmq/signing-keys/releases/download/2.0/rabbitmq-release-signing-key.asc +sslverify=1 +sslcacert=/etc/pki/tls/certs/ca-bundle.crt +metadata_expire=300 + +[rabbitmq_erlang-source] +name=rabbitmq_erlang-source +baseurl=https://packagecloud.io/rabbitmq/erlang/el/7/SRPMS +repo_gpgcheck=1 +gpgcheck=0 +enabled=1 +# PackageCloud's repository key and RabbitMQ package signing key +gpgkey=https://packagecloud.io/rabbitmq/erlang/gpgkey + https://github.com/rabbitmq/signing-keys/releases/download/2.0/rabbitmq-release-signing-key.asc +sslverify=1 +sslcacert=/etc/pki/tls/certs/ca-bundle.crt +metadata_expire=300 + +## +## RabbitMQ server +## + +[rabbitmq_server] +name=rabbitmq_server +baseurl=https://packagecloud.io/rabbitmq/rabbitmq-server/el/7/$basearch +repo_gpgcheck=1 +gpgcheck=0 +enabled=1 +# PackageCloud's repository key and RabbitMQ package signing key +gpgkey=https://packagecloud.io/rabbitmq/rabbitmq-server/gpgkey + https://github.com/rabbitmq/signing-keys/releases/download/2.0/rabbitmq-release-signing-key.asc +sslverify=1 +sslcacert=/etc/pki/tls/certs/ca-bundle.crt +metadata_expire=300 + +[rabbitmq_server-source] +name=rabbitmq_server-source +baseurl=https://packagecloud.io/rabbitmq/rabbitmq-server/el/7/SRPMS +repo_gpgcheck=1 +gpgcheck=0 +enabled=1 +gpgkey=https://packagecloud.io/rabbitmq/rabbitmq-server/gpgkey +sslverify=1 +sslcacert=/etc/pki/tls/certs/ca-bundle.crt +metadata_expire=300 diff --git a/packages/engines/toil/toil.aws.sh b/packages/engines/toil/toil.aws.sh new file mode 100644 index 00000000..8d8ce67b --- /dev/null +++ b/packages/engines/toil/toil.aws.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +# Toil is a WES server and so it gets this custom entrypoint script + +DEFAULT_AWS_CLI_PATH=/opt/aws-cli/bin/aws +AWS_CLI_PATH=${JOB_AWS_CLI_PATH:-$DEFAULT_AWS_CLI_PATH} + +echo "=== ENVIRONMENT ===" +printenv + +echo "=== START SERVER ===" + +# We expect some AGC info in the environment: JOB_QUEUE_ARN and ROOT_DIR +# These come from packages/cdk/lib/env/context-app-parameters.ts +# And also TOIL_AWS_BATCH_JOB_ROLE_ARN must be set in Toil's environment. +# This comes from packages/cdk/lib/stacks/engines/toil-engine-construct.ts +AWS_REGION=$(echo ${JOB_QUEUE_ARN} | cut -f4 -d':') +set -x + +export TOIL_WES_BROKER_URL="amqp://guest:guest@localhost:5672//" +export TOIL_WES_JOB_STORE_TYPE="aws" + +concurrently -n rabbitmq,celery,toil \ + "rabbitmq-server" \ + "celery --broker=${TOIL_WES_BROKER_URL} -A toil.server.celery_app worker --loglevel=INFO" \ + "toil server --debug --host=0.0.0.0 --port=8000 --dest_bucket_base=${ROOT_DIR} --opt=--batchSystem=aws_batch '--opt=--awsBatchQueue=${JOB_QUEUE_ARN}' '--opt=--awsBatchRegion=${AWS_REGION}' --opt=--disableCaching" + + diff --git a/site/content/en/docs/Concepts/engines.md b/site/content/en/docs/Concepts/engines.md index 1726722c..388c154f 100644 --- a/site/content/en/docs/Concepts/engines.md +++ b/site/content/en/docs/Concepts/engines.md @@ -21,6 +21,7 @@ Currently, Amazon Genomics CLI's officially supported engines can be used to run | [Nextflow](https://www.nextflow.io) | [Nextflow DSL](https://www.nextflow.io/docs/latest/script.html) | Standard and DSL 2 | Head Process | | [miniwdl](https://miniwdl.readthedocs.io/en/latest/) | [WDL](https://openwdl.org) | [documented here](https://miniwdl.readthedocs.io/en/latest/runner_reference.html?highlight=errata#wdl-interoperability) | Head Process | | [Snakemake](https://snakemake.readthedocs.io/en/stable/) | [Snakemake](https://snakemake.readthedocs.io/en/stable/snakefiles/writing_snakefiles.html) | All versions | Head Process | +| [Toil](http://toil.ucsc-cgl.org/) | [CWL](https://www.commonwl.org/) | All versions up to 1.2 | Server | Overtime we plan to add additional engine and language support and provide the ability for third party developers to develop engine plugins. diff --git a/site/content/en/docs/Workflow engines/toil.md b/site/content/en/docs/Workflow engines/toil.md new file mode 100644 index 00000000..1bf1b9e9 --- /dev/null +++ b/site/content/en/docs/Workflow engines/toil.md @@ -0,0 +1,62 @@ +--- +title: "Toil" +date: 2022-04-26T15:34:00-04:00 +draft: false +weight: 20 +description: > + Details on the Toil engine deployed by Amazon Genomics CLI +--- + +## Description + +[Toil](http://toil.ucsc-cgl.org/) is a workflow engine developed by the +[Computational Genomics Lab](https://cglgenomics.ucsc.edu/) at the +[UC Santa Cruz Genomics Institute](https://genomics.ucsc.edu/). In Amazon Genomics +CLI, Toil is an engine that can be deployed in a +[context]( {{< relref "../Concepts/contexts" >}} ) as an +[engine]( {{< relref "../Concepts/engines">}} ) to run workflows based on the +[CWL](https://www.commonwl.org/) specification. + +Toil is an open source project distributed by UC Santa Cruz under the [Apache 2 +license](https://github.com/DataBiosphere/toil/blob/master/LICENSE) and +available on +[GitHub](https://github.com/DataBiosphere/toil). + +## Architecture + +There are two components of a Toil engine as deployed in an Amazon Genomics +CLI context: + +### Engine Service + +The Toil engine is run in "server mode" as a container service in ECS. The +engine can run multiple workflows asynchronously. Workflow tasks are run in an +elastic [compute environment]( #compute-environment ) and monitored by Toil. +Amazon Genomics CLI communicates with the Toil engine via a GA4GH +[WES](https://github.com/ga4gh/workflow-execution-service-schemas) REST service +which the server offers, available via API Gateway. + +### Compute Environment + +Workflow tasks are submitted by Toil to an AWS Batch queue and run in +Toil-provided containers using an AWS Compute Environment. Tasks which use the +[CWL `DockerRequirement`](https://www.commonwl.org/user_guide/07-containers/index.html) +will additionally be run in sibling containers on the host Docker daemon. AWS +Batch coordinates the elastic provisioning of EC2 instances (container hosts) +based on the available work in the queue. Batch will place containers on +container hosts as space allows. + +#### Disk Expansion + +Container hosts in the Batch compute environment use EBS volumes as local +scratch space. As an EBS volume approaches a capacity threshold, new EBS +volumes will be attached and merged into the file system. These volumes are +destroyed when AWS Batch terminates the container host. CWL disk space +requirements are ignored by Toil when running against AWS Batch. + +This setup means that workflows that succeed on AGC may fail on other CWL +runners (because they do not request enough disk space) and workflows that +succeed on other CWL runners may fail on AGC (because they allocate disk space +faster than the expansion process can react). + +