diff --git a/README.md b/README.md index 89a557e..001cf56 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,6 @@ -# DISCLAIMER +

-TO THE GREATEST EXTENT PERMITTED BY APPLICABLE LAW, THIS WEBSITE AND ITS CONTENT, INCLUDING ALL SOFTWARE, SOFTWARE CODE, SITE-RELATED SERVICES, AND DATA, ARE PROVIDED "AS IS," WITH ALL FAULTS, WITH NO REPRESENTATIONS OR WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, SATISFACTORY QUALITY, NON-INFRINGEMENT OR FITNESS FOR A PARTICULAR PURPOSE. ALL WARRANTIES ARE REJECTED AND DISCLAIMED. YOU ASSUME TOTAL RESPONSIBILITY AND RISK FOR YOUR USE OF THE FOREGOING. PACBIO IS NOT OBLIGATED TO PROVIDE ANY SUPPORT FOR ANY OF THE FOREGOING, AND ANY SUPPORT PACBIO DOES PROVIDE IS SIMILARLY PROVIDED WITHOUT REPRESENTATION OR WARRANTY OF ANY KIND. NO ORAL OR WRITTEN INFORMATION OR ADVICE SHALL CREATE A REPRESENTATION OR WARRANTY OF ANY KIND. ANY REFERENCES TO SPECIFIC PRODUCTS OR SERVICES ON THE WEBSITES DO NOT CONSTITUTE OR IMPLY A RECOMMENDATION OR ENDORSEMENT BY PACBIO. - -# wdl-humanassembly +

PacBio Human Assembly pipeline

Workflow for running de novo assembly using human PacBio whole genome sequencing (WGS) data. Written using [Workflow Description Language (WDL)](https://openwdl.org/). @@ -15,11 +13,19 @@ Workflow for running de novo assembly using human PacBio whole genome sequencing The assembly workflow performs _de novo_ assembly on samples and trios. -![De novo assembly workflow diagram](workflows/main.graphviz.svg "De novo assembly workflow diagram") +![De novo assembly workflow diagram](images/main.graphviz.svg "De novo assembly workflow diagram") ## Setup -Some tasks and workflows are pulled in from other repositories. Ensure you have initialized submodules following cloning by running `git submodule update --init --recursive`. +Clone a tagged version of the git repository. Use the `--branch` flag to pull the desired version, and the `--recursive` flag to pull code from any submodules. + +``` +git clone \ + --depth 1 --branch v1.0.0 \ # for reproducibility + --recursive \ # to clone submodule + https://github.com/PacificBiosciences/HiFi-human-assembly-WDL.git +``` + ## Resource requirements @@ -47,10 +53,12 @@ For backend-specific configuration, see the relevant documentation: - [GCP](backends/gcp) - [HPC](backends/hpc) -## Configuring a workflow engine +## Configuring a workflow engine and container runtime An execution engine is required to run workflows. Two popular engines for running WDL-based workflows are [`miniwdl`](https://miniwdl.readthedocs.io/en/latest/getting_started.html) and [`Cromwell`](https://cromwell.readthedocs.io/en/stable/tutorials/FiveMinuteIntro/). +Because workflow dependencies are containerized, a container runtime is required. This workflow has been tested with [Docker](https://docs.docker.com/get-docker/) and [Singularity](https://docs.sylabs.io/guides/3.10/user-guide/) container runtimes. + See [backend-specific documentation](backends) for details on setting up an engine. | Engine | Azure | AWS | GCP | HPC | @@ -115,7 +123,7 @@ A cohort can include one or more samples. Samples need not be related. | Type | Name | Description | Notes | | :- | :- | :- | :- | -| String | cohort_id | A unique name for the cohort; used to name outputs | | +| String | cohort_id | A unique name for the cohort; used to name outputs. Alphanumeric characters, underscore (`_`), and dash (`-`) are allowed. | | | Array[[Sample](#sample)] | samples | The set of samples for the cohort. At least one sample must be defined. | | | Boolean | run_de_novo_assembly_trio | Run trio binned _de novo_ assembly. | Cohort must contain at least one valid trio (child and both parents present in the cohort) | @@ -125,10 +133,10 @@ Sample information for each sample in the workflow run. | Type | Name | Description | Notes | | :- | :- | :- | :- | -| String | sample_id | A unique name for the sample; used to name outputs | | +| String | sample_id | A unique name for the sample; used to name outputs. Alphanumeric characters, underscore (`_`), and dash (`-`) are allowed | | | Array[[IndexData](https://github.com/PacificBiosciences/wdl-common/blob/main/wdl/structs.wdl)] | movie_bams | The set of unaligned movie BAMs associated with this sample | | -| String? | father_id | Paternal `sample_id` | | -| String? | mother_id | Maternal `sample_id` | | +| String? | father_id | Paternal `sample_id`. Alphanumeric characters, underscore (`_`), and dash (`-`) are allowed. | | +| String? | mother_id | Maternal `sample_id`. Alphanumeric characters, underscore (`_`), and dash (`-`) are allowed. | | | Boolean | run_de_novo_assembly | If true, run single-sample _de novo_ assembly for this sample | \[true, false\] | ## [ReferenceData](workflows/humanwgs_structs.wdl) @@ -197,6 +205,12 @@ The Docker image used by a particular step of the workflow can be identified by | hifiasm | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3560fcc5a84e044067cea9c9a7669cfc2659178e/docker/hifiasm) | | htslib | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3560fcc5a84e044067cea9c9a7669cfc2659178e/docker/htslib) | | paftools | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3560fcc5a84e044067cea9c9a7669cfc2659178e/docker/align_hifiasm) | -| parse-cohort | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3560fcc5a84e044067cea9c9a7669cfc2659178e/docker/parse-cohort) | +| pyyaml | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/f72e862bca2f209b9909e6043ef0197975762f27/docker/pyyaml) | | samtools | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3560fcc5a84e044067cea9c9a7669cfc2659178e/docker/samtools) | | yak | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3560fcc5a84e044067cea9c9a7669cfc2659178e/docker/yak) | + +--- + +## DISCLAIMER + +TO THE GREATEST EXTENT PERMITTED BY APPLICABLE LAW, THIS WEBSITE AND ITS CONTENT, INCLUDING ALL SOFTWARE, SOFTWARE CODE, SITE-RELATED SERVICES, AND DATA, ARE PROVIDED "AS IS," WITH ALL FAULTS, WITH NO REPRESENTATIONS OR WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, SATISFACTORY QUALITY, NON-INFRINGEMENT OR FITNESS FOR A PARTICULAR PURPOSE. ALL WARRANTIES ARE REJECTED AND DISCLAIMED. YOU ASSUME TOTAL RESPONSIBILITY AND RISK FOR YOUR USE OF THE FOREGOING. PACBIO IS NOT OBLIGATED TO PROVIDE ANY SUPPORT FOR ANY OF THE FOREGOING, AND ANY SUPPORT PACBIO DOES PROVIDE IS SIMILARLY PROVIDED WITHOUT REPRESENTATION OR WARRANTY OF ANY KIND. NO ORAL OR WRITTEN INFORMATION OR ADVICE SHALL CREATE A REPRESENTATION OR WARRANTY OF ANY KIND. ANY REFERENCES TO SPECIFIC PRODUCTS OR SERVICES ON THE WEBSITES DO NOT CONSTITUTE OR IMPLY A RECOMMENDATION OR ENDORSEMENT BY PACBIO. \ No newline at end of file diff --git a/backends/example/single.json b/backends/example/single.json new file mode 100644 index 0000000..734d169 --- /dev/null +++ b/backends/example/single.json @@ -0,0 +1,17 @@ +{ + "de_novo_assembly.cohort": { + "cohort_id": "HG002", + "samples": [ + { + "movie_bams": [ + "/path/to/input1.bam", + "/path/to/input2.bam", + "/path/to/input3.bam" + ], + "run_de_novo_assembly": true, + "sample_id": "HG002" + } + ], + "run_de_novo_assembly_trio": false + } +} diff --git a/backends/example/trio.json b/backends/example/trio.json new file mode 100644 index 0000000..b09f10e --- /dev/null +++ b/backends/example/trio.json @@ -0,0 +1,34 @@ +{ + "de_novo_assembly.cohort": { + "cohort_id": "HG002", + "samples": [ + { + "father_id": "HG003", + "mother_id": "HG004", + "movie_bams": [ + "/path/to/sampleA_1.bam", + "/path/to/sampleA_2.bam" + ], + "run_de_novo_assembly": false, + "sample_id": "HG002" + }, + { + "movie_bams": [ + "/path/to/sampleB_1.bam", + "/path/to/sampleB_2.bam" + ], + "run_de_novo_assembly": true, + "sample_id": "HG003" + }, + { + "movie_bams": [ + "/path/to/sampleC_1.bam", + "/path/to/sampleC_2.bam" + ], + "run_de_novo_assembly": true, + "sample_id": "HG004" + } + ], + "run_de_novo_assembly_trio": true + } +} diff --git a/images/logo_wdl_workflows.svg b/images/logo_wdl_workflows.svg new file mode 100644 index 0000000..4f065f0 --- /dev/null +++ b/images/logo_wdl_workflows.svg @@ -0,0 +1,83 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/workflows/main.graphviz.svg b/images/main.graphviz.svg similarity index 100% rename from workflows/main.graphviz.svg rename to images/main.graphviz.svg diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 4315946..01e1183 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -79,7 +79,7 @@ "tasks": { "parse_families": { "key": "parse_families", - "digest": "rprxafsnidgno35awynatngwbnuw6suo", + "digest": "rbuiru23pdiayrbc4zmrqcjyqay4c2aa", "tests": [ { "inputs": { diff --git a/workflows/de_novo_assembly_trio/de_novo_assembly_trio.wdl b/workflows/de_novo_assembly_trio/de_novo_assembly_trio.wdl index b11e5a0..230ad9a 100644 --- a/workflows/de_novo_assembly_trio/de_novo_assembly_trio.wdl +++ b/workflows/de_novo_assembly_trio/de_novo_assembly_trio.wdl @@ -191,7 +191,7 @@ task parse_families { } runtime { - docker: "~{runtime_attributes.container_registry}/parse-cohort@sha256:e6a8ac24ada706644e62878178790a0006db9a6abec7a312232052bb0666fe8f" + docker: "~{runtime_attributes.container_registry}/pyyaml@sha256:af6f0689a7412b1edf76bd4bf6434e7fa6a86192eebf19573e8618880d9c1dbb" cpu: 2 memory: "4 GB" disk: "20 GB"