diff --git a/README.md b/README.md index 457449bb..67b68d25 100644 --- a/README.md +++ b/README.md @@ -214,7 +214,7 @@ These files will be output for each sample defined in the cohort. | Array[Array[File]] | cpg_pileup_bigwigs | 5mCpG site methylation probability pileups generated by pb-CpG-tools | | | Array[File] | paraphase_output | Output generated by [Paraphase](https://github.com/PacificBiosciences/paraphase) | | | Array[[IndexData](https://github.com/PacificBiosciences/wdl-common/blob/main/wdl/structs.wdl)] | paraphase_realigned_bam | Realigned BAM for selected medically relevant genes in segmental duplications (with index), generated by Paraphase | | -| Array[Array[File]] | paraphase_vcfs | Phased Variant calls for selected medically relevant genes in segmental duplications, generated by Paraphase | | +| Array[File] | paraphase_vcfs | Tarball of phased variant calls for selected medically relevant genes in segmental duplications, generated by Paraphase | | | Array[[IndexData](https://github.com/PacificBiosciences/wdl-common/blob/main/wdl/structs.wdl)] | hificnv_vcfs | VCF output containing copy number variant calls for the sample from [HiFiCNV](https://github.com/PacificBiosciences/HiFiCNV) | | | Array[File] | hificnv_copynum_bedgraphs | Copy number values calculated for each region | | | Array[File] | hificnv_depth_bws | Bigwig file containing the depth measurements from HiFiCNV | | @@ -262,9 +262,9 @@ The Docker image used by a particular step of the workflow can be identified by | hiphase | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/d26db6204409dfeff56e169cdba0cc14bc272f15/docker/hiphase) | | htslib | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3560fcc5a84e044067cea9c9a7669cfc2659178e/docker/htslib) | | mosdepth | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3560fcc5a84e044067cea9c9a7669cfc2659178e/docker/mosdepth) | -| paraphase | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3560fcc5a84e044067cea9c9a7669cfc2659178e/docker/paraphase) | +| paraphase | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/4f510e5f434cc138577853f56558b90e059fd770/docker/paraphase) | | pb-cpg-tools | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/7481837d3b0f539adf4f64209a65cf28eebf3dba/docker/pb-cpg-tools) | -| pbmm2 | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3560fcc5a84e044067cea9c9a7669cfc2659178e/docker/pbmm2) | +| pbmm2 | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/94bbc94044ed8ee5dace9ebdc92756884535be93/docker/pbmm2) | | pbsv | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/f9e33a757e6d8cb15696ac930a2efd0fd7a885d8/docker/pbsv) | | pyyaml | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/f72e862bca2f209b9909e6043ef0197975762f27/docker/pyyaml) | | samtools | | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3560fcc5a84e044067cea9c9a7669cfc2659178e/docker/samtools) | diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 414f0248..b23ff1db 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -25,7 +25,7 @@ "tasks": { "pbmm2_align": { "key": "pbmm2_align", - "digest": "3r4icze5zkps7m6xoruzvnfzk2fp4gqd", + "digest": "lo45v7z6rbh2uvh7nvnjf66udnoxfz2m", "tests": [ { "inputs": { @@ -249,24 +249,24 @@ "tests": [ { "inputs": { - "sample_id": "${sample_id}", - "bam": "/coac74908838b5dd7/inputs/small_dataset/paraphase/HG005.GRCh38.paraphase.test.bam", - "bam_index": "/coac74908838b5dd7/inputs/small_dataset/paraphase/HG005.GRCh38.paraphase.test.bam.bai", - "out_directory": "${sample_id}.paraphase", + "sample_id": "HG002", + "bam": "/coac74908838b5dd7/inputs/small_dataset/paraphase/HG002.GRCh38.haplotagged.paraphase_region_v3.bam", + "bam_index": "/coac74908838b5dd7/inputs/small_dataset/paraphase/HG002.GRCh38.haplotagged.paraphase_region_v3.bam.bai", + "out_directory": "HG002.paraphase", "reference": "${datasets_file_path}/GRCh38/human_GRCh38_no_alt_analysis_set.fasta", "reference_index": "${datasets_file_path}/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai", "runtime_attributes": "${default_runtime_attributes}" }, "output_tests": { "output_json": { - "value": "${resources_file_path}/paraphase/${sample_id}.json", + "value": "${resources_file_path}/paraphase/HG002.json", "test_tasks": [ "compare_file_basename", "check_json" ] }, "realigned_bam": { - "value": "${resources_file_path}/paraphase/${sample_id}_realigned_tagged.bam", + "value": "${resources_file_path}/paraphase/HG002_realigned_tagged.bam", "test_tasks": [ "compare_file_basename", "samtools_quickcheck" diff --git a/workflows/main.wdl b/workflows/main.wdl index 0ee719aa..d334f526 100644 --- a/workflows/main.wdl +++ b/workflows/main.wdl @@ -130,7 +130,7 @@ workflow humanwgs { # per sample paraphase outputs Array[File] paraphase_output_jsons = sample_analysis.paraphase_output_json Array[IndexData] paraphase_realigned_bams = sample_analysis.paraphase_realigned_bam - Array[Array[File]] paraphase_vcfs = sample_analysis.paraphase_vcfs + Array[File] paraphase_vcfs = sample_analysis.paraphase_vcfs # per sample hificnv outputs Array[IndexData] hificnv_vcfs = sample_analysis.hificnv_vcf diff --git a/workflows/sample_analysis/sample_analysis.wdl b/workflows/sample_analysis/sample_analysis.wdl index 1f4f1a8d..d5cd18d3 100644 --- a/workflows/sample_analysis/sample_analysis.wdl +++ b/workflows/sample_analysis/sample_analysis.wdl @@ -231,7 +231,7 @@ workflow sample_analysis { # per sample paraphase outputs File paraphase_output_json = paraphase.output_json IndexData paraphase_realigned_bam = {"data": paraphase.realigned_bam, "data_index": paraphase.realigned_bam_index} - Array[File] paraphase_vcfs = paraphase.paraphase_vcfs + File paraphase_vcfs = paraphase.paraphase_vcfs # per sample hificnv outputs IndexData hificnv_vcf = {"data": hificnv.cnv_vcf, "data_index": hificnv.cnv_vcf_index} @@ -317,7 +317,7 @@ task pbmm2_align { } runtime { - docker: "~{runtime_attributes.container_registry}/pbmm2@sha256:1013aa0fd5fb42c607d78bfe3ec3d19e7781ad3aa337bf84d144c61ed7d51fa1" + docker: "~{runtime_attributes.container_registry}/pbmm2@sha256:d92495f9a81d5d7edc8b394e4fa5f942027a37f9143ac4ab6cfeda1b7b320d2e" cpu: threads memory: mem_gb + " GB" disk: disk_size + " GB" @@ -624,8 +624,8 @@ task paraphase { RuntimeAttributes runtime_attributes } - Int threads = 4 - Int mem_gb = 4 + Int threads = 8 + Int mem_gb = 16 Int disk_size = ceil(size(bam, "GB") + 20) command <<< @@ -638,17 +638,21 @@ task paraphase { --bam ~{bam} \ --reference ~{reference} \ --out ~{out_directory} + + cd ~{out_directory} \ + && tar zcvf ~{out_directory}.tar.gz ~{sample_id}_vcfs/*.vcf \ + && mv ~{out_directory}.tar.gz ../ >>> output { File output_json = "~{out_directory}/~{sample_id}.json" File realigned_bam = "~{out_directory}/~{sample_id}_realigned_tagged.bam" File realigned_bam_index = "~{out_directory}/~{sample_id}_realigned_tagged.bam.bai" - Array[File] paraphase_vcfs = glob("~{out_directory}/~{sample_id}_vcfs/*.vcf") + File paraphase_vcfs = "~{out_directory}.tar.gz" } runtime { - docker: "~{runtime_attributes.container_registry}/paraphase@sha256:186dec5f6dabedf8c90fe381cd8f934d31fe74310175efee9ca4f603deac954d" + docker: "~{runtime_attributes.container_registry}/paraphase@sha256:b9852d1a43485b13c563aaddcb32bacc7f0c9088c2ca007051b9888e9fe5617d" cpu: threads memory: mem_gb + " GB" disk: disk_size + " GB"