diff --git a/README.md b/README.md
index 457449bb..67b68d25 100644
--- a/README.md
+++ b/README.md
@@ -214,7 +214,7 @@ These files will be output for each sample defined in the cohort.
| Array[Array[File]] | cpg_pileup_bigwigs | 5mCpG site methylation probability pileups generated by pb-CpG-tools | |
| Array[File] | paraphase_output | Output generated by [Paraphase](https://github.com/PacificBiosciences/paraphase) | |
| Array[[IndexData](https://github.com/PacificBiosciences/wdl-common/blob/main/wdl/structs.wdl)] | paraphase_realigned_bam | Realigned BAM for selected medically relevant genes in segmental duplications (with index), generated by Paraphase | |
-| Array[Array[File]] | paraphase_vcfs | Phased Variant calls for selected medically relevant genes in segmental duplications, generated by Paraphase | |
+| Array[File] | paraphase_vcfs | Tarball of phased variant calls for selected medically relevant genes in segmental duplications, generated by Paraphase | |
| Array[[IndexData](https://github.com/PacificBiosciences/wdl-common/blob/main/wdl/structs.wdl)] | hificnv_vcfs | VCF output containing copy number variant calls for the sample from [HiFiCNV](https://github.com/PacificBiosciences/HiFiCNV) | |
| Array[File] | hificnv_copynum_bedgraphs | Copy number values calculated for each region | |
| Array[File] | hificnv_depth_bws | Bigwig file containing the depth measurements from HiFiCNV | |
@@ -262,9 +262,9 @@ The Docker image used by a particular step of the workflow can be identified by
| hiphase |
- [HiPhase 1.0.0](https://github.com/PacificBiosciences/HiPhase/releases/tag/v1.0.0)
- [samtools 1.18](https://github.com/samtools/samtools/releases/tag/1.18)
- [bcftools 1.18](https://github.com/samtools/bcftools/releases/tag/1.18)
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/d26db6204409dfeff56e169cdba0cc14bc272f15/docker/hiphase) |
| htslib | - [htslib 1.14](https://github.com/samtools/htslib/releases/tag/1.14)
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3560fcc5a84e044067cea9c9a7669cfc2659178e/docker/htslib) |
| mosdepth | - [mosdepth 0.2.9](https://github.com/brentp/mosdepth/releases/tag/v0.2.9)
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3560fcc5a84e044067cea9c9a7669cfc2659178e/docker/mosdepth) |
-| paraphase | - [minimap2 2.17](https://github.com/lh3/minimap2/releases/tag/v2.17)
- [samtools 1.14](https://github.com/samtools/samtools/releases/tag/1.14)
- [paraphase 2.2.3](https://github.com/PacificBiosciences/paraphase/releases/tag/v2.2.3)
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3560fcc5a84e044067cea9c9a7669cfc2659178e/docker/paraphase) |
+| paraphase | - [minimap2 2.26](https://github.com/lh3/minimap2/releases/tag/v2.26)
- [samtools 1.18](https://github.com/samtools/samtools/releases/tag/1.18)
- [paraphase 3.0.0](https://github.com/PacificBiosciences/paraphase)
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/4f510e5f434cc138577853f56558b90e059fd770/docker/paraphase) |
| pb-cpg-tools | - [pb-CpG-tools v2.3.2](https://github.com/PacificBiosciences/pb-CpG-tools/releases/tag/v2.3.2)
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/7481837d3b0f539adf4f64209a65cf28eebf3dba/docker/pb-cpg-tools) |
-| pbmm2 | - [pbmm2 1.10.0](https://github.com/PacificBiosciences/pbmm2/releases/tag/v1.10.0)
- [datamash 1.1.0](https://ftp.gnu.org/gnu/datamash/)
- [pysam 0.16.0.1](https://github.com/pysam-developers/pysam/releases/tag/v0.16.0.1)
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3560fcc5a84e044067cea9c9a7669cfc2659178e/docker/pbmm2) |
+| pbmm2 | - [pbmm2 1.13.1](https://github.com/PacificBiosciences/pbmm2/releases/tag/v1.13.1)
- [datamash 1.1.0](https://ftp.gnu.org/gnu/datamash/)
- [pysam 0.16.0.1](https://github.com/pysam-developers/pysam/releases/tag/v0.16.0.1)
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/94bbc94044ed8ee5dace9ebdc92756884535be93/docker/pbmm2) |
| pbsv | - [pbsv 2.9.0](https://github.com/PacificBiosciences/pbsv/releases/tag/v2.9.0)
- [htslib 1.14](https://github.com/samtools/htslib/releases/tag/1.14)
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/f9e33a757e6d8cb15696ac930a2efd0fd7a885d8/docker/pbsv) |
| pyyaml | - [pyyaml 5.3.1](https://github.com/yaml/pyyaml/releases/tag/5.3.1)
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/f72e862bca2f209b9909e6043ef0197975762f27/docker/pyyaml) |
| samtools | - [samtools 1.14](https://github.com/samtools/samtools/releases/tag/1.14)
| [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3560fcc5a84e044067cea9c9a7669cfc2659178e/docker/samtools) |
diff --git a/wdl-ci.config.json b/wdl-ci.config.json
index 414f0248..b23ff1db 100644
--- a/wdl-ci.config.json
+++ b/wdl-ci.config.json
@@ -25,7 +25,7 @@
"tasks": {
"pbmm2_align": {
"key": "pbmm2_align",
- "digest": "3r4icze5zkps7m6xoruzvnfzk2fp4gqd",
+ "digest": "lo45v7z6rbh2uvh7nvnjf66udnoxfz2m",
"tests": [
{
"inputs": {
@@ -249,24 +249,24 @@
"tests": [
{
"inputs": {
- "sample_id": "${sample_id}",
- "bam": "/coac74908838b5dd7/inputs/small_dataset/paraphase/HG005.GRCh38.paraphase.test.bam",
- "bam_index": "/coac74908838b5dd7/inputs/small_dataset/paraphase/HG005.GRCh38.paraphase.test.bam.bai",
- "out_directory": "${sample_id}.paraphase",
+ "sample_id": "HG002",
+ "bam": "/coac74908838b5dd7/inputs/small_dataset/paraphase/HG002.GRCh38.haplotagged.paraphase_region_v3.bam",
+ "bam_index": "/coac74908838b5dd7/inputs/small_dataset/paraphase/HG002.GRCh38.haplotagged.paraphase_region_v3.bam.bai",
+ "out_directory": "HG002.paraphase",
"reference": "${datasets_file_path}/GRCh38/human_GRCh38_no_alt_analysis_set.fasta",
"reference_index": "${datasets_file_path}/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai",
"runtime_attributes": "${default_runtime_attributes}"
},
"output_tests": {
"output_json": {
- "value": "${resources_file_path}/paraphase/${sample_id}.json",
+ "value": "${resources_file_path}/paraphase/HG002.json",
"test_tasks": [
"compare_file_basename",
"check_json"
]
},
"realigned_bam": {
- "value": "${resources_file_path}/paraphase/${sample_id}_realigned_tagged.bam",
+ "value": "${resources_file_path}/paraphase/HG002_realigned_tagged.bam",
"test_tasks": [
"compare_file_basename",
"samtools_quickcheck"
diff --git a/workflows/main.wdl b/workflows/main.wdl
index 0ee719aa..d334f526 100644
--- a/workflows/main.wdl
+++ b/workflows/main.wdl
@@ -130,7 +130,7 @@ workflow humanwgs {
# per sample paraphase outputs
Array[File] paraphase_output_jsons = sample_analysis.paraphase_output_json
Array[IndexData] paraphase_realigned_bams = sample_analysis.paraphase_realigned_bam
- Array[Array[File]] paraphase_vcfs = sample_analysis.paraphase_vcfs
+ Array[File] paraphase_vcfs = sample_analysis.paraphase_vcfs
# per sample hificnv outputs
Array[IndexData] hificnv_vcfs = sample_analysis.hificnv_vcf
diff --git a/workflows/sample_analysis/sample_analysis.wdl b/workflows/sample_analysis/sample_analysis.wdl
index 1f4f1a8d..d5cd18d3 100644
--- a/workflows/sample_analysis/sample_analysis.wdl
+++ b/workflows/sample_analysis/sample_analysis.wdl
@@ -231,7 +231,7 @@ workflow sample_analysis {
# per sample paraphase outputs
File paraphase_output_json = paraphase.output_json
IndexData paraphase_realigned_bam = {"data": paraphase.realigned_bam, "data_index": paraphase.realigned_bam_index}
- Array[File] paraphase_vcfs = paraphase.paraphase_vcfs
+ File paraphase_vcfs = paraphase.paraphase_vcfs
# per sample hificnv outputs
IndexData hificnv_vcf = {"data": hificnv.cnv_vcf, "data_index": hificnv.cnv_vcf_index}
@@ -317,7 +317,7 @@ task pbmm2_align {
}
runtime {
- docker: "~{runtime_attributes.container_registry}/pbmm2@sha256:1013aa0fd5fb42c607d78bfe3ec3d19e7781ad3aa337bf84d144c61ed7d51fa1"
+ docker: "~{runtime_attributes.container_registry}/pbmm2@sha256:d92495f9a81d5d7edc8b394e4fa5f942027a37f9143ac4ab6cfeda1b7b320d2e"
cpu: threads
memory: mem_gb + " GB"
disk: disk_size + " GB"
@@ -624,8 +624,8 @@ task paraphase {
RuntimeAttributes runtime_attributes
}
- Int threads = 4
- Int mem_gb = 4
+ Int threads = 8
+ Int mem_gb = 16
Int disk_size = ceil(size(bam, "GB") + 20)
command <<<
@@ -638,17 +638,21 @@ task paraphase {
--bam ~{bam} \
--reference ~{reference} \
--out ~{out_directory}
+
+ cd ~{out_directory} \
+ && tar zcvf ~{out_directory}.tar.gz ~{sample_id}_vcfs/*.vcf \
+ && mv ~{out_directory}.tar.gz ../
>>>
output {
File output_json = "~{out_directory}/~{sample_id}.json"
File realigned_bam = "~{out_directory}/~{sample_id}_realigned_tagged.bam"
File realigned_bam_index = "~{out_directory}/~{sample_id}_realigned_tagged.bam.bai"
- Array[File] paraphase_vcfs = glob("~{out_directory}/~{sample_id}_vcfs/*.vcf")
+ File paraphase_vcfs = "~{out_directory}.tar.gz"
}
runtime {
- docker: "~{runtime_attributes.container_registry}/paraphase@sha256:186dec5f6dabedf8c90fe381cd8f934d31fe74310175efee9ca4f603deac954d"
+ docker: "~{runtime_attributes.container_registry}/paraphase@sha256:b9852d1a43485b13c563aaddcb32bacc7f0c9088c2ca007051b9888e9fe5617d"
cpu: threads
memory: mem_gb + " GB"
disk: disk_size + " GB"