Skip to content

Commit

Permalink
Update to Paraphase 3.0
Browse files Browse the repository at this point in the history
- this update increases the number of segdups haplotyped by Paraphase in
  GRCh38 from 9 to over 160
- resources have been increased to keep up with the increased number of
  sites
- because the number of VCFs output is variable, a tarball of the VCFs
  is provided as an output target rather than an array of VCFs
  • Loading branch information
williamrowell committed Dec 12, 2023
1 parent fd1b806 commit 982de43
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 14 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ These files will be output for each sample defined in the cohort.
| Array[Array[File]] | cpg_pileup_bigwigs | 5mCpG site methylation probability pileups generated by pb-CpG-tools | |
| Array[File] | paraphase_output | Output generated by [Paraphase](https://github.com/PacificBiosciences/paraphase) | |
| Array[[IndexData](https://github.com/PacificBiosciences/wdl-common/blob/main/wdl/structs.wdl)] | paraphase_realigned_bam | Realigned BAM for selected medically relevant genes in segmental duplications (with index), generated by Paraphase | |
| Array[Array[File]] | paraphase_vcfs | Phased Variant calls for selected medically relevant genes in segmental duplications, generated by Paraphase | |
| Array[File] | paraphase_vcfs | Tarball of phased variant calls for selected medically relevant genes in segmental duplications, generated by Paraphase | |
| Array[[IndexData](https://github.com/PacificBiosciences/wdl-common/blob/main/wdl/structs.wdl)] | hificnv_vcfs | VCF output containing copy number variant calls for the sample from [HiFiCNV](https://github.com/PacificBiosciences/HiFiCNV) | |
| Array[File] | hificnv_copynum_bedgraphs | Copy number values calculated for each region | |
| Array[File] | hificnv_depth_bws | Bigwig file containing the depth measurements from HiFiCNV | |
Expand Down Expand Up @@ -262,7 +262,7 @@ The Docker image used by a particular step of the workflow can be identified by
| hiphase | <ul><li>[HiPhase 1.0.0](https://github.com/PacificBiosciences/HiPhase/releases/tag/v1.0.0)</li><li>[samtools 1.18](https://github.com/samtools/samtools/releases/tag/1.18)</li><li>[bcftools 1.18](https://github.com/samtools/bcftools/releases/tag/1.18)</li></ul> | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/d26db6204409dfeff56e169cdba0cc14bc272f15/docker/hiphase) |
| htslib | <ul><li>[htslib 1.14](https://github.com/samtools/htslib/releases/tag/1.14)</li></ul> | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3560fcc5a84e044067cea9c9a7669cfc2659178e/docker/htslib) |
| mosdepth | <ul><li>[mosdepth 0.2.9](https://github.com/brentp/mosdepth/releases/tag/v0.2.9)</li></ul> | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3560fcc5a84e044067cea9c9a7669cfc2659178e/docker/mosdepth) |
| paraphase | <ul><li>[minimap2 2.17](https://github.com/lh3/minimap2/releases/tag/v2.17)</li><li>[samtools 1.14](https://github.com/samtools/samtools/releases/tag/1.14)</li><li>[paraphase 2.2.3](https://github.com/PacificBiosciences/paraphase/releases/tag/v2.2.3)</li></ul> | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/3560fcc5a84e044067cea9c9a7669cfc2659178e/docker/paraphase) |
| paraphase | <ul><li>[minimap2 2.26](https://github.com/lh3/minimap2/releases/tag/v2.26)</li><li>[samtools 1.18](https://github.com/samtools/samtools/releases/tag/1.18)</li><li>[paraphase 3.0.0](https://github.com/PacificBiosciences/paraphase)</li></ul> | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/4f510e5f434cc138577853f56558b90e059fd770/docker/paraphase) |
| pb-cpg-tools | <ul><li>[pb-CpG-tools v2.3.2](https://github.com/PacificBiosciences/pb-CpG-tools/releases/tag/v2.3.2)</li></ul> | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/7481837d3b0f539adf4f64209a65cf28eebf3dba/docker/pb-cpg-tools) |
| pbmm2 | <ul><li>[pbmm2 1.13.1](https://github.com/PacificBiosciences/pbmm2/releases/tag/v1.13.1)</li><li>[datamash 1.1.0](https://ftp.gnu.org/gnu/datamash/)</li><li>[pysam 0.16.0.1](https://github.com/pysam-developers/pysam/releases/tag/v0.16.0.1)</li></ul> | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/94bbc94044ed8ee5dace9ebdc92756884535be93/docker/pbmm2) |
| pbsv | <ul><li>[pbsv 2.9.0](https://github.com/PacificBiosciences/pbsv/releases/tag/v2.9.0)</li><li>[htslib 1.14](https://github.com/samtools/htslib/releases/tag/1.14)</li></ul> | [Dockerfile](https://github.com/PacificBiosciences/wdl-dockerfiles/tree/f9e33a757e6d8cb15696ac930a2efd0fd7a885d8/docker/pbsv) |
Expand Down
12 changes: 6 additions & 6 deletions wdl-ci.config.json
Original file line number Diff line number Diff line change
Expand Up @@ -249,24 +249,24 @@
"tests": [
{
"inputs": {
"sample_id": "${sample_id}",
"bam": "/coac74908838b5dd7/inputs/small_dataset/paraphase/HG005.GRCh38.paraphase.test.bam",
"bam_index": "/coac74908838b5dd7/inputs/small_dataset/paraphase/HG005.GRCh38.paraphase.test.bam.bai",
"out_directory": "${sample_id}.paraphase",
"sample_id": "HG002",
"bam": "/coac74908838b5dd7/inputs/small_dataset/paraphase/HG002.GRCh38.haplotagged.paraphase_region_v3.bam",
"bam_index": "/coac74908838b5dd7/inputs/small_dataset/paraphase/HG002.GRCh38.haplotagged.paraphase_region_v3.bam.bai",
"out_directory": "HG002.paraphase",
"reference": "${datasets_file_path}/GRCh38/human_GRCh38_no_alt_analysis_set.fasta",
"reference_index": "${datasets_file_path}/GRCh38/human_GRCh38_no_alt_analysis_set.fasta.fai",
"runtime_attributes": "${default_runtime_attributes}"
},
"output_tests": {
"output_json": {
"value": "${resources_file_path}/paraphase/${sample_id}.json",
"value": "${resources_file_path}/paraphase/HG002.json",
"test_tasks": [
"compare_file_basename",
"check_json"
]
},
"realigned_bam": {
"value": "${resources_file_path}/paraphase/${sample_id}_realigned_tagged.bam",
"value": "${resources_file_path}/paraphase/HG002_realigned_tagged.bam",
"test_tasks": [
"compare_file_basename",
"samtools_quickcheck"
Expand Down
2 changes: 1 addition & 1 deletion workflows/main.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ workflow humanwgs {
# per sample paraphase outputs
Array[File] paraphase_output_jsons = sample_analysis.paraphase_output_json
Array[IndexData] paraphase_realigned_bams = sample_analysis.paraphase_realigned_bam
Array[Array[File]] paraphase_vcfs = sample_analysis.paraphase_vcfs
Array[File] paraphase_vcfs = sample_analysis.paraphase_vcfs

# per sample hificnv outputs
Array[IndexData] hificnv_vcfs = sample_analysis.hificnv_vcf
Expand Down
14 changes: 9 additions & 5 deletions workflows/sample_analysis/sample_analysis.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ workflow sample_analysis {
# per sample paraphase outputs
File paraphase_output_json = paraphase.output_json
IndexData paraphase_realigned_bam = {"data": paraphase.realigned_bam, "data_index": paraphase.realigned_bam_index}
Array[File] paraphase_vcfs = paraphase.paraphase_vcfs
File paraphase_vcfs = paraphase.paraphase_vcfs

# per sample hificnv outputs
IndexData hificnv_vcf = {"data": hificnv.cnv_vcf, "data_index": hificnv.cnv_vcf_index}
Expand Down Expand Up @@ -624,8 +624,8 @@ task paraphase {
RuntimeAttributes runtime_attributes
}
Int threads = 4
Int mem_gb = 4
Int threads = 8
Int mem_gb = 16
Int disk_size = ceil(size(bam, "GB") + 20)
command <<<
Expand All @@ -638,17 +638,21 @@ task paraphase {
--bam ~{bam} \
--reference ~{reference} \
--out ~{out_directory}
cd ~{out_directory} \
&& tar zcvf ~{out_directory}.tar.gz ~{sample_id}_vcfs/*.vcf \
&& mv ~{out_directory}.tar.gz ../
>>>
output {
File output_json = "~{out_directory}/~{sample_id}.json"
File realigned_bam = "~{out_directory}/~{sample_id}_realigned_tagged.bam"
File realigned_bam_index = "~{out_directory}/~{sample_id}_realigned_tagged.bam.bai"
Array[File] paraphase_vcfs = glob("~{out_directory}/~{sample_id}_vcfs/*.vcf")
File paraphase_vcfs = "~{out_directory}.tar.gz"
}

runtime {
docker: "~{runtime_attributes.container_registry}/paraphase@sha256:186dec5f6dabedf8c90fe381cd8f934d31fe74310175efee9ca4f603deac954d"
docker: "~{runtime_attributes.container_registry}/paraphase@sha256:b9852d1a43485b13c563aaddcb32bacc7f0c9088c2ca007051b9888e9fe5617d"
cpu: threads
memory: mem_gb + " GB"
disk: disk_size + " GB"
Expand Down

0 comments on commit 982de43

Please sign in to comment.