Skip to content

Commit

Permalink
adding viloca to the workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
LaraFuhrmann committed Feb 5, 2024
1 parent 25fd3a9 commit 0688a2e
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 5 deletions.
11 changes: 11 additions & 0 deletions workflow/envs/viloca.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- libshorah
- python=3.10.4
- pip
- pip:
- pandas
- git+https://github.com/cbg-ethz/VILOCA@master
56 changes: 54 additions & 2 deletions workflow/rules/snv.smk
Original file line number Diff line number Diff line change
Expand Up @@ -286,11 +286,63 @@ rule lofreq:
{params.LOFREQ} call {params.EXTRA} --call-indels -f {input.REF} -o {output.SNVs} --verbose {output.BAM} >> {log.outfile} 2> >(tee -a {log.errfile} >&2)
"""

rule viloca:
input:
REF=(
cohortdir("cohort_consensus.fasta")
if config.lofreq["consensus"]
else reference_file
),
BAM=alignment_wildcard,
output:
SNVs="{dataset}/variants/SNVs/snvs.vcf",
CSV="{dataset}/variants/SNVs/snv/cooccurring_mutations.csv",
params:
READ_LEN=read_len,
INSERT_FILE=config.viloca["insert_bedfile"],
MODE=config.viloca["mode"],
SHIFT=config.viloca["shift"],
OUTDIR="{dataset}/variants/SNVs",
EXTRA=config.viloca["extra"],
VILOCA=config.applications["viloca"],
log:
outfile="{dataset}/variants/SNVs/viloca.out.log",
errfile="{dataset}/variants/SNVs/viloca.err.log",
conda:
config.viloca["conda"]
benchmark:
"{dataset}/variants/SNVs/viloca.benchmark"
threads: config.viloca["threads"]
resources:
disk_mb=2000,
mem_mb=config.viloca["mem"],
runtime=config.viloca["time"],
shell:
"""
let "WINDOW_SHIFTS=({params.READ_LEN} * 4/5 + {params.SHIFT}) / {params.SHIFT}"
let "WINDOW_LEN=WINDOW_SHIFTS * {params.SHIFT}"
# Run VILOCA
echo "Running VILOCA" >> {log.outfile}
if [[ "{params.INSERT_FILE}" == "None" ]]; then
{params.VILOCA} {params.EXTRA} -t {threads} --mode {params.MODE} -w ${{WINDOW_LEN}} -s {params.SHIFT} -f {input.REF} -b {input.BAM} >> {log.outfile} 2> >(tee -a {log.errfile} >&2)
else
{params.VILOCA} {params.EXTRA} -t {threads} --mode {params.MODE} -z {params.INSERT_FILE} -f {input.REF} -b {input.BAM} >> {log.outfile} 2> >(tee -a {log.errfile} >&2)
fi
# rename viloca output snv/SNVs_0.010000_final.vcf --> snvs.vcf
cp "${params.OUTDIR}/snv/SNVs_0.010000_final.vcf" {output.SNVs}
"""


if config.general["snv_caller"] == "shorah":

ruleorder: snv > lofreq
ruleorder: snv > lofreq > viloca

elif config.general["snv_caller"] == "lofreq":

ruleorder: lofreq > snv
ruleorder: lofreq > snv > viloca

elif config.general["snv_caller"] == "viloca":

ruleorder: viloca > lofreq > snv
57 changes: 54 additions & 3 deletions workflow/schemas/config_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@
"snv_caller": {
"type": "string",
"default": "shorah",
"enum": ["shorah","lofreq"],
"description": "There are two options available for calling single nucleotide variants, either using [ShoRAH (`shorah`)](https://github.com/cbg-ethz/shorah) [^5] or [LoFreq (`lofreq`)](https://csb5.github.io/lofreq/) [^6]. ShoRAH is used by default. If you prefer to use LoFreq, then indicate so in the configuration file as in the example\n\n[^5]: Zagordi, O. et al. ShoRAH: estimating the genetic diversity of a mixed sample from next-generation sequencing data. BMC Bioinformatics. 2011.\n[^6]: Wilm, A. et al. LoFreq: A sequence-quality aware, ultra-sensitive variant caller for uncovering cell-population heterogeneity from high-throughput sequencing datasets. Nucleic Acids Res. 2012.",
"enum": ["shorah","lofreq", "viloca"],
"description": "There are three options available for calling single nucleotide variants, either using [ShoRAH (`shorah`)](https://github.com/cbg-ethz/shorah) [^5], [LoFreq (`lofreq`)](https://csb5.github.io/lofreq/) [^6] or [VILOCA (`viloca`)](https://github.com/cbg-ethz/viloca) [^7] . ShoRAH is used by default. If you prefer to use LoFreq, then indicate so in the configuration file as in the example\n\n[^5]: Zagordi, O. et al. ShoRAH: estimating the genetic diversity of a mixed sample from next-generation sequencing data. BMC Bioinformatics. 2011.\n[^6]: Wilm, A. et al. LoFreq: A sequence-quality aware, ultra-sensitive variant caller for uncovering cell-population heterogeneity from high-throughput sequencing datasets. Nucleic Acids Res. 2012.",
"examples": ["lofreq"]
},
"haplotype_reconstruction": {
Expand Down Expand Up @@ -228,7 +228,7 @@
"local": {
"type": "boolean",
"default": false,
"description": "This option activates local haplotype reconstruction (only available when using ShoRAH).",
"description": "This option activates local haplotype reconstruction (only available when using ShoRAH or VILOCA).",
"examples": [true]
},
"global": {
Expand Down Expand Up @@ -386,6 +386,10 @@
"type": "string",
"default": "shorah shotgun"
},
"viloca": {
"type": "string",
"default": "viloca run"
},
"lofreq": {
"type": "string",
"default": "lofreq"
Expand Down Expand Up @@ -1185,6 +1189,53 @@
"default": {},
"type": "object"
},
"viloca": {
"properties": {
"mem": {
"type": "integer",
"default": 10000
},
"threads": {
"type": "integer"
},
"time": {
"type": "integer",
"default": 2880
},
"conda": {
"type": "string",
"default": "{VPIPE_BASEDIR}/envs/viloca.yaml"
},
"consensus": {
"type": "boolean",
"default": false,
"description": "Indicate whether to use the cohort-consensus sequence from the analyzed samples (output from `minor_variants` rule located in the cohort-wide output `results/cohort_onsensus.fasta`) or the reference sequence by setting this option to False.",
"examples": [false]
},
"shift": {
"type": "integer",
"default": 3,
"description": "VILOCA performs local haplotype reconstruction on windows of the read alignment. The overlap between these windows is defined by the window shifts. By default, it is set to 3, i.e., apart from flanking regions each position is covered by 3 windows."
},
"insert_bedfile": {
"type": "string",
"default": "None",
"description": "VILOCA performs local haplotype reconstruction on windows of the read alignment. In a first step the alignment is tiled into local regions. This can be done uniformly then set this value None, otherwise path to an (optional) insert file (primer tiling strategy)"
},
"mode": {
"type": "string",
"default": "use_quality_scores",
"description": "Mode in which to run VILOCA: shorah, learn_error_params, use_quality_scores. If quality scores are available, we recommend this option"
},
"extra": {
"type": "string",
"default": "",
"description": "Pass additional options to run `lofreq call`"
}
},
"default": {},
"type": "object"
},
"lofreq": {
"properties": {
"mem": {
Expand Down

0 comments on commit 0688a2e

Please sign in to comment.