diff --git a/resources/auxiliary_workflows/benchmark/workflow/Snakefile b/resources/auxiliary_workflows/benchmark/workflow/Snakefile index 72e498ea..df0e7fb3 100644 --- a/resources/auxiliary_workflows/benchmark/workflow/Snakefile +++ b/resources/auxiliary_workflows/benchmark/workflow/Snakefile @@ -557,15 +557,19 @@ rule bed_file_single_amplicon: rule run_method_local: input: script=srcdir("../resources/method_definitions/{method}.py"), - fname_bam=lambda wildcards: f"results/simulated_reads/{paramspace.wildcard_pattern}/replicates/{{replicate}}/reads.{{seq_mode}}.bam" - if wildcards.seq_mode == "amplicon" - else f"results/simulated_reads/{paramspace.wildcard_pattern}/replicates/{{replicate}}/reads.{{seq_mode}}.bam", + fname_bam=lambda wildcards: ( + f"results/simulated_reads/{paramspace.wildcard_pattern}/replicates/{{replicate}}/reads.{{seq_mode}}.bam" + if wildcards.seq_mode == "amplicon" + else f"results/simulated_reads/{paramspace.wildcard_pattern}/replicates/{{replicate}}/reads.{{seq_mode}}.bam" + ), fname_bam_index=f"results/simulated_reads/{paramspace.wildcard_pattern}/replicates/{{replicate}}/reads.{{seq_mode}}.bam.bai", fname_reference=f"results/simulated_reads/{paramspace.wildcard_pattern}/replicates/{{replicate}}/reference.fasta", - fname_insert_bed=lambda wildcards: f"results/simulated_reads/{paramspace.wildcard_pattern}/replicates/{{replicate}}/scheme/reference.insert.bed" - if wildcards.seq_mode == "amplicon" - or wildcards.seq_mode_param == "single_amplicon" - else [], + fname_insert_bed=lambda wildcards: ( + f"results/simulated_reads/{paramspace.wildcard_pattern}/replicates/{{replicate}}/scheme/reference.insert.bed" + if wildcards.seq_mode == "amplicon" + or wildcards.seq_mode_param == "single_amplicon" + else [] + ), output: fname_result=f"results/method_runs/{paramspace.wildcard_pattern}/{{method}}/replicates/{{replicate}}/snvs.vcf", fname_status=touch( diff --git a/workflow/rules/align.smk b/workflow/rules/align.smk index 7d683272..9885cc69 100644 --- a/workflow/rules/align.smk +++ b/workflow/rules/align.smk @@ -13,9 +13,11 @@ rule initial_vicuna: global_ref=reference_file, R1="{dataset}/preprocessed_data/R1.fastq", R2=( - lambda wildcards: wildcards.dataset + "/preprocessed_data/R2.fastq" - if config.input["paired"] - else [] + lambda wildcards: ( + wildcards.dataset + "/preprocessed_data/R2.fastq" + if config.input["paired"] + else [] + ) ), output: "{dataset}/references/vicuna_consensus.fasta", @@ -667,9 +669,11 @@ elif config.general["aligner"] == "minimap": SEED="--seed 42", EXTRA=config.minimap_align["extra"], PRESET=config.minimap_align["preset"], - SECONDARY="--secondary=yes --secondary-seq" - if config.minimap_align["secondary"] - else "--secondary=no", + SECONDARY=( + "--secondary=yes --secondary-seq" + if config.minimap_align["secondary"] + else "--secondary=no" + ), FILTER="-f 2" if config.input["paired"] else "-F 4", MINIMAP=config.applications["minimap"], SAMTOOLS=config.applications["samtools"], diff --git a/workflow/rules/dehuman.smk b/workflow/rules/dehuman.smk index c9abb544..9a9c47d3 100644 --- a/workflow/rules/dehuman.smk +++ b/workflow/rules/dehuman.smk @@ -10,9 +10,11 @@ rule dh_reuse_alignreject: # this rule re-use the rejected reads in align.smk (e.g. ngshmmalign's /alignments/rejects.sam) # (useful when in parallel with the main processing) input: - reject_aln=rules.hmm_align.output.reject_aln - if config["general"]["aligner"] == "ngshmmalign" - else temp_prefix("{dataset}/alignments/tmp_aln.sam"), + reject_aln=( + rules.hmm_align.output.reject_aln + if config["general"]["aligner"] == "ngshmmalign" + else temp_prefix("{dataset}/alignments/tmp_aln.sam") + ), output: reject_1=temp_with_prefix("{dataset}/alignments/reject_R1.fastq.gz"), reject_2=temp_with_prefix("{dataset}/alignments/reject_R2.fastq.gz"), @@ -115,12 +117,16 @@ rule dh_hostalign: input: host_ref=config.dehuman["ref_host"], ref_index=multiext(config.dehuman["ref_host"], *bwa_idx_ext), - reject_1=rules.dh_redo_alignreject.output.reject_1 - if config["dehuman"]["catchup"] - else rules.dh_reuse_alignreject.output.reject_1, - reject_2=rules.dh_redo_alignreject.output.reject_2 - if config["dehuman"]["catchup"] - else rules.dh_reuse_alignreject.output.reject_2, + reject_1=( + rules.dh_redo_alignreject.output.reject_1 + if config["dehuman"]["catchup"] + else rules.dh_reuse_alignreject.output.reject_1 + ), + reject_2=( + rules.dh_redo_alignreject.output.reject_2 + if config["dehuman"]["catchup"] + else rules.dh_reuse_alignreject.output.reject_2 + ), output: host_aln=temp_with_prefix("{dataset}/alignments/host_aln.sam"), params: @@ -157,12 +163,16 @@ rule dh_hostalign: rule dh_filter: input: host_aln=temp_prefix("{dataset}/alignments/host_aln.sam"), - R1=partial(raw_data_file, pair=1) - if config["dehuman"]["catchup"] - else temp_prefix("{dataset}/extracted_data/R1.fastq"), - R2=partial(raw_data_file, pair=2) - if config["dehuman"]["catchup"] - else temp_prefix("{dataset}/extracted_data/R2.fastq"), + R1=( + partial(raw_data_file, pair=1) + if config["dehuman"]["catchup"] + else temp_prefix("{dataset}/extracted_data/R1.fastq") + ), + R2=( + partial(raw_data_file, pair=2) + if config["dehuman"]["catchup"] + else temp_prefix("{dataset}/extracted_data/R2.fastq") + ), output: filter_count="{dataset}/alignments/dehuman.count", filter_list=temp_with_prefix("{dataset}/alignments/dehuman.filter"), diff --git a/workflow/rules/publish.smk b/workflow/rules/publish.smk index 05b69d1c..20aa2a8c 100644 --- a/workflow/rules/publish.smk +++ b/workflow/rules/publish.smk @@ -16,35 +16,43 @@ rule prepare_upload: input: R1=partial(raw_data_file, pair=1) if config.upload["orig_fastq"] else [], R2=partial(raw_data_file, pair=2) if config.upload["orig_fastq"] else [], - orig_cram=[ - "{dataset}/raw_uploads/raw_reads.cram", - "{dataset}/raw_uploads/raw_reads.cram.%s" % config.general["checksum"], - ] - if config.upload["orig_cram"] - else [], - dehuman=[ - "{dataset}/raw_uploads/dehuman.cram", - "{dataset}/raw_uploads/dehuman.cram.%s" % config.general["checksum"], - ] - if config.output["dehumanized_raw_reads"] - else [], + orig_cram=( + [ + "{dataset}/raw_uploads/raw_reads.cram", + "{dataset}/raw_uploads/raw_reads.cram.%s" % config.general["checksum"], + ] + if config.upload["orig_cram"] + else [] + ), + dehuman=( + [ + "{dataset}/raw_uploads/dehuman.cram", + "{dataset}/raw_uploads/dehuman.cram.%s" % config.general["checksum"], + ] + if config.output["dehumanized_raw_reads"] + else [] + ), consensus_indels="{dataset}/references/consensus%s.bcftools.fasta" % bcft_suffix(), consensus_indels_chain="{dataset}/references/consensus%s.bcftools.chain" % bcft_suffix(), consensus_aligned="{dataset}/references/ref_%s_dels.fasta" % config.upload["consensus"], - csum=[ - "{dataset}/references/consensus%(suf)s.bcftools.fasta.%(csum)s" - % {"suf": bcft_suffix(), "csum": config.general["checksum"]}, - "{dataset}/references/ref_majority_dels.fasta.%(csum)s" - % {"suf": bcft_suffix(), "csum": config.general["checksum"]}, - ] - if config.upload["checksum"] - else [], - frameshift_deletions_check="{dataset}/references/frameshift_deletions_check.tsv" - if config.output["QA"] - else [], + csum=( + [ + "{dataset}/references/consensus%(suf)s.bcftools.fasta.%(csum)s" + % {"suf": bcft_suffix(), "csum": config.general["checksum"]}, + "{dataset}/references/ref_majority_dels.fasta.%(csum)s" + % {"suf": bcft_suffix(), "csum": config.general["checksum"]}, + ] + if config.upload["checksum"] + else [] + ), + frameshift_deletions_check=( + "{dataset}/references/frameshift_deletions_check.tsv" + if config.output["QA"] + else [] + ), output: upload_prepared_touch="{dataset}/upload_prepared.touch", params: diff --git a/workflow/rules/signatures.smk b/workflow/rules/signatures.smk index d6085995..78b03a16 100644 --- a/workflow/rules/signatures.smk +++ b/workflow/rules/signatures.smk @@ -61,9 +61,11 @@ rule cooc: COJAC=config.applications["cojac"], name=ID, sep=config.general["id_separator"], - out_format="--multiindex" - if config.cooc["out_format"] == "columns" - else "--multiindex --lines", + out_format=( + "--multiindex" + if config.cooc["out_format"] == "columns" + else "--multiindex --lines" + ), log: outfile="{dataset}/signatures/cooc.out.log", errfile="{dataset}/signatures/cooc.err.log", @@ -104,9 +106,11 @@ rule cohort_cooc: params: COJAC=config.applications["cojac"], sep=config.general["id_separator"], - out_format="--multiindex" - if config.cooc["out_format"] == "columns" - else "--multiindex --lines", + out_format=( + "--multiindex" + if config.cooc["out_format"] == "columns" + else "--multiindex --lines" + ), log: outfile=cohortdir("cohort_cooc.{proto}.out.log"), errfile=cohortdir("cohort_cooc.{proto}.err.log"), @@ -222,26 +226,36 @@ if config.timeline["local"]: rule timeline: input: samples_tsv=config.input["samples_file"], - locations=config.timeline["locations_table"] - if config.timeline["locations_table"] - else [], + locations=( + config.timeline["locations_table"] + if config.timeline["locations_table"] + else [] + ), regex=config.timeline["regex_yaml"] if config.timeline["regex_yaml"] else [], output: timeline=cohortdir("timeline.tsv"), - locations_list=cohortdir("locations_list.yaml") - if config.timeline["locations_table"] - else [], + locations_list=( + cohortdir("locations_list.yaml") + if config.timeline["locations_table"] + else [] + ), params: maketimeline=cachepath(config.timeline["script"], executable=True), - locations=f"--locations {config.timeline['locations_table']}" - if config.timeline["locations_table"] - else "", - regex=f"--regex-config {config.timeline['regex_yaml']}" - if config.timeline["regex_yaml"] - else "", - out_locations=f"--out-locations {cohortdir('locations_list.yaml')}" - if config.timeline["locations_table"] or config.timeline["regex_yaml"] - else "", + locations=( + f"--locations {config.timeline['locations_table']}" + if config.timeline["locations_table"] + else "" + ), + regex=( + f"--regex-config {config.timeline['regex_yaml']}" + if config.timeline["regex_yaml"] + else "" + ), + out_locations=( + f"--out-locations {cohortdir('locations_list.yaml')}" + if config.timeline["locations_table"] or config.timeline["regex_yaml"] + else "" + ), options=config.timeline["options"], log: outfile=cohortdir("timeline.out.log"), @@ -296,20 +310,24 @@ rule deconvolution: input: tallymut=cohortdir("tallymut.tsv.zst"), deconv_conf=config.deconvolution["deconvolution_config"], - var_conf=config.deconvolution["variants_config"] - if config.deconvolution["variants_config"] - else cohortdir("variants_pangolin.yaml"), - var_dates=config.deconvolution["variants_dates"] - if config.deconvolution["variants_dates"] - else [], + var_conf=( + config.deconvolution["variants_config"] + if config.deconvolution["variants_config"] + else cohortdir("variants_pangolin.yaml") + ), + var_dates=( + config.deconvolution["variants_dates"] + if config.deconvolution["variants_dates"] + else [] + ), output: deconvoluted=cohortdir("deconvoluted.tsv.zst"), deconv_json=cohortdir("deconvoluted_upload.json"), params: LOLLIPOP=config.applications["lollipop"], - out_format="--fmt-columns" - if config.deconvolution["out_format"] == "columns" - else "", + out_format=( + "--fmt-columns" if config.deconvolution["out_format"] == "columns" else "" + ), seed="--seed=42", log: outfile=cohortdir("deconvoluted.out.log"),