From 92f628a061487fd64c818f3b4139e6c1fd3061da Mon Sep 17 00:00:00 2001 From: Susana Posada-Cespedes Date: Mon, 6 Apr 2020 01:08:58 +0200 Subject: [PATCH] Refactor rules for cleaning previous outputs --- rules/align.smk | 57 ------------------ rules/clean.smk | 112 ++++++++++++++++++++++++++++++++++++ rules/haplotypes.smk | 15 ----- rules/quality_assurance.smk | 15 ----- rules/snv.smk | 7 --- vpipe.snake | 1 + 6 files changed, 113 insertions(+), 94 deletions(-) create mode 100644 rules/clean.smk diff --git a/rules/align.smk b/rules/align.smk index bc8716cad..ed6799fa2 100644 --- a/rules/align.smk +++ b/rules/align.smk @@ -209,18 +209,6 @@ rule create_denovo_initial: sed -i -e "s/>.*/>${{CONSENSUS_NAME}}/" {output} """ -rule vicunaclean: - params: - DIR = config.input['datadir'] - shell: - """ - rm -rf {params.DIR}/*/*/initial_consensus - rm -rf {params.DIR}/*/*/references/vicuna_consensus.fasta - rm -rf {params.DIR}/*/*/references/initial_consensus.fasta - rm -rf references/initial_aln.fasta - rm -rf references/initial_aln_gap_removed.fasta - rm -rf references/MAFFT_initial_aln.* - """ # change this to switch between VICUNA and creating a simple initial # initial reference @@ -348,13 +336,6 @@ rule msa: rm ALL_{wildcards.kind}.fasta """ -rule msaclean: - shell: - """ - rm -rf references/ALL_aln_*.fasta - rm -rf references/MAFFT_*_cohort.* - """ - # 4. convert alignments to REF alignment def get_reference_name(wildcards): @@ -396,18 +377,6 @@ rule convert_to_ref: """ -rule alignclean: - params: - DIR = config.input['datadir'] - shell: - """ - rm -rf {params.DIR}/*/*/alignments - rm -rf {params.DIR}/*/*/QA_alignments - rm -rf {params.DIR}/*/*/references/ref_ambig.fasta - rm -rf {params.DIR}/*/*/references/ref_majority.fasta - rm -rf {params.DIR}/*/*/references/initial_consensus.fasta - """ - # 2-4. Alternative: align reads using bwa or bowtie if config.general["aligner"] == "bwa": rule ref_bwa_index: @@ -571,32 +540,6 @@ elif config.general["aligner"] == "bowtie": rm {params.TMP_SAM} """ -rule bwaclean: - input: - "{}.bwt".format(reference_file) - params: - DIR = config.input['datadir'] - shell: - """ - rm -f {input} - rm -rf {params.DIR}/*/*/alignments - """ - -rule bowtieclean: - input: - INDEX1 = "{}.1.bt2".format(reference_file), - INDEX2 = "{}.2.bt2".format(reference_file), - INDEX3 = "{}.3.bt2".format(reference_file), - INDEX4 = "{}.4.bt2".format(reference_file), - INDEX5 = "{}.rev.1.bt2".format(reference_file), - INDEX6 = "{}.rev.2.bt2".format(reference_file) - params: - DIR = config.input['datadir'] - shell: - """ - rm -f {input} - rm -rf {params.DIR}/*/*/alignments - """ rule consensus_sequences: diff --git a/rules/clean.smk b/rules/clean.smk new file mode 100644 index 000000000..f5b26caa7 --- /dev/null +++ b/rules/clean.smk @@ -0,0 +1,112 @@ +rule extractclean: + params: + DIR = config.input['datadir'] + shell: + """ + rm -rf {params.DIR}/*/*/extracted_data + """ + + +rule trimmingclean: + params: + DIR = config.input['datadir'] + shell: + """ + rm -rf {params.DIR}/*/*/preprocessed_data + """ + + +rule vicunaclean: + params: + DIR = config.input['datadir'] + shell: + """ + rm -rf {params.DIR}/*/*/initial_consensus + rm -rf {params.DIR}/*/*/references/vicuna_consensus.fasta + rm -rf {params.DIR}/*/*/references/initial_consensus.fasta + rm -rf references/initial_aln.fasta + rm -rf references/initial_aln_gap_removed.fasta + rm -rf references/MAFFT_initial_aln.* + """ + + +rule msaclean: + shell: + """ + rm -rf references/ALL_aln_*.fasta + rm -rf references/MAFFT_*_cohort.* + """ + + +rule alignclean: + params: + DIR = config.input['datadir'] + shell: + """ + rm -rf {params.DIR}/*/*/alignments + rm -rf {params.DIR}/*/*/QA_alignments + rm -rf {params.DIR}/*/*/references/ref_ambig.fasta + rm -rf {params.DIR}/*/*/references/ref_majority.fasta + rm -rf {params.DIR}/*/*/references/initial_consensus.fasta + """ + + +rule bwaclean: + input: + "{}.bwt".format(reference_file) + params: + DIR = config.input['datadir'] + shell: + """ + rm -f {input} + rm -rf {params.DIR}/*/*/alignments + rm -rf {params.DIR}/*/*/references/ref_ambig*.fasta + rm -rf {params.DIR}/*/*/references/ref_majority*.fasta + """ + + +rule bowtieclean: + input: + INDEX1 = "{}.1.bt2".format(reference_file), + INDEX2 = "{}.2.bt2".format(reference_file), + INDEX3 = "{}.3.bt2".format(reference_file), + INDEX4 = "{}.4.bt2".format(reference_file), + INDEX5 = "{}.rev.1.bt2".format(reference_file), + INDEX6 = "{}.rev.2.bt2".format(reference_file) + params: + DIR = config.input['datadir'] + shell: + """ + rm -f {input} + rm -rf {params.DIR}/*/*/alignments + rm -rf {params.DIR}/*/*/references/ref_ambig*.fasta + rm -rf {params.DIR}/*/*/references/ref_majority*.fasta + """ + + +rule snvclean: + params: + DIR = config.input['datadir'] + shell: + """ + rm -rf {params.DIR}/*/*/variants/SNVs + """ + + +rule savageclean: + params: + DIR = config.input['datadir'] + shell: + """ + rm -rf {params.DIR}/*/*/variants/global/contigs_stage_?.fasta + rm -rf {params.DIR}/*/*/variants/global/stage_? + """ + + +rule haplocliqueclean: + params: + DIR = config.input['datadir'] + shell: + """ + rm {params.DIR}/*/*/variants/global/quasispecies.* + """ diff --git a/rules/haplotypes.smk b/rules/haplotypes.smk index 37aa04e87..9354e4a6a 100644 --- a/rules/haplotypes.smk +++ b/rules/haplotypes.smk @@ -63,13 +63,6 @@ rule haploclique_visualization: {params.COMPUTE_MDS} -q {params.INPREFIX} -s {params.REGION_START} -e {params.REGION_END} {params.USE_MSA} {params.MSA} -p {output.PDF} -o {params.TSV} > {log.output} 2> >(tee {log.errfile} >&2) """ -rule haplocliqueclean: - params: - DIR = config.input['datadir'] - shell: - """ - rm {params.DIR}/*/*/variants/global/quasispecies.* - """ if config.input['paired']: rule savage: @@ -141,12 +134,4 @@ else: {params.SAVAGE} -t {threads} --split {params.SPLIT} -s ${{R1}} -o {params.OUTDIR} 2> >(tee -a {log.errfile} >&2) """ -rule savageclean: - params: - DIR = config.input['datadir'] - shell: - """ - rm -rf {params.DIR}/*/*/variants/global/contigs_stage_?.fasta - rm -rf {params.DIR}/*/*/variants/global/stage_? - """ diff --git a/rules/quality_assurance.smk b/rules/quality_assurance.smk index 02a5cb8e7..41a2b169d 100644 --- a/rules/quality_assurance.smk +++ b/rules/quality_assurance.smk @@ -69,14 +69,6 @@ rule extract: cat {input} | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > {output} 2> >(tee {log.errfile} >&2) """ -rule extractclean: - params: - DIR = config.input['datadir'] - shell: - """ - rm -rf {params.DIR}/*/*/extracted_data - """ - # 2. clipping def len_cutoff(wildcards): @@ -175,11 +167,4 @@ else: gzip {wildcards.dataset}/preprocessed_data/R1.fastq """ -rule trimmingclean: - params: - DIR = config.input['datadir'] - shell: - """ - rm -rf {params.DIR}/*/*/preprocessed_data - """ diff --git a/rules/snv.smk b/rules/snv.smk index b00a52165..1fb6773e3 100644 --- a/rules/snv.smk +++ b/rules/snv.smk @@ -201,13 +201,6 @@ rule snv: fi """ -rule snvclean: - params: - DIR = config.input['datadir'] - shell: - """ - rm -rf {params.DIR}/*/*/variants/SNVs - """ rule lofreq: input: diff --git a/vpipe.snake b/vpipe.snake index d3c248a8c..3721deae9 100644 --- a/vpipe.snake +++ b/vpipe.snake @@ -465,6 +465,7 @@ rule alltrimmed: trimmed_files +include: "rules/clean.smk" include: "rules/quality_assurance.smk" include: "rules/align.smk" include: "rules/mafs.smk"