From 6fe9f6322aa4312f06ab1bf994f458d0d959cfda Mon Sep 17 00:00:00 2001 From: jvfe Date: Mon, 4 Sep 2023 08:03:56 -0300 Subject: [PATCH] refactor: Make assemblyqc (quast) run by default --- subworkflows/local/assembly.nf | 16 -------------- subworkflows/local/assemblyqc.nf | 38 ++++++++++---------------------- workflows/arete.nf | 19 ++++++++++++++-- 3 files changed, 29 insertions(+), 44 deletions(-) diff --git a/subworkflows/local/assembly.nf b/subworkflows/local/assembly.nf index 30a0f3c2..590f10b6 100644 --- a/subworkflows/local/assembly.nf +++ b/subworkflows/local/assembly.nf @@ -76,7 +76,6 @@ workflow ASSEMBLE_SHORTREADS{ /* * MODULE: Assembly */ - // unicycler can accept short reads and long reads. For now, shortread only: Pass empty list for optional file args ch_unicycler_input = FASTP.out.reads.map { it -> it + [[]]} UNICYCLER(ch_unicycler_input) @@ -84,26 +83,11 @@ workflow ASSEMBLE_SHORTREADS{ // Unicycler outputs not quite right for QUAST. Need to re-arrange // pattern adapted from nf-core/bacass - ch_assembly = Channel.empty() - ch_assembly = ch_assembly.mix(UNICYCLER.out.scaffolds.dump(tag: 'unicycler')) - ch_assembly - .map { meta, fasta -> fasta.toString() } - .collectFile(name:'assemblies.txt', newLine: true) - .set { ch_to_quast } - /* - * Module: Evaluate Assembly - */ - QUAST(ch_to_quast, ch_reference_genome, [], use_reference_genome, false) - ch_software_versions = ch_software_versions.mix(QUAST.out.versions.ifEmpty(null)) - ch_multiqc_files = ch_multiqc_files.mix(RAW_FASTQC.out.zip.collect{it[1]}.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(TRIM_FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(QUAST.out.tsv.collect()) emit: - assemblies = ch_assembly scaffolds = UNICYCLER.out.scaffolds - quast_report = QUAST.out.transposed_report assembly_software = ch_software_versions multiqc = ch_multiqc_files diff --git a/subworkflows/local/assemblyqc.nf b/subworkflows/local/assemblyqc.nf index d0ab17e6..6fc502cd 100644 --- a/subworkflows/local/assemblyqc.nf +++ b/subworkflows/local/assemblyqc.nf @@ -6,7 +6,6 @@ include { CHECKM_LINEAGEWF } from '../../modules/nf-core/checkm/lineagewf/main' workflow CHECK_ASSEMBLIES { take: assemblies - krakendb_cache reference_genome use_reference_genome @@ -15,22 +14,6 @@ workflow CHECK_ASSEMBLIES { ch_multiqc_files = Channel.empty() ch_software_versions = Channel.empty() - ///* - // * MODULE: Run Kraken2 - // */ - if (!params.skip_kraken) { - if(krakendb_cache) { - GET_DB_CACHE(krakendb_cache) - KRAKEN2_RUN(assemblies, GET_DB_CACHE.out.minikraken, false, true) - } else { - KRAKEN2_DB() - KRAKEN2_RUN(assemblies, KRAKEN2_DB.out.minikraken, false, true) - } - - ch_software_versions = ch_software_versions.mix(KRAKEN2_RUN.out.versions.first().ifEmpty(null)) - ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2_RUN.out.report.collect{it[1]}.ifEmpty([])) - } - fasta_extension = assemblies.map{ id, path -> path.getExtension() }.first() /* @@ -41,20 +24,23 @@ workflow CHECK_ASSEMBLIES { /* * Module: QUAST quality check */ - // Need to reformat assembly channel for QUAST - // pattern adapted from nf-core/bacass - ch_assembly = Channel.empty() - ch_assembly = ch_assembly.mix(assemblies.dump(tag: 'assembly')) - ch_assembly - .map { meta, fasta -> fasta } //QUAST doesn't take the meta tag + assemblies + .map { meta, fasta -> fasta.toString() } .collectFile(name:'assemblies.txt', newLine: true) - .set { ch_to_quast } - QUAST(ch_to_quast, reference_genome, [], use_reference_genome, false) + .set { quast_input } + + QUAST ( + quast_input, + reference_genome, + [], + use_reference_genome, + false + ) ch_software_versions = ch_software_versions.mix(QUAST.out.versions.ifEmpty(null)) - ch_multiqc_files = ch_multiqc_files.mix(QUAST.out.tsv.collect()) emit: + quast_report = QUAST.out.transposed_report assemblyqc_software = ch_software_versions multiqc = ch_multiqc_files } diff --git a/workflows/arete.nf b/workflows/arete.nf index 7da61780..967264ba 100755 --- a/workflows/arete.nf +++ b/workflows/arete.nf @@ -146,6 +146,13 @@ workflow ARETE { ASSEMBLE_SHORTREADS.out.scaffolds.set { assemblies } + CHECK_ASSEMBLIES ( + assemblies, + ch_reference_genome, + use_reference_genome + ) + ch_software_versions = ch_software_versions.mix(CHECK_ASSEMBLIES.out.assemblyqc_software) + if (db_cache) { /////////////////// ANNOTATION /////////////////////////// ANNOTATE_ASSEMBLIES( @@ -198,7 +205,7 @@ workflow ARETE { RECOMBINATION ( assemblies, RUN_POPPUNK.out.clusters, - ASSEMBLE_SHORTREADS.out.quast_report + CHECK_ASSEMBLIES.out.quast_report ) } } @@ -240,6 +247,7 @@ workflow ARETE { ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(GET_SOFTWARE_VERSIONS.out.yaml.collect()) ch_multiqc_files = ch_multiqc_files.mix(ASSEMBLE_SHORTREADS.out.multiqc) + ch_multiqc_files = ch_multiqc_files.mix(CHECK_ASSEMBLIES.out.multiqc) ch_multiqc_files = ch_multiqc_files.mix(ANNOTATE_ASSEMBLIES.out.multiqc) MULTIQC( @@ -285,6 +293,13 @@ workflow ASSEMBLY { ch_software_versions = ch_software_versions.mix(ASSEMBLE_SHORTREADS.out.assembly_software) + CHECK_ASSEMBLIES( + ASSEMBLE_SHORTREADS.out.scaffolds, + ch_reference_genome, + use_reference_genome + ) + ch_software_versions = ch_software_versions.mix(CHECK_ASSEMBLIES.out.assemblyqc_software) + // Get unique list of files containing version information ch_software_versions .map { it -> if (it) [ it.baseName, it ] } @@ -300,6 +315,7 @@ workflow ASSEMBLY { ch_workflow_summary = Channel.value(workflow_summary) ch_multiqc_files = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix(CHECK_ASSEMBLIES.out.multiqc) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ASSEMBLE_SHORTREADS.out.multiqc) @@ -480,7 +496,6 @@ workflow QUALITYCHECK { CHECK_ASSEMBLIES( ANNOTATION_INPUT_CHECK.out.genomes, - db_cache, ch_reference_genome, use_reference_genome )