Merge pull request #388 from nschcolnicov/fix_igenome

Updating workflow files to work with Igenome
nf-core · Aug 28, 2024 · 228212c · 228212c
2 parents 17c027e + 7d534cd
commit 228212c
Show file tree

Hide file tree

Showing 8 changed files with 46 additions and 33 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [[#383]](https://github.com/nf-core/smrnaseq/pull/383) - Fix [parameter `--skip_fastp` throws an error](https://github.com/nf-core/smrnaseq/issues/263) - Fix parameter --skip_fastp.
 - [[#384]](https://github.com/nf-core/smrnaseq/pull/384) - Fix [filter status bug fix](https://github.com/nf-core/smrnaseq/issues/360) - Fix filter stats module and add filter contaminants test profile.
 - [[#387]](https://github.com/nf-core/smrnaseq/pull/387) - Add nf-test to local module `blat_mirna` and fixes [contaminant filter failure because the Docker image for BLAT cannot be pulled](https://github.com/nf-core/smrnaseq/issues/354). Adds a small test profile to test contaminant filter results.
+- [[#388]](https://github.com/nf-core/smrnaseq/pull/388) - Fix [igenomes fix](https://github.com/nf-core/smrnaseq/issues/360) - Fix workflow scripts so that they can use igenome parameters.
 - [[#391]](https://github.com/nf-core/smrnaseq/pull/391) - Change `.bai` index for `.csi` index in `samtools_index` to fix [error because of large chromosomes](https://github.com/nf-core/smrnaseq/issues/132).
 
 ## v2.3.1 - 2024-04-18 - Gray Zinc Dalmation Patch

diff --git a/main.nf b/main.nf
@@ -61,6 +61,9 @@ workflow {
     NFCORE_SMRNASEQ (
         Channel.of(file(params.input, checkIfExists: true)),
         PIPELINE_INITIALISATION.out.samplesheet,
+        params.fasta,
+        params.mirtrace_species,
+        params.bowtie_index,
         ch_versions
     )
 

diff --git a/modules/local/mirtop_quant.nf b/modules/local/mirtop_quant.nf
@@ -10,6 +10,7 @@ process MIRTOP_QUANT {
     path ("bams/*")
     path hairpin
     path gtf
+    val mirtrace_species
 
     output:
     path "mirtop/mirtop.gff"        , emit: mirtop_gff
@@ -22,7 +23,7 @@ process MIRTOP_QUANT {
     task.ext.when == null || task.ext.when
 
     script:
-    def filter_species = params.mirgenedb ? params.mirgenedb_species : params.mirtrace_species
+    def filter_species = params.mirgenedb ? params.mirgenedb_species : mirtrace_species
     """
     #Cleanup the GTF if mirbase html form is broken
     GTF="$gtf"

diff --git a/modules/local/mirtrace.nf b/modules/local/mirtrace.nf
@@ -9,6 +9,7 @@ process MIRTRACE_RUN {
     input:
     tuple val(adapter), val(ids), path(reads)
     path(mirtrace_config)
+    val mirtrace_species
 
     output:
     path "mirtrace/*"  , emit: mirtrace
@@ -30,7 +31,7 @@ process MIRTRACE_RUN {
     export mirtracejar=\$(dirname \$(which mirtrace))
 
     java $java_mem -jar \$mirtracejar/mirtrace.jar --mirtrace-wrapper-name mirtrace qc  \\
-        --species $params.mirtrace_species \\
+        --species $mirtrace_species \\
         $protocol \\
         --config $mirtrace_config \\
         --write-fasta \\

diff --git a/modules/local/parse_fasta_mirna.nf b/modules/local/parse_fasta_mirna.nf
@@ -8,6 +8,7 @@ process PARSE_FASTA_MIRNA {
 
     input:
     tuple val(meta2), path(fasta)
+    val filter_species
 
     output:
     tuple val(meta2), path('*_igenome.fa'), emit: parsed_fasta
@@ -17,7 +18,6 @@ process PARSE_FASTA_MIRNA {
     task.ext.when == null || task.ext.when
 
     script:
-    def filter_species = params.mirgenedb ? params.mirgenedb_species : params.mirtrace_species
     """
     # Uncompress FASTA reference files if necessary
     FASTA="$fasta"

diff --git a/subworkflows/local/mirna_quant.nf b/subworkflows/local/mirna_quant.nf
@@ -25,15 +25,16 @@ include { EDGER_QC             } from '../../modules/local/edger_qc/edger_qc.nf'
 
 workflow MIRNA_QUANT {
     take:
-    mature     // channel: [ val(meta), fasta file]
-    hairpin    // channel: [ val(meta), fasta file]
-    gtf        // channle: GTF file
-    reads      // channel: [ val(meta), [ reads ] ]
+    mature           // channel: [ val(meta), fasta file]
+    hairpin          // channel: [ val(meta), fasta file]
+    gtf              // channel: path GTF file
+    reads            // channel: [ val(meta), [ reads ] ]
+    mirtrace_species // val: params.mirtrace_species
 
     main:
     ch_versions = Channel.empty()
-
-    PARSE_MATURE ( mature ).parsed_fasta.set { mirna_parsed }
+    parse_species_input = params.mirgenedb ? Channel.value(params.mirgenedb_species) : Channel.value(mirtrace_species)
+    PARSE_MATURE ( mature, parse_species_input ).parsed_fasta.set { mirna_parsed }
     ch_versions = ch_versions.mix(PARSE_MATURE.out.versions)
 
     FORMAT_MATURE ( mirna_parsed )
@@ -58,7 +59,7 @@ workflow MIRNA_QUANT {
     BAM_STATS_MATURE ( BOWTIE_MAP_MATURE.out.bam, FORMAT_MATURE.out.formatted_fasta )
     ch_versions = ch_versions.mix(BAM_STATS_MATURE.out.versions)
 
-    PARSE_HAIRPIN ( hairpin ).parsed_fasta.set { hairpin_parsed }
+    PARSE_HAIRPIN ( hairpin, parse_species_input ).parsed_fasta.set { hairpin_parsed }
     ch_versions = ch_versions.mix(PARSE_HAIRPIN.out.versions)
 
     FORMAT_HAIRPIN ( hairpin_parsed )
@@ -95,8 +96,8 @@ workflow MIRNA_QUANT {
     ch_versions = ch_versions.mix(BOWTIE_MAP_SEQCLUSTER.out.versions)
 
     ch_mirtop_logs = Channel.empty()
-    if (params.mirtrace_species){
-        MIRTOP_QUANT ( BOWTIE_MAP_SEQCLUSTER.out.bam.collect{it[1]}, FORMAT_HAIRPIN.out.formatted_fasta.collect{it[1]}, gtf )
+    if (mirtrace_species){
+        MIRTOP_QUANT ( BOWTIE_MAP_SEQCLUSTER.out.bam.collect{it[1]}, FORMAT_HAIRPIN.out.formatted_fasta.collect{it[1]}, gtf, Channel.value(mirtrace_species) )
         ch_mirtop_logs = MIRTOP_QUANT.out.logs
         ch_versions = ch_versions.mix(MIRTOP_QUANT.out.versions)
 

diff --git a/subworkflows/local/mirtrace.nf b/subworkflows/local/mirtrace.nf
@@ -6,7 +6,8 @@ include { MIRTRACE_RUN } from '../../modules/local/mirtrace'
 
 workflow MIRTRACE {
     take:
-    reads      // channel: [ val(adapterseq), [ val(ids) ], [ path(reads) ] ]
+    reads            // channel: [ val(adapterseq), [ val(ids) ], [ path(reads) ] ]
+    mirtrace_species // value  : params.mirtrace_species
 
     main:
 
@@ -20,7 +21,8 @@ workflow MIRTRACE {
 
     MIRTRACE_RUN (
         reads,
-        ch_mirtrace_config
+        ch_mirtrace_config,
+        mirtrace_species
     )
 
     emit:

diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf
@@ -41,20 +41,23 @@ ch_fastp_adapters                     = Channel.fromPath(params.fastp_known_mirn
 workflow NFCORE_SMRNASEQ {
 
     take:
-    ch_input            // channel: samplesheet file as specified to --input
-    ch_samplesheet      // channel: sample fastqs parsed from --input
-    ch_versions         // channel: [ path(versions.yml) ]
+    ch_input             // channel: samplesheet file as specified to --input
+    ch_samplesheet       // channel: sample fastqs parsed from --input
+    val_fasta            // params.fasta
+    val_mirtrace_species // params.mirtrace_species
+    bowtie_index         // params.bowtie_index
+    ch_versions          // channel: [ path(versions.yml) ]
 
     main:
     //Config checks
     // Check optional parameters
-    if (!params.mirgenedb && !params.mirtrace_species) {
+    if (!params.mirgenedb && !val_mirtrace_species) {
             exit 1, "Reference species for miRTrace is not defined via the --mirtrace_species parameter."
         }
 
     // Genome options
-    def mirna_gtf_from_species = params.mirtrace_species ? (params.mirtrace_species == 'hsa' ? "https://github.com/nf-core/test-datasets/raw/smrnaseq/miRBase/hsa.gff3" : "https://mirbase.org/download/CURRENT/genomes/${params.mirtrace_species}.gff3") : false
-    def mirna_gtf = params.mirna_gtf ?: mirna_gtf_from_species
+    mirna_gtf_from_species = val_mirtrace_species ? (val_mirtrace_species == 'hsa' ? "https://github.com/nf-core/test-datasets/raw/smrnaseq/miRBase/hsa.gff3" : "https://mirbase.org/download/CURRENT/genomes/${val_mirtrace_species}.gff3") : false
+    mirna_gtf = params.mirna_gtf ?: mirna_gtf_from_species
 
     if (!params.mirgenedb) {
         if (params.mature) { reference_mature = file(params.mature, checkIfExists: true) } else { exit 1, "Mature miRNA fasta file not found: ${params.mature}" }
@@ -108,23 +111,23 @@ workflow NFCORE_SMRNASEQ {
     )
     ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions)
 
-    ch_fasta = params.fasta ? file(params.fasta): []
+    ch_fasta = val_fasta ? file(val_fasta): []
     ch_reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads
 
     // even if bowtie index is specified, there still needs to be a fasta.
     // without fasta, no genome analysis.
-    if(params.fasta) {
+    if(val_fasta) {
         //Prepare bowtie index, unless specified
         //This needs to be done here as the index is used by GENOME_QUANT
         if(params.bowtie_index) {
-            ch_fasta = Channel.fromPath(params.fasta)
+            ch_fasta = Channel.fromPath(val_fasta)
             if (params.bowtie_index.endsWith(".tar.gz")) {
                 UNTAR_BOWTIE_INDEX ( [ [], params.bowtie_index ]).files.map { it[1] }.set {ch_bowtie_index}
                 ch_versions  = ch_versions.mix(UNTAR_BOWTIE_INDEX.out.versions)
             } else {
                 Channel.fromPath("${params.bowtie_index}**ebwt", checkIfExists: true).ifEmpty{ error "Bowtie1 index directory not found: ${params.bowtie_index}" }.filter { it != null }.set { ch_bowtie_index }
             }
-            } else {
+        } else {
             INDEX_GENOME ( [ [:], ch_fasta ] )
             ch_versions = ch_versions.mix(INDEX_GENOME.out.versions)
             ch_bowtie_index = INDEX_GENOME.out.index
@@ -181,8 +184,8 @@ workflow NFCORE_SMRNASEQ {
     //
     // SUBWORKFLOW: MIRTRACE
     //
-    if (params.mirtrace_species) {
-            MIRTRACE(ch_mirtrace_inputs)
+    if (val_mirtrace_species) {
+            MIRTRACE(ch_mirtrace_inputs, val_mirtrace_species)
             ch_versions = ch_versions.mix(MIRTRACE.out.versions)
         } else {
             log.warn "The parameter --mirtrace_species is absent. MIRTRACE quantification skipped."
@@ -209,20 +212,21 @@ workflow NFCORE_SMRNASEQ {
         ch_reads_for_mirna = CONTAMINANT_FILTER.out.filtered_reads
 
     }
-
+    //MIRNA_QUANT process should still run even if mirtrace_species is null when mirgendb is true
     MIRNA_QUANT (
-        [ [:], reference_mature],
-        [ [:], reference_hairpin],
-        mirna_gtf,
-        ch_reads_for_mirna
+    [ [:], reference_mature],
+    [ [:], reference_hairpin],
+    mirna_gtf,
+    ch_reads_for_mirna,
+    val_mirtrace_species
     )
     ch_versions = ch_versions.mix(MIRNA_QUANT.out.versions)
 
     //
     // GENOME
     //
     genome_stats = Channel.empty()
-    if (params.fasta){
+    if (val_fasta){
         GENOME_QUANT ( ch_bowtie_index, ch_fasta, MIRNA_QUANT.out.unmapped )
         genome_stats = GENOME_QUANT.out.stats
         ch_versions = ch_versions.mix(GENOME_QUANT.out.versions)
@@ -306,7 +310,7 @@ workflow NFCORE_SMRNASEQ {
         ch_multiqc_files = ch_multiqc_files.mix(MIRNA_QUANT.out.mature_stats.collect({it[1]}).ifEmpty([]))
         ch_multiqc_files = ch_multiqc_files.mix(MIRNA_QUANT.out.hairpin_stats.collect({it[1]}).ifEmpty([]))
         ch_multiqc_files = ch_multiqc_files.mix(MIRNA_QUANT.out.mirtop_logs.collect().ifEmpty([]))
-        if (params.mirtrace_species) {
+        if (val_mirtrace_species) {
         ch_multiqc_files = ch_multiqc_files.mix(MIRTRACE.out.results.collect().ifEmpty([]))
             }