nf-core · nschcolnicov · Sep 3, 2024 · Aug 29, 2024 · Aug 29, 2024 · Aug 30, 2024
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -4,14 +4,20 @@ on:
   push:
     branches:
       - dev
+      - master
   pull_request:
+    branches:
+      - dev
+      - master
   release:
     types: [published]
 
 env:
   NXF_ANSI_LOG: false
-  CAPSULE_LOG: none
-  NFTEST_VER: "0.9.0"
+  NFT_VER: "0.9.0"
+  NFT_WORKDIR: "~"
+  NFT_DIFF: "pdiff"
+  NFT_DIFF_ARGS: "--line-numbers --expand-tabs=2"
 
 concurrency:
   group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}"
@@ -26,42 +32,49 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
+        shard: [1, 2, 3, 4]
         NXF_VER:
           - "23.04.0"
-          - "latest-everything"
-        profile:
-          - "test_umi"
-          - "test_mirgenedb"
-          - "test_contamination_tech_reps"
-          - "test_skipfastp"
+        profile: ["docker"]
+    env:
+      SHARDS: "4"
     steps:
       - name: Check out pipeline code
         uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
-
-      - name: Install Nextflow
-        uses: nf-core/setup-nextflow@v2
         with:
-          version: "${{ matrix.NXF_VER }}"
+          fetch-depth: 0
 
-      - name: Disk space cleanup
-        uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
+          architecture: "x64"
 
-      - name: Install nf-test
+      - name: Install pdiff to see diff between nf-test snapshots
         run: |
-          wget -qO- https://code.askimed.com/install/nf-test | bash -s $NFTEST_VER
-          sudo mv nf-test /usr/local/bin/
+          python -m pip install --upgrade pip
+          pip install pdiff
 
-      - name: Run nf-test
-        run: nf-test test tests/${{ matrix.profile }}.nf.test --profile +docker --junitxml=test.xml
+      - uses: nf-core/setup-nextflow@v2
+        with:
+          version: "${{ matrix.NXF_VER }}"
+
+      - uses: nf-core/setup-nf-test@v1
+        with:
+          version: ${{ env.NFT_VER }}
 
-      - name: Output log on failure
-        if: failure()
+      - name: Run Tests (Shard ${{ matrix.shard }}/${{ env.SHARDS }})
         run: |
-          sudo apt install bat > /dev/null
-          batcat --decorations=always --color=always .nf-test/tests/*/output/pipeline_info/software_versions.yml
+          nf-test test \
+              --ci \
+              --shard ${{ matrix.shard }}/${{ env.SHARDS }} \
+              --changed-since HEAD^ \
+              --profile "+${{ matrix.profile }}" \
+              --filter pipeline \
+              --junitxml=test.xml
 
       - name: Publish Test Report
         uses: mikepenz/action-junit-report@v3
         if: always() # always run even if the previous step fails
         with:
           report_paths: test.xml
+          annotate_only: true
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -22,7 +22,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [[#387]](https://github.com/nf-core/smrnaseq/pull/387) - Add [contaminant filter failure because the Docker image for BLAT cannot be pulled](https://github.com/nf-core/smrnaseq/issues/354) - Add nf-test to local module `blat_mirna` and fixes . Adds a small test profile to test contaminant filter results.
 - [[#388]](https://github.com/nf-core/smrnaseq/pull/388) - Fix [igenomes fix](https://github.com/nf-core/smrnaseq/issues/360) - Fix workflow scripts so that they can use igenome parameters.
 - [[#391]](https://github.com/nf-core/smrnaseq/pull/391) - Fix [error because of large chromosomes](https://github.com/nf-core/smrnaseq/issues/132) - Change `.bai` index for `.csi` index in `samtools_index` to fix .
-- [[#392]](https://github.com/nf-core/smrnaseq/pull/392) - Update [Reduce tests](https://github.com/orgs/nf-core/projects/74/views/7?pane=issue&itemId=76437974) - Combine and optimize tests, and reduce samplesheets sizes.
+- [[#392]](https://github.com/nf-core/smrnaseq/pull/392) - Update [Reduce tests](https://github.com/nf-core/smrnaseq/issues/389) - Combine and optimize tests, and reduce samplesheets sizes.
+- [[#398]](https://github.com/nf-core/smrnaseq/pull/398) - Update [Input channels](https://github.com/nf-core/smrnaseq/issues/390) - Updated channel and params handling through workflows.
 
 ## v2.3.1 - 2024-04-18 - Gray Zinc Dalmation Patch
 

diff --git a/conf/test_mirgenedb.config b/conf/test_mirgenedb.config
@@ -25,9 +25,9 @@ params {
 
     mirgenedb                = true
 
-    mirgenedb_mature         = "https://mirgenedb.org/fasta/hsa?mat=1"
-    mirgenedb_hairpin        = "https://mirgenedb.org/static/data/hsa/hsa-hg38-pri-30-30.fas"
-    mirgenedb_gff            = "https://mirgenedb.org/gff/hsa?sort=pos&all=1"
+    mirgenedb_mature         = "https://github.com/nf-core/test-datasets/raw/smrnaseq/MirGeneDB/mirgenedb_hsa_mature.fa"
+    mirgenedb_hairpin        = "https://github.com/nf-core/test-datasets/raw/smrnaseq/MirGeneDB/mirgenedb_hsa_hairpin.fa"
+    mirgenedb_gff            = "https://github.com/nf-core/test-datasets/raw/smrnaseq/MirGeneDB/mirgenedb_hsa.gff"
     mirgenedb_species        = "Hsa"
 
     skip_mirdeep             = true

diff --git a/main.nf b/main.nf
@@ -18,6 +18,7 @@ nextflow.enable.dsl = 2
 */
 
 include { NFCORE_SMRNASEQ         } from './workflows/smrnaseq'
+include { PREPARE_GENOME          } from './subworkflows/local/prepare_genome'
 include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_smrnaseq_pipeline'
 include { PIPELINE_COMPLETION     } from './subworkflows/local/utils_nfcore_smrnaseq_pipeline'
 include { getGenomeAttribute      } from './subworkflows/local/utils_nfcore_smrnaseq_pipeline'
@@ -28,9 +29,17 @@ include { getGenomeAttribute      } from './subworkflows/local/utils_nfcore_smrn
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-params.fasta            = getGenomeAttribute('fasta')
-params.mirtrace_species = getGenomeAttribute('mirtrace_species')
-params.bowtie_index     = getGenomeAttribute('bowtie')
+params.fasta               = getGenomeAttribute('fasta')
+params.mirtrace_species    = getGenomeAttribute('mirtrace_species')
+params.bowtie_index        = getGenomeAttribute('bowtie')
+params.mirna_gtf           = getGenomeAttribute('mirna_gtf') //not in igenomes yet
+params.rrna                = getGenomeAttribute('rrna') //not in igenomes yet
+params.trna                = getGenomeAttribute('trna') //not in igenomes yet
+params.cdna                = getGenomeAttribute('cdna') //not in igenomes yet
+params.ncrna               = getGenomeAttribute('ncrna') //not in igenomes yet
+params.pirna               = getGenomeAttribute('pirna') //not in igenomes yet
+params.other_contamination = getGenomeAttribute('other_contamination') //not in igenomes yet
+
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -42,6 +51,23 @@ workflow {
     main:
     ch_versions = Channel.empty()
 
+    //
+    // SUBWORKFLOW : Prepare reference genome files
+    //
+    PREPARE_GENOME (
+        params.fasta,
+        params.bowtie_index,
+        params.mirtrace_species,
+        params.rrna,
+        params.trna,
+        params.cdna,
+        params.ncrna,
+        params.pirna,
+        params.other_contamination,
+        params.fastp_known_mirna_adapters,
+        params.mirna_gtf
+    )
+
     //
     // SUBWORKFLOW: Run initialisation tasks
     //
@@ -59,12 +85,23 @@ workflow {
     // WORKFLOW: Run main workflow
     //
     NFCORE_SMRNASEQ (
-        Channel.of(file(params.input, checkIfExists: true)),
+        PREPARE_GENOME.out.has_fasta,
+        PREPARE_GENOME.out.has_mirtrace_species,
+        PREPARE_GENOME.out.mirna_adapters,
+        PREPARE_GENOME.out.mirtrace_species,
+        PREPARE_GENOME.out.reference_mature,
+        PREPARE_GENOME.out.reference_hairpin,
+        PREPARE_GENOME.out.mirna_gtf,
+        PREPARE_GENOME.out.fasta,
+        PREPARE_GENOME.out.bowtie_index,
+        PREPARE_GENOME.out.rrna,
+        PREPARE_GENOME.out.trna,
+        PREPARE_GENOME.out.cdna,
+        PREPARE_GENOME.out.ncrna,
+        PREPARE_GENOME.out.pirna,
+        PREPARE_GENOME.out.other_contamination,
+        ch_versions,
         PIPELINE_INITIALISATION.out.samplesheet,
-        params.fasta,
-        params.mirtrace_species,
-        params.bowtie_index,
-        ch_versions
     )
 
     //

diff --git a/modules/local/mirdeep2_run.nf b/modules/local/mirdeep2_run.nf
@@ -2,7 +2,7 @@ def VERSION = '2.0.1'
 
 process MIRDEEP2_RUN {
     label 'process_medium'
-    errorStrategy 'ignore'
+    errorStrategy 'ignore' //TODO why was it set like this?
 
     conda 'bioconda::mirdeep2=2.0.1.2'
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -258,7 +258,7 @@
                     "exists": true,
                     "mimetype": "text/plain",
                     "default": "${projectDir}/assets/known_adapters.fa",
-                    "description": "FastA with known miRNA adapter sequences for adapter trimming",
+                    "description": "Fasta with known miRNA adapter sequences for adapter trimming",
                     "fa_icon": "far fa-question-circle"
                 },
                 "min_trimmed_reads": {

diff --git a/subworkflows/local/contaminant_filter.nf b/subworkflows/local/contaminant_filter.nf
@@ -25,30 +25,28 @@ include { FILTER_STATS } from '../../modules/local/filter_stats'
 
 workflow CONTAMINANT_FILTER {
     take:
-    mirna
-    rrna
-    trna
-    cdna
-    ncrna
-    pirna
-    other
-    reads      // channel: [ val(meta), [ reads ] ]
+    ch_reference_hairpin   // channel: [ val(meta), fasta file]
+    ch_rrna                // channel: [ path(fasta) ]
+    ch_trna                // channel: [ path(fasta) ]
+    ch_cdna                // channel: [ path(fasta) ]
+    ch_ncrna               // channel: [ path(fasta) ]
+    ch_pirna               // channel: [ path(fasta) ]
+    ch_other_contamination // channel: [ path(fasta) ]
+    ch_reads_for_mirna     // channel: [ val(meta), [ reads ] ]
 
     main:
 
     ch_versions = Channel.empty()
     ch_filter_stats = Channel.empty()
     ch_mqc_results = Channel.empty()
 
-    rrna_reads = reads
-
-    reads.set { rrna_reads }
+    ch_reads_for_mirna.set { rrna_reads }
 
     if (params.rrna) {
         // Index DB and filter $reads emit: $rrna_reads
-        INDEX_RRNA ( rrna )
+        INDEX_RRNA ( ch_rrna )
         ch_versions = ch_versions.mix(INDEX_RRNA.out.versions)
-        MAP_RRNA ( reads, INDEX_RRNA.out.index, 'rRNA' )
+        MAP_RRNA ( ch_reads_for_mirna, INDEX_RRNA.out.index.first(), Channel.value('rRNA') )
         ch_versions = ch_versions.mix(MAP_RRNA.out.versions)
         ch_filter_stats = ch_filter_stats.mix(MAP_RRNA.out.stats.ifEmpty(null))
         MAP_RRNA.out.unmapped.set { rrna_reads }
@@ -58,9 +56,9 @@ workflow CONTAMINANT_FILTER {
 
     if (params.trna) {
         // Index DB and filter $rrna_reads emit: $trna_reads
-        INDEX_TRNA ( trna )
+        INDEX_TRNA ( ch_trna )
         ch_versions = ch_versions.mix(INDEX_TRNA.out.versions)
-        MAP_TRNA ( rrna_reads, INDEX_TRNA.out.index, 'tRNA')
+        MAP_TRNA ( rrna_reads, INDEX_TRNA.out.index.first(), Channel.value("tRNA") )
         ch_versions = ch_versions.mix(MAP_TRNA.out.versions)
         ch_filter_stats = ch_filter_stats.mix(MAP_TRNA.out.stats.ifEmpty(null))
         MAP_TRNA.out.unmapped.set { trna_reads }
@@ -70,11 +68,11 @@ workflow CONTAMINANT_FILTER {
 
 
     if (params.cdna) {
-        BLAT_CDNA ( 'cdna', mirna, cdna )
+        BLAT_CDNA ( Channel.value( 'cdna' ), ch_reference_hairpin, ch_cdna )
         ch_versions = ch_versions.mix(BLAT_CDNA.out.versions)
         INDEX_CDNA ( BLAT_CDNA.out.filtered_set )
         ch_versions = ch_versions.mix(INDEX_CDNA.out.versions)
-        MAP_CDNA ( trna_reads, INDEX_CDNA.out.index, 'cDNA' )
+        MAP_CDNA ( trna_reads, INDEX_CDNA.out.index.first(), Channel.value('cDNA'))
         ch_versions = ch_versions.mix(MAP_CDNA.out.versions)
         ch_filter_stats = ch_filter_stats.mix(MAP_CDNA.out.stats.ifEmpty(null))
         MAP_CDNA.out.unmapped.set { cdna_reads }
@@ -83,11 +81,11 @@ workflow CONTAMINANT_FILTER {
     cdna_reads.set { ncrna_reads }
 
     if (params.ncrna) {
-        BLAT_NCRNA ( 'ncrna', mirna, ncrna )
+        BLAT_NCRNA ( Channel.value( 'ncrna' ), ch_reference_hairpin, ch_ncrna )
         ch_versions = ch_versions.mix(BLAT_NCRNA.out.versions)
         INDEX_NCRNA ( BLAT_NCRNA.out.filtered_set )
         ch_versions = ch_versions.mix(INDEX_NCRNA.out.versions)
-        MAP_NCRNA ( cdna_reads, INDEX_NCRNA.out.index, 'ncRNA' )
+        MAP_NCRNA ( cdna_reads, INDEX_NCRNA.out.index.first(), Channel.value('ncRNA') )
         ch_versions = ch_versions.mix(MAP_NCRNA.out.versions)
         ch_filter_stats = ch_filter_stats.mix(MAP_NCRNA.out.stats.ifEmpty(null))
         MAP_NCRNA.out.unmapped.set { ncrna_reads }
@@ -96,24 +94,24 @@ workflow CONTAMINANT_FILTER {
     ncrna_reads.set { pirna_reads }
 
     if (params.pirna) {
-        BLAT_PIRNA ( 'other', mirna, pirna )
+        BLAT_PIRNA ( Channel.value( 'other' ), ch_reference_hairpin, ch_pirna )
         ch_versions = ch_versions.mix(BLAT_PIRNA.out.versions)
         INDEX_PIRNA ( BLAT_PIRNA.out.filtered_set )
         ch_versions = ch_versions.mix(INDEX_PIRNA.out.versions)
-        MAP_PIRNA ( ncrna_reads, INDEX_PIRNA.out.index, 'piRNA' )
+        MAP_PIRNA ( ncrna_reads, INDEX_PIRNA.out.index.first(), Channel.value('piRNA'))
         ch_versions = ch_versions.mix(MAP_PIRNA.out.versions)
         ch_filter_stats = ch_filter_stats.mix(MAP_PIRNA.out.stats.ifEmpty(null))
         MAP_PIRNA.out.unmapped.set { pirna_reads }
     }
 
     pirna_reads.set { other_cont_reads }
 
-    if (other) {
-        BLAT_OTHER ( 'other', mirna, other)
+    if (params.other_contamination) {
+        BLAT_OTHER ( Channel.value( 'other' ), ch_reference_hairpin, ch_other_contamination)
         ch_versions = ch_versions.mix(BLAT_OTHER.out.versions)
         INDEX_OTHER ( BLAT_OTHER.out.filtered_set )
         ch_versions = ch_versions.mix(INDEX_OTHER.out.versions)
-        MAP_OTHER ( ncrna_reads, INDEX_OTHER.out.index, 'other' )
+        MAP_OTHER ( ncrna_reads, INDEX_OTHER.out.index.first(), Channel.value('other'))
         ch_versions = ch_versions.mix(MAP_OTHER.out.versions)
         ch_filter_stats = ch_filter_stats.mix(MAP_OTHER.out.stats.ifEmpty(null))
         MAP_OTHER.out.unmapped.set { other_cont_reads }

diff --git a/subworkflows/local/genome_quant.nf b/subworkflows/local/genome_quant.nf
@@ -7,23 +7,22 @@ include { BOWTIE_MAP_SEQ as BOWTIE_MAP_GENOME } from '../../modules/local/bowtie
 
 workflow GENOME_QUANT {
     take:
-    bowtie_index
-    fasta_formatted // fasta as generated by bowtie index step
-    reads // channel: [ val(meta), [ reads ] ]
+    ch_bowtie_index // channel: [genome.1.ebwt, genome.2.ebwt, genome.3.ebwt, genome.4.ebwt, genome.rev.1.ebwt, genome.rev.2.ebwt]
+    ch_fasta        // channel: [ val(meta), path(fasta) ]
+    ch_reads        // channel: [ val(meta), [ reads ] ]
 
     main:
     ch_versions = Channel.empty()
 
-    BOWTIE_MAP_GENOME ( reads, bowtie_index.collect() )
+    BOWTIE_MAP_GENOME ( ch_reads, ch_bowtie_index )
     ch_versions = ch_versions.mix(BOWTIE_MAP_GENOME.out.versions)
 
-    ch_fasta_formatted_for_sort = fasta_formatted .map { file -> tuple(file.baseName, file) }
-    BAM_SORT_STATS_SAMTOOLS ( BOWTIE_MAP_GENOME.out.bam,  ch_fasta_formatted_for_sort )
+    BAM_SORT_STATS_SAMTOOLS ( BOWTIE_MAP_GENOME.out.bam,  ch_fasta )
     ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions)
 
     emit:
-    fasta    = fasta_formatted
-    index    = bowtie_index
+    fasta    = ch_fasta //TODO: This fasta is the same one that was used as input, ask the original developer, if they meant to have something else here
+    index    = ch_bowtie_index //TODO: Same here, are we outputting the right files? We can remove these channels if we are.
     stats    = BAM_SORT_STATS_SAMTOOLS.out.stats
 
     versions = ch_versions

diff --git a/subworkflows/local/mirdeep2.nf b/subworkflows/local/mirdeep2.nf
@@ -9,10 +9,10 @@ include { MIRDEEP2_RUN    } from '../../modules/local/mirdeep2_run'
 workflow MIRDEEP2 {
     take:
     reads        // channel: [ val(meta), [ reads ] ]
-    fasta
-    index
-    hairpin
-    mature
+    fasta        // channel: [ val(meta), path(fasta) ]
+    index        // channel: [genome.1.ebwt, genome.2.ebwt, genome.3.ebwt, genome.4.ebwt, genome.rev.1.ebwt, genome.rev.2.ebwt]
+    hairpin      // channel: [ path(hairpin.fa) ]
+    mature       // channel: [ path(mature.fa)  ]
 
     main:
     ch_versions = Channel.empty()
@@ -23,7 +23,7 @@ workflow MIRDEEP2 {
     MIRDEEP2_MAPPER ( MIRDEEP2_PIGZ.out.reads, index )
     ch_versions = ch_versions.mix(MIRDEEP2_MAPPER.out.versions.first())
 
-    MIRDEEP2_RUN ( fasta, MIRDEEP2_MAPPER.out.mirdeep2_inputs, hairpin, mature )
+    MIRDEEP2_RUN ( fasta.map{meta,file->file}, MIRDEEP2_MAPPER.out.mirdeep2_inputs, hairpin, mature )
     ch_versions = ch_versions.mix(MIRDEEP2_RUN.out.versions.first())
 
     emit: