diff --git a/CHANGELOG.md b/CHANGELOG.md index f719cd01..e90969fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[#439]](https://github.com/nf-core/smrnaseq/pull/439) - Fix [Fix paired end samples processing](https://github.com/nf-core/smrnaseq/issues/415) - Fix paired end sample handling and add test profile. - [[#441]](https://github.com/nf-core/smrnaseq/pull/441) - Migrate [local contaminant bowtie to nf-core](https://github.com/nf-core/smrnaseq/issues/436) - Replace local processes with `BOWTIE2_ALIGN`. - [[#443]](https://github.com/nf-core/smrnaseq/pull/443) - Migrate [mirna and genome_quant bowtie to nf-core](https://github.com/nf-core/smrnaseq/issues/436) - Replace local processes with `BOWTIE_ALIGN`. +- [[#448]](https://github.com/nf-core/smrnaseq/pull/448) - Migrate local mirdeep to [nf-core mirdeep2 modules and subworkflow](https://github.com/nf-core/smrnaseq/issues/443) and generate [test profile for mirdeep2](https://github.com/nf-core/smrnaseq/issues/399). ## v2.3.1 - 2024-04-18 - Gray Zinc Dalmation Patch diff --git a/conf/modules.config b/conf/modules.config index 56151f76..5867e9c3 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -539,24 +539,19 @@ process { // MIRDEEP // - withName: 'PIGZ_UNCOMPRESS' { - tag = {"${meta.id}"} - stageInMode = 'copy' + withName: 'MIRDEEP2_MAPPER' { + ext.args = "-c -j -m -v" + publishDir = [ enabled: false ] } - withName: 'NFCORE_SMRNASEQ:MIRDEEP2:MIRDEEP2_MAPPER' { - publishDir = [ - path: { "${params.outdir}/mirdeep2/mapper" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + withName: 'SEQKIT_REPLACE' { + ext.args = "-p '\s.+'" + ext.suffix = "fasta" + publishDir = [ enabled: false ] } - withName: 'NFCORE_SMRNASEQ:MIRDEEP2:MIRDEEP2_RUN' { - publishDir = [ - path: { "${params.outdir}/mirdeep2/run" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + + withName: 'MIRDEEP2_MIRDEEP2' { + errorStrategy = { task.exitStatus in (255) ? 'ignore' : '' } } // diff --git a/conf/test_mirgenedb.config b/conf/test_mirgenedb.config index ea7babb4..9567dad4 100644 --- a/conf/test_mirgenedb.config +++ b/conf/test_mirgenedb.config @@ -11,8 +11,8 @@ */ params { - config_profile_name = 'Test profile with mirgeneDB inputs' - config_profile_description = 'Minimal test dataset to check pipeline function with mirgeneDB inputs' + config_profile_name = 'Test profile with mirgeneDB inputs and run mirdeep2' + config_profile_description = 'Minimal test dataset to check pipeline function with mirgeneDB inputs and run mirdeep2' // Limit resources so that this can run on GitHub Actions max_cpus = 2 @@ -30,7 +30,7 @@ params { mirgenedb_gff = "https://github.com/nf-core/test-datasets/raw/smrnaseq/MirGeneDB/mirgenedb_hsa.gff" mirgenedb_species = "Hsa" - skip_mirdeep = true + skip_mirdeep = false save_intermediates = true } diff --git a/docs/output.md b/docs/output.md index 1b771609..603ef4b6 100644 --- a/docs/output.md +++ b/docs/output.md @@ -151,9 +151,9 @@ If `--save_intermediates` is specified, these files will be placed in this direc **Output directory: `results/mirdeep2`** -- `mirdeep/timestamp_sample.bed` File with the known and novel miRNAs in bed format. -- `mirdeep/timestamp_sample.csv` File with an overview of all detected miRNAs (known and novel) in csv format. -- `mirdeep/timestamp_sample.html` A HTML report with an overview of all detected miRNAs (known and novel) in html format. +- `mirdeep2/result_{sample.id}.bed` File with the known and novel miRNAs in bed format. +- `mirdeep2/result_{sample.id}.csv` File with an overview of all detected miRNAs (known and novel) in csv format. +- `mirdeep2/result_{sample.id}.html` A HTML report with an overview of all detected miRNAs (known and novel) in html format. ## miRTrace diff --git a/docs/usage.md b/docs/usage.md index 06115967..caaca2f9 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -63,6 +63,12 @@ Contamination filtering of the sequencing reads is optional and can be invoked u - `pirna`: Used to supply a FASTA file containing piRNA contamination sequence. e.g. The FASTA file is first compared to the available miRNA sequences and overlaps are removed. - `other_contamination`: Used to supply an additional filtering set. The FASTA file is first compared to the available miRNA sequences and overlaps are removed. +## mirDeep2 + +If the software encounters an error with exit status 255, it will be ignored, and the pipeline will continue to complete. In such cases, the pipeline will log a note that includes the path to the work directory where the issue occurred. You can inspect this work directory to examine your input data and troubleshoot the issue. + +Error 255 is typically related to the core algorithm of miRDeep generating empty output files. This often happens when the reads being processed do not correspond to putative mature miRNA sequences, or if the provided precursors do not meet the criteria for valid miRNA precursors, both of which may stem from the input reads used. A common cause of this error is running the pipeline with a small subset of the input reads. + ### UMI handling The pipeline handles UMIs with two tools. Umicollapse to deduplicate on entire read sequence after 3'adapter removal. Followed by Umitools-extract to extract the miRNA adapter and UMI. This can be achieved by using the parameters for UMI handling as follows (in this case for QIAseq miRNA Library Kit): diff --git a/modules.json b/modules.json index 7ed3095d..52abe170 100644 --- a/modules.json +++ b/modules.json @@ -66,6 +66,16 @@ "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["modules"] }, + "mirdeep2/mapper": { + "branch": "master", + "git_sha": "757f60e5656283122cd6ec37d4679483bebb7312", + "installed_by": ["fastq_find_mirna_mirdeep2"] + }, + "mirdeep2/mirdeep2": { + "branch": "master", + "git_sha": "757f60e5656283122cd6ec37d4679483bebb7312", + "installed_by": ["fastq_find_mirna_mirdeep2"] + }, "mirtop/counts": { "branch": "master", "git_sha": "196062335bb9ec979075bf2212f64a369b927b0d", @@ -131,12 +141,22 @@ "git_sha": "407ff4b579f5ae5a3b842b675cd75005d112b8ba", "installed_by": ["modules"] }, + "seqkit/fq2fa": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["fastq_find_mirna_mirdeep2"] + }, "seqkit/grep": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["modules"], "patch": "modules/nf-core/seqkit/grep/seqkit-grep.diff" }, + "seqkit/replace": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["fastq_find_mirna_mirdeep2"] + }, "umicollapse": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", @@ -176,6 +196,11 @@ "git_sha": "46eca555142d6e597729fcb682adcc791796f514", "installed_by": ["subworkflows"] }, + "fastq_find_mirna_mirdeep2": { + "branch": "master", + "git_sha": "757f60e5656283122cd6ec37d4679483bebb7312", + "installed_by": ["subworkflows"] + }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", diff --git a/modules/local/mirdeep2_mapper.nf b/modules/local/mirdeep2_mapper.nf deleted file mode 100644 index e810c736..00000000 --- a/modules/local/mirdeep2_mapper.nf +++ /dev/null @@ -1,43 +0,0 @@ -def VERSION = '2.0.1' - -process MIRDEEP2_MAPPER { - label 'process_medium' - tag "$meta.id" - - conda 'bioconda::mirdeep2=2.0.1.2' - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mirdeep2:2.0.1.3--hdfd78af_1' : - 'biocontainers/mirdeep2:2.0.1.3--hdfd78af_1' }" - - input: - tuple val(meta), path(reads) - path index - - output: - tuple val(meta), path('*_collapsed.fa'), path('*reads_vs_refdb.arf'), emit: mirdeep2_inputs - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def index_base = index.toString().tokenize(' ')[0].tokenize('.')[0] - """ - mapper.pl \\ - $reads \\ - -e \\ - -h \\ - -i \\ - -j \\ - -m \\ - -p $index_base \\ - -s ${meta.id}_collapsed.fa \\ - -t ${meta.id}_reads_vs_refdb.arf \\ - -o 4 - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - mapper: \$(echo "$VERSION") - END_VERSIONS - """ -} diff --git a/modules/local/mirdeep2_run.nf b/modules/local/mirdeep2_run.nf deleted file mode 100644 index a65dd175..00000000 --- a/modules/local/mirdeep2_run.nf +++ /dev/null @@ -1,42 +0,0 @@ -def VERSION = '2.0.1' - -process MIRDEEP2_RUN { - label 'process_medium' - errorStrategy 'ignore' //TODO why was it set like this? - - conda 'bioconda::mirdeep2=2.0.1.2' - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mirdeep2:2.0.1.3--hdfd78af_1' : - 'biocontainers/mirdeep2:2.0.1.3--hdfd78af_1' }" - - input: - path(fasta) - tuple val(meta), path(reads), path(arf) - path(hairpin) - path(mature) - - output: - tuple val(meta), path('result*.{bed,csv,html}'), emit: result - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - """ - miRDeep2.pl \\ - $reads \\ - $fasta \\ - $arf \\ - $mature \\ - none \\ - $hairpin \\ - -d \\ - -z _${reads.simpleName} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - mirdeep2: \$(echo "$VERSION") - END_VERSIONS - """ -} diff --git a/modules/nf-core/mirdeep2/mapper/environment.yml b/modules/nf-core/mirdeep2/mapper/environment.yml new file mode 100644 index 00000000..fafc6663 --- /dev/null +++ b/modules/nf-core/mirdeep2/mapper/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::mirdeep2=2.0.1.2" diff --git a/modules/nf-core/mirdeep2/mapper/main.nf b/modules/nf-core/mirdeep2/mapper/main.nf new file mode 100644 index 00000000..d52820a3 --- /dev/null +++ b/modules/nf-core/mirdeep2/mapper/main.nf @@ -0,0 +1,53 @@ +process MIRDEEP2_MAPPER { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mirdeep2:2.0.1.2--0': + 'biocontainers/mirdeep2:2.0.1.2--0' }" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(index, stageAs: '*') + + output: + tuple val(meta), path('*.fa'), path('*.arf'), emit: outputs + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '2.0.1' + + """ + mapper.pl \\ + ${reads} \\ + $args \\ + -p ${index}/${meta2.id} \\ + -s ${prefix}_collapsed.fa \\ + -t ${prefix}_reads_collapsed_vs_${meta2.id}_genome.arf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mirdeep2: \$(echo "$VERSION") + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '2.0.1' + """ + touch ${prefix}.fa + touch ${prefix}reads_vs_refdb.arf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mirdeep2: \$(echo "$VERSION") + END_VERSIONS + """ +} diff --git a/modules/nf-core/mirdeep2/mapper/meta.yml b/modules/nf-core/mirdeep2/mapper/meta.yml new file mode 100644 index 00000000..5844344c --- /dev/null +++ b/modules/nf-core/mirdeep2/mapper/meta.yml @@ -0,0 +1,50 @@ +name: "mirdeep2_mapper" +description: | + miRDeep2 Mapper is a tool that prepares deep sequencing reads for downstream miRNA detection by collapsing reads, mapping them to a genome, and outputting the required files for miRNA discovery. +keywords: + - mirdeep2 + - mapper + - RNA sequencing +tools: + - "mirdeep2": + description: | + miRDeep2 Mapper (`mapper.pl`) is part of the miRDeep2 suite. It collapses identical reads, maps them to a reference genome, and outputs both collapsed FASTA and ARF files for downstream miRNA detection and analysis. + homepage: "https://www.mdc-berlin.de/content/mirdeep2-documentation" + documentation: "https://www.mdc-berlin.de/content/mirdeep2-documentation" + tool_dev_url: "https://github.com/rajewsky-lab/mirdeep2" + doi: "10.1093/nar/gkn491" + licence: ["GPL V3"] + identifier: biotools:mirdeep2 + +input: + - - meta: + type: map + description: Groovy Map containing sample information, e.g. `[ id:'sample1', + single_end:false ]` + - reads: + type: file + description: File containing the raw sequencing reads that need to be collapsed + and mapped to a reference genome. + pattern: "*.fa" + - - meta2: + type: map + description: Groovy Map containing information about the genome index. + - index: + type: file + description: Path to the genome index file used for mapping the reads to the + genome. + pattern: "*" +output: + - outputs: + - meta: {} + - "*.fa": {} + - "*.arf": {} + - versions: + - versions.yml: + type: file + description: File containing software versions for tracking. + pattern: "versions.yml" +authors: + - "@atrigila" +maintainers: + - "@atrigila" diff --git a/modules/nf-core/mirdeep2/mapper/tests/main.nf.test b/modules/nf-core/mirdeep2/mapper/tests/main.nf.test new file mode 100644 index 00000000..62e3e615 --- /dev/null +++ b/modules/nf-core/mirdeep2/mapper/tests/main.nf.test @@ -0,0 +1,141 @@ + +nextflow_process { + + name "Test Process MIRDEEP2_MAPPER" + script "../main.nf" + process "MIRDEEP2_MAPPER" + + tag "modules" + tag "modules_nfcore" + tag "mirdeep2" + tag "bowtie/build" + tag "mirdeep2/mapper" + tag "seqkit/fq2fa" + tag "seqkit/replace" + + + setup { + run("BOWTIE_BUILD") { + script "../../../bowtie/build/main.nf" + process { + """ + input[0] = [ + [ id:'genome_cel_cluster' ], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/cel_cluster.fa', checkIfExists: true) + ] + """ + } + } + + run("SEQKIT_FQ2FA") { + script "../../../seqkit/fq2fa/main.nf" + process { + """ + input[0] = [ + [ id:'small_Clone1_N1' ], // meta map + file('https://github.com/nf-core/test-datasets/raw/smrnaseq/testdata/trimmed/small_Clone1_N1.fastp.fastq.gz', checkIfExists: true) + ] + """ + } + } + + run("SEQKIT_REPLACE") { + script "../../../seqkit/replace/main.nf" + config "./nextflow.config" + process { + """ + input[0] = SEQKIT_FQ2FA.out.fasta + """ + } + } + + } + + test("mirdeep2 - mapper - fasta celegans") { + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test_reads', single_end:false ], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/reads.fa', checkIfExists: true) + ] + input[1] = BOWTIE_BUILD.out.index + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() }, + + // md5sum not stable - IDs change while sequences are the same + + // Assert TCACCGGGGGTACATCAGCTAA occurs once + { assert file(process.out.outputs[0][1]).readLines().findAll { it.contains("TCACCGGGGGTACATCAGCTAA") }.size() == 1 }, + + // Assert seq_347479_x287 occurs once + { assert file(process.out.outputs[0][1]).readLines().findAll { it.contains("seq_347479_x287") }.size() == 1 }, + + // Assert that specific content occurs 4 times + { assert file(process.out.outputs[0][2]).readLines().findAll { it.contains("21\t1\t21\ttcaccgggtgtaaatcagctt\tchrII:11534525-11540624\t21\t3535\t3555\ttcaccgggtgtaaatcagctt\t+\t0\tmmmmmmmmmmmmmmmmmmmmm") }.size() == 4 } + ) + } + + } + + test("mirdeep2 - mapper - fasta smrnaseq") { + config "./nextflow.config" + + when { + process { + """ + input[0] = SEQKIT_REPLACE.out.fastx + input[1] = BOWTIE_BUILD.out.index + """ + } + } + + then { + assertAll( + { assert process.success }, + + // Assert reads occurs once + { assert file(process.out.outputs[0][1]).readLines().findAll { it.contains("TACCTGAGGTAGCAGGTTGTATAGTTGGGG") }.size() == 1 }, + + // Assert ID occurs once + { assert file(process.out.outputs[0][1]).readLines().findAll { it.contains("seq_996152_x1") }.size() == 1 } + + ) + } + + } + + test("mirdeep2 - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test_reads', single_end:false ], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/reads.fa', checkIfExists: true) + ] + input[1] = BOWTIE_BUILD.out.index + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/mirdeep2/mapper/tests/main.nf.test.snap b/modules/nf-core/mirdeep2/mapper/tests/main.nf.test.snap new file mode 100644 index 00000000..4c3697d9 --- /dev/null +++ b/modules/nf-core/mirdeep2/mapper/tests/main.nf.test.snap @@ -0,0 +1,51 @@ +{ + "mirdeep2 - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_reads", + "single_end": false + }, + "test_reads.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_readsreads_vs_refdb.arf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,33c794292d6772d67fa8001439394614" + ], + "outputs": [ + [ + { + "id": "test_reads", + "single_end": false + }, + "test_reads.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_readsreads_vs_refdb.arf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,33c794292d6772d67fa8001439394614" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-20T20:58:19.544297445" + }, + "mirdeep2 - mapper - fasta celegans": { + "content": [ + [ + "versions.yml:md5,33c794292d6772d67fa8001439394614" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-17T17:41:05.101661825" + } +} \ No newline at end of file diff --git a/modules/nf-core/mirdeep2/mapper/tests/nextflow.config b/modules/nf-core/mirdeep2/mapper/tests/nextflow.config new file mode 100644 index 00000000..ec097561 --- /dev/null +++ b/modules/nf-core/mirdeep2/mapper/tests/nextflow.config @@ -0,0 +1,11 @@ +process { + withName: 'MIRDEEP2_MAPPER' { + ext.args = "-c -j -k TCGTATGCCGTCTTCTGCTTGT -l 18 -m -v" + } + + withName: 'SEQKIT_REPLACE' { + ext.args = "-p '\s.+'" + ext.suffix = "fasta" + } + +} diff --git a/modules/nf-core/mirdeep2/mirdeep2/environment.yml b/modules/nf-core/mirdeep2/mirdeep2/environment.yml new file mode 100644 index 00000000..fafc6663 --- /dev/null +++ b/modules/nf-core/mirdeep2/mirdeep2/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::mirdeep2=2.0.1.2" diff --git a/modules/nf-core/mirdeep2/mirdeep2/main.nf b/modules/nf-core/mirdeep2/mirdeep2/main.nf new file mode 100644 index 00000000..66c85968 --- /dev/null +++ b/modules/nf-core/mirdeep2/mirdeep2/main.nf @@ -0,0 +1,64 @@ +process MIRDEEP2_MIRDEEP2 { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mirdeep2:2.0.1.2--0': + 'biocontainers/mirdeep2:2.0.1.2--0' }" + + input: + tuple val(meta), path(processed_reads), path(genome_mappings) + tuple val(meta2), path(fasta) + tuple val(meta3), path(mature), path(hairpin), path(mature_other_species) + + output: + tuple val(meta), path("result*.{bed,csv,html}") , emit: outputs + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '2.0.1' + def mature_species = mature ? "${mature}" : "none" + def mature_other = mature_other_species ? "${mature_other_species}": "none" + def precursors = hairpin ? "${hairpin}" : "none" + + """ + miRDeep2.pl \\ + $processed_reads \\ + $fasta \\ + $genome_mappings \\ + $mature_species \\ + $mature_other \\ + $precursors \\ + $args + + mv result_*.bed result_${prefix}.bed + mv result_*.csv result_${prefix}.csv + mv result_*.html result_${prefix}.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mirdeep2: \$(echo "$VERSION") + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '2.0.1' + """ + touch result_${prefix}.html + touch result_${prefix}.bed + touch result_${prefix}.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mirdeep2: \$(echo "$VERSION") + END_VERSIONS + """ +} diff --git a/modules/nf-core/mirdeep2/mirdeep2/meta.yml b/modules/nf-core/mirdeep2/mirdeep2/meta.yml new file mode 100644 index 00000000..adf14101 --- /dev/null +++ b/modules/nf-core/mirdeep2/mirdeep2/meta.yml @@ -0,0 +1,76 @@ +name: "mirdeep2_mirdeep2" +description: | + miRDeep2 is a tool for identifying known and novel miRNAs in deep sequencing data by analyzing sequenced RNAs. It integrates the mapping of sequencing reads to the genome and predicts miRNA precursors and mature miRNAs. +keywords: + - mirdeep2 + - miRNA + - RNA sequencing +tools: + - "mirdeep2": + description: | + miRDeep2 is a tool that discovers microRNA genes by analyzing sequenced RNAs. + It includes three main scripts: `miRDeep2.pl`, `mapper.pl`, and `quantifier.pl` for comprehensive miRNA detection and quantification. + homepage: "https://www.mdc-berlin.de/content/mirdeep2-documentation" + documentation: "https://www.mdc-berlin.de/content/mirdeep2-documentation" + tool_dev_url: "https://github.com/rajewsky-lab/mirdeep2" + doi: "10.1093/nar/gkn491" + licence: ["GPL V3"] + identifier: biotools:mirdeep2 + +input: + - - meta: + type: map + description: Groovy Map containing sample information, e.g. `[ id:'sample1', + single_end:false ]` + - processed_reads: + type: file + description: FASTA file containing the processed sequencing reads. + pattern: "*.fa" + - genome_mappings: + type: file + description: ARF format file with mapped reads to the genome. + pattern: "*.arf" + - - meta2: + type: map + description: Groovy Map for genome FASTA file metadata, e.g. `[ id:'genome']` + - fasta: + type: file + description: FASTA file of the corresponding genome. + pattern: "*.fa" + - - meta3: + type: map + description: Groovy Map for miRNA metadata, e.g. `[ id:'mirbase', single_end:false + ]` + - mature: + type: file + description: FASTA file containing known mature miRNAs of the species being + analyzed. + pattern: "*.fa" + - hairpin: + type: file + description: FASTA file containing hairpin sequences (miRNA precursors). + pattern: "*.fa" + - mature_other_species: + type: file + description: FASTA file containing known mature miRNAs of other species. + pattern: "*.fa" +output: + - outputs: + - meta: + type: map + description: Groovy Map containing sample information e.g. `[ id:'sample1', + single_end:false ]` + - result*.{bed,csv,html}: + type: file + description: Output files, including BED, CSV, and HTML results files with an + overview of detected miRNAs. + pattern: "result*.{bed,csv,html}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@atrigila" +maintainers: + - "@atrigila" diff --git a/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test b/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test new file mode 100644 index 00000000..b7b73ec1 --- /dev/null +++ b/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test @@ -0,0 +1,111 @@ +nextflow_process { + + name "Test Process MIRDEEP2_MIRDEEP2" + script "../main.nf" + process "MIRDEEP2_MIRDEEP2" + + tag "modules" + tag "modules_nfcore" + tag "mirdeep2" + tag "mirdeep2/mirdeep2" + tag "bowtie/build" + tag "mirdeep2/mapper" + + + setup { + run("BOWTIE_BUILD") { + script "../../../bowtie/build/main.nf" + process { + """ + input[0] = [ + [ id:'genome_cel_cluster' ], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/cel_cluster.fa', checkIfExists: true) + ] + """ + } + } + + run("MIRDEEP2_MAPPER") { + script "../../../mirdeep2/mapper/main.nf" + config "./nextflow.config" + + process { + """ + input[0] = [ + [ id:'test_reads', single_end:false ], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/reads.fa', checkIfExists: true) + ] + input[1] = BOWTIE_BUILD.out.index + """ + } + } + + } + + test("mirdeep2 - mirdeep2 - fa") { + + when { + process { + """ + input[0] = MIRDEEP2_MAPPER.out.outputs + input[1] = [ + [ id:'genome_cel_cluster' ], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/cel_cluster.fa', checkIfExists: true) + ] + input[2] = [ + [ id:'hairpin_mature'], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/mature_ref_this_species.fa', checkIfExists: true), + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/precursors_ref_this_species.fa', checkIfExists: true), + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions, + path(process.out.outputs.get(0).get(1)[2]).readLines().last().contains(''), + process.out.outputs.get(0).get(1)[0], + path(process.out.outputs.get(0).get(1)[1]).readLines().first().contains('miRDeep2 score') + ).match() }, + // Assert .html + { assert path(process.out.outputs.get(0).get(1)[2]).readLines().last().contains('') } + ) + } + + } + + test("mirdeep - mirdeep2 - stub") { + + options "-stub" + + when { + process { + """ + input[0] = MIRDEEP2_MAPPER.out.outputs + input[1] = [ + [ id:'genome_cel_cluster' ], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/cel_cluster.fa', checkIfExists: true) + ] + input[2] = [ + [ id:'hairpin_mature'], // meta map + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/mature_ref_this_species.fa', checkIfExists: true), + file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/mature_ref_other_species.fa', checkIfExists: true), + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test.snap b/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test.snap new file mode 100644 index 00000000..f8ffcf01 --- /dev/null +++ b/modules/nf-core/mirdeep2/mirdeep2/tests/main.nf.test.snap @@ -0,0 +1,60 @@ +{ + "mirdeep - mirdeep2 - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_reads", + "single_end": false + }, + [ + "result_test_reads.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "result_test_reads.csv:md5,d41d8cd98f00b204e9800998ecf8427e", + "result_test_reads.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,8984ad2f1e8bdd148da051e2e6b569bf" + ], + "outputs": [ + [ + { + "id": "test_reads", + "single_end": false + }, + [ + "result_test_reads.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "result_test_reads.csv:md5,d41d8cd98f00b204e9800998ecf8427e", + "result_test_reads.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,8984ad2f1e8bdd148da051e2e6b569bf" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-20T21:04:53.304188615" + }, + "mirdeep2 - mirdeep2 - fa": { + "content": [ + [ + "versions.yml:md5,8984ad2f1e8bdd148da051e2e6b569bf" + ], + true, + "result_test_reads.bed:md5,ba5ef5782e40d7219ca064dd68865d74", + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-23T15:08:50.660562955" + } +} \ No newline at end of file diff --git a/modules/nf-core/mirdeep2/mirdeep2/tests/nextflow.config b/modules/nf-core/mirdeep2/mirdeep2/tests/nextflow.config new file mode 100644 index 00000000..6a33ae05 --- /dev/null +++ b/modules/nf-core/mirdeep2/mirdeep2/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'MIRDEEP2_MAPPER' { + ext.args = "-c -j -k TCGTATGCCGTCTTCTGCTTGT -l 18 -m -v" + } +} diff --git a/modules/nf-core/seqkit/fq2fa/environment.yml b/modules/nf-core/seqkit/fq2fa/environment.yml new file mode 100644 index 00000000..41f3e7de --- /dev/null +++ b/modules/nf-core/seqkit/fq2fa/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::seqkit=2.8.1 diff --git a/modules/nf-core/seqkit/fq2fa/main.nf b/modules/nf-core/seqkit/fq2fa/main.nf new file mode 100644 index 00000000..77462ad0 --- /dev/null +++ b/modules/nf-core/seqkit/fq2fa/main.nf @@ -0,0 +1,48 @@ +process SEQKIT_FQ2FA { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqkit:2.8.1--h9ee0642_0' : + 'biocontainers/seqkit:2.8.1--h9ee0642_0' }" + + input: + tuple val(meta), path(fastq) + + output: + tuple val(meta), path("*.fa.gz"), emit: fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + seqkit \\ + fq2fa \\ + $args \\ + -j $task.cpus \\ + -o ${prefix}.fa.gz \\ + $fastq + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$( seqkit | sed '3!d; s/Version: //' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.fa.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$( seqkit | sed '3!d; s/Version: //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/seqkit/fq2fa/meta.yml b/modules/nf-core/seqkit/fq2fa/meta.yml new file mode 100644 index 00000000..2241fda9 --- /dev/null +++ b/modules/nf-core/seqkit/fq2fa/meta.yml @@ -0,0 +1,44 @@ +name: "seqkit_fq2fa" +description: Convert FASTQ to FASTA format +keywords: + - fastq + - fasta + - convert +tools: + - "seqkit": + description: "Cross-platform and ultrafast toolkit for FASTA/Q file manipulation, + written by Wei Shen." + homepage: "https://github.com/shenwei356/seqkit" + documentation: "https://bioinf.shenwei.me/seqkit/" + doi: "10.1371/journal.pone.0163962" + licence: ["MIT"] + identifier: biotools:seqkit + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - fastq: + type: file + description: Sequence file in fastq format + pattern: "*.{fastq,fq}.gz" +output: + - fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.fa.gz": + type: file + description: Sequence file in fasta format + pattern: "*.{fasta,fa}.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@d-jch" diff --git a/modules/nf-core/seqkit/fq2fa/tests/main.nf.test b/modules/nf-core/seqkit/fq2fa/tests/main.nf.test new file mode 100644 index 00000000..08f399e7 --- /dev/null +++ b/modules/nf-core/seqkit/fq2fa/tests/main.nf.test @@ -0,0 +1,56 @@ +nextflow_process { + + name "Test Process SEQKIT_FQ2FA" + script "../main.nf" + process "SEQKIT_FQ2FA" + + tag "modules" + tag "modules_nfcore" + tag "seqkit" + tag "seqkit/fq2fa" + + test("sarscov2 - bam") { + + when { + process { + """ + input[0] = [[ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [[ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/seqkit/fq2fa/tests/main.nf.test.snap b/modules/nf-core/seqkit/fq2fa/tests/main.nf.test.snap new file mode 100644 index 00000000..b10ff751 --- /dev/null +++ b/modules/nf-core/seqkit/fq2fa/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "sarscov2 - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,70efc6839fd6443ee9116c082a730f72" + ], + "fasta": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,70efc6839fd6443ee9116c082a730f72" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-13T08:56:21.234724552" + }, + "sarscov2 - bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fa.gz:md5,f0c5c9110ce19e9ebbc9a6b6baf9e105" + ] + ], + "1": [ + "versions.yml:md5,70efc6839fd6443ee9116c082a730f72" + ], + "fasta": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fa.gz:md5,f0c5c9110ce19e9ebbc9a6b6baf9e105" + ] + ], + "versions": [ + "versions.yml:md5,70efc6839fd6443ee9116c082a730f72" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-13T08:55:54.648865102" + } +} \ No newline at end of file diff --git a/modules/nf-core/seqkit/fq2fa/tests/tags.yml b/modules/nf-core/seqkit/fq2fa/tests/tags.yml new file mode 100644 index 00000000..004f102d --- /dev/null +++ b/modules/nf-core/seqkit/fq2fa/tests/tags.yml @@ -0,0 +1,2 @@ +seqkit/fq2fa: + - "modules/nf-core/seqkit/fq2fa/**" diff --git a/modules/nf-core/seqkit/replace/environment.yml b/modules/nf-core/seqkit/replace/environment.yml new file mode 100644 index 00000000..41f3e7de --- /dev/null +++ b/modules/nf-core/seqkit/replace/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::seqkit=2.8.1 diff --git a/modules/nf-core/seqkit/replace/main.nf b/modules/nf-core/seqkit/replace/main.nf new file mode 100644 index 00000000..70811c8b --- /dev/null +++ b/modules/nf-core/seqkit/replace/main.nf @@ -0,0 +1,59 @@ +process SEQKIT_REPLACE { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqkit:2.8.1--h9ee0642_0': + 'biocontainers/seqkit:2.8.1--h9ee0642_0' }" + + input: + tuple val(meta), path(fastx) + + output: + tuple val(meta), path("*.fast*"), emit: fastx + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = "fastq" + if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz/) { + extension = "fasta" + } + def endswith = task.ext.suffix ?: "${extension}.gz" + """ + seqkit \\ + replace \\ + ${args} \\ + --threads ${task.cpus} \\ + -i ${fastx} \\ + -o ${prefix}.${endswith} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$( seqkit version | sed 's/seqkit v//' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = "fastq" + if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz/) { + extension = "fasta" + } + def endswith = task.ext.suffix ?: "${extension}.gz" + + """ + echo "" | gzip > ${prefix}.${endswith} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$( seqkit version | sed 's/seqkit v//' ) + END_VERSIONS + """ + +} diff --git a/modules/nf-core/seqkit/replace/meta.yml b/modules/nf-core/seqkit/replace/meta.yml new file mode 100644 index 00000000..1be01079 --- /dev/null +++ b/modules/nf-core/seqkit/replace/meta.yml @@ -0,0 +1,47 @@ +name: seqkit_replace +description: Use seqkit to find/replace strings within sequences and sequence headers +keywords: + - seqkit + - replace + - sequence + - sequence headers + - fasta +tools: + - seqkit: + description: Cross-platform and ultrafast toolkit for FASTA/Q file manipulation, + written by Wei Shen. + homepage: https://bioinf.shenwei.me/seqkit/usage/ + documentation: https://bioinf.shenwei.me/seqkit/usage/ + tool_dev_url: https://github.com/shenwei356/seqkit/ + doi: "10.1371/journal.pone.016396" + identifier: biotools:seqkit +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fastx: + type: file + description: fasta/q file + pattern: "*.{fasta,fastq,fa,fq,fas,fna,faa}*" +output: + - fastx: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fast*": + type: file + description: fasta/q file with replaced values + pattern: "*.{fasta,fastq,fa,fq,fas,fna,faa}*" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@mjcipriano" +maintainers: + - "@mjcipriano" diff --git a/modules/nf-core/seqkit/replace/tests/main.nf.test b/modules/nf-core/seqkit/replace/tests/main.nf.test new file mode 100644 index 00000000..759974c1 --- /dev/null +++ b/modules/nf-core/seqkit/replace/tests/main.nf.test @@ -0,0 +1,81 @@ +nextflow_process { + + name "Test Process SEQKIT_REPLACE" + script "../main.nf" + process "SEQKIT_REPLACE" + + tag "modules" + tag "modules_nfcore" + tag "seqkit" + tag "seqkit/replace" + + test("sarscov2 - fasta - replace") { + + config "./replace.config" + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fasta - uncomp") { + + config "./uncomp.config" + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/seqkit/replace/tests/main.nf.test.snap b/modules/nf-core/seqkit/replace/tests/main.nf.test.snap new file mode 100644 index 00000000..24e1887f --- /dev/null +++ b/modules/nf-core/seqkit/replace/tests/main.nf.test.snap @@ -0,0 +1,101 @@ +{ + "sarscov2 - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fasta.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,d0b955de076997af3989d2ce5b5417b6" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fasta.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,d0b955de076997af3989d2ce5b5417b6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T11:10:12.100214525" + }, + "sarscov2 - fasta - replace": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fasta.gz:md5,b1518908253a4997fcad98270751112e" + ] + ], + "1": [ + "versions.yml:md5,d0b955de076997af3989d2ce5b5417b6" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fasta.gz:md5,b1518908253a4997fcad98270751112e" + ] + ], + "versions": [ + "versions.yml:md5,d0b955de076997af3989d2ce5b5417b6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-07T16:23:57.895160549" + }, + "sarscov2 - fasta - uncomp": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test..fasta:md5,05d3294a62c72f5489f067c1da3c2f6c" + ] + ], + "1": [ + "versions.yml:md5,d0b955de076997af3989d2ce5b5417b6" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test..fasta:md5,05d3294a62c72f5489f067c1da3c2f6c" + ] + ], + "versions": [ + "versions.yml:md5,d0b955de076997af3989d2ce5b5417b6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-07T16:24:09.142463316" + } +} \ No newline at end of file diff --git a/modules/nf-core/seqkit/replace/tests/replace.config b/modules/nf-core/seqkit/replace/tests/replace.config new file mode 100644 index 00000000..8766447c --- /dev/null +++ b/modules/nf-core/seqkit/replace/tests/replace.config @@ -0,0 +1,5 @@ + process { + withName: 'SEQKIT_REPLACE' { + ext.args = "-s -p 'A' -r 'N'" + } + } diff --git a/modules/nf-core/seqkit/replace/tests/tags.yml b/modules/nf-core/seqkit/replace/tests/tags.yml new file mode 100644 index 00000000..b42ee48d --- /dev/null +++ b/modules/nf-core/seqkit/replace/tests/tags.yml @@ -0,0 +1,2 @@ +seqkit/replace: + - "modules/nf-core/seqkit/replace/**" diff --git a/modules/nf-core/seqkit/replace/tests/uncomp.config b/modules/nf-core/seqkit/replace/tests/uncomp.config new file mode 100644 index 00000000..dbd892b5 --- /dev/null +++ b/modules/nf-core/seqkit/replace/tests/uncomp.config @@ -0,0 +1,6 @@ + process { + withName: 'SEQKIT_REPLACE' { + ext.args = "-s -p 'T' -r 'N'" + ext.suffix = ".fasta" + } + } diff --git a/subworkflows/local/mirdeep2.nf b/subworkflows/local/mirdeep2.nf deleted file mode 100644 index 40dd9f29..00000000 --- a/subworkflows/local/mirdeep2.nf +++ /dev/null @@ -1,31 +0,0 @@ -// -// Quantify mirna with bowtie and mirtop -// - -include { PIGZ_UNCOMPRESS } from '../../modules/nf-core/pigz/uncompress/main' -include { MIRDEEP2_MAPPER } from '../../modules/local/mirdeep2_mapper' -include { MIRDEEP2_RUN } from '../../modules/local/mirdeep2_run' - -workflow MIRDEEP2 { - take: - reads // channel: [ val(meta), [ reads ] ] - fasta // channel: [ val(meta), path(fasta) ] - index // channel: [genome.1.ebwt, genome.2.ebwt, genome.3.ebwt, genome.4.ebwt, genome.rev.1.ebwt, genome.rev.2.ebwt] - hairpin // channel: [ path(hairpin.fa) ] - mature // channel: [ path(mature.fa) ] - - main: - ch_versions = Channel.empty() - - PIGZ_UNCOMPRESS ( reads ) - ch_versions = ch_versions.mix(PIGZ_UNCOMPRESS.out.versions.first()) - - MIRDEEP2_MAPPER ( PIGZ_UNCOMPRESS.out.file, index ) - ch_versions = ch_versions.mix(MIRDEEP2_MAPPER.out.versions.first()) - - MIRDEEP2_RUN ( fasta.map{meta,file->file}, MIRDEEP2_MAPPER.out.mirdeep2_inputs, hairpin, mature ) - ch_versions = ch_versions.mix(MIRDEEP2_RUN.out.versions.first()) - - emit: - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/main.nf b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/main.nf new file mode 100644 index 00000000..f8c3da93 --- /dev/null +++ b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/main.nf @@ -0,0 +1,33 @@ +include { SEQKIT_FQ2FA } from '../../../modules/nf-core/seqkit/fq2fa/main' +include { SEQKIT_REPLACE } from '../../../modules/nf-core/seqkit/replace/main' +include { MIRDEEP2_MAPPER } from '../../../modules/nf-core/mirdeep2/mapper/main' +include { MIRDEEP2_MIRDEEP2 } from '../../../modules/nf-core/mirdeep2/mirdeep2/main' + +workflow FASTQ_FIND_MIRNA_MIRDEEP2 { + + take: + ch_reads // channel: [ val(meta), fastq ] + ch_genome_fasta // channel: [ val(meta), genome_fasta ] + ch_bowtie_index // channel: [ val(meta), index ] + ch_mirna_mature_hairpin // channel: [ val(meta), mature_mirna, hairpin_mirna ] + + main: + + ch_versions = Channel.empty() + + SEQKIT_FQ2FA ( ch_reads ) + ch_versions = ch_versions.mix(SEQKIT_FQ2FA.out.versions) + + SEQKIT_REPLACE ( SEQKIT_FQ2FA.out.fasta ) + ch_versions = ch_versions.mix(SEQKIT_REPLACE.out.versions) + + MIRDEEP2_MAPPER ( SEQKIT_REPLACE.out.fastx, ch_bowtie_index ) + ch_versions = ch_versions.mix(MIRDEEP2_MAPPER.out.versions) + + MIRDEEP2_MIRDEEP2 ( MIRDEEP2_MAPPER.out.outputs, ch_genome_fasta, ch_mirna_mature_hairpin ) + ch_versions = ch_versions.mix(MIRDEEP2_MIRDEEP2.out.versions) + + emit: + outputs = MIRDEEP2_MIRDEEP2.out.outputs // channel: [ val(meta), [ bed, csv, html ] ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/meta.yml b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/meta.yml new file mode 100644 index 00000000..22a475b3 --- /dev/null +++ b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/meta.yml @@ -0,0 +1,51 @@ +name: "fastq_find_mirna_mirdeep2" +description: | + This subworkflow identifies miRNAs from FASTQ files using miRDeep2. The workflow converts FASTQ to FASTA, processes and replaces any whitespace in sequence IDs, builds a Bowtie index of the genome, and then maps reads using miRDeep2 mapper before identifying known and novel miRNAs. +keywords: + - miRNA + - FASTQ + - FASTA + - Bowtie + - miRDeep2 +components: + - seqkit/fq2fa + - seqkit/replace + - bowtie/build + - mirdeep2/mapper + - mirdeep2/mirdeep2 +input: + - ch_reads: + type: file + description: | + The input channel containing the FASTQ files to process and identify miRNAs. + Structure: [ val(meta), path(fastq) ] + pattern: "*.fastq.gz" + - ch_genome_fasta: + type: file + description: | + The input channel containing the genome FASTA files used to build the Bowtie index. + Structure: [ val(meta), path(fasta) ] + pattern: "*.fa" + - ch_mirna_mature_hairpin: + type: file + description: | + The input channel containing the mature and hairpin miRNA sequences for miRNA identification. + Structure: [ val(meta), path(mature_fasta), path(hairpin_fasta) ] + pattern: "*.fa" +output: + - outputs: + type: file + description: | + The output channel containing the BED, CSV, and HTML files with the identified miRNAs. + Structure: [ val(meta), path(bed), path(csv), path(html) ] + pattern: "*.{bed,csv,html}" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@atrigila" +maintainers: + - "@atrigila" diff --git a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test new file mode 100644 index 00000000..13c10e52 --- /dev/null +++ b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test @@ -0,0 +1,80 @@ +nextflow_workflow { + + name "Test Subworkflow FASTQ_FIND_MIRNA_MIRDEEP2" + script "../main.nf" + workflow "FASTQ_FIND_MIRNA_MIRDEEP2" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_find_mirna_mirdeep2" + tag "mirdeep2/mapper" + tag "mirdeep2/mirdeep2" + tag "seqkit/fq2fa" + tag "seqkit/replace" + tag "bowtie/build" + + + test("smrnaseq - fasta - single_end") { + config "./nextflow.config" + + setup { + run("SEQKIT_REPLACE") { + script "modules/nf-core/seqkit/replace/main.nf" + config "./nextflow.config" + + process { + """ + input[0] = [ + [ id:'genome' ], // meta map + file('https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/genome.fa', checkIfExists: true) + ] + """ + } + } + + run("BOWTIE_BUILD") { + script "modules/nf-core/bowtie/build/main.nf" + process { + """ + input[0] = SEQKIT_REPLACE.out.fastx + """ + } + } + } + + when { + workflow { + """ + input[0] = [ + [ id:'small_Clone1_N1', single_end:false ], // meta map + file('https://github.com/nf-core/test-datasets/raw/smrnaseq/testdata/trimmed/small_Clone1_N1.fastp.fastq.gz', checkIfExists: true) + ] + + input[1] = SEQKIT_REPLACE.out.fastx + + input[2] = BOWTIE_BUILD.out.index + + input[3] = [ + [ id:'mirna_mature_hairpin'], // meta map + file('https://github.com/nf-core/test-datasets/raw/smrnaseq/MirGeneDB/mirgenedb_hsa_mature.fa', checkIfExists: true), + file('https://github.com/nf-core/test-datasets/raw/smrnaseq/MirGeneDB/mirgenedb_hsa_hairpin.fa', checkIfExists: true), + [] + ] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out.versions, + path(workflow.out.outputs.get(0).get(1)[2]).readLines().last().contains(''), + workflow.out.outputs.get(0).get(1)[0], + path(workflow.out.outputs.get(0).get(1)[1]).readLines().first().contains('miRDeep2 score') + ).match()}, + // Assert .html + { assert path(workflow.out.outputs.get(0).get(1)[2]).readLines().last().contains('') } + ) + } + } +} diff --git a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test.snap new file mode 100644 index 00000000..c48df3d7 --- /dev/null +++ b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/main.nf.test.snap @@ -0,0 +1,20 @@ +{ + "smrnaseq - fasta - single_end": { + "content": [ + [ + "versions.yml:md5,10138b74aed5b2658c26ddf80ff391d5", + "versions.yml:md5,631c0428c28d5355f0e3e9bd790bd77d", + "versions.yml:md5,706a3f609ec9d66162576d93a6f6a67b", + "versions.yml:md5,756eee52b4a45f7a9effe33b1cd3cb92" + ], + true, + "result_small_Clone1_N1.bed:md5,98a74ac6dd16ee876e9a3f54d2695c88", + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-23T14:56:03.274059331" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/nextflow.config b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/nextflow.config new file mode 100644 index 00000000..ec097561 --- /dev/null +++ b/subworkflows/nf-core/fastq_find_mirna_mirdeep2/tests/nextflow.config @@ -0,0 +1,11 @@ +process { + withName: 'MIRDEEP2_MAPPER' { + ext.args = "-c -j -k TCGTATGCCGTCTTCTGCTTGT -l 18 -m -v" + } + + withName: 'SEQKIT_REPLACE' { + ext.args = "-p '\s.+'" + ext.suffix = "fasta" + } + +} diff --git a/tests/test_mirgenedb.nf.test b/tests/test_mirgenedb.nf.test index b7ff0079..9433f837 100644 --- a/tests/test_mirgenedb.nf.test +++ b/tests/test_mirgenedb.nf.test @@ -19,7 +19,8 @@ nextflow_pipeline { assertAll( { assert workflow.success }, { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") }, - { assert workflow.trace.succeeded().size() == 79 }, + { assert workflow.trace.succeeded().size() == 90 }, + { assert workflow.trace.failed().size() == 1 }, { assert snapshot( path("$outputDir/mirna_quant/bam/mature/Clone1_N1_mature.sorted.flagstat").exists(), @@ -73,6 +74,15 @@ nextflow_pipeline { path("$outputDir/genome_quant/bam/Control_N1_mature_hairpin_genome.sorted.flagstat") ).match("genome_quant_bam") }, + { assert snapshot( + path("$outputDir/mirdeep2/result_Clone1_N1.csv").exists(), + path("$outputDir/mirdeep2/result_Control_N1.csv").exists(), + path("$outputDir/mirdeep2/result_Control_N1.bed").exists(), + path("$outputDir/mirdeep2/result_Control_N1.bed").exists(), + path("$outputDir/mirdeep2/result_Control_N1.html").exists(), + path("$outputDir/mirdeep2/result_Control_N1.html").exists() + ).match("mirdeep2") }, + { assert snapshot( path("$outputDir/multiqc/multiqc_report.html").exists() ).match("multiqc") }, diff --git a/tests/test_mirgenedb.nf.test.snap b/tests/test_mirgenedb.nf.test.snap index 4c5cff01..1aa062d4 100644 --- a/tests/test_mirgenedb.nf.test.snap +++ b/tests/test_mirgenedb.nf.test.snap @@ -19,13 +19,13 @@ }, "software_versions": { "content": [ - "{BOWTIE_MAP_GENOME={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, FASTP={fastp=0.23.4}, FASTQC_RAW={fastqc=0.12.1}, FASTQC_TRIM={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, Workflow={nf-core/smrnaseq=v2.3.2dev}}" + "{BOWTIE_MAP_GENOME={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_MATURE={bowtie=1.3.0, samtools=1.16.1}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.0, samtools=1.16.1}, FASTP={fastp=0.23.4}, FASTQC_RAW={fastqc=0.12.1}, FASTQC_TRIM={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, MIRDEEP2_MAPPER={mirdeep2=2.0.1}, MIRDEEP2_MIRDEEP2={mirdeep2=2.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, SEQKIT_FQ2FA={seqkit=2.8.0}, SEQKIT_REPLACE={seqkit=2.8.0}, Workflow={nf-core/smrnaseq=v2.3.2dev}}" ], "meta": { - "nf-test": "0.9.0", + "nf-test": "0.8.4", "nextflow": "24.04.4" }, - "timestamp": "2024-09-26T00:23:04.015921" + "timestamp": "2024-09-26T18:15:04.369221417" }, "mirna_quant_bam": { "content": [ @@ -54,6 +54,21 @@ }, "timestamp": "2024-09-19T15:14:06.231457002" }, + "mirdeep2": { + "content": [ + true, + true, + true, + true, + true, + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-26T18:15:04.45050483" + }, "mirna_quant_edger_qc": { "content": [ true, diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 0492b2c3..fd3e31aa 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -17,11 +17,11 @@ include { MIRTRACE_QC } from '../modules/nf-core/mirtrace/q include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../subworkflows/nf-core/fastq_fastqc_umitools_fastp' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { FASTQ_FIND_MIRNA_MIRDEEP2 } from '../subworkflows/nf-core/fastq_find_mirna_mirdeep2/main' // local subworkflows include { CONTAMINANT_FILTER } from '../subworkflows/local/contaminant_filter/main' include { GENOME_QUANT } from '../subworkflows/local/genome_quant' include { MIRNA_QUANT } from '../subworkflows/local/mirna_quant' -include { MIRDEEP2 } from '../subworkflows/local/mirdeep2' // plugins include { paramsSummaryMap } from 'plugin/nf-validation' @@ -220,15 +220,21 @@ workflow NFCORE_SMRNASEQ { hairpin_clean = MIRNA_QUANT.out.fasta_hairpin.map { it -> it[1] } mature_clean = MIRNA_QUANT.out.fasta_mature.map { it -> it[1] } + ch_mature_hairpin = mature_clean + .combine(hairpin_clean) + .map { mature, hairpin -> + [[id: 'mature_hairpin'], mature, hairpin, []] + } + .first() + if (!params.skip_mirdeep) { - MIRDEEP2 ( - ch_reads_for_mirna, - GENOME_QUANT.out.fasta, - GENOME_QUANT.out.index.collect(), - hairpin_clean, - mature_clean - ) - ch_versions = ch_versions.mix(MIRDEEP2.out.versions) + FASTQ_FIND_MIRNA_MIRDEEP2 ( + ch_reads_for_mirna, + ch_fasta, + ch_bowtie_index, + ch_mature_hairpin, + ) + ch_versions = ch_versions.mix(FASTQ_FIND_MIRNA_MIRDEEP2.out.versions) } }