diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4ecfbfe3..4a9bc5c7 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -18,11 +18,11 @@ "python.linting.flake8Path": "/opt/conda/bin/flake8", "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", - "python.linting.pylintPath": "/opt/conda/bin/pylint" + "python.linting.pylintPath": "/opt/conda/bin/pylint", }, // Add the IDs of extensions you want installed when the container is created. - "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] - } - } + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"], + }, + }, } diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 4d09370b..bd134c88 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -27,6 +27,9 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests +You can optionally test your changes by running the pipeline locally. Then it is recommended to use the `debug` profile to +receive warnings about process selectors and other debug info. Example: `nextflow run . -profile debug,test,docker --outdir `. + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index e278390b..3cdbf2b7 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -19,6 +19,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/smrn - [ ] If necessary, also make a PR on the nf-core/smrnaseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 45ad5219..51c3ee37 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,9 +29,10 @@ jobs: profile: - "test" - "test_no_genome" + - "test_umi" steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index ae6a0d7b..0cd44ed6 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: token: ${{ secrets.nf_core_bot_auth_token }} @@ -24,7 +24,7 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install Prettier run: npm install -g prettier @prettier/plugin-php diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index b8bdd214..905c58e4 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,9 +14,9 @@ jobs: EditorConfig: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install editorconfig-checker run: npm install -g editorconfig-checker @@ -27,9 +27,9 @@ jobs: Prettier: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install Prettier run: npm install -g prettier @@ -40,7 +40,7 @@ jobs: PythonBlack: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Check code lints with Black uses: psf/black@stable @@ -71,7 +71,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 diff --git a/.github/workflows/release-announcments.yml b/.github/workflows/release-announcements.yml similarity index 100% rename from .github/workflows/release-announcments.yml rename to .github/workflows/release-announcements.yml diff --git a/.gitpod.yml b/.gitpod.yml index 25488dcc..acf72695 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -4,7 +4,9 @@ tasks: command: | pre-commit install --install-hooks nextflow self-update - + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS vscode: extensions: # based on nf-core.nf-core-extensionpack - codezombiech.gitignore # Language support for .gitignore files diff --git a/CHANGELOG.md b/CHANGELOG.md index 47cb7b72..10fc5720 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,9 +3,38 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [dev](https://github.com/nf-core/smrnaseq/branch/dev) +## v2.3dev - [date] -- _nothing yet done_ +- [[#299]](https://github.com/nf-core/smrnaseq/issues/299) - Bugfix for missing inputs in BAM stats (`genome_quant.r`) +- [[#164]](https://github.com/nf-core/smrnaseq/pull/164) - UMI Handling Feature implemented in the pipeline +- [[#302]](https://github.com/nf-core/smrnaseq/pull/302) - Merged in nf-core template v2.11.1 +- [[#294]](https://github.com/nf-core/smrnaseq/pull/294) - Fixed contamination screening issues + +### Parameters + +| Old parameter | New parameter | +| ------------- | --------------------------- | +| | `--with_umi` | +| | `--umitools_extract_method` | +| | `--umitools_method` | +| | `--skip_umi_extract` | +| | `--umitools_bc_pattern` | +| | `--umi_discard_read` | +| | `--save_umi_intermeds` | + +### Software dependencies + +| Dependency | Old version | New version | +| ------------ | ----------- | ----------- | +| `multiqc` | 1.15 | 1.19 | +| `edgeR` | 3.36.0 | 4.0.2 | +| `limma` | 3.50.0 | 3.58.1 | +| `bioconvert` | 0.4.3 | 1.1.1 | +| `mirdeep` | 2.0.1 | 2.0.1.3 | +| `seqkit` | 2.3.1 | 2.6.1 | +| `fastqc` | 0.11.4 | 0.12.1 | +| `samtools` | 1.17 | 1.18 | +| `umitools` | | 1.1.4 | ## [v2.2.4](https://github.com/nf-core/smrnaseq/releases/tag/2.2.4) - 2023-11-03 @@ -64,23 +93,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[#188](https://github.com/nf-core/smrnaseq/pull/188)] - Dropped TrimGalore in favor of fastp QC and adapter trimming, improved handling of adapters and trimming parameters - [[#194](https://github.com/nf-core/smrnaseq/issues/194)] - Added default adapters file for FastP improved miRNA adapter trimming -### Parameters - -| Old parameter | New parameter | -| ------------- | ------------------------ | -| | `--mirgenedb` | -| | `--mirgenedb_species` | -| | `--mirgenedb_gff` | -| | `--mirgenedb_mature` | -| | `--mirgenedb_hairpin` | -| | `--contamination_filter` | -| | `--rrna` | -| | `--trna` | -| | `--cdna` | -| | `--ncrna` | -| | `--pirna` | -| | `--other_contamination` | - ## [v2.0.0](https://github.com/nf-core/smrnaseq/releases/tag/2.0.0) - 2022-05-31 Aqua Zinc Chihuahua ### Major enhancements @@ -98,20 +110,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Other enhancements & fixes - [#134](https://github.com/nf-core/smrnaseq/issues/134) - Fixed colSum of zero issues for edgeR_miRBase.R script +- [#49](https://github.com/nf-core/smrnaseq/issues/49) - Integrated the existing umitools modules into the pipeline and extend the deduplication step. - [#55](https://github.com/lpantano/seqcluster/pull/55) - update seqcluster to fix UMI-detecting bug -### Parameters - -| Old parameter | New parameter | -| -------------------- | ---------------- | -| `--conda` | `--enable_conda` | -| `--clusterOptions` | | -| `--publish_dir_mode` | | - -> **NB:** Parameter has been **updated** if both old and new parameter information is present. -> **NB:** Parameter has been **added** if just the new parameter information is present. -> **NB:** Parameter has been **removed** if parameter information isn't present. - ### Software dependencies Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. diff --git a/README.md b/README.md index 7c010c00..1b4a1655 100644 --- a/README.md +++ b/README.md @@ -27,37 +27,45 @@ You can find numerous talks on the nf-core events page from various topics inclu ## Pipeline summary -1. Raw read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) -2. Adapter trimming ([`Trim Galore!`](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/)) - 1. Insert Size calculation - 2. Collapse reads ([`seqcluster`](https://seqcluster.readthedocs.io/mirna_annotation.html#processing-of-reads)) -3. Contamination filtering ([`Bowtie2`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml)) -4. Alignment against miRBase mature miRNA ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) -5. Alignment against miRBase hairpin - 1. Unaligned reads from step 3 ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) - 2. Collapsed reads from step 2.2 ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) -6. Post-alignment processing of miRBase hairpin - 1. Basic statistics from step 3 and step 4.1 ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) - 2. Analysis on miRBase, or MirGeneDB hairpin counts ([`edgeR`](https://bioconductor.org/packages/release/bioc/html/edgeR.html)) - - TMM normalization and a table of top expression hairpin - - MDS plot clustering samples - - Heatmap of sample similarities - 3. miRNA and isomiR annotation from step 4.1 ([`mirtop`](https://github.com/miRTop/mirtop)) -7. Alignment against host reference genome ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) - 1. Post-alignment processing of alignment against host reference genome ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) -8. Novel miRNAs and known miRNAs discovery ([`MiRDeep2`](https://www.mdc-berlin.de/content/mirdeep2-documentation)) +1. Quality check and triming + 1. Raw read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) + 2. Adapter trimming ([`fastp`](https://github.com/OpenGene/fastp)) + 3. UMI barcode extraction ([`UMI-tools`](https://github.com/CGATOxford/UMI-tools)) + 4. Trim read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) +2. miRNA QC ([`miRTrace`](https://github.com/friedlanderlab/mirtrace)) +3. Contamination filtering ([`Bowtie2`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml)) (Optional) + 1. rRNA filtration + 2. tRNA filtration + 3. cDNA filtration + 4. ncRNA filtration + 5. piRNA filtration + 6. Others filtration +4. UMI barcode deduplication ([`UMI-tools`](https://github.com/CGATOxford/UMI-tools)) +5. miRNA quantification + - EdgeR + 1. Reads alignment against miRBase mature miRNA ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) + 2. Post-alignment processing of alignment against Mature miRNA ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) + 3. Unmapped reads (from reads vs mature miRNA) alignment against miRBase hairpin + 4. Post-alignment processing of alignment against Hairpin ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) + 5. Analysis on miRBase, or MirGeneDB hairpin counts ([`edgeR`](https://bioconductor.org/packages/release/bioc/html/edgeR.html)) + - TMM normalization and a table of top expression hairpin + - MDS plot clustering samples + - Heatmap of sample similarities + - Mirtop quantification + 1. Read collapsing ([`seqcluster`](https://github.com/lpantano/seqcluster)) + 2. miRNA and isomiR annotation ([`mirtop`](https://github.com/miRTop/mirtop)) +6. Genome Quantification (Optional) + 1. Reads alignment against host reference genome ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) + 2. Post-alignment processing of alignment against host reference genome ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) +7. Novel miRNAs and known miRNAs discovery ([`MiRDeep2`](https://www.mdc-berlin.de/content/mirdeep2-documentation)) (Optional) 1. Mapping against reference genome with the mapper module 2. Known and novel miRNA discovery with the mirdeep2 module -9. miRNA quality control ([`mirtrace`](https://github.com/friedlanderlab/mirtrace)) -10. Present QC for raw read, alignment, and expression results ([`MultiQC`](http://multiqc.info/)) +8. Present QC for raw read, alignment, and expression results ([`MultiQC`](http://multiqc.info/)) ## Usage -:::note -If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how -to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) -with `-profile test` before running the workflow on actual data. -::: +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. First, prepare a samplesheet with your input data that looks as follows: @@ -89,11 +97,9 @@ nextflow run nf-core/smrnaseq \ --outdir ``` -:::warning -Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those -provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; -see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). -::: +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/smrnaseq/usage) and the [parameter documentation](https://nf-co.re/smrnaseq/parameters). diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 74c7de72..11c1b997 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,8 @@ report_comment: > - This report has been generated by the nf-core/smrnaseq + This report has been generated by the nf-core/smrnaseq analysis pipeline. For information about how to interpret these results, please see the documentation. + report_section_order: "nf-core-smrnaseq-methods-description": order: -1000 diff --git a/assets/slackreport.json b/assets/slackreport.json index 214c7fa9..1884b21a 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "nf-core/smrnaseq v${version} - ${runName}", + "author_name": "nf-core/smrnaseq ${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/conf/modules.config b/conf/modules.config index b06d0055..127e0a34 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -77,10 +77,11 @@ process { // // Read QC and trimming options // + process { withName: 'MIRTRACE_RUN' { publishDir = [ - path: { "${params.outdir}/mirtrace/${meta.id}" }, + path: { "${params.outdir}/mirtrace" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -89,7 +90,7 @@ process { if (!(params.skip_fastqc)) { process { - withName: '.*:FASTQC_FASTP:FASTQC_.*' { + withName: '.*:FASTQC_UMITOOLS_FASTP:FASTQC_.*' { ext.args = '--quiet' } } @@ -97,7 +98,7 @@ if (!(params.skip_fastqc)) { if (!params.skip_fastp) { process { - withName: 'FASTP' { + withName: '.*:FASTQC_UMITOOLS_FASTP:FASTP' { ext.args = [ "", params.trim_fastq ? "" : "--disable_adapter_trimming", params.clip_r1 > 0 ? "--trim_front1 ${params.clip_r1}" : "", // Remove bp from the 5' end of read 1. @@ -130,7 +131,15 @@ if (!params.skip_fastp) { if (!params.skip_fastqc) { process { - withName: '.*:.*:FASTQC_FASTP:FASTQC_TRIM' { + withName: '.*:.*:FASTQC_UMITOOLS_FASTP:FASTQC_RAW' { + ext.args = '--quiet' + publishDir = [ + path: { "${params.outdir}/fastqc/raw" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*:.*:FASTQC_UMITOOLS_FASTP:FASTQC_TRIM' { ext.args = '--quiet' publishDir = [ path: { "${params.outdir}/fastqc/trim" }, @@ -142,6 +151,83 @@ if (!params.skip_fastp) { } } +if (params.with_umi && !params.skip_umi_extract) { + process { + withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:UMITOOLS_EXTRACT' { + ext.args = [ + params.umitools_extract_method ? "--extract-method=${params.umitools_extract_method}" : '', + params.umitools_bc_pattern ? "--bc-pattern='${params.umitools_bc_pattern}'" : '', + ].join(' ').trim() + publishDir = [ + [ + path: { "${params.outdir}/umitools" }, + mode: params.publish_dir_mode, + pattern: "*.log" + ], + [ + path: { "${params.outdir}/umitools" }, + mode: params.publish_dir_mode, + pattern: "*.fastq.gz", + enabled: params.save_umi_intermeds + ] + ] + } + } +} + +// +// UMI tools deduplication +// + +if (params.with_umi) { + process { + withName: '.*:DEDUPLICATE_UMIS:UMICOLLAPSE' { + ext.args = { meta.single_end ? "--algo ${params.umitools_method} --two-pass" : "--method ${params.umitools_method} --two-pass --paired --remove-unpaired --remove-chimeric" } + ext.prefix = { "${meta.id}.umi_dedup.sorted" } + publishDir = [ + path: { "${params.outdir}/umi_dedup" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: ( + params.save_umi_intermeds + ) + ] + } + + withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { + ext.prefix = { "${meta.id}.sorted" } + publishDir = [ + path: { "${params.outdir}/umi_dedup" }, + mode: params.publish_dir_mode, + pattern: '*.{bam}', + enabled: ( + params.save_umi_intermeds + ) + ] + } + + withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { + ext.prefix = { "${meta.id}.sorted" } + publishDir = [ + path: { "${params.outdir}/umi_dedup" }, + mode: params.publish_dir_mode, + pattern: '*.{bai,csi}', + enabled: ( + params.save_umi_intermeds + ) + ] + } + + withName: '.*:DEDUPLICATE_UMIS:BAM_SORT_STATS_SAMTOOLS:.*' { + publishDir = [ + path: { "${params.outdir}/umi_dedup/samtools_stats" }, + mode: params.publish_dir_mode, + pattern: '*.{stats,flagstat,idxstats}' + ] + } + } +} + // // Quantification // @@ -238,7 +324,14 @@ if (!params.skip_mirdeep) { process { withName: 'MIRDEEP2_MAPPER' { publishDir = [ - path: { "${params.outdir}/mirdeep" }, + path: { "${params.outdir}/mirdeep2/mapper" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'MIRDEEP2_RUN' { + publishDir = [ + path: { "${params.outdir}/mirdeep2/run" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/conf/test.config b/conf/test.config index 1a81afee..e699776b 100644 --- a/conf/test.config +++ b/conf/test.config @@ -23,9 +23,7 @@ params { input = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/samplesheet/v2.0/samplesheet.csv' fasta = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/genome.fa' - mature = 'https://mirbase.org/download/CURRENT/mature.fa' - hairpin = 'https://mirbase.org/download/CURRENT/hairpin.fa' - mirna_gtf = 'https://mirbase.org/download/hsa.gff3' + mirtrace_species = 'hsa' protocol = 'illumina' skip_mirdeep = true diff --git a/conf/test_umi.config b/conf/test_umi.config new file mode 100644 index 00000000..48f61181 --- /dev/null +++ b/conf/test_umi.config @@ -0,0 +1,36 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/smrnaseq -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + + input = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/samplesheet/v2.0/samplesheet_umi.csv' + fasta = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/genome.fa' + + mirtrace_species = 'hsa' + protocol = 'illumina' + skip_mirdeep = true + + //UMI Specific testcase + with_umi = true + umitools_extract_method = 'regex' + umitools_bc_pattern = '.+AACTGTAGGCACCATCAAT{s<=2}(?P.{12})(?P.*)' + save_umi_intermeds = true +} diff --git a/docs/output.md b/docs/output.md index 706930a1..537f17c5 100644 --- a/docs/output.md +++ b/docs/output.md @@ -13,6 +13,8 @@ The directories listed below will be created in the results directory after the The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: - [FastQC](#fastqc) - read quality control +- [UMI-tools extract](#umi-tools-extract) - UMI barcode extraction +- [UMI-tools deduplicate](#umi-tools-deduplicate) - read deduplication - [FastP](#fastp) - adapter trimming - [Bowtie2](#bowtie2) - contamination filtering - [Bowtie](#bowtie) - alignment against mature miRNAs and miRNA precursors (hairpins) @@ -40,6 +42,21 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d ![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png) +## UMI-tools extract + +
+Output files + +- `umitools/` + - `*.fastq.gz`: If `--save_umi_intermeds` is specified, FastQ files **after** UMI extraction will be placed in this directory. + - `*.log`: Log file generated by the UMI-tools `extract` command. + +
+ +[UMI-tools](https://github.com/CGATOxford/UMI-tools) deduplicates reads based on unique molecular identifiers (UMIs) to address PCR-bias. Firstly, the UMI-tools `extract` command removes the UMI barcode information from the read sequence and adds it to the read name. Secondly, reads are deduplicated based on UMI identifier after mapping as highlighted in the [UMI-tools deduplicate](#umi-tools-deduplicate) section. + +To facilitate processing of input data which has the UMI barcode already embedded in the read name from the start, `--skip_umi_extract` can be specified in conjunction with `--with_umi`. + ## FastP [FastP](https://github.com/OpenGene/fastp) is used for removal of adapter contamination and trimming of low quality regions. @@ -55,6 +72,19 @@ Contains FastQ files with quality and adapter trimmed reads for each sample, alo FastP can automatically detect adapter sequences when not specified directly by the user - the pipeline also comes with a feature and a supplied miRNA adapters file to ensure adapters auto-detected are more accurate. If there are needs to add more known miRNA adapters to this list, please open a pull request. +## UMI-tools deduplicate + +
+Output files + +- `umi_dedup/` + - `*.tsv`: Results statistics files detailing the UMI deduplication results. + - `*.bam`: If `--save_umi_intermeds` is specified, the deduplicated bam files **after** UMI deduplication will be placed in this directory. In addition the sorted and indexed files will be placed there as well. + - `samtools_stats/` - `*.{stats,flagstat,idxstats}:` Statistics on the mappings underlying the UMI deduplication. +
+ +[UMI-tools](https://github.com/CGATOxford/UMI-tools) deduplicates reads based on unique molecular identifiers (UMIs) to address PCR-bias. Firstly, the UMI-tools `extract` command removes the UMI barcode information from the read sequence and adds it to the read name as highlighted in the [UMI-tools extract](#umi-tools-extract) section. The reads are deduplicated based on an alignment against the full genome of the species. The deduplicated reads are then converted into fastq format. The resulting fastq files are used in the remaining steps of the pipeline. + ## Bowtie2 [Bowtie2](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml) is used to align the reads to user-defined databases of contaminants. diff --git a/docs/usage.md b/docs/usage.md index 2884df74..f659727b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -27,10 +27,10 @@ It should point to the 3-letter species name used by [miRBase](https://www.mirba Different parameters can be set for the two supported databases. By default `miRBase` will be used with the parameters below. - `mirna_gtf`: If not supplied by the user, then `mirna_gtf` will point to the latest GFF3 file in miRbase: `https://mirbase.org/download/CURRENT/genomes/${params.mirtrace_species}.gff3` -- `mature`: points to the FASTA file of mature miRNA sequences. `https://mirbase.org/download/CURRENT/mature.fa` -- `hairpin`: points to the FASTA file of precursor miRNA sequences. `https://mirbase.org/download/CURRENT/hairpin.fa` +- `mature`: points to the FASTA file of mature miRNA sequences. `https://mirbase.org/download/mature.fa` +- `hairpin`: points to the FASTA file of precursor miRNA sequences. `https://mirbase.org/download/hairpin.fa` -If MirGeneDB should be used instead it needs to be specified using `--mirgenedb` and use the parameters below . +If MirGeneDB should be used instead it needs to be specified using `--mirgenedb` and use the parameters below. - `mirgenedb_gff`: The data can not be downloaded automatically (URLs are created with short term tokens in it), thus the user needs to supply the gff file for either his species, or all species downloaded from `https://mirgenedb.org/download`. The total set will automatically be subsetted to the species specified with `--mirgenedb_species`. - `mirgenedb_mature`: points to the FASTA file of mature miRNA sequences. Download from `https://mirgenedb.org/download`. diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 01b8653d..e248e4c3 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -4,6 +4,7 @@ import org.yaml.snakeyaml.Yaml import groovy.json.JsonOutput +import nextflow.extension.FilesEx class NfcoreTemplate { @@ -141,12 +142,14 @@ class NfcoreTemplate { try { if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } [ 'sendmail', '-t' ].execute() << sendmail_html log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" } catch (all) { // Catch failures and try with plaintext def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { + if ( mqc_report != null && mqc_report.size() <= max_multiqc_email_size.toBytes() ) { mail_cmd += [ '-A', mqc_report ] } mail_cmd.execute() << email_html @@ -155,14 +158,16 @@ class NfcoreTemplate { } // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") + FilesEx.copyTo(output_hf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() } // @@ -227,15 +232,14 @@ class NfcoreTemplate { // Dump pipeline parameters in a json file // public static void dump_parameters(workflow, params) { - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def output_pf = new File(output_d, "params_${timestamp}.json") + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") def jsonStr = JsonOutput.toJson(params) - output_pf.text = JsonOutput.prettyPrint(jsonStr) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${params.outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() } // diff --git a/modules.json b/modules.json index 1250670b..8236fcfd 100644 --- a/modules.json +++ b/modules.json @@ -5,55 +5,75 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "cat/cat": { + "branch": "master", + "git_sha": "81f27e75847087865299cc46605deb3b09b4e0a2", + "installed_by": ["modules"] + }, "cat/fastq": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "git_sha": "02fd5bd7275abad27aad32d5c852e0a9b1b98882", "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", "installed_by": ["modules"] }, "fastp": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", - "installed_by": ["modules"] + "git_sha": "1799e452de650f6fb8890d25829bca23014b0728", + "installed_by": ["fastq_fastqc_umitools_fastp", "modules"] }, "fastqc": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", - "installed_by": ["modules"] + "git_sha": "617777a807a1770f73deb38c80004bac06807eef", + "installed_by": ["fastq_fastqc_umitools_fastp"] }, "multiqc": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", + "installed_by": ["modules"] + }, + "samtools/bam2fq": { + "branch": "master", + "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", "installed_by": ["modules"] }, "samtools/flagstat": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", - "installed_by": ["modules", "bam_stats_samtools"] + "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "installed_by": ["bam_stats_samtools", "modules"] }, "samtools/idxstats": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", - "installed_by": ["modules", "bam_stats_samtools"] + "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "installed_by": ["bam_stats_samtools", "modules"] }, "samtools/index": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", - "installed_by": ["modules", "bam_sort_stats_samtools"] + "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "installed_by": ["bam_sort_stats_samtools", "modules"] }, "samtools/sort": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", - "installed_by": ["modules", "bam_sort_stats_samtools"] + "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "installed_by": ["bam_sort_stats_samtools", "modules"] }, "samtools/stats": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", - "installed_by": ["modules", "bam_stats_samtools"] + "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "installed_by": ["bam_stats_samtools", "modules"] + }, + "umicollapse": { + "branch": "master", + "git_sha": "b71a681e9ec41cefd330e12b8566b5f5aff4941c", + "installed_by": ["modules"] + }, + "umitools/extract": { + "branch": "master", + "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53", + "installed_by": ["fastq_fastqc_umitools_fastp", "modules"] } } }, @@ -61,13 +81,18 @@ "nf-core": { "bam_sort_stats_samtools": { "branch": "master", - "git_sha": "7c8eeb2b37a6c6d3ffba0aef55ff60c8718c0ba6", + "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", "installed_by": ["subworkflows"] }, "bam_stats_samtools": { "branch": "master", - "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": ["subworkflows", "bam_sort_stats_samtools"] + "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "installed_by": ["bam_sort_stats_samtools", "subworkflows"] + }, + "fastq_fastqc_umitools_fastp": { + "branch": "master", + "git_sha": "d20574ce7420a54fe47b46a91c5f2215d1a2777c", + "installed_by": ["subworkflows"] } } } diff --git a/modules/local/blat_mirna.nf b/modules/local/blat_mirna.nf index 7f8a2324..aa0d3d51 100644 --- a/modules/local/blat_mirna.nf +++ b/modules/local/blat_mirna.nf @@ -53,7 +53,7 @@ process BLAT_MIRNA { blat -out=blast8 $mirna $contaminants /dev/stdout | awk 'BEGIN{FS="\t"}{if(\$11 < 1e-5)print \$1;}' | uniq > mirnahit.txt awk 'BEGIN { while((getline<"mirnahit.txt")>0) l[">"\$1]=1 } /^>/ {x = l[\$1]} {if(!x) print }' $contaminants > filtered.fa -cat <<-END_VERSIONS > versions.yml + cat <<-END_VERSIONS > versions.yml "${task.process}": blat: \$(echo \$(blat) | grep Standalone | awk '{ if (match(\$0,/[0-9]*[0-9]/,m)) print m[0] }') END_VERSIONS diff --git a/modules/local/bowtie_contaminants.nf b/modules/local/bowtie_contaminants.nf index e6a594a7..cf02de31 100644 --- a/modules/local/bowtie_contaminants.nf +++ b/modules/local/bowtie_contaminants.nf @@ -4,7 +4,7 @@ process INDEX_CONTAMINANTS { conda 'bowtie2=2.4.5' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bowtie2:2.4.5--py39hd2f7db1_2' : - 'biocontainers/bowtie2:2.4.5--py36hfca12d5_2'}" + 'biocontainers/bowtie2:2.4.5--py39hd2f7db1_2'}" input: path fasta diff --git a/modules/local/bowtie_genome.nf b/modules/local/bowtie_genome.nf index 91a6cd53..17ea9253 100644 --- a/modules/local/bowtie_genome.nf +++ b/modules/local/bowtie_genome.nf @@ -2,10 +2,10 @@ process INDEX_GENOME { tag "$fasta" label 'process_medium' - conda 'bioconda::bowtie=1.3.1-4' + conda 'bioconda::bowtie=1.3.1' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bowtie%3A1.3.1--py39hd400a0c_2' : - 'biocontainers/bowtie:1.3.1--py310h4070885_4' }" + 'https://depot.galaxyproject.org/singularity/bowtie:1.3.1--py310h7b97f60_6' : + 'biocontainers/bowtie:1.3.1--py310h7b97f60_6' }" input: tuple val(meta2), path(fasta) diff --git a/modules/local/bowtie_map_contaminants.nf b/modules/local/bowtie_map_contaminants.nf index d10f13b5..c9863ab3 100644 --- a/modules/local/bowtie_map_contaminants.nf +++ b/modules/local/bowtie_map_contaminants.nf @@ -4,7 +4,7 @@ process BOWTIE_MAP_CONTAMINANTS { conda 'bowtie2=2.4.5' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bowtie2:2.4.5--py39hd2f7db1_2' : - 'biocontainers/bowtie2:2.4.5--py36hfca12d5_2' }" + 'biocontainers/bowtie2:2.4.5--py39hd2f7db1_2' }" input: tuple val(meta), path(reads) @@ -21,17 +21,20 @@ process BOWTIE_MAP_CONTAMINANTS { task.ext.when == null || task.ext.when script: + def args = task.ext.args ?: "" + """ - INDEX=`find -L ./ -name "*.3.ebwt" | sed 's/.3.ebwt//'` + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"` bowtie2 \\ + -x \$INDEX \\ + -U ${reads} \\ --threads ${task.cpus} \\ + --un ${meta.id}.${contaminant_type}.filter.unmapped.contaminant.fastq \\ --very-sensitive-local \\ -k 1 \\ - -x \$INDEX \\ - --un ${meta.id}.${contaminant_type}.filter.unmapped.contaminant.fastq \\ - ${reads} \\ + -S ${meta.id}.filter.contaminant.sam \\ ${args} \\ - -S ${meta.id}.filter.contaminant.sam > ${meta.id}.contaminant_bowtie.log 2>&1 + > ${meta.id}.contaminant_bowtie.log 2>&1 # extracting number of reads from bowtie logs awk -v type=${contaminant_type} 'BEGIN{tot=0} {if(NR==4 || NR == 5){tot += \$1}} END {print "\\""type"\\": "tot }' ${meta.id}.contaminant_bowtie.log | tr -d , > filtered.${meta.id}_${contaminant_type}.stats diff --git a/modules/local/bowtie_mirna.nf b/modules/local/bowtie_mirna.nf index 2be45bb8..733d816e 100644 --- a/modules/local/bowtie_mirna.nf +++ b/modules/local/bowtie_mirna.nf @@ -1,10 +1,10 @@ process INDEX_MIRNA { label 'process_medium' - conda 'bioconda::bowtie=1.3.0-2' + conda 'bioconda::bowtie=1.3.1' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bowtie%3A1.3.1--py39hd400a0c_2' : - 'biocontainers/bowtie:1.3.1--py310h4070885_4' }" + 'https://depot.galaxyproject.org/singularity/bowtie:1.3.1--py310h7b97f60_6' : + 'biocontainers/bowtie:1.3.1--py310h7b97f60_6' }" input: tuple val(meta2), path(fasta) diff --git a/modules/local/edger_qc.nf b/modules/local/edger_qc.nf index 729d5eed..8c311457 100644 --- a/modules/local/edger_qc.nf +++ b/modules/local/edger_qc.nf @@ -1,10 +1,10 @@ process EDGER_QC { label 'process_medium' - conda 'bioconda::bioconductor-limma=3.50.0 bioconda::bioconductor-edger=3.36.0 conda-forge::r-data.table=1.14.2 conda-forge::r-gplots=3.1.1 conda-forge::r-statmod=1.4.36' + conda 'bioconda::bioconductor-limma=3.58.1 bioconda::bioconductor-edger=4.0.2 conda-forge::r-data.table=1.14.10 conda-forge::r-gplots=3.1.3 conda-forge::r-statmod=1.5.0' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-419bd7f10b2b902489ac63bbaafc7db76f8e0ae1:709335c37934db1b481054cbec637c6e5b5971cb-0' : - 'biocontainers/mulled-v2-419bd7f10b2b902489ac63bbaafc7db76f8e0ae1:709335c37934db1b481054cbec637c6e5b5971cb-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-419bd7f10b2b902489ac63bbaafc7db76f8e0ae1:f5ff7de321749bc7ae12f7e79a4b581497f4c8ce-0' : + 'biocontainers/mulled-v2-419bd7f10b2b902489ac63bbaafc7db76f8e0ae1:f5ff7de321749bc7ae12f7e79a4b581497f4c8ce-0' }" input: path input_files diff --git a/modules/local/filter_stats.nf b/modules/local/filter_stats.nf index 18e7016b..4c46f51d 100644 --- a/modules/local/filter_stats.nf +++ b/modules/local/filter_stats.nf @@ -4,7 +4,7 @@ process FILTER_STATS { conda 'bowtie2=2.4.5' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bowtie2:2.4.5--py39hd2f7db1_2' : - 'biocontainers/bowtie2:2.4.5--py36hfca12d5_2' }" + 'biocontainers/bowtie2:2.4.5--py39hd2f7db1_2' }" input: tuple val(meta), path(reads) @@ -13,6 +13,7 @@ process FILTER_STATS { output: path "*_mqc.yaml" , emit: stats tuple val(meta), path('*.filtered.fastq.gz'), emit: reads + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -24,5 +25,12 @@ process FILTER_STATS { tr '\n' ', ' | \\ awk -v sample=${meta.id} -v readnumber=\$readnumber '{ print "id: \\"my_pca_section\\"\\nsection_name: \\"Contamination Filtering\\"\\ndescription: \\"This plot shows the amount of reads filtered by contaminant type.\\"\\nplot_type: \\"bargraph\\"\\npconfig:\\n id: \\"contamination_filter_plot\\"\\n title: \\"Contamination Plot\\"\\n ylab: \\"Number of reads\\"\\ndata:\\n "sample": {"\$0"\\"remaining reads\\": "readnumber"}" }' > ${meta.id}.contamination_mqc.yaml gzip -c ${reads} > ${meta.id}.filtered.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(cat --version | grep 'cat ' |sed 's/cat (GNU coreutils) //') + gzip: \$(gzip --version | grep "gzip" | sed 's/gzip //') + tr: \$(tr --version | grep 'tr ' |sed 's/tr (GNU coreutils) //') + END_VERSIONS """ } diff --git a/modules/local/format_fasta_mirna.nf b/modules/local/format_fasta_mirna.nf index 489879a5..67461d64 100644 --- a/modules/local/format_fasta_mirna.nf +++ b/modules/local/format_fasta_mirna.nf @@ -4,10 +4,10 @@ process FORMAT_FASTA_MIRNA { tag "$fasta" label 'process_medium' - conda 'bioconda::fastx_toolkit=0.0.14-9' + conda 'bioconda::fastx_toolkit=0.0.14' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastx_toolkit:0.0.14--he1b5a44_8' : - 'biocontainers/fastx_toolkit:0.0.14--he1b5a44_8' }" + 'https://depot.galaxyproject.org/singularity/fastx_toolkit:0.0.14--hdbdd923_11' : + 'biocontainers/fastx_toolkit:0.0.14--hdbdd923_11' }" input: tuple val(meta2), path(fasta) diff --git a/modules/local/mirdeep2_mapper.nf b/modules/local/mirdeep2_mapper.nf index 842af6e6..19a9c5dc 100644 --- a/modules/local/mirdeep2_mapper.nf +++ b/modules/local/mirdeep2_mapper.nf @@ -4,7 +4,7 @@ process MIRDEEP2_MAPPER { label 'process_medium' tag "$meta.id" - conda 'bioconda::mirdeep2=2.0.1' + conda 'bioconda::mirdeep2=2.0.1.3' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mirdeep2:2.0.1.3--hdfd78af_1' : 'biocontainers/mirdeep2:2.0.1.3--hdfd78af_1' }" diff --git a/modules/local/mirdeep2_prepare.nf b/modules/local/mirdeep2_prepare.nf index 7e2f2437..ce66b9f1 100644 --- a/modules/local/mirdeep2_prepare.nf +++ b/modules/local/mirdeep2_prepare.nf @@ -3,10 +3,10 @@ process MIRDEEP2_PIGZ { tag "$meta.id" // TODO maybe create a mulled container and uncompress within mirdeep2_mapper? - conda 'bioconda::bioconvert=0.4.3' + conda 'bioconda::bioconvert=1.1.1' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bioconvert:0.4.3--py_0' : - 'biocontainers/bioconvert:0.4.3--py_0' }" + 'https://depot.galaxyproject.org/singularity/bioconvert:1.1.1--pyhdfd78af_0' : + 'biocontainers/bioconvert:1.1.1--pyhdfd78af_0' }" input: tuple val(meta), path(reads) @@ -23,7 +23,7 @@ process MIRDEEP2_PIGZ { pigz -f -d -p $task.cpus $reads cat <<-END_VERSIONS > versions.yml - ${task.process}": + "${task.process}": pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) END_VERSIONS """ diff --git a/modules/local/mirdeep2_run.nf b/modules/local/mirdeep2_run.nf index 26635a4f..442f26f3 100644 --- a/modules/local/mirdeep2_run.nf +++ b/modules/local/mirdeep2_run.nf @@ -4,7 +4,7 @@ process MIRDEEP2_RUN { label 'process_medium' errorStrategy 'ignore' - conda 'bioconda::mirdeep2=2.0.1' + conda 'bioconda::mirdeep2=2.0.1.3' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mirdeep2:2.0.1.3--hdfd78af_1' : 'biocontainers/mirdeep2:2.0.1.3--hdfd78af_1' }" @@ -35,9 +35,8 @@ process MIRDEEP2_RUN { -z _${reads.simpleName} cat <<-END_VERSIONS > versions.yml - ${task.process}": + "${task.process}": mirdeep2: \$(echo "$VERSION") END_VERSIONS """ } - diff --git a/modules/local/mirtop_quant.nf b/modules/local/mirtop_quant.nf index e97d6a09..ab38c93d 100644 --- a/modules/local/mirtop_quant.nf +++ b/modules/local/mirtop_quant.nf @@ -34,7 +34,7 @@ process MIRTOP_QUANT { mv mirtop/stats/mirtop_stats.log mirtop/stats/full_mirtop_stats.log cat <<-END_VERSIONS > versions.yml - ${task.process}": + "${task.process}": mirtop: \$(echo \$(mirtop --version 2>&1) | sed 's/^.*mirtop //') END_VERSIONS """ diff --git a/modules/local/mirtrace.nf b/modules/local/mirtrace.nf index f576ebc0..500de058 100644 --- a/modules/local/mirtrace.nf +++ b/modules/local/mirtrace.nf @@ -43,8 +43,8 @@ process MIRTRACE_RUN { --force cat <<-END_VERSIONS > versions.yml - ${task.process}": - mirtrace: \$(echo \$(mirtrace -v 2>&1)) + "${task.process}": + mirtrace: \$(echo \$(mirtrace -v)) END_VERSIONS """ diff --git a/modules/local/parse_fasta_mirna.nf b/modules/local/parse_fasta_mirna.nf index a0bbc75e..60665251 100644 --- a/modules/local/parse_fasta_mirna.nf +++ b/modules/local/parse_fasta_mirna.nf @@ -1,10 +1,10 @@ process PARSE_FASTA_MIRNA { label 'process_medium' - conda 'bioconda::seqkit=2.3.1' + conda 'bioconda::seqkit=2.6.1' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/seqkit:2.3.1--h9ee0642_0' : - 'biocontainers/seqkit:2.3.1--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/seqkit:2.6.1--h9ee0642_0' : + 'biocontainers/seqkit:2.6.1--h9ee0642_0' }" input: tuple val(meta2), path(fasta) @@ -34,7 +34,7 @@ process PARSE_FASTA_MIRNA { seqkit seq --rna2dna \${FASTA}_sps.fa > \${FASTA}_igenome.fa cat <<-END_VERSIONS > versions.yml - ${task.process}": + "${task.process}": seqkit: \$(echo \$(seqkit 2>&1) | sed 's/^.*Version: //; s/ .*\$//') END_VERSIONS """ diff --git a/modules/local/seqcluster_collapse.nf b/modules/local/seqcluster_collapse.nf index 39f6ce85..4379654c 100644 --- a/modules/local/seqcluster_collapse.nf +++ b/modules/local/seqcluster_collapse.nf @@ -2,10 +2,10 @@ process SEQCLUSTER_SEQUENCES { label 'process_medium' tag "$meta.id" - conda 'bioconda::seqcluster=1.2.9-0' + conda 'bioconda::seqcluster=1.2.9' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/seqcluster:1.2.9--pyh5e36f6f_0' : - 'biocontainers/seqcluster:1.2.8--pyh5e36f6f_0' }" + 'biocontainers/seqcluster:1.2.9--pyh5e36f6f_0' }" input: tuple val(meta), path(reads) @@ -25,7 +25,7 @@ process SEQCLUSTER_SEQUENCES { mv collapsed/*.fastq.gz final/. cat <<-END_VERSIONS > versions.yml - ${task.process}": + "${task.process}": seqcluster: \$(echo \$(seqcluster --version 2>&1) | sed 's/^.*seqcluster //') END_VERSIONS """ diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml new file mode 100644 index 00000000..17a04ef2 --- /dev/null +++ b/modules/nf-core/cat/cat/environment.yml @@ -0,0 +1,7 @@ +name: cat_cat +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::pigz=2.3.4 diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf new file mode 100644 index 00000000..adbdbd7b --- /dev/null +++ b/modules/nf-core/cat/cat/main.nf @@ -0,0 +1,79 @@ +process CAT_CAT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.3.4' : + 'biocontainers/pigz:2.3.4' }" + + input: + tuple val(meta), path(files_in) + + output: + tuple val(meta), path("${prefix}"), emit: file_out + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def file_list = files_in.collect { it.toString() } + + // choose appropriate concatenation tool depending on input and output format + + // | input | output | command1 | command2 | + // |-----------|------------|----------|----------| + // | gzipped | gzipped | cat | | + // | ungzipped | ungzipped | cat | | + // | gzipped | ungzipped | zcat | | + // | ungzipped | gzipped | cat | pigz | + + // Use input file ending as default + prefix = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}" + out_zip = prefix.endsWith('.gz') + in_zip = file_list[0].endsWith('.gz') + command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' + command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : '' + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + $command1 \\ + $args \\ + ${file_list.join(' ')} \\ + $command2 \\ + > ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def file_list = files_in.collect { it.toString() } + prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + touch $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} + +// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz +def getFileSuffix(filename) { + def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/ + return match ? match[0][1] : filename.substring(filename.lastIndexOf('.')) +} + diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml new file mode 100644 index 00000000..00a8db0b --- /dev/null +++ b/modules/nf-core/cat/cat/meta.yml @@ -0,0 +1,36 @@ +name: cat_cat +description: A module for concatenation of gzipped or uncompressed files +keywords: + - concatenate + - gzip + - cat +tools: + - cat: + description: Just concatenation + documentation: https://man7.org/linux/man-pages/man1/cat.1.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files_in: + type: file + description: List of compressed / uncompressed files + pattern: "*" +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - file_out: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" +authors: + - "@erikrikarddaniel" + - "@FriederikeHanssen" +maintainers: + - "@erikrikarddaniel" + - "@FriederikeHanssen" diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test new file mode 100644 index 00000000..aaae04f9 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test @@ -0,0 +1,177 @@ +nextflow_process { + + name "Test Process CAT_CAT" + script "../main.nf" + process "CAT_CAT" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/cat" + + test("test_cat_name_conflict") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'genome', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") } + ) + } + } + + test("test_cat_unzipped_unzipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + + test("test_cat_zipped_zipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_zipped_unzipped") { + config './nextflow_zipped_unzipped.config' + + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("test_cat_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_one_file_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(lines[0..5]).match("test_cat_one_file_unzipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_one_file_unzipped_zipped_size")} + ) + } + } +} + diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap new file mode 100644 index 00000000..0c9bfe8d --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap @@ -0,0 +1,145 @@ +{ + "test_cat_unzipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "timestamp": "2023-10-16T14:32:18.500464399" + }, + "test_cat_zipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "timestamp": "2023-10-16T14:32:49.642741302" + }, + "test_cat_zipped_zipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.gff3.gz:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.gff3.gz:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "timestamp": "2024-01-12T14:02:02.999254641" + }, + "test_cat_one_file_unzipped_zipped_lines": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ] + ], + "timestamp": "2023-10-16T14:33:21.39642399" + }, + "test_cat_unzipped_zipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt.gz:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt.gz:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "timestamp": "2024-01-12T14:08:26.948048418" + }, + "test_cat_one_file_unzipped_zipped_size": { + "content": [ + 374 + ], + "timestamp": "2024-01-12T14:10:22.445700266" + } +} diff --git a/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config new file mode 100644 index 00000000..ec26b0fd --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config @@ -0,0 +1,6 @@ + +process { + withName: CAT_CAT { + ext.prefix = 'cat.txt.gz' + } +} diff --git a/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config new file mode 100644 index 00000000..fbc79783 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config @@ -0,0 +1,8 @@ + +process { + + withName: CAT_CAT { + ext.prefix = 'cat.txt' + } + +} diff --git a/modules/nf-core/cat/cat/tests/tags.yml b/modules/nf-core/cat/cat/tests/tags.yml new file mode 100644 index 00000000..37b578f5 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/tags.yml @@ -0,0 +1,2 @@ +cat/cat: + - modules/nf-core/cat/cat/** diff --git a/modules/nf-core/cat/fastq/environment.yml b/modules/nf-core/cat/fastq/environment.yml index 222b301f..bff93add 100644 --- a/modules/nf-core/cat/fastq/environment.yml +++ b/modules/nf-core/cat/fastq/environment.yml @@ -1,3 +1,4 @@ +name: cat_fastq channels: - conda-forge - bioconda diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf index b75a2e73..3d963784 100644 --- a/modules/nf-core/cat/fastq/main.nf +++ b/modules/nf-core/cat/fastq/main.nf @@ -2,7 +2,7 @@ process CAT_FASTQ { tag "$meta.id" label 'process_single' - conda 'modules/nf-core/cat/fastq/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 'nf-core/ubuntu:20.04' }" diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test b/modules/nf-core/cat/fastq/tests/main.nf.test index f5f94182..dab2e14c 100644 --- a/modules/nf-core/cat/fastq/tests/main.nf.test +++ b/modules/nf-core/cat/fastq/tests/main.nf.test @@ -16,11 +16,11 @@ nextflow_process { } process { """ - input[0] = [ - [ id:'test', single_end:true ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true) ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -28,8 +28,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match() }, - { assert path(process.out.versions.get(0)).getText().contains("cat") } + { assert snapshot(process.out).match() } ) } } @@ -42,13 +41,13 @@ nextflow_process { } process { """ - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) ] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -56,8 +55,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match() }, - { assert path(process.out.versions.get(0)).getText().contains("cat") } + { assert snapshot(process.out).match() } ) } } @@ -70,11 +68,11 @@ nextflow_process { } process { """ - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:true ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -82,8 +80,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match() }, - { assert path(process.out.versions.get(0)).getText().contains("cat") } + { assert snapshot(process.out).match() } ) } } @@ -96,13 +93,13 @@ nextflow_process { } process { """ - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -110,8 +107,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match() }, - { assert path(process.out.versions.get(0)).getText().contains("cat") } + { assert snapshot(process.out).match() } ) } } @@ -124,10 +120,10 @@ nextflow_process { } process { """ - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:true ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -135,8 +131,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match() }, - { assert path(process.out.versions.get(0)).getText().contains("cat") } + { assert snapshot(process.out).match() } ) } } diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test.snap b/modules/nf-core/cat/fastq/tests/main.nf.test.snap index ec2342e5..43dfe28f 100644 --- a/modules/nf-core/cat/fastq/tests/main.nf.test.snap +++ b/modules/nf-core/cat/fastq/tests/main.nf.test.snap @@ -1,78 +1,169 @@ { "test_cat_fastq_single_end": { "content": [ - [ - [ - { - "id": "test", - "single_end": true - }, - "test.merged.fastq.gz:md5,f9cf5e375f7de81a406144a2c70cc64d" + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" ] - ] + } ], - "timestamp": "2023-10-17T23:19:12.990284837" + "timestamp": "2024-01-17T17:30:39.816981" }, "test_cat_fastq_single_end_same_name": { "content": [ - [ - [ - { - "id": "test", - "single_end": true - }, - "test.merged.fastq.gz:md5,63f817db7a29a03eb538104495556f66" + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" ] - ] + } ], - "timestamp": "2023-10-17T23:19:31.554568147" + "timestamp": "2024-01-17T17:32:35.229332" }, "test_cat_fastq_single_end_single_file": { "content": [ - [ - [ - { - "id": "test", - "single_end": true - }, - "test.merged.fastq.gz:md5,e325ef7deb4023447a1f074e285761af" + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" ] - ] + } ], - "timestamp": "2023-10-17T23:19:49.629360033" + "timestamp": "2024-01-17T17:34:00.058829" }, "test_cat_fastq_paired_end_same_name": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, + { + "0": [ [ - "test_1.merged.fastq.gz:md5,63f817db7a29a03eb538104495556f66", - "test_2.merged.fastq.gz:md5,fe9f266f43a6fc3dcab690a18419a56e" + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" ] - ] + } ], - "timestamp": "2023-10-17T23:19:40.711617539" + "timestamp": "2024-01-17T17:33:33.031555" }, "test_cat_fastq_paired_end": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ [ - "test_1.merged.fastq.gz:md5,f9cf5e375f7de81a406144a2c70cc64d", - "test_2.merged.fastq.gz:md5,77c8e966e130d8c6b6ec9be52fcb2bda" + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" ] - ] + } ], - "timestamp": "2023-10-18T07:53:20.923560211" + "timestamp": "2024-01-17T17:32:02.270935" } } \ No newline at end of file diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml index 7ca22161..9b3272bc 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/environment.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -1,6 +1,7 @@ +name: custom_dumpsoftwareversions channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::multiqc=1.15 + - bioconda::multiqc=1.19 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index 60a19e0e..f2187611 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda 'modules/nf-core/custom/dumpsoftwareversions/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : - 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : + 'biocontainers/multiqc:1.19--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml index 9414c32d..5f15a5fd 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: custom_dumpsoftwareversions description: Custom module used to dump software versions within the nf-core pipeline template keywords: diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap index 8713b921..5f59a936 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -1,27 +1,33 @@ { "Should run without failures": { "content": [ - { - "0": [ - "software_versions.yml:md5,a027f820f30b8191a20ca16465daaf37" - ], - "1": [ - "software_versions_mqc.yml:md5,ee4a1d028ad29987f9ac511f4668f17c" - ], - "2": [ - "versions.yml:md5,f47ebd22aba1dd987b7e5d5247b766c3" - ], - "mqc_yml": [ - "software_versions_mqc.yml:md5,ee4a1d028ad29987f9ac511f4668f17c" - ], - "versions": [ - "versions.yml:md5,f47ebd22aba1dd987b7e5d5247b766c3" - ], - "yml": [ - "software_versions.yml:md5,a027f820f30b8191a20ca16465daaf37" - ] - } + [ + "versions.yml:md5,76d454d92244589d32455833f7c1ba6d" + ], + [ + "data: \"\\n\\n \\n \\n \\n \\n \\n \\n \\n\\", + " \\n\\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n \\n \\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n\\n\\n \\n\\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\" + ], + [ + "CUSTOM_DUMPSOFTWAREVERSIONS:", + " python: 3.11.7", + " yaml: 5.4.1", + "TOOL1:", + " tool1: 0.11.9", + "TOOL2:", + " tool2: '1.9'", + "Workflow:" + ] ], - "timestamp": "2023-10-11T17:10:02.930699" + "timestamp": "2024-01-09T23:01:18.710682" } -} +} \ No newline at end of file diff --git a/modules/nf-core/fastp/environment.yml b/modules/nf-core/fastp/environment.yml index 19ccec25..70389e66 100644 --- a/modules/nf-core/fastp/environment.yml +++ b/modules/nf-core/fastp/environment.yml @@ -1,3 +1,4 @@ +name: fastp channels: - conda-forge - bioconda diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf index ca5f100f..2a3b679e 100644 --- a/modules/nf-core/fastp/main.nf +++ b/modules/nf-core/fastp/main.nf @@ -2,7 +2,7 @@ process FASTP { tag "$meta.id" label 'process_medium' - conda 'modules/nf-core/fastp/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' : 'biocontainers/fastp:0.23.4--h5f740d0_0' }" @@ -45,7 +45,7 @@ process FASTP { $adapter_list \\ $fail_fastq \\ $args \\ - 2> ${prefix}.fastp.log \\ + 2> >(tee ${prefix}.fastp.log >&2) \\ | gzip -c > ${prefix}.fastp.fastq.gz cat <<-END_VERSIONS > versions.yml @@ -66,7 +66,7 @@ process FASTP { $adapter_list \\ $fail_fastq \\ $args \\ - 2> ${prefix}.fastp.log + 2> >(tee ${prefix}.fastp.log >&2) cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -91,7 +91,7 @@ process FASTP { --thread $task.cpus \\ --detect_adapter_for_pe \\ $args \\ - 2> ${prefix}.fastp.log + 2> >(tee ${prefix}.fastp.log >&2) cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -99,4 +99,22 @@ process FASTP { END_VERSIONS """ } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end + def touch_reads = is_single_output ? "${prefix}.fastp.fastq.gz" : "${prefix}_1.fastp.fastq.gz ${prefix}_2.fastp.fastq.gz" + def touch_merged = (!is_single_output && save_merged) ? "touch ${prefix}.merged.fastq.gz" : "" + """ + touch $touch_reads + touch "${prefix}.fastp.json" + touch "${prefix}.fastp.html" + touch "${prefix}.fastp.log" + $touch_merged + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ } diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test index f610b735..dcf4fd6f 100644 --- a/modules/nf-core/fastp/tests/main.nf.test +++ b/modules/nf-core/fastp/tests/main.nf.test @@ -19,11 +19,10 @@ nextflow_process { save_trimmed_fail = false save_merged = false - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:true ], - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] - ] - + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -57,6 +56,66 @@ nextflow_process { { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } } }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_single_end-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_single_end-stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_single_end-for_stub_match") + }, { assert snapshot(process.out.versions).match("versions") } ) } @@ -74,12 +133,11 @@ nextflow_process { save_trimmed_fail = false save_merged = false - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] - ] - + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -127,6 +185,66 @@ nextflow_process { { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } } }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end-stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end-for_stub_match") + }, { assert snapshot(process.out.versions).match("versions") } ) } @@ -144,10 +262,10 @@ nextflow_process { save_trimmed_fail = false save_merged = false - input[0] = [ [ id:'test', single_end:true ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ] - ] - + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -181,6 +299,66 @@ nextflow_process { { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } } }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_interleaved-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("fastp test_fastp_interleaved-stub") { + + options '-stub' + + config './nextflow.config' + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_interleaved-for_stub_match") + }, { assert snapshot(process.out.versions).match("versions") } ) } @@ -198,9 +376,10 @@ nextflow_process { save_trimmed_fail = true save_merged = false - input[0] = [ [ id:'test', single_end:true ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -258,13 +437,11 @@ nextflow_process { save_trimmed_fail = true save_merged = false - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) - ] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -337,11 +514,11 @@ nextflow_process { adapter_fasta = [] save_trimmed_fail = false save_merged = true - - input[0] = [ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged @@ -399,6 +576,66 @@ nextflow_process { { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } } }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end_merged-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end_merged-stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = true + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end_merged-for_stub_match") + }, { assert snapshot(process.out.versions).match("versions") } ) } @@ -412,14 +649,15 @@ nextflow_process { } process { """ - adapter_fasta = file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/fastp/adapters.fasta", checkIfExists: true) + adapter_fasta = Channel.of([ file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) ]) save_trimmed_fail = false save_merged = true - input[0] = [ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) input[1] = adapter_fasta input[2] = save_trimmed_fail input[3] = save_merged diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap index 0fa68c7d..6a71b680 100644 --- a/modules/nf-core/fastp/tests/main.nf.test.snap +++ b/modules/nf-core/fastp/tests/main.nf.test.snap @@ -1,4 +1,19 @@ { + "test_fastp_paired_end-for_stub_match": { + "content": [ + [ + [ + "test_1.fastp.fastq.gz", + "test_2.fastp.fastq.gz" + ], + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=false}" + ] + ], + "timestamp": "2024-01-17T18:07:15.398827" + }, "fastp test_fastp_interleaved_json": { "content": [ [ @@ -11,7 +26,23 @@ ] ] ], - "timestamp": "2023-10-17T11:04:45.794175881" + "timestamp": "2024-01-17T18:08:06.123035" + }, + "test_fastp_paired_end_merged-for_stub_match": { + "content": [ + [ + [ + "test_1.fastp.fastq.gz", + "test_2.fastp.fastq.gz" + ], + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "test.merged.fastq.gz", + "{id=test, single_end=false}" + ] + ], + "timestamp": "2024-01-17T18:10:13.467574" }, "test_fastp_single_end_json": { "content": [ @@ -25,7 +56,7 @@ ] ] ], - "timestamp": "2023-10-17T11:04:10.566343705" + "timestamp": "2024-01-17T18:06:00.223817" }, "versions": { "content": [ @@ -33,7 +64,31 @@ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" ] ], - "timestamp": "2023-10-17T11:04:10.582076024" + "timestamp": "2024-01-17T18:06:00.248422" + }, + "test_fastp_interleaved-for_stub_match": { + "content": [ + [ + "test.fastp.fastq.gz", + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=true}" + ] + ], + "timestamp": "2024-01-17T18:08:06.127974" + }, + "test_fastp_single_end-for_stub_match": { + "content": [ + [ + "test.fastp.fastq.gz", + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=true}" + ] + ], + "timestamp": "2024-01-17T18:06:00.244202" }, "test_fastp_single_end_trim_fail_json": { "content": [ @@ -47,6 +102,6 @@ ] ] ], - "timestamp": "2023-10-17T11:05:00.379878948" + "timestamp": "2024-01-17T18:08:41.942317" } } \ No newline at end of file diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml index f52a53a0..1787b38a 100644 --- a/modules/nf-core/fastqc/environment.yml +++ b/modules/nf-core/fastqc/environment.yml @@ -1,3 +1,4 @@ +name: fastqc channels: - conda-forge - bioconda diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 5def8818..9e19a74c 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -2,7 +2,7 @@ process FASTQC { tag "$meta.id" label 'process_medium' - conda 'modules/nf-core/fastqc/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" @@ -37,7 +37,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ @@ -49,7 +49,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ } diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test index 6437a144..b9e8f926 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -38,4 +38,72 @@ nextflow_process { ) } } +// TODO +// // +// // Test with paired-end data +// // +// workflow test_fastqc_paired_end { +// input = [ +// [id: 'test', single_end: false], // meta map +// [ +// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) +// ] +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with interleaved data +// // +// workflow test_fastqc_interleaved { +// input = [ +// [id: 'test', single_end: false], // meta map +// file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with bam data +// // +// workflow test_fastqc_bam { +// input = [ +// [id: 'test', single_end: false], // meta map +// file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with multiple samples +// // +// workflow test_fastqc_multiple { +// input = [ +// [id: 'test', single_end: false], // meta map +// [ +// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) +// ] +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with custom prefix +// // +// workflow test_fastqc_custom_prefix { +// input = [ +// [ id:'mysample', single_end:true ], // meta map +// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) +// ] + +// FASTQC ( input ) +// } } diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index 9d0e6b20..7625b752 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -1,6 +1,7 @@ +name: multiqc channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::multiqc=1.17 + - bioconda::multiqc=1.19 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 485b3ba8..1b9f7c43 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda 'modules/nf-core/multiqc/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' : - 'biocontainers/multiqc:1.17--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : + 'biocontainers/multiqc:1.19--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -25,12 +25,14 @@ process MULTIQC { def args = task.ext.args ?: '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' """ multiqc \\ --force \\ $args \\ $config \\ $extra_config \\ + $logo \\ . cat <<-END_VERSIONS > versions.yml @@ -41,7 +43,7 @@ process MULTIQC { stub: """ - touch multiqc_data + mkdir multiqc_data touch multiqc_plots touch multiqc_report.html diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index a61223ed..45a9bc35 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,5 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: MultiQC +name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: - QC diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 00000000..d0438eda --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,83 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + test("sarscov2 single-end [fastqc]") { + + when { + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("sarscov2 single-end [fastqc] [config]") { + + when { + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match() } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 00000000..d37e7304 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,21 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,14e9a2661241abd828f4f06a7b5c222d" + ] + ], + "timestamp": "2024-01-09T23:02:49.911994" + }, + "sarscov2 single-end [fastqc] - stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,14e9a2661241abd828f4f06a7b5c222d" + ] + ], + "timestamp": "2024-01-09T23:03:14.524346" + } +} \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 00000000..bea6c0d3 --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/modules/nf-core/samtools/bam2fq/environment.yml b/modules/nf-core/samtools/bam2fq/environment.yml new file mode 100644 index 00000000..5297496f --- /dev/null +++ b/modules/nf-core/samtools/bam2fq/environment.yml @@ -0,0 +1,7 @@ +name: samtools_bam2fq +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.18 diff --git a/modules/nf-core/samtools/bam2fq/main.nf b/modules/nf-core/samtools/bam2fq/main.nf new file mode 100644 index 00000000..55ffd0cf --- /dev/null +++ b/modules/nf-core/samtools/bam2fq/main.nf @@ -0,0 +1,56 @@ +process SAMTOOLS_BAM2FQ { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : + 'biocontainers/samtools:1.18--h50ea8bc_1' }" + + input: + tuple val(meta), path(inputbam) + val split + + output: + tuple val(meta), path("*.fq.gz"), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + if (split){ + """ + samtools \\ + bam2fq \\ + $args \\ + -@ $task.cpus \\ + -1 ${prefix}_1.fq.gz \\ + -2 ${prefix}_2.fq.gz \\ + -0 ${prefix}_other.fq.gz \\ + -s ${prefix}_singleton.fq.gz \\ + $inputbam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + } else { + """ + samtools \\ + bam2fq \\ + $args \\ + -@ $task.cpus \\ + $inputbam | gzip --no-name > ${prefix}_interleaved.fq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/samtools/bam2fq/meta.yml b/modules/nf-core/samtools/bam2fq/meta.yml new file mode 100644 index 00000000..7769046b --- /dev/null +++ b/modules/nf-core/samtools/bam2fq/meta.yml @@ -0,0 +1,51 @@ +name: samtools_bam2fq +description: | + The module uses bam2fq method from samtools to + convert a SAM, BAM or CRAM file to FASTQ format +keywords: + - bam2fq + - samtools + - fastq +tools: + - samtools: + description: Tools for dealing with SAM, BAM and CRAM files + documentation: http://www.htslib.org/doc/1.1/samtools.html + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - inputbam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - split: + type: boolean + description: | + TRUE/FALSE value to indicate if reads should be separated into + /1, /2 and if present other, or singleton. + Note: choosing TRUE will generate 4 different files. + Choosing FALSE will produce a single file, which will be interleaved in case + the input contains paired reads. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads: + type: file + description: | + FASTQ files, which will be either a group of 4 files (read_1, read_2, other and singleton) + or a single interleaved .fq.gz file if the user chooses not to split the reads. + pattern: "*.fq.gz" +authors: + - "@lescai" +maintainers: + - "@lescai" diff --git a/modules/nf-core/samtools/bam2fq/tests/main.nf.test b/modules/nf-core/samtools/bam2fq/tests/main.nf.test new file mode 100644 index 00000000..cd65abbe --- /dev/null +++ b/modules/nf-core/samtools/bam2fq/tests/main.nf.test @@ -0,0 +1,71 @@ +nextflow_process { + + name "Test Process SAMTOOLS_BAM2FQ" + script "../main.nf" + process "SAMTOOLS_BAM2FQ" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/bam2fq" + + config "./nextflow.config" + + test("homo_sapiens - bam, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_umi_converted_bam'], checkIfExists: true) + ] + input[1] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + path(process.out.reads[0][1]).linesGzip[0..6], + process.out.versions + ).match() } + ) + } + + } + + test("homo_sapiens - bam, true") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_umi_converted_bam'], checkIfExists: true) + ] + input[1] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.reads[0][1].collect{ + if(it ==~ /.*(other|singleton)\.fq\.gz$/) { + return file(it).name + } + return path(it).linesGzip[0..6] + }, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/samtools/bam2fq/tests/main.nf.test.snap b/modules/nf-core/samtools/bam2fq/tests/main.nf.test.snap new file mode 100644 index 00000000..1f824503 --- /dev/null +++ b/modules/nf-core/samtools/bam2fq/tests/main.nf.test.snap @@ -0,0 +1,49 @@ +{ + "homo_sapiens - bam, false": { + "content": [ + [ + "@922332/1\tRX:Z:ATTTCAG-TATTATT", + "GAGAGGATCTCGTGTAGAAATTGCTTTGAGCTGTTCTTTGTCATTTTCCCTTAATTCATTGTCTCTAGCTAGTCTGTTACTCTGTAAAATAAAATAATAAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTTAAGGTCAGTG", + "+", + "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE versions.yml + "${task.process}": + umicollapse: $VERSION + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.dedup.bam + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umicollapse: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/umicollapse/meta.yml b/modules/nf-core/umicollapse/meta.yml new file mode 100644 index 00000000..c45d6932 --- /dev/null +++ b/modules/nf-core/umicollapse/meta.yml @@ -0,0 +1,55 @@ +--- +name: "umicollapse" +description: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read. +keywords: + - umicollapse + - deduplication + - genomics +tools: + - "umicollapse": + description: "UMICollapse contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs)." + homepage: "https://github.com/Daniel-Liu-c0deb0t/UMICollapse" + documentation: "https://github.com/Daniel-Liu-c0deb0t/UMICollapse" + tool_dev_url: "https://github.com/Daniel-Liu-c0deb0t/UMICollapse" + doi: "10.7717/peerj.8275" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: | + BAM file containing reads to be deduplicated via UMIs. + pattern: "*.{bam}" + - bai: + type: file + description: | + BAM index files corresponding to the input BAM file. + pattern: "*.{bai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file with deduplicated UMIs. + pattern: "*.{bam}" + - log: + type: file + description: A log file with the deduplication statistics. + pattern: "*_{UMICollapse.log}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@CharlotteAnne" + - "@chris-cheshire" +maintainers: + - "@CharlotteAnne" + - "@chris-cheshire" diff --git a/modules/nf-core/umicollapse/tests/main.nf.test b/modules/nf-core/umicollapse/tests/main.nf.test new file mode 100644 index 00000000..1b8bf7e9 --- /dev/null +++ b/modules/nf-core/umicollapse/tests/main.nf.test @@ -0,0 +1,153 @@ +nextflow_process { + + name "Test Process UMICOLLAPSE" + script "../main.nf" + process "UMICOLLAPSE" + + tag "modules" + tag "modules_nfcore" + tag "umicollapse" + tag "umitools/extract" + tag "samtools/index" + tag "bwa/index" + tag "bwa/mem" + + test("umicollapse single end test") { + setup{ + run("UMITOOLS_EXTRACT"){ + script "../../umitools/extract/main.nf" + config "./nextflow_SE.config" + process{ + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + run("BWA_INDEX"){ + script "../../bwa/index/main.nf" + process{ + """ + input[0] = [ + [ id:'sarscov2'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + run("BWA_MEM"){ + script "../../bwa/mem/main.nf" + process{ + """ + input[0] = UMITOOLS_EXTRACT.out.reads + input[1] = BWA_INDEX.out.index + input[2] = true + """ + } + } + run("SAMTOOLS_INDEX"){ + script "../../samtools/index/main.nf" + process{ + """ + input[0] = BWA_MEM.out.bam + """ + } + } + } + + when { + config "./nextflow_SE.config" + process { + """ + input[0] = BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.versions).match() } + ) + } + + } + + test("umicollapse paired tests") { + setup{ + run("UMITOOLS_EXTRACT"){ + script "../../umitools/extract/main.nf" + config "./nextflow_PE.config" + process{ + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + run("BWA_INDEX"){ + script "../../bwa/index/main.nf" + process{ + """ + input[0] = [ + [ id:'sarscov2'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + run("BWA_MEM"){ + script "../../bwa/mem/main.nf" + process{ + """ + input[0] = UMITOOLS_EXTRACT.out.reads + input[1] = BWA_INDEX.out.index + input[2] = true + """ + } + } + run("SAMTOOLS_INDEX"){ + script "../../samtools/index/main.nf" + process{ + """ + input[0] = BWA_MEM.out.bam + """ + } + } + } + + when { + config "./nextflow_PE.config" + process { + """ + input[0] = BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.versions).match() } + ) + } + + } + +} diff --git a/modules/nf-core/umicollapse/tests/main.nf.test.snap b/modules/nf-core/umicollapse/tests/main.nf.test.snap new file mode 100644 index 00000000..60250530 --- /dev/null +++ b/modules/nf-core/umicollapse/tests/main.nf.test.snap @@ -0,0 +1,36 @@ +{ + "umicollapse single end test": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.dedup.bam:md5,4e86d60aa82242889ab5f9031418ab2e" + ] + ], + [ + "versions.yml:md5,c1e0275d81b1c97a9344d216f9154996" + ] + ], + "timestamp": "2024-01-24T13:57:02.801573999" + }, + "umicollapse paired tests": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.dedup.bam:md5,54be836ec246073e60212445b4369a91" + ] + ], + [ + "versions.yml:md5,c1e0275d81b1c97a9344d216f9154996" + ] + ], + "timestamp": "2024-01-24T13:57:24.797928099" + } +} \ No newline at end of file diff --git a/modules/nf-core/umicollapse/tests/nextflow.config b/modules/nf-core/umicollapse/tests/nextflow.config new file mode 100644 index 00000000..844edbdc --- /dev/null +++ b/modules/nf-core/umicollapse/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: UMITOOLS_EXTRACT { + ext.args = '--bc-pattern="NNNN"' + } + withName: UMICOLLAPSE { + ext.prefix = { "${meta.id}.dedup" } + } +} \ No newline at end of file diff --git a/modules/nf-core/umicollapse/tests/nextflow_PE.config b/modules/nf-core/umicollapse/tests/nextflow_PE.config new file mode 100644 index 00000000..ae4c9632 --- /dev/null +++ b/modules/nf-core/umicollapse/tests/nextflow_PE.config @@ -0,0 +1,10 @@ +process { + + withName: UMITOOLS_EXTRACT { + ext.args = '--bc-pattern="NNNN" --bc-pattern2="NNNN"' + } + + withName: UMICOLLAPSE { + ext.prefix = { "${meta.id}.dedup" } + } +} diff --git a/modules/nf-core/umicollapse/tests/nextflow_SE.config b/modules/nf-core/umicollapse/tests/nextflow_SE.config new file mode 100644 index 00000000..d4b94436 --- /dev/null +++ b/modules/nf-core/umicollapse/tests/nextflow_SE.config @@ -0,0 +1,10 @@ +process { + + withName: UMITOOLS_EXTRACT { + ext.args = '--bc-pattern="NNNN"' + } + + withName: UMICOLLAPSE { + ext.prefix = { "${meta.id}.dedup" } + } +} diff --git a/modules/nf-core/umicollapse/tests/tags.yml b/modules/nf-core/umicollapse/tests/tags.yml new file mode 100644 index 00000000..912879c4 --- /dev/null +++ b/modules/nf-core/umicollapse/tests/tags.yml @@ -0,0 +1,2 @@ +umicollapse: + - "modules/nf-core/umicollapse/**" diff --git a/modules/nf-core/umitools/extract/environment.yml b/modules/nf-core/umitools/extract/environment.yml new file mode 100644 index 00000000..7d08ac0e --- /dev/null +++ b/modules/nf-core/umitools/extract/environment.yml @@ -0,0 +1,7 @@ +name: umitools_extract +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::umi_tools=1.1.4 diff --git a/modules/nf-core/umitools/extract/main.nf b/modules/nf-core/umitools/extract/main.nf new file mode 100644 index 00000000..4bd79e79 --- /dev/null +++ b/modules/nf-core/umitools/extract/main.nf @@ -0,0 +1,56 @@ +process UMITOOLS_EXTRACT { + tag "$meta.id" + label "process_single" + label "process_long" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' : + 'biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.fastq.gz"), emit: reads + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if (meta.single_end) { + """ + umi_tools \\ + extract \\ + -I $reads \\ + -S ${prefix}.umi_extract.fastq.gz \\ + $args \\ + > ${prefix}.umi_extract.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' ) + END_VERSIONS + """ + } else { + """ + umi_tools \\ + extract \\ + -I ${reads[0]} \\ + --read2-in=${reads[1]} \\ + -S ${prefix}.umi_extract_1.fastq.gz \\ + --read2-out=${prefix}.umi_extract_2.fastq.gz \\ + $args \\ + > ${prefix}.umi_extract.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' ) + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/umitools/extract/meta.yml b/modules/nf-core/umitools/extract/meta.yml new file mode 100644 index 00000000..7695b271 --- /dev/null +++ b/modules/nf-core/umitools/extract/meta.yml @@ -0,0 +1,48 @@ +name: umitools_extract +description: Extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place +keywords: + - UMI + - barcode + - extract + - umitools +tools: + - umi_tools: + description: > + UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes + documentation: https://umi-tools.readthedocs.io/en/latest/ + license: "MIT" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: list + description: | + List of input FASTQ files whose UMIs will be extracted. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: > + Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. + pattern: "*.{fastq.gz}" + - log: + type: file + description: Logfile for umi_tools + pattern: "*.{log}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test b/modules/nf-core/umitools/extract/tests/main.nf.test new file mode 100644 index 00000000..22242d1d --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process UMITOOLS_EXTRACT" + script "../main.nf" + process "UMITOOLS_EXTRACT" + config "./nextflow.config" + tag "modules_nfcore" + tag "modules" + tag "umitools" + tag "umitools/extract" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } +} \ No newline at end of file diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test.snap b/modules/nf-core/umitools/extract/tests/main.nf.test.snap new file mode 100644 index 00000000..6d5944f1 --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/main.nf.test.snap @@ -0,0 +1,10 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,5a18da2d3a5a4de15e7aaae9082d7abb" + ] + ], + "timestamp": "2023-12-08T09:41:43.540658352" + } +} \ No newline at end of file diff --git a/modules/nf-core/umitools/extract/tests/nextflow.config b/modules/nf-core/umitools/extract/tests/nextflow.config new file mode 100644 index 00000000..c866f5a0 --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: UMITOOLS_EXTRACT { + ext.args = '--bc-pattern="NNNN"' + } + +} diff --git a/modules/nf-core/umitools/extract/tests/tags.yml b/modules/nf-core/umitools/extract/tests/tags.yml new file mode 100644 index 00000000..c3fb23de --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/tags.yml @@ -0,0 +1,2 @@ +umitools/extract: + - modules/nf-core/umitools/extract/** diff --git a/nextflow.config b/nextflow.config index 2038c633..0ea71080 100644 --- a/nextflow.config +++ b/nextflow.config @@ -16,19 +16,29 @@ params { // Workflow flags protocol = 'illumina' + // References // References genome = null igenomes_base = 's3://ngi-igenomes/igenomes' igenomes_ignore = false mirna_gtf = null - mature = "https://mirbase.org/download/CURRENT/mature.fa" - hairpin = "https://mirbase.org/download/CURRENT/hairpin.fa" + mature = "https://mirbase.org/download/mature.fa" + hairpin = "https://mirbase.org/download/hairpin.fa" mirgenedb = false mirgenedb_mature = null mirgenedb_hairpin = null mirgenedb_gff = null mirgenedb_species = null + // UMI handling + with_umi = false + skip_umi_extract = false + umitools_extract_method = 'string' + umitools_bc_pattern = null + umi_discard_read = null + umitools_method = 'dir' + save_umi_intermeds = false + // Trimming options clip_r1 = null three_prime_clip_r1 = null @@ -37,12 +47,14 @@ params { fastp_min_length = 17 fastp_known_mirna_adapters = "$projectDir/assets/known_adapters.fa" save_trimmed_fail = false + save_merged = false skip_fastqc = false skip_multiqc = false skip_mirdeep = false skip_fastp = false save_reference = false fastp_max_length = 40 + min_trimmed_reads = 10 // Contamination filtering filter_contamination = false @@ -127,6 +139,7 @@ profiles { dumpHashes = true process.beforeScript = 'echo $HOSTNAME' cleanup = false + nextflow.enable.configProcessNamesValidation = true } conda { conda.enabled = true @@ -157,9 +170,10 @@ profiles { shifter.enabled = false charliecloud.enabled = false apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { singularity.enabled = true @@ -216,6 +230,7 @@ profiles { } test { includeConfig 'conf/test.config' } + test_umi { includeConfig 'conf/test_umi.config' } test_no_genome { includeConfig 'conf/test_no_genome.config' } test_full { includeConfig 'conf/test_full.config' } } @@ -231,8 +246,7 @@ singularity.registry = 'quay.io' // Nextflow plugins plugins { - id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet - + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet } // Load igenomes.config if required @@ -259,8 +273,13 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] // Set default registry for Docker and Podman independent of -profile // Will not be used unless Docker / Podman are enabled // Set to your registry if you have a mirror of containers -docker.registry = 'quay.io' -podman.registry = 'quay.io' +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' + +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { diff --git a/nextflow_schema.json b/nextflow_schema.json index 40360e49..204376ab 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -50,6 +50,55 @@ } } }, + "umi_options": { + "title": "UMI options", + "type": "object", + "description": "Options for processing reads with unique molecular identifiers", + "default": "", + "properties": { + "with_umi": { + "type": "boolean", + "fa_icon": "fas fa-barcode", + "description": "Enable UMI-based read deduplication." + }, + "umitools_extract_method": { + "type": "string", + "default": "string", + "fa_icon": "fas fa-barcode", + "description": "UMI pattern to use. Can be either 'string' (default) or 'regex'.", + "help_text": "More details can be found in the [UMI-tools documentation](https://umi-tools.readthedocs.io/en/latest/reference/extract.html#extract-method).\n" + }, + "umitools_method": { + "type": "string", + "default": "dir", + "description": "UMI grouping method", + "fa_icon": "fas fa-layer-group", + "help_text": "Available options are dir, cc, adj" + }, + "skip_umi_extract": { + "type": "boolean", + "fa_icon": "fas fa-compress-alt", + "description": "Skip the UMI extraction from the read in case the UMIs have been moved to the headers in advance of the pipeline run." + }, + "umitools_bc_pattern": { + "type": "string", + "fa_icon": "fas fa-barcode", + "help_text": "More details can be found in the [UMI-tools documentation](https://umi-tools.readthedocs.io/en/latest/reference/extract.html#extract-method).", + "description": "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the first 6 nucleotides of the read are from the UMI." + }, + "umi_discard_read": { + "type": "integer", + "fa_icon": "fas fa-barcode", + "description": "After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively." + }, + "save_umi_intermeds": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "If this option is specified, intermediate FastQ and BAM files produced by UMI-tools are also saved in the results directory." + } + }, + "fa_icon": "fas fa-barcode" + }, "reference_genome_options": { "title": "Reference genome options", "type": "object", @@ -104,7 +153,7 @@ "description": "Path to FASTA file with mature miRNAs.", "fa_icon": "fas fa-wheelchair", "help_text": "Typically this will be the `mature.fa` file from miRBase. Can be given either as a plain text `.fa` file or a compressed `.gz` file.\n\nDefaults to the current miRBase release URL, from which the file will be downloaded.", - "default": "https://mirbase.org/download/CURRENT/mature.fa" + "default": "https://mirbase.org/download/mature.fa" }, "mirgenedb_mature": { "type": "string", @@ -116,7 +165,7 @@ "description": "Path to FASTA file with miRNAs precursors.", "fa_icon": "fab fa-cuttlefish", "help_text": "Typically this will be the `mature.fa` file from miRBase. Can be given either as a plain text `.fa` file or a compressed `.gz` file.\n\nDefaults to the current miRBase release URL, from which the file will be downloaded.", - "default": "https://mirbase.org/download/CURRENT/hairpin.fa" + "default": "https://mirbase.org/download/hairpin.fa" }, "mirgenedb_hairpin": { "type": "string", @@ -162,7 +211,6 @@ }, "three_prime_adapter": { "type": "string", - "default": "TGGAATTCTCGGGTGCCAAGG", "fa_icon": "fas fa-text-width", "description": "Sequencing adapter sequence to use for trimming." }, @@ -194,6 +242,12 @@ "default": "${projectDir}/assets/known_adapters.fa", "description": "FastA with known miRNA adapter sequences for adapter trimming", "fa_icon": "far fa-question-circle" + }, + "min_trimmed_reads": { + "type": "integer", + "default": 10, + "fa_icon": "far fa-window-minimize", + "description": "Minimum number of reads required in input file to use it" } } }, @@ -361,14 +415,12 @@ "type": "boolean", "description": "Display help text.", "fa_icon": "fas fa-question-circle", - "default": false, "hidden": true }, "version": { "type": "boolean", "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", - "default": false, "hidden": true }, "publish_dir_mode": { @@ -392,7 +444,6 @@ "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", - "default": false, "hidden": true }, "max_multiqc_email_size": { @@ -407,7 +458,6 @@ "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", - "default": false, "hidden": true }, "hook_url": { @@ -446,7 +496,6 @@ "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", - "default": false, "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." }, @@ -454,7 +503,6 @@ "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters fails when an unrecognised parameter is found.", - "default": false, "hidden": true, "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." }, @@ -462,9 +510,11 @@ "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters in lenient more.", - "default": false, "hidden": true, "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." + }, + "save_merged": { + "type": "boolean" } } } @@ -473,6 +523,9 @@ { "$ref": "#/definitions/input_output_options" }, + { + "$ref": "#/definitions/umi_options" + }, { "$ref": "#/definitions/reference_genome_options" }, diff --git a/subworkflows/local/contaminant_filter.nf b/subworkflows/local/contaminant_filter.nf index 383c85ad..dede115d 100644 --- a/subworkflows/local/contaminant_filter.nf +++ b/subworkflows/local/contaminant_filter.nf @@ -123,6 +123,6 @@ workflow CONTAMINANT_FILTER { emit: filtered_reads = FILTER_STATS.out.reads - versions = ch_versions + versions = ch_versions.mix(FILTER_STATS.out.versions) filter_stats = FILTER_STATS.out.stats -} \ No newline at end of file +} diff --git a/subworkflows/local/fastqc_fastp.nf b/subworkflows/local/fastqc_fastp.nf deleted file mode 100644 index 9e4d952e..00000000 --- a/subworkflows/local/fastqc_fastp.nf +++ /dev/null @@ -1,116 +0,0 @@ -// -// Read QC and trimming -// - -include { FASTQC as FASTQC_RAW } from '../../modules/nf-core/fastqc/main' -include { FASTQC as FASTQC_TRIM } from '../../modules/nf-core/fastqc/main' -include { FASTP } from '../../modules/nf-core/fastp/main' - -// -// Function that parses fastp json output file to get total number of reads after trimming -// -import groovy.json.JsonSlurper - -def getFastpReadsAfterFiltering(json_file) { - return new JsonSlurper().parseText(json_file.text) - ?.get('summary') - ?.get('after_filtering') - ?.get('total_reads') - ?.toInteger() -} - -String getFastpAdapterSequence(json_file){ - return new JsonSlurper().parseText(json_file.text) - ?.get('adapter_cutting') - ?.get('read1_adapter_sequence') -} - -workflow FASTQC_FASTP { - take: - reads // channel: [ val(meta), [ reads ] ] - adapter_list // channel: [ path/to/adapters.fa ] - save_trimmed_fail // value: boolean - save_merged // value: boolean - - - main: - - ch_versions = Channel.empty() - fastqc_raw_html = Channel.empty() - fastqc_raw_zip = Channel.empty() - adapterseq = reads.map { meta, _ -> [meta, null] } - if (!params.skip_fastqc) { - FASTQC_RAW ( - reads - ) - fastqc_raw_html = FASTQC_RAW.out.html - fastqc_raw_zip = FASTQC_RAW.out.zip - ch_versions = ch_versions.mix(FASTQC_RAW.out.versions.first()) - } - - trim_reads = reads - trim_json = Channel.empty() - trim_html = Channel.empty() - trim_log = Channel.empty() - trim_reads_fail = Channel.empty() - trim_reads_merged = Channel.empty() - fastqc_trim_html = Channel.empty() - fastqc_trim_zip = Channel.empty() - if (!params.skip_fastp) { - FASTP ( - reads, - adapter_list, - save_trimmed_fail, - save_merged - ) - trim_reads = FASTP.out.reads - trim_json = FASTP.out.json - trim_html = FASTP.out.html - trim_log = FASTP.out.log - trim_reads_fail = FASTP.out.reads_fail - trim_reads_merged = FASTP.out.reads_merged - ch_versions = ch_versions.mix(FASTP.out.versions.first()) - - // - // Filter empty FastQ files after adapter trimming so FastQC doesn't fail - // - trim_reads - .join(trim_json) - .map { - meta, reads, json -> - if (getFastpReadsAfterFiltering(json) > 0) { - [ meta, reads ] - } - } - .set { trim_reads } - - trim_json - .map { meta, json -> [meta, getFastpAdapterSequence(json)] } - .set { adapterseq } - - if (!params.skip_fastqc) { - FASTQC_TRIM ( - trim_reads - ) - fastqc_trim_html = FASTQC_TRIM.out.html - fastqc_trim_zip = FASTQC_TRIM.out.zip - ch_versions = ch_versions.mix(FASTQC_TRIM.out.versions.first()) - } - } - - emit: - reads = trim_reads // channel: [ val(meta), [ reads ] ] - trim_json // channel: [ val(meta), [ json ] ] - trim_html // channel: [ val(meta), [ html ] ] - trim_log // channel: [ val(meta), [ log ] ] - trim_reads_fail // channel: [ val(meta), [ fastq.gz ] ] - trim_reads_merged // channel: [ val(meta), [ fastq.gz ] ] - adapterseq // channel: [ val(meta), [ adapterseq ] ] - - fastqc_raw_html // channel: [ val(meta), [ html ] ] - fastqc_raw_zip // channel: [ val(meta), [ zip ] ] - fastqc_trim_html // channel: [ val(meta), [ html ] ] - fastqc_trim_zip // channel: [ val(meta), [ zip ] ] - - versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] -} diff --git a/subworkflows/local/genome_quant.nf b/subworkflows/local/genome_quant.nf index 967b2757..0f97f16b 100644 --- a/subworkflows/local/genome_quant.nf +++ b/subworkflows/local/genome_quant.nf @@ -8,30 +8,19 @@ include { BOWTIE_MAP_SEQ as BOWTIE_MAP_GENOME } from '../../modules/local/bowtie workflow GENOME_QUANT { take: - fasta - index + bowtie_index + fasta_formatted // fasta as generated by bowtie index step reads // channel: [ val(meta), [ reads ] ] main: ch_versions = Channel.empty() - if (!index){ - INDEX_GENOME ( [ [:], fasta ] ) - bowtie_index = INDEX_GENOME.out.index - fasta_formatted = INDEX_GENOME.out.fasta - ch_versions = ch_versions.mix(INDEX_GENOME.out.versions) - } else { - bowtie_index = Channel.fromPath("${index}**ebwt", checkIfExists: true).ifEmpty { exit 1, "Bowtie1 index directory not found: ${index}" } - fasta_formatted = fasta - } + BOWTIE_MAP_GENOME ( reads, bowtie_index.collect() ) + ch_versions = ch_versions.mix(BOWTIE_MAP_GENOME.out.versions) - if (bowtie_index){ - BOWTIE_MAP_GENOME ( reads, bowtie_index.collect() ) - ch_versions = ch_versions.mix(BOWTIE_MAP_GENOME.out.versions) - - BAM_SORT_STATS_SAMTOOLS ( BOWTIE_MAP_GENOME.out.bam, Channel.empty() ) - ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) - } + ch_fasta_formatted_for_sort = fasta_formatted .map { file -> tuple(file.baseName, file) } + BAM_SORT_STATS_SAMTOOLS ( BOWTIE_MAP_GENOME.out.bam, ch_fasta_formatted_for_sort ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) emit: fasta = fasta_formatted diff --git a/subworkflows/local/mirna_quant.nf b/subworkflows/local/mirna_quant.nf index dfa16ab4..fc6942fe 100644 --- a/subworkflows/local/mirna_quant.nf +++ b/subworkflows/local/mirna_quant.nf @@ -39,18 +39,9 @@ workflow MIRNA_QUANT { FORMAT_MATURE ( mirna_parsed ) ch_versions = ch_versions.mix(FORMAT_MATURE.out.versions) - PARSE_HAIRPIN ( hairpin ).parsed_fasta.set { hairpin_parsed } - ch_versions = ch_versions.mix(PARSE_HAIRPIN.out.versions) - - FORMAT_HAIRPIN ( hairpin_parsed ) - ch_versions = ch_versions.mix(FORMAT_HAIRPIN.out.versions) - INDEX_MATURE ( FORMAT_MATURE.out.formatted_fasta ).index.set { mature_bowtie } ch_versions = ch_versions.mix(INDEX_MATURE.out.versions) - INDEX_HAIRPIN ( FORMAT_HAIRPIN.out.formatted_fasta ).index.set { hairpin_bowtie } - ch_versions = ch_versions.mix(INDEX_HAIRPIN.out.versions) - reads .map { add_suffix(it, "mature") } .dump (tag:'msux') @@ -64,15 +55,27 @@ workflow MIRNA_QUANT { .dump (tag:'hsux') .set { reads_hairpin } - BOWTIE_MAP_HAIRPIN ( reads_hairpin, hairpin_bowtie.collect() ) - ch_versions = ch_versions.mix(BOWTIE_MAP_HAIRPIN.out.versions) - BAM_STATS_MATURE ( BOWTIE_MAP_MATURE.out.bam, FORMAT_MATURE.out.formatted_fasta ) ch_versions = ch_versions.mix(BAM_STATS_MATURE.out.versions) + + + PARSE_HAIRPIN ( hairpin ).parsed_fasta.set { hairpin_parsed } + ch_versions = ch_versions.mix(PARSE_HAIRPIN.out.versions) + + FORMAT_HAIRPIN ( hairpin_parsed ) + ch_versions = ch_versions.mix(FORMAT_HAIRPIN.out.versions) + + INDEX_HAIRPIN ( FORMAT_HAIRPIN.out.formatted_fasta ).index.set { hairpin_bowtie } + ch_versions = ch_versions.mix(INDEX_HAIRPIN.out.versions) + + BOWTIE_MAP_HAIRPIN ( reads_hairpin, hairpin_bowtie.collect() ) + ch_versions = ch_versions.mix(BOWTIE_MAP_HAIRPIN.out.versions) + BAM_STATS_HAIRPIN ( BOWTIE_MAP_HAIRPIN.out.bam, FORMAT_HAIRPIN.out.formatted_fasta ) ch_versions = ch_versions.mix(BAM_STATS_HAIRPIN.out.versions) + BAM_STATS_MATURE.out.idxstats.collect{it[1]} .mix(BAM_STATS_HAIRPIN.out.idxstats.collect{it[1]}) .dump(tag:'edger') diff --git a/subworkflows/local/umi_dedup.nf b/subworkflows/local/umi_dedup.nf new file mode 100644 index 00000000..9f65fa8e --- /dev/null +++ b/subworkflows/local/umi_dedup.nf @@ -0,0 +1,41 @@ +// +// Deduplicate the UMI reads by mapping them to the complete genome. +// + +include { INDEX_GENOME } from '../../modules/local/bowtie_genome' +include { BOWTIE_MAP_SEQ as UMI_MAP_GENOME } from '../../modules/local/bowtie_map_mirna' +include { BAM_SORT_STATS_SAMTOOLS } from '../../subworkflows/nf-core/bam_sort_stats_samtools' +include { UMICOLLAPSE } from '../../modules/nf-core/umicollapse/main' +include { SAMTOOLS_BAM2FQ } from '../../modules/nf-core/samtools/bam2fq/main' +include { CAT_CAT } from '../../modules/nf-core/cat/cat/main' + + +workflow DEDUPLICATE_UMIS { + take: + bt_index + reads // channel: [ val(meta), [ reads ] ] + + main: + + ch_versions = Channel.empty() + + UMI_MAP_GENOME ( reads, bt_index.collect() ) + ch_versions = ch_versions.mix(UMI_MAP_GENOME.out.versions) + + BAM_SORT_STATS_SAMTOOLS ( UMI_MAP_GENOME.out.bam, Channel.empty() ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + + ch_umi_dedup = BAM_SORT_STATS_SAMTOOLS.out.bam.join(BAM_SORT_STATS_SAMTOOLS.out.bai) + UMICOLLAPSE(ch_umi_dedup) + ch_versions = ch_versions.mix(UMICOLLAPSE.out.versions) + + SAMTOOLS_BAM2FQ ( UMICOLLAPSE.out.bam, false ) + ch_versions = ch_versions.mix(SAMTOOLS_BAM2FQ.out.versions) + + ch_dedup_reads = SAMTOOLS_BAM2FQ.out.reads + + emit: + reads = ch_dedup_reads + indices = bt_index + versions = ch_versions +} diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test index a8a13f2a..59b749d8 100644 --- a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test @@ -5,7 +5,9 @@ nextflow_workflow { workflow "BAM_SORT_STATS_SAMTOOLS" tag "subworkflows" tag "subworkflows_nfcore" + tag "subworkflows/bam_sort_stats_samtools" tag "bam_sort_stats_samtools" + tag "subworkflows/bam_stats_samtools" tag "bam_stats_samtools" tag "samtools" tag "samtools/index" @@ -35,7 +37,11 @@ nextflow_workflow { then { assertAll( { assert workflow.success}, - { assert snapshot(workflow.out).match()} + { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"}, + { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"}, + { assert snapshot(workflow.out.stats).match("test_bam_sort_stats_samtools_single_end_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_sort_stats_samtools_single_end_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_sort_stats_samtools_single_end_idxstats") } ) } } @@ -61,7 +67,11 @@ nextflow_workflow { then { assertAll( { assert workflow.success}, - { assert snapshot(workflow.out).match()} + { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"}, + { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"}, + { assert snapshot(workflow.out.stats).match("test_bam_sort_stats_samtools_paired_end_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_sort_stats_samtools_paired_end_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_sort_stats_samtools_paired_end_idxstats") } ) } } diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap index 50ffde60..77afbf17 100644 --- a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap @@ -1,236 +1,86 @@ { - "test_bam_sort_stats_samtools_single_end": { + "test_bam_sort_stats_samtools_paired_end_flagstats": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam:md5,2cf8fe8dbba3da7eb4fb251c79f428dc" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam.bai:md5,002488588110dcee464e65f68c4726e8" - ] - ], - "2": [ - - ], - "3": [ - [ - { - "id": "test", - "single_end": false - }, - "test.stats:md5,796f45f791f06291b76329528fae0a54" - ] - ], - "4": [ - [ - { - "id": "test", - "single_end": false - }, - "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53" - ] - ], - "5": [ - [ - { - "id": "test", - "single_end": false - }, - "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20" - ] - ], - "6": [ - "versions.yml:md5,176f12ceae81f76341e481988c799c15", - "versions.yml:md5,7beadfaf6b22ea0ae6e655b41447803f", - "versions.yml:md5,bfcdd8e2d5151a14dac15a9332d73d52", - "versions.yml:md5,dd8f44a9bfef10555ef1c8cc0267ff9c", - "versions.yml:md5,f2eb7aba102adae159006c9a443c301b" - ], - "bai": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam.bai:md5,002488588110dcee464e65f68c4726e8" - ] - ], - "bam": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam:md5,2cf8fe8dbba3da7eb4fb251c79f428dc" - ] - ], - "csi": [ - - ], - "flagstat": [ - [ - { - "id": "test", - "single_end": false - }, - "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53" - ] - ], - "idxstats": [ - [ - { - "id": "test", - "single_end": false - }, - "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20" - ] - ], - "stats": [ - [ - { - "id": "test", - "single_end": false - }, - "test.stats:md5,796f45f791f06291b76329528fae0a54" - ] - ], - "versions": [ - "versions.yml:md5,176f12ceae81f76341e481988c799c15", - "versions.yml:md5,7beadfaf6b22ea0ae6e655b41447803f", - "versions.yml:md5,bfcdd8e2d5151a14dac15a9332d73d52", - "versions.yml:md5,dd8f44a9bfef10555ef1c8cc0267ff9c", - "versions.yml:md5,f2eb7aba102adae159006c9a443c301b" + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" ] - } + ] ], - "timestamp": "2023-10-18T09:34:31.989804787" + "timestamp": "2023-10-22T20:25:03.687121177" }, - "test_bam_sort_stats_samtools_paired_end": { + "test_bam_sort_stats_samtools_paired_end_idxstats": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam:md5,81adec7882577c0ad17962599acf7745" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam.bai:md5,9e6427a796975290b1110c9d542ac79d" - ] - ], - "2": [ - - ], - "3": [ - [ - { - "id": "test", - "single_end": false - }, - "test.stats:md5,f3f0e5aad236aae678ac5361b529a664" - ] - ], - "4": [ - [ - { - "id": "test", - "single_end": false - }, - "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" - ] - ], - "5": [ - [ - { - "id": "test", - "single_end": false - }, - "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" - ] - ], - "6": [ - "versions.yml:md5,176f12ceae81f76341e481988c799c15", - "versions.yml:md5,7beadfaf6b22ea0ae6e655b41447803f", - "versions.yml:md5,bfcdd8e2d5151a14dac15a9332d73d52", - "versions.yml:md5,dd8f44a9bfef10555ef1c8cc0267ff9c", - "versions.yml:md5,f2eb7aba102adae159006c9a443c301b" - ], - "bai": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam.bai:md5,9e6427a796975290b1110c9d542ac79d" - ] - ], - "bam": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam:md5,81adec7882577c0ad17962599acf7745" - ] - ], - "csi": [ - - ], - "flagstat": [ - [ - { - "id": "test", - "single_end": false - }, - "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" - ] - ], - "idxstats": [ - [ - { - "id": "test", - "single_end": false - }, - "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" - ] - ], - "stats": [ - [ - { - "id": "test", - "single_end": false - }, - "test.stats:md5,f3f0e5aad236aae678ac5361b529a664" - ] - ], - "versions": [ - "versions.yml:md5,176f12ceae81f76341e481988c799c15", - "versions.yml:md5,7beadfaf6b22ea0ae6e655b41447803f", - "versions.yml:md5,bfcdd8e2d5151a14dac15a9332d73d52", - "versions.yml:md5,dd8f44a9bfef10555ef1c8cc0267ff9c", - "versions.yml:md5,f2eb7aba102adae159006c9a443c301b" + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" ] - } + ] ], - "timestamp": "2023-10-18T09:34:57.682759147" + "timestamp": "2023-10-22T20:25:03.709648916" + }, + "test_bam_sort_stats_samtools_single_end_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,f281507081517414eb1a04b2d9c855b2" + ] + ] + ], + "timestamp": "2023-12-04T11:06:50.951881479" + }, + "test_bam_sort_stats_samtools_paired_end_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,e32e7e49dce1fbe327a89e0fb7bc01b1" + ] + ] + ], + "timestamp": "2023-12-04T11:06:59.253905951" + }, + "test_bam_sort_stats_samtools_single_end_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20" + ] + ] + ], + "timestamp": "2023-10-22T20:25:58.451364604" + }, + "test_bam_sort_stats_samtools_single_end_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53" + ] + ] + ], + "timestamp": "2023-10-22T20:25:58.416859285" } } \ No newline at end of file diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml b/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml index a8274109..30b69d6a 100644 --- a/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml @@ -1,2 +1,2 @@ -bam_sort_stats_samtools: +subworkflows/bam_sort_stats_samtools: - subworkflows/nf-core/bam_sort_stats_samtools/** diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test new file mode 100644 index 00000000..97210890 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test @@ -0,0 +1,102 @@ +nextflow_workflow { + + name "Test Workflow BAM_STATS_SAMTOOLS" + script "../main.nf" + workflow "BAM_STATS_SAMTOOLS" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "bam_stats_samtools" + tag "subworkflows/bam_stats_samtools" + tag "samtools" + tag "samtools/flagstat" + tag "samtools/idxstats" + tag "samtools/stats" + + test("test_bam_stats_samtools_single_end") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = [ [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true) + ] + input[1] = [ [ id:'genome' ], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out.stats).match("test_bam_stats_samtools_single_end_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_stats_samtools_single_end_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_stats_samtools_single_end_idxstats") } + ) + } + } + + test("test_bam_stats_samtools_paired_end") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = [ [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + ] + input[1] = [ [ id:'genome' ], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.stats).match("test_bam_stats_samtools_paired_end_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_stats_samtools_paired_end_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_stats_samtools_paired_end_idxstats") } + ) + } + } + + test("test_bam_stats_samtools_paired_end_cram") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) + ] + input[1] = [ [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out.stats).match("test_bam_stats_samtools_paired_end_cram_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_stats_samtools_paired_end_cram_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_stats_samtools_paired_end_cram_idxstats") } + ) + } + } + +} diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap new file mode 100644 index 00000000..d3af1376 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap @@ -0,0 +1,128 @@ +{ + "test_bam_stats_samtools_paired_end_cram_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,a53f3d26e2e9851f7d528442bbfe9781" + ] + ] + ], + "timestamp": "2023-11-06T09:31:26.194017574" + }, + "test_bam_stats_samtools_paired_end_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,49e2b43344ff92bc4c02463a58f7ba4a" + ] + ] + ], + "timestamp": "2023-12-04T11:07:13.965061942" + }, + "test_bam_stats_samtools_paired_end_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ] + ], + "timestamp": "2023-11-06T09:31:11.668517251" + }, + "test_bam_stats_samtools_single_end_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53" + ] + ] + ], + "timestamp": "2023-11-06T09:26:10.340046381" + }, + "test_bam_stats_samtools_paired_end_cram_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,e179601fa7b8ebce81ac3765206f6c15" + ] + ] + ], + "timestamp": "2023-11-06T09:31:26.207052003" + }, + "test_bam_stats_samtools_single_end_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,5a6667d97806e5002731e9cf23674fad" + ] + ] + ], + "timestamp": "2023-12-04T11:07:06.676820877" + }, + "test_bam_stats_samtools_paired_end_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ] + ], + "timestamp": "2023-11-06T09:31:11.68246157" + }, + "test_bam_stats_samtools_single_end_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20" + ] + ] + ], + "timestamp": "2023-11-06T09:26:10.349439801" + }, + "test_bam_stats_samtools_paired_end_cram_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,2cf2fe93596ee3d74f946097b204a629" + ] + ] + ], + "timestamp": "2023-12-04T11:07:22.30295557" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml b/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml new file mode 100644 index 00000000..ec2f2d68 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_stats_samtools: + - subworkflows/nf-core/bam_stats_samtools/** diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf new file mode 100644 index 00000000..764ce013 --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf @@ -0,0 +1,157 @@ +// +// Read QC, UMI extraction and trimming +// + +include { FASTQC as FASTQC_RAW } from '../../../modules/nf-core/fastqc/main' +include { FASTQC as FASTQC_TRIM } from '../../../modules/nf-core/fastqc/main' +include { UMITOOLS_EXTRACT } from '../../../modules/nf-core/umitools/extract/main' +include { FASTP } from '../../../modules/nf-core/fastp/main' + +// +// Function that parses fastp json output file to get total number of reads after trimming +// +import groovy.json.JsonSlurper + +def getFastpReadsAfterFiltering(json_file) { + def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('summary') + return json['after_filtering']['total_reads'].toLong() +} + +def getFastpAdapterSequence(json_file){ + def Map json = (Map) new JsonSlurper().parseText(json_file.text) + try{ + adapter = json['adapter_cutting']['read1_adapter_sequence'] + } catch(Exception ex){ + adapter = "" + } + return adapter +} + +workflow FASTQ_FASTQC_UMITOOLS_FASTP { + take: + reads // channel: [ val(meta), [ reads ] ] + skip_fastqc // boolean: true/false + with_umi // boolean: true/false + skip_umi_extract // boolean: true/false + umi_discard_read // integer: 0, 1 or 2 + skip_trimming // boolean: true/false + adapter_fasta // file: adapter.fasta + save_trimmed_fail // boolean: true/false + save_merged // boolean: true/false + min_trimmed_reads // integer: > 0 + + main: + ch_versions = Channel.empty() + fastqc_raw_html = Channel.empty() + fastqc_raw_zip = Channel.empty() + if (!skip_fastqc) { + FASTQC_RAW ( + reads + ) + fastqc_raw_html = FASTQC_RAW.out.html + fastqc_raw_zip = FASTQC_RAW.out.zip + ch_versions = ch_versions.mix(FASTQC_RAW.out.versions.first()) + } + + umi_reads = reads + umi_log = Channel.empty() + if (with_umi && !skip_umi_extract) { + UMITOOLS_EXTRACT ( + reads + ) + umi_reads = UMITOOLS_EXTRACT.out.reads + umi_log = UMITOOLS_EXTRACT.out.log + ch_versions = ch_versions.mix(UMITOOLS_EXTRACT.out.versions.first()) + + // Discard R1 / R2 if required + if (umi_discard_read in [1,2]) { + UMITOOLS_EXTRACT + .out + .reads + .map { + meta, reads -> + meta.single_end ? [ meta, reads ] : [ meta + [single_end: true], reads[umi_discard_read % 2] ] + } + .set { umi_reads } + } + } + + trim_reads = umi_reads + trim_json = Channel.empty() + trim_html = Channel.empty() + trim_log = Channel.empty() + trim_reads_fail = Channel.empty() + trim_reads_merged = Channel.empty() + fastqc_trim_html = Channel.empty() + fastqc_trim_zip = Channel.empty() + trim_read_count = Channel.empty() + adapter_seq = Channel.empty() + + if (!skip_trimming) { + FASTP ( + umi_reads, + adapter_fasta, + save_trimmed_fail, + save_merged + ) + trim_json = FASTP.out.json + trim_html = FASTP.out.html + trim_log = FASTP.out.log + trim_reads_fail = FASTP.out.reads_fail + trim_reads_merged = FASTP.out.reads_merged + ch_versions = ch_versions.mix(FASTP.out.versions.first()) + + // + // Filter FastQ files based on minimum trimmed read count after adapter trimming + // + FASTP + .out + .reads + .join(trim_json) + .map { meta, reads, json -> [ meta, reads, getFastpReadsAfterFiltering(json) ] } + .set { ch_num_trimmed_reads } + + ch_num_trimmed_reads + .filter { meta, reads, num_reads -> num_reads >= min_trimmed_reads.toLong() } + .map { meta, reads, num_reads -> [ meta, reads ] } + .set { trim_reads } + + ch_num_trimmed_reads + .map { meta, reads, num_reads -> [ meta, num_reads ] } + .set { trim_read_count } + + trim_json + .map { meta, json -> [meta, getFastpAdapterSequence(json)] } + .set { adapter_seq } + + if (!skip_fastqc) { + FASTQC_TRIM ( + trim_reads + ) + fastqc_trim_html = FASTQC_TRIM.out.html + fastqc_trim_zip = FASTQC_TRIM.out.zip + ch_versions = ch_versions.mix(FASTQC_TRIM.out.versions.first()) + } + } + + emit: + reads = trim_reads // channel: [ val(meta), [ reads ] ] + + fastqc_raw_html // channel: [ val(meta), [ html ] ] + fastqc_raw_zip // channel: [ val(meta), [ zip ] ] + + umi_log // channel: [ val(meta), [ log ] ] + adapter_seq // channel: [ val(meta), [ adapter_seq] ] + + trim_json // channel: [ val(meta), [ json ] ] + trim_html // channel: [ val(meta), [ html ] ] + trim_log // channel: [ val(meta), [ log ] ] + trim_reads_fail // channel: [ val(meta), [ fastq.gz ] ] + trim_reads_merged // channel: [ val(meta), [ fastq.gz ] ] + trim_read_count // channel: [ val(meta), val(count) ] + + fastqc_trim_html // channel: [ val(meta), [ html ] ] + fastqc_trim_zip // channel: [ val(meta), [ zip ] ] + + versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml new file mode 100644 index 00000000..9308fe9b --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml @@ -0,0 +1,129 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +# yaml-language-server: $schema=yaml-schema.json +name: "fastq_fastqc_umitools_fastp" +description: Read QC, UMI extraction and trimming +keywords: + - fastq + - fastqc + - qc + - UMI + - trimming + - fastp +components: + - fastqc + - umitools/extract + - fastp +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - skip_fastqc: + type: boolean + description: | + Skip fastqc process + - with_umi: + type: boolean + description: | + With or without umi detection + - skip_umi_extract: + type: boolean + description: | + With or without umi extrection + - umi_discard_read: + type: integer + description: | + Discard R1 / R2 if required + - skip_trimming: + type: boolean + description: | + Allows to skip FastP execution + - adapter_fasta: + type: file + description: | + Fasta file of adapter sequences + - save_trimmed_fail: + type: boolean + description: | + Save trimmed fastqs of failed samples + - save_merged: + type: boolean + description: | + Save merged fastqs + - min_trimmed_reads: + type: integer + description: | + Inputs with fewer than this reads will be filtered out of the "reads" output channel +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - reads: + type: file + description: > + Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | + For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. + pattern: "*.{fastq.gz}" + - fastqc_html: + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + - fastqc_zip: + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + - log: + type: file + description: Logfile for umi_tools + pattern: "*.{log}" + - trim_json: + type: file + description: FastP Trimming report + pattern: "*.{fastp.json}" + - trim_html: + type: file + description: FastP Trimming report + pattern: "*.{fastp.html}" + - log: + type: file + description: Logfile FastP + pattern: "*.{fastp.log}" + - trim_reads_fail: + type: file + description: Trimmed fastq files failing QC + pattern: "*.{fastq.gz}" + - trim_reads_merged: + type: file + description: Trimmed and merged fastq files + pattern: "*.{fastq.gz}" + - trim_read_count: + type: integer + description: Number of reads after trimming + - fastqc_trim_html: + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + - fastqc_trim_zip: + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + - adapter_seq: + type: string + description: | + Adapter Sequence found in read1 + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@robsyme" +maintainers: + - "@robsyme" diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test new file mode 100644 index 00000000..91dec8c5 --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test @@ -0,0 +1,61 @@ +nextflow_workflow { + + name "Test Workflow FASTQ_FASTQC_UMITOOLS_FASTP" + script "../main.nf" + workflow "FASTQ_FASTQC_UMITOOLS_FASTP" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_fastqc_umitools_fastp" + tag "fastq_fastqc_umitools_fastp" + tag "fastqc" + tag "umitools/extract" + tag "fastp" + + + test("sarscov2 paired-end [fastq]") { + + when { + workflow { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = false // skip_fastqc + input[2] = false // with_umi + input[3] = false // skip_umi_extract + input[4] = 1 // umi_discard_read + input[5] = false // skip_trimming + input[6] = [] // adapter_fasta + input[7] = false // save_trimmed_fail + input[8] = false // save_merged + input[9] = 1 // min_trimmed_reads + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.reads).match("reads") }, + { assert snapshot(workflow.out.umi_log).match("umi_log") }, + { assert snapshot(workflow.out.trim_json).match("trim_json") }, + { assert snapshot(workflow.out.trim_reads_fail).match("trim_reads_fail") }, + { assert snapshot(workflow.out.trim_reads_merged).match("trim_reads_merged") }, + { assert snapshot(workflow.out.adapter_seq).match("adapter_seq") }, + { assert snapshot(workflow.out.trim_read_count).match("trim_read_count") }, + { assert snapshot(workflow.out.versions).match("versions") }, + + { assert workflow.out.fastqc_raw_html }, + { assert workflow.out.fastqc_raw_zip }, + { assert workflow.out.trim_html }, + { assert workflow.out.trim_log }, + { assert workflow.out.fastqc_trim_html }, + { assert workflow.out.fastqc_trim_zip } + ) + } + } +} diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap new file mode 100644 index 00000000..973746a3 --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap @@ -0,0 +1,95 @@ +{ + "trim_reads_merged": { + "content": [ + [ + + ] + ], + "timestamp": "2024-01-12T08:38:50.041635573" + }, + "trim_reads_fail": { + "content": [ + [ + + ] + ], + "timestamp": "2024-01-12T08:38:50.033284693" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", + "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0", + "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e" + ] + ], + "timestamp": "2024-01-12T08:38:50.121510557" + }, + "trim_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,1e0f8e27e71728e2b63fc64086be95cd" + ] + ] + ], + "timestamp": "2024-01-12T08:38:50.024410724" + }, + "adapter_seq": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "unspecified" + ] + ] + ], + "timestamp": "2024-01-12T08:38:50.08674429" + }, + "reads": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7", + "test_2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39" + ] + ] + ] + ], + "timestamp": "2024-01-12T08:38:49.994419936" + }, + "umi_log": { + "content": [ + [ + + ] + ], + "timestamp": "2024-01-12T08:38:50.017720214" + }, + "trim_read_count": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + 198 + ] + ] + ], + "timestamp": "2024-01-12T08:38:50.102326089" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml new file mode 100644 index 00000000..84a4b567 --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/fastq_fastqc_umitools_fastp: + - subworkflows/nf-core/fastq_fastqc_umitools_fastp/** diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index b6c2d3a2..58a4e8ad 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -64,13 +64,15 @@ if (!params.mirgenedb) { if (params.mirgenedb_gff) { mirna_gtf = file(params.mirgenedb_gff, checkIfExists: true) } else { exit 1, "MirGeneDB gff file not found: ${params.mirgenedb_gff}"} } -include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { FASTQC_FASTP } from '../subworkflows/local/fastqc_fastp' -include { CONTAMINANT_FILTER } from '../subworkflows/local/contaminant_filter' -include { MIRNA_QUANT } from '../subworkflows/local/mirna_quant' -include { GENOME_QUANT } from '../subworkflows/local/genome_quant' -include { MIRTRACE } from '../subworkflows/local/mirtrace' -include { MIRDEEP2 } from '../subworkflows/local/mirdeep2' +include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../subworkflows/nf-core/fastq_fastqc_umitools_fastp' +include { DEDUPLICATE_UMIS } from '../subworkflows/local/umi_dedup' +include { CONTAMINANT_FILTER } from '../subworkflows/local/contaminant_filter' +include { MIRNA_QUANT } from '../subworkflows/local/mirna_quant' +include { GENOME_QUANT } from '../subworkflows/local/genome_quant' +include { MIRTRACE } from '../subworkflows/local/mirtrace' +include { MIRDEEP2 } from '../subworkflows/local/mirdeep2' +include { INDEX_GENOME } from '../modules/local/bowtie_genome' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -131,23 +133,64 @@ workflow SMRNASEQ { ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first().ifEmpty(null)) // - // SUBWORKFLOW: Read QC and trim adapters + // SUBWORKFLOW: Read QC, extract UMI and trim adapters & dedup UMIs if necessary / desired by the user // - FASTQC_FASTP ( + FASTQ_FASTQC_UMITOOLS_FASTP ( ch_cat_fastq, - ch_fastp_adapters, - false, - false + params.skip_fastqc, + params.with_umi, + params.skip_umi_extract, + params.umi_discard_read, + params.skip_fastp, + params.fastp_known_mirna_adapters, + params.save_trimmed_fail, + params.save_merged, + params.min_trimmed_reads ) - ch_versions = ch_versions.mix(FASTQC_FASTP.out.versions) + ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions) + + if(params.with_umi && !params.fasta) { + error "Specifying a genome fasta is required for UMI deduplication" + } + ch_fasta = params.fasta ? file(params.fasta): [] + ch_reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads + + // even if bowtie index is specified, there still needs to be a fasta. + // without fasta, no genome analysis. + if(params.fasta) { + //Prepare bowtie index, unless specified + //This needs to be done here as the index is used by both UMI deduplication and GENOME_QUANT + if(params.bowtie_index) { + ch_bowtie_index = Channel.fromPath("${index}**ebwt", checkIfExists: true).ifEmpty { error "Bowtie1 index directory not found: ${index}" } + } else { + INDEX_GENOME ( [ [:], ch_fasta ] ) + ch_versions = ch_versions.mix(INDEX_GENOME.out.versions) + ch_bowtie_index = INDEX_GENOME.out.index + // set to reformatted fasta as generated by `bowtie index` + ch_fasta = INDEX_GENOME.out.fasta + } + } + + // + // SUBWORKFLOW: Deduplicate UMIs by mapping them to the genome + // + if (params.with_umi){ + DEDUPLICATE_UMIS ( + ch_bowtie_index, + ch_reads_for_mirna, + ) + ch_reads_for_mirna = DEDUPLICATE_UMIS.out.reads + ch_versions = ch_versions.mix(DEDUPLICATE_UMIS.out.versions) + } + // // SUBWORKFLOW: mirtrace QC // - FASTQC_FASTP.out.adapterseq - .join( FASTQC_FASTP.out.reads ) - .map { meta, adapterseq, reads -> [adapterseq, meta.id, reads] } + FASTQ_FASTQC_UMITOOLS_FASTP.out.adapter_seq + .join( ch_reads_for_mirna ) + .map { meta, adapter_seq, reads -> [adapter_seq, meta.id, reads] } .groupTuple() .set { ch_mirtrace_inputs } @@ -159,7 +202,6 @@ workflow SMRNASEQ { // SUBWORKFLOW: remove contaminants from reads // contamination_stats = Channel.empty() - mirna_reads = FASTQC_FASTP.out.reads if (params.filter_contamination){ CONTAMINANT_FILTER ( reference_hairpin, @@ -169,12 +211,12 @@ workflow SMRNASEQ { params.ncrna, params.pirna, params.other_contamination, - FASTQC_FASTP.out.reads + ch_reads_for_mirna ) contamination_stats = CONTAMINANT_FILTER.out.filter_stats ch_versions = ch_versions.mix(CONTAMINANT_FILTER.out.versions) - mirna_reads = CONTAMINANT_FILTER.out.filtered_reads + ch_reads_for_mirna = CONTAMINANT_FILTER.out.filtered_reads } @@ -182,7 +224,7 @@ workflow SMRNASEQ { [ [:], reference_mature], [ [:], reference_hairpin], mirna_gtf, - mirna_reads + ch_reads_for_mirna ) ch_versions = ch_versions.mix(MIRNA_QUANT.out.versions.ifEmpty(null)) @@ -191,17 +233,20 @@ workflow SMRNASEQ { // genome_stats = Channel.empty() if (params.fasta){ - GENOME_QUANT ( file(params.fasta), params.bowtie_index, MIRNA_QUANT.out.unmapped ) + GENOME_QUANT ( ch_bowtie_index, ch_fasta, MIRNA_QUANT.out.unmapped ) genome_stats = GENOME_QUANT.out.stats ch_versions = ch_versions.mix(GENOME_QUANT.out.versions) + hairpin_clean = MIRNA_QUANT.out.fasta_hairpin.map { it -> it[1] } + mature_clean = MIRNA_QUANT.out.fasta_mature.map { it -> it[1] } + if (!params.skip_mirdeep) { MIRDEEP2 ( - FASTQC_FASTP.out.reads, + ch_reads_for_mirna, GENOME_QUANT.out.fasta, GENOME_QUANT.out.index.collect(), - MIRNA_QUANT.out.fasta_hairpin, - MIRNA_QUANT.out.fasta_mature + hairpin_clean, + mature_clean ) ch_versions = ch_versions.mix(MIRDEEP2.out.versions) } @@ -226,8 +271,8 @@ workflow SMRNASEQ { ch_multiqc_files = Channel.empty() ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC_FASTP.out.fastqc_raw_zip.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC_FASTP.out.trim_json.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_raw_zip.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_json.collect{it[1]}.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(contamination_stats.collect().ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(genome_stats.collect({it[1]}).ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(MIRNA_QUANT.out.mature_stats.collect({it[1]}).ifEmpty([]))
Process Name \\", + " \\ Software Version
CUSTOM_DUMPSOFTWAREVERSIONSpython3.11.7
yaml5.4.1
TOOL1tool10.11.9
TOOL2tool21.9
WorkflowNextflow