diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..4ecfbfe3 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,28 @@ +{ + "name": "nfcore", + "image": "nfcore/gitpod:latest", + "remoteUser": "gitpod", + "runArgs": ["--privileged"], + + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + // Set *default* container specific settings.json values on container create. + "settings": { + "python.defaultInterpreterPath": "/opt/conda/bin/python", + "python.linting.enabled": true, + "python.linting.pylintEnabled": true, + "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", + "python.formatting.yapfPath": "/opt/conda/bin/yapf", + "python.linting.flake8Path": "/opt/conda/bin/flake8", + "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", + "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", + "python.linting.pylintPath": "/opt/conda/bin/pylint" + }, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + } + } +} diff --git a/.gitattributes b/.gitattributes index 050bb120..7a2dabc2 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,4 @@ *.config linguist-language=nextflow +*.nf.test linguist-language=nextflow modules/nf-core/** linguist-generated subworkflows/nf-core/** linguist-generated diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 4f0eaee3..4d09370b 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -9,7 +9,9 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) -> If you need help using or modifying nf-core/smrnaseq then the best place to ask is on the nf-core Slack [#smrnaseq](https://nfcore.slack.com/channels/smrnaseq) channel ([join our Slack here](https://nf-co.re/join/slack)). +:::info +If you need help using or modifying nf-core/smrnaseq then the best place to ask is on the nf-core Slack [#smrnaseq](https://nfcore.slack.com/channels/smrnaseq) channel ([join our Slack here](https://nf-co.re/join/slack)). +::: ## Contribution workflow @@ -101,3 +103,18 @@ If you are using a new feature from core Nextflow, you may bump the minimum requ ### Images and figures For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). + +## GitHub Codespaces + +This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal. + +To get started: + +- Open the repo in [Codespaces](https://github.com/nf-core/smrnaseq/codespaces) +- Tools installed + - nf-core + - Nextflow + +Devcontainer specs: + +- [DevContainer config](.devcontainer/devcontainer.json) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index af04ee73..335a06b1 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -42,9 +42,9 @@ body: attributes: label: System information description: | - * Nextflow version _(eg. 21.10.3)_ + * Nextflow version _(eg. 23.04.0)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ - * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ * OS _(eg. CentOS Linux, macOS, Linux Mint)_ * Version of nf-core/smrnaseq _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index d103b361..e278390b 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -15,8 +15,8 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/smrn - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! - - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/smrnaseq/tree/master/.github/CONTRIBUTING.md) - - [ ] If necessary, also make a PR on the nf-core/smrnaseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/smrnaseq/tree/master/.github/CONTRIBUTING.md) +- [ ] If necessary, also make a PR on the nf-core/smrnaseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index c3bb0832..2ac5826b 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -14,14 +14,23 @@ jobs: runs-on: ubuntu-latest steps: - name: Launch workflow via tower - uses: nf-core/tower-action@v3 + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/smrnaseq/work-${{ github.sha }} parameters: | { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/smrnaseq/results-${{ github.sha }}" } - profiles: test_full,aws_tower + profiles: test_full + + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index ea00f80e..84d22f1c 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -12,14 +12,22 @@ jobs: steps: # Launch workflow using Tower CLI tool action - name: Launch workflow via tower - uses: nf-core/tower-action@v3 + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/smrnaseq/work-${{ github.sha }} parameters: | { "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/smrnaseq/results-test-${{ github.sha }}" } - profiles: test,aws_tower + profiles: test + + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index afde4c08..96080391 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -13,7 +13,7 @@ jobs: - name: Check PRs if: github.repository == 'nf-core/smrnaseq' run: | - { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/smrnaseq ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/smrnaseq ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] # If the above check failed, post a comment on the PR explaining the failure # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 31a2a000..45ad5219 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,10 @@ on: env: NXF_ANSI_LOG: false - CAPSULE_LOG: none + +concurrency: + group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + cancel-in-progress: true jobs: test: @@ -20,29 +23,21 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - # Nextflow versions - include: - # Test pipeline minimum Nextflow version - - NXF_VER: "21.10.3" - NXF_EDGE: "" - # Test latest edge release of Nextflow - - NXF_VER: "" - NXF_EDGE: "1" + NXF_VER: + - "23.04.0" + - "latest-everything" + profile: + - "test" + - "test_no_genome" steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Install Nextflow - env: - NXF_VER: ${{ matrix.NXF_VER }} - # Uncomment only if the edge release is more recent than the latest stable release - # See https://github.com/nextflow-io/nextflow/issues/2467 - # NXF_EDGE: ${{ matrix.NXF_EDGE }} - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + uses: nf-core/setup-nextflow@v1 + with: + version: "${{ matrix.NXF_VER }}" - name: Run pipeline with test data run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results - nextflow run ${GITHUB_WORKSPACE} -profile test_no_genome,docker --outdir ./results + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.profile }},docker --outdir ./results diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml new file mode 100644 index 00000000..694e90ec --- /dev/null +++ b/.github/workflows/clean-up.yml @@ -0,0 +1,24 @@ +name: "Close user-tagged issues and PRs" +on: + schedule: + - cron: "0 0 * * 0" # Once a week + +jobs: + clean-up: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@v7 + with: + stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." + stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." + close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." + days-before-stale: 30 + days-before-close: 20 + days-before-pr-close: -1 + any-of-labels: "awaiting-changes,awaiting-feedback" + exempt-issue-labels: "WIP" + exempt-pr-labels: "WIP" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index 8f76fa7f..ae6a0d7b 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -24,7 +24,7 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v2 + - uses: actions/setup-node@v3 - name: Install Prettier run: npm install -g prettier @prettier/plugin-php @@ -34,9 +34,9 @@ jobs: id: prettier_status run: | if prettier --check ${GITHUB_WORKSPACE}; then - echo "::set-output name=result::pass" + echo "result=pass" >> $GITHUB_OUTPUT else - echo "::set-output name=result::fail" + echo "result=fail" >> $GITHUB_OUTPUT fi - name: Run 'prettier --write' diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 8a092d87..b8bdd214 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -4,6 +4,8 @@ name: nf-core linting # that the code meets the nf-core guidelines. on: push: + branches: + - dev pull_request: release: types: [published] @@ -12,9 +14,9 @@ jobs: EditorConfig: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: actions/setup-node@v2 + - uses: actions/setup-node@v3 - name: Install editorconfig-checker run: npm install -g editorconfig-checker @@ -25,9 +27,9 @@ jobs: Prettier: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: actions/setup-node@v2 + - uses: actions/setup-node@v3 - name: Install Prettier run: npm install -g prettier @@ -35,28 +37,55 @@ jobs: - name: Run Prettier --check run: prettier --check ${GITHUB_WORKSPACE} + PythonBlack: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Check code lints with Black + uses: psf/black@stable + + # If the above check failed, post a comment on the PR explaining the failure + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@v1 + with: + message: | + ## Python linting (`black`) is failing + + To keep the code consistent with lots of contributors, we run automated code consistency checks. + To fix this CI test, please run: + + * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` + * Fix formatting errors in your pipeline: `black .` + + Once you push these changes the test should pass, and you can hide this comment :+1: + + We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false + nf-core: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + uses: nf-core/setup-nextflow@v1 - - uses: actions/setup-python@v3 + - uses: actions/setup-python@v4 with: - python-version: "3.6" + python-version: "3.11" architecture: "x64" - name: Install dependencies run: | python -m pip install --upgrade pip pip install nf-core + - name: Run nf-core lint env: GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} @@ -70,7 +99,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 04758f61..0bbcd30f 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -18,7 +18,7 @@ jobs: - name: Get PR number id: pr_number - run: echo "::set-output name=pr_number::$(cat linting-logs/PR_number.txt)" + run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment uses: marocchino/sticky-pull-request-comment@v2 diff --git a/.github/workflows/release-announcments.yml b/.github/workflows/release-announcments.yml new file mode 100644 index 00000000..6ad33927 --- /dev/null +++ b/.github/workflows/release-announcments.yml @@ -0,0 +1,68 @@ +name: release-announcements +# Automatic release toot and tweet anouncements +on: + release: + types: [published] + workflow_dispatch: + +jobs: + toot: + runs-on: ubuntu-latest + steps: + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + + send-tweet: + runs-on: ubuntu-latest + + steps: + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + - name: Install dependencies + run: pip install tweepy==4.14.0 + - name: Send tweet + shell: python + run: | + import os + import tweepy + + client = tweepy.Client( + access_token=os.getenv("TWITTER_ACCESS_TOKEN"), + access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), + consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), + consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), + ) + tweet = os.getenv("TWEET") + client.create_tweet(text=tweet) + env: + TWEET: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} + TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@v0.0.2 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # diff --git a/.gitpod.yml b/.gitpod.yml index 85d95ecc..25488dcc 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,4 +1,9 @@ image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update vscode: extensions: # based on nf-core.nf-core-extensionpack diff --git a/.nf-core.yml b/.nf-core.yml index 9de90de2..3805dc81 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,8 +1 @@ repository_type: pipeline -lint: - files_unchanged: - - .github/workflows/linting.yml - - .github/workflows/branch.yml - - .github/workflows/ci.yml - - .github/workflows/linting.yml - - .github/workflows/linting_comment.yml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..0c31cdb9 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,5 @@ +repos: + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v2.7.1" + hooks: + - id: prettier diff --git a/.prettierignore b/.prettierignore index d0e7ae58..437d763d 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,4 +1,6 @@ email_template.html +adaptivecard.json +slackreport.json .nextflow* work/ data/ @@ -7,3 +9,4 @@ results/ testing/ testing* *.pyc +bin/ diff --git a/CHANGELOG.md b/CHANGELOG.md index cc0ba654..fb799c9d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,33 +3,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unpublished Version / DEV] - -### Enhancements & fixes - -- Trimmed output was not as documented and not correctly published [#161](https://github.com/nf-core/smrnaseq/issues/161) - -### Other enhancements - -- [#55](https://github.com/nf-core/smrnaseq/issues/12) - Enabled the use of `MirGeneDB` as an alternative database insted of `miRBase` -- [#113](https://github.com/nf-core/smrnaseq/issues/113) - Added a optional contamination filtering step, including MultiQC plot. +## [dev](https://github.com/nf-core/smrnaseq/branch/dev) ### Parameters | Old parameter | New parameter | | ------------- | --------------------------- | -| | `--mirGeneDB` | -| | `--mirGeneDB_species` | -| | `--mirGeneDB_gff` | -| | `--mirGeneDB_mature` | -| | `--mirGeneDB_hairpin` | -| | `--contamination_filter` | -| | `--rrna` | -| | `--trna` | -| | `--cdna` | -| | `--ncrna` | -| | `--pirna` | -| | `--other_contamination` | | | `--with_umi` | | | `--umitools_extract_method` | | | `--umitools_bc_pattern` | @@ -37,6 +16,65 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | | `--save_umi_intermeds` | | | `--umi_merge_unmapped` | + +## [v2.2.4](https://github.com/nf-core/smrnaseq/releases/tag/2.2.4) - 2023-11-03 + +- Update template to 2.10 +- [[#289]](https://github.com/nf-core/smrnaseq/issues/289) - Bugfix for issue with mirdeep2 channels () +- [[#288]](https://github.com/nf-core/smrnaseq/issues/288) - Bugfix for issue with handling malformed GFF3 from mirbase +- Updated dependencies, including FASTQC, MultiQC 1.17, fastP and samtools to latest versions + +## [v2.2.3](https://github.com/nf-core/smrnaseq/releases/tag/2.2.3) - 2023-09-06 + +- [[#271]](https://github.com/nf-core/smrnaseq/issues/271) - Bugfix for parsing hairpin and mature fasta files + +## [v2.2.2](https://github.com/nf-core/smrnaseq/releases/tag/2.2.2) - 2023-09-04 + +- [[#253]](https://github.com/nf-core/smrnaseq/pull/253) - Remove globs from process alias when using ECR containers +- [[#237]](https://github.com/nf-core/smrnaseq/issues/237) - Fix illumina protocol clip parameters to default +- Remove public_aws_ecr profile +- [[#269]](https://github.com/nf-core/smrnaseq/pull/269) - Updated miRBase URLs with new location (old ones were broken) + +### Software dependencies + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| `multiqc` | 1.13 | 1.15 | +| `fastp` | 0.23.2 | 0.23.4 | + +## [v2.2.1](https://github.com/nf-core/smrnaseq/releases/tag/2.2.1) - 2023-05-12 + +### Enhancements & fixes + +- [[#238](https://github.com/nf-core/smrnaseq/issues/238)] - Restored the missing mirtop outputs +- [[#242](https://github.com/nf-core/smrnaseq/issues/242)] - Fixed mirtrace using wrong input fastq files (untrimmed) + +## [v2.2.0](https://github.com/nf-core/smrnaseq/releases/tag/2.2.0) - 2023-04-26 + +- [[#220](https://github.com/nf-core/smrnaseq/issues/220)] - Fixed an issue where miRTrace reports fastq basename instead of sample ID +- [[#208](https://github.com/nf-core/smrnaseq/issues/208)] - Fixed usability of `--skip_qc` parameter +- Updated FASTP module to allow direct addition of adapter_fasta file to it +- [[#205](https://github.com/nf-core/smrnaseq/issues/205)] - Fix mirTrace Report to be a single report again instead of per sample reports +- [[#206](https://github.com/nf-core/smrnaseq/issues/206)] - Use % as separator in sed commands to enable conda working properly on OSX and Linux +- [[#207](https://github.com/nf-core/smrnaseq/issues/224)] - Fix Samplesheet print error +- Group samples by adapter sequence before running mirtrace +- Remove `--skip_qc` parameter + +## [v2.1.0](https://github.com/nf-core/smrnaseq/releases/tag/2.1.0) - 2022-10-20 Maroon Tin Dalmatian + +### Enhancements & fixes + +- [[#12](https://github.com/nf-core/smrnaseq/issues/12)] - Enabled the use of `MirGeneDB` as an alternative database insted of `miRBase` +- [[#113](https://github.com/nf-core/smrnaseq/issues/113)] - Added a optional contamination filtering step, including MultiQC plot +- [[#137](https://github.com/nf-core/smrnaseq/issues/137)] - Fixed issue with mirTop and MultiQC by upgrading to MultiQC V1.13dev +- [[#159](https://github.com/nf-core/smrnaseq/issues/159)] - Index files were not collected when `bowtie_index` was used and thus mapping was failing +- [[#161](https://github.com/nf-core/smrnaseq/issues/161)] - Trimmed output was not as documented and not correctly published +- [[#168](https://github.com/nf-core/smrnaseq/issues/168)] - Removed `mirtrace_protocol` as the parameter was redundant and `params.protocol` is entirely sufficient +- Updated pipeline template to [nf-core/tools 2.6.0](https://github.com/nf-core/tools/releases/tag/2.6.0) +- [[#188](https://github.com/nf-core/smrnaseq/pull/188)] - Dropped TrimGalore in favor of fastp QC and adapter trimming, improved handling of adapters and trimming parameters +- [[#194](https://github.com/nf-core/smrnaseq/issues/194)] - Added default adapters file for FastP improved miRNA adapter trimming + + ## [v2.0.0](https://github.com/nf-core/smrnaseq/releases/tag/2.0.0) - 2022-05-31 Aqua Zinc Chihuahua ### Major enhancements diff --git a/CITATIONS.md b/CITATIONS.md index 11f752b9..4996b65f 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -12,33 +12,35 @@ - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) -* [trimgalore](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/) + > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. Available online https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. -* [samtools](https://pubmed.ncbi.nlm.nih.gov/19505943/) +- [trimgalore](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/) + +- [samtools](https://pubmed.ncbi.nlm.nih.gov/19505943/) > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PMID: 19505943; PMCID: PMC2723002. -* [mirtop](https://pubmed.ncbi.nlm.nih.gov/31504201/) +- [mirtop](https://pubmed.ncbi.nlm.nih.gov/31504201/) > Desvignes T, Loher P, Eilbeck K, Ma J, Urgese G, Fromm B, Sydes J, Aparicio-Puerta E, Barrera V, Espín R, Thibord F, Bofill-De Ros X, Londin E, Telonis AG, Ficarra E, Friedländer MR, Postlethwait JH, Rigoutsos I, Hackenberg M, Vlachos IS, Halushka MK, Pantano L. Unification of miRNA and isomiR research: the mirGFF3 format and the mirtop API. Bioinformatics. 2020 Feb 1;36(3):698-703. doi: 10.1093/bioinformatics/btz675. PMID: 31504201; PMCID: PMC7566869. -* [seqcluster](https://pubmed.ncbi.nlm.nih.gov/21976421/) +- [seqcluster](https://pubmed.ncbi.nlm.nih.gov/21976421/) > Pantano L, Estivill X, Martí E. A non-biased framework for the annotation and classification of the non-miRNA small RNA transcriptome. Bioinformatics. 2011 Nov 15;27(22):3202-3. doi: 10.1093/bioinformatics/btr527. Epub 2011 Oct 5. PMID: 21976421. -* [mirdeep2](https://pubmed.ncbi.nlm.nih.gov/21911355/) +- [mirdeep2](https://pubmed.ncbi.nlm.nih.gov/21911355/) > Friedländer MR, Mackowiak SD, Li N, Chen W, Rajewsky N. miRDeep2 accurately identifies known and hundreds of novel microRNA genes in seven animal clades. Nucleic Acids Res. 2012 Jan;40(1):37-52. doi: 10.1093/nar/gkr688. Epub 2011 Sep 12. PMID: 21911355; PMCID: PMC3245920. -* [mirtrace](https://pubmed.ncbi.nlm.nih.gov/30514392/) +- [mirtrace](https://pubmed.ncbi.nlm.nih.gov/30514392/) > Kang W, Eldfjell Y, Fromm B, Estivill X, Biryukova I, Friedländer MR. miRTrace reveals the organismal origins of microRNA sequencing data. Genome Biol. 2018 Dec 4;19(1):213. doi: 10.1186/s13059-018-1588-9. PMID: 30514392; PMCID: PMC6280396. -* [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) +- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. -* [Bioconductor](https://www.bioconductor.org/) and [R](https://cran.r-project.org/) +- [Bioconductor](https://www.bioconductor.org/) and [R](https://cran.r-project.org/) ## Software packaging/containerisation tools @@ -56,5 +58,8 @@ - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index f4fd052f..c089ec78 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,18 +1,20 @@ -# Code of Conduct at nf-core (v1.0) +# Code of Conduct at nf-core (v1.4) ## Our Pledge -In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: - Age +- Ability - Body size +- Caste - Familial status - Gender identity and expression - Geographical location - Level of experience - Nationality and national origins - Native language -- Physical and neurological ability +- Neurodiversity - Race or ethnicity - Religion - Sexual identity and orientation @@ -22,80 +24,133 @@ Please note that the list above is alphabetised and is therefore not ranked in a ## Preamble -> Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply. +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: -An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva. +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. -We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc. +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. -Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. -We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. -Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. ## Our Responsibilities -The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. -The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. -Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC. +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. -## When are where does this Code of Conduct apply? +## When and where does this Code of Conduct apply? -Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference: +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): - Communicating with an official project email address. - Communicating with community members within the nf-core Slack channel. - Participating in hackathons organised by nf-core (both online and in-person events). -- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence. -- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc. +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. - Representing nf-core on social media. This includes both official and personal accounts. ## nf-core cares 😊 -nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order): +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): - Ask for consent before sharing another community member’s personal information (including photographs) on social media. - Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. -- Celebrate your accomplishments at events! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) - Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) - Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) - Focus on what is best for the team and the community. (When in doubt, ask) -- Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn. +- Accept feedback, yet be unafraid to question, deliberate, and learn. - Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) -- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) - Take breaks when you feel like you need them. -- Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.) +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) ## nf-core frowns on 😕 -The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces. +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: - Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. - “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. - Spamming or trolling of individuals on social media. -- Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention. -- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. ### Online Trolling -The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately. +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. -All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls. +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. -## Procedures for Reporting CoC violations +## Procedures for reporting CoC violations If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. -You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s). +You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. + +All reports will be handled with the utmost discretion and confidentiality. + +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: + +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. -Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course. +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. -All reports will be handled with utmost discretion and confidentially. +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. + +## Enforcement + +Actions taken by the nf-core’s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. ## Attribution and Acknowledgements @@ -106,6 +161,22 @@ All reports will be handled with utmost discretion and confidentially. ## Changelog -### v1.0 - March 12th, 2021 +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. + +### v1.1 - October 14th, 2021 + +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. + +### v1.0 - March 15th, 2021 - Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/LICENSE b/LICENSE index ad773211..d6a83450 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) P. Ewels , C. Wang , R. Hammarén , L. Pantano +Copyright (c) P. Ewels, C. Wang, R. Hammarén, L. Pantano, A. Peltzer Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index f6198652..5c5bf3a2 100644 --- a/README.md +++ b/README.md @@ -2,29 +2,29 @@ [![GitHub Actions CI Status](https://github.com/nf-core/smrnaseq/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/smrnaseq/actions?query=workflow%3A%22nf-core+CI%22) [![GitHub Actions Linting Status](https://github.com/nf-core/smrnaseq/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/smrnaseq/actions?query=workflow%3A%22nf-core+linting%22) -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?logo=Amazon%20AWS)](https://nf-co.re/smrnaseq/results) -[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8)](https://doi.org/10.5281/zenodo.4956678) - -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/) -[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?logo=anaconda)](https://docs.conda.io/en/latest/) -[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?logo=docker)](https://www.docker.com/) -[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg)](https://sylabs.io/docs/) +[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/smrnaseq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3456879-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3456879) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) +[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) +[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/smrnaseq) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23smrnaseq-4A154B?logo=slack)](https://nfcore.slack.com/channels/smrnaseq) -[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?logo=twitter)](https://twitter.com/nf_core) -[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?logo=youtube)](https://www.youtube.com/c/nf-core) - -[![DOI](https://zenodo.org/badge/140590861.svg)](https://zenodo.org/badge/latestdoi/140590861) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23smrnaseq-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/smrnaseq)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction -**nf-core/smrnaseq** is a bioinformatics best-practice analysis pipeline for Small RNA-Seq Best Practice Analysis Pipeline. +**nf-core/smrnaseq** is a bioinformatics best-practice analysis pipeline for Small RNA-Seq. The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/smrnaseq/results). +## Online videos + +A short talk about the history, current status and functionality on offer in this pipeline was given by Lorena Pantano (@lpantano) on [9th November 2021](https://youtu.be/4YLQ2VwpCJE) as part of the nf-core/bytesize series. + +You can find numerous talks on the nf-core events page from various topics including writing pipelines/modules in Nextflow DSL2, using nf-core tooling, running nf-core pipelines as well as more generic content like contributing to Github. Please check them out! + ## Pipeline summary 1. Raw read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) @@ -53,38 +53,63 @@ On release, automated continuous integration tests run the pipeline on a full-si 13. miRNA quality control ([`mirtrace`](https://github.com/friedlanderlab/mirtrace)) 14. Present QC for raw read, alignment, and expression results ([`MultiQC`](http://multiqc.info/)) -## Quick Start +## Usage + +:::note +If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how +to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) +with `-profile test` before running the workflow on actual data. +::: -1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=21.10.3`) +First, prepare a samplesheet with your input data that looks as follows: -2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_. +`samplesheet.csv`: -3. Download the pipeline and test it on a minimal dataset with a single command: +```csv +sample,fastq_1 +Clone1_N1,s3://ngi-igenomes/test-data/smrnaseq/C1-N1-R1_S4_L001_R1_001.fastq.gz +Clone1_N3,s3://ngi-igenomes/test-data/smrnaseq/C1-N3-R1_S6_L001_R1_001.fastq.gz +Clone9_N1,s3://ngi-igenomes/test-data/smrnaseq/C9-N1-R1_S7_L001_R1_001.fastq.gz +Clone9_N2,s3://ngi-igenomes/test-data/smrnaseq/C9-N2-R1_S8_L001_R1_001.fastq.gz +Clone9_N3,s3://ngi-igenomes/test-data/smrnaseq/C9-N3-R1_S9_L001_R1_001.fastq.gz +Control_N1,s3://ngi-igenomes/test-data/smrnaseq/Ctl-N1-R1_S1_L001_R1_001.fastq.gz +Control_N2,s3://ngi-igenomes/test-data/smrnaseq/Ctl-N2-R1_S2_L001_R1_001.fastq.gz +Control_N3,s3://ngi-igenomes/test-data/smrnaseq/Ctl-N3-R1_S3_L001_R1_001.fastq.gz +``` - ```console - nextflow run nf-core/smrnaseq -profile test,YOURPROFILE --outdir - ``` +Each row represents a fastq file (single-end). - Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string. +Now, you can run the pipeline using: - > - The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`. - > - Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. - > - If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. - > - If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs. +```bash +nextflow run nf-core/smrnaseq \ + -profile \ + --input samplesheet.csv \ + --genome 'GRCh37' \ + --mirtrace_species 'hsa' \ + --protocol 'illumina' \ + --outdir +``` -4. Start running your own analysis! +:::warning +Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those +provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). +::: - ```console - nextflow run nf-core/smrnaseq --input samplesheet.csv --outdir --genome GRCh37 -profile - ``` +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/smrnaseq/usage) and the [parameter documentation](https://nf-co.re/smrnaseq/parameters). -## Documentation +## Pipeline output -The nf-core/smrnaseq pipeline comes with documentation about the pipeline [usage](https://nf-co.re/smrnaseq/usage), [parameters](https://nf-co.re/smrnaseq/parameters) and [output](https://nf-co.re/smrnaseq/output). +To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/smrnaseq/results) tab on the nf-core website pipeline page. +For more details about the output files and reports, please refer to the +[output documentation](https://nf-co.re/smrnaseq/output). ## Credits -nf-core/smrnaseq was originally written for use at the [National Genomics Infrastructure](https://portal.scilifelab.se/genomics/) at [SciLifeLab](http://www.scilifelab.se/) in Stockholm, Sweden, by Phil Ewels ([@ewels](https://github.com/ewels)), Chuan Wang ([@chuan-wang](https://github.com/chuan-wang)) and Rickard Hammarén ([@Hammarn](https://github.com/hammarn)). +nf-core/smrnaseq was originally written by P. Ewels, C. Wang, R. Hammarén, L. Pantano, A. Peltzer. + +We thank the following people for their extensive assistance in the development of this pipeline: Lorena Pantano ([@lpantano](https://github.com/lpantano)) from MIT updated the pipeline to Nextflow DSL2. diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json new file mode 100644 index 00000000..0b804f3d --- /dev/null +++ b/assets/adaptivecard.json @@ -0,0 +1,67 @@ +{ + "type": "message", + "attachments": [ + { + "contentType": "application/vnd.microsoft.card.adaptive", + "contentUrl": null, + "content": { + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "msteams": { + "width": "Full" + }, + "type": "AdaptiveCard", + "version": "1.2", + "body": [ + { + "type": "TextBlock", + "size": "Large", + "weight": "Bolder", + "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", + "text": "nf-core/smrnaseq v${version} - ${runName}", + "wrap": true + }, + { + "type": "TextBlock", + "spacing": "None", + "text": "Completed at ${dateComplete} (duration: ${duration})", + "isSubtle": true, + "wrap": true + }, + { + "type": "TextBlock", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", + "wrap": true + }, + { + "type": "TextBlock", + "text": "The command used to launch the workflow was as follows:", + "wrap": true + }, + { + "type": "TextBlock", + "text": "${commandLine}", + "isSubtle": true, + "wrap": true + } + ], + "actions": [ + { + "type": "Action.ShowCard", + "title": "Pipeline Configuration", + "card": { + "type": "AdaptiveCard", + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "body": [ + { + "type": "FactSet", + "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> + ] + } + ] + } + } + ] + } + } + ] +} diff --git a/assets/email_template.txt b/assets/email_template.txt index 6e8b6393..654e78db 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -6,7 +6,6 @@ `._,._,' nf-core/smrnaseq v${version} ---------------------------------------------------- - Run Name: $runName <% if (success){ diff --git a/assets/known_adapters.fa b/assets/known_adapters.fa new file mode 100644 index 00000000..6b54be80 --- /dev/null +++ b/assets/known_adapters.fa @@ -0,0 +1,6 @@ +> QIAseq miRNA adapter +AACTGTAGGCACCATCAAT +> Illumina miRNA adapter +TGGAATTCTCGGGTGCCAAGG +> Nextflex miRNA adapter +TGGAATTCTCGGGTGCCAAGG diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml new file mode 100644 index 00000000..a88003eb --- /dev/null +++ b/assets/methods_description_template.yml @@ -0,0 +1,28 @@ +id: "nf-core-smrnaseq-methods-description" +description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." +section_name: "nf-core/smrnaseq Methods Description" +section_href: "https://github.com/nf-core/smrnaseq" +plot_type: "html" + +data: | +

Methods

+

Data was processed using nf-core/smrnaseq v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

+

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

+
${workflow.commandLine}
+

${tool_citations}

+

References

+
    +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
  • +
  • Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
  • +
  • da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
  • + ${tool_bibliography} +
+
+
Notes:
+
    + ${nodoi_text} +
  • The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
  • +
  • You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
  • +
+
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index f7715e34..74c7de72 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,11 +1,13 @@ report_comment: > - This report has been generated by the nf-core/smrnaseq + This report has been generated by the nf-core/smrnaseq analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: - software_versions: + "nf-core-smrnaseq-methods-description": order: -1000 - "nf-core-smrnaseq-summary": + software_versions: order: -1001 + "nf-core-smrnaseq-summary": + order: -1002 export_plots: true diff --git a/assets/nf-core-smrnaseq_logo_light.png b/assets/nf-core-smrnaseq_logo_light.png index 2ecc50a7..262fa1b0 100644 Binary files a/assets/nf-core-smrnaseq_logo_light.png and b/assets/nf-core-smrnaseq_logo_light.png differ diff --git a/assets/slackreport.json b/assets/slackreport.json new file mode 100644 index 00000000..214c7fa9 --- /dev/null +++ b/assets/slackreport.json @@ -0,0 +1,34 @@ +{ + "attachments": [ + { + "fallback": "Plain-text summary of the attachment.", + "color": "<% if (success) { %>good<% } else { %>danger<%} %>", + "author_name": "nf-core/smrnaseq v${version} - ${runName}", + "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", + "fields": [ + { + "title": "Command used to launch the workflow", + "value": "```${commandLine}```", + "short": false + } + <% + if (!success) { %> + , + { + "title": "Full error message", + "value": "```${errorReport}```", + "short": false + }, + { + "title": "Pipeline configuration", + "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>", + "short": false + } + <% } + %> + ], + "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})" + } + ] +} diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 00a4c93a..94077cd4 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -50,60 +50,49 @@ def check_samplesheet(file_in, file_out): sample_mapping_dict = {} with open(file_in, "r") as fin: - ## Check header MIN_COLS = 2 HEADER = ["sample", "fastq_1"] header = [x.strip('"') for x in fin.readline().strip().split(",")] - if header[: len(HEADER)] != HEADER: - print("ERROR: Please check samplesheet header -> {} != {}".format(",".join(header), ",".join(HEADER))) + if any([item not in header for item in HEADER]): + missing = [item for item in HEADER if item not in header] + print_error("ERROR: Please check samplesheet header. Missing columns: '{}'".format(",".join(missing))) sys.exit(1) ## Check sample entries - for line in fin: + for line_number, line in enumerate(fin): lspl = [x.strip().strip('"') for x in line.strip().split(",")] + row = {k: v for k, v in zip(header, lspl)} # Check valid number of columns per row - if len(lspl) < len(HEADER): - print_error( - "Invalid number of columns (minimum = {})!".format(len(HEADER)), - "Line", - line, - ) - num_cols = len([x for x in lspl if x]) - if num_cols < MIN_COLS: + if len(lspl) != len(header): print_error( - "Invalid number of populated columns (minimum = {})!".format(MIN_COLS), - "Line", + "Invalid number of columns found - header has {} columns, content has {}. Number of rows {})".format( + len(header), len(lspl), line_number + ), + f"Line #{line_number+2}", line, ) ## Check sample name entries - sample, fastq_1 = lspl[: len(HEADER)] - sample = sample.replace(" ", "_") + sample = row.get("sample", "").replace(" ", "_") if not sample: - print_error("Sample entry has not been specified!", "Line", line) + print_error("Sample entry has not been specified!", f"Line #{line_number+2}", line) ## Check FastQ file extension - for fastq in [fastq_1]: - if fastq: - if fastq.find(" ") != -1: - print_error("FastQ file contains spaces!", "Line", line) - if not fastq.endswith(".fastq.gz") and not fastq.endswith(".fq.gz"): - print_error( - "FastQ file does not have extension '.fastq.gz' or '.fq.gz'!", - "Line", - line, - ) - - ## Auto-detect paired-end/single-end - sample_info = [] ## [single_end, fastq_1, fastq_2] - if sample and fastq_1: ## Single-end short reads - sample_info = ["1", fastq_1] - else: - print_error("Invalid combination of columns provided!", "Line", line) - - ## Create sample mapping dictionary = { sample: [ single_end, fastq_1 ] } + fastq = row.get("fastq_1", None) + if fastq: + if fastq.find(" ") != -1: + print_error("FastQ file contains spaces!", f"Line #{line_number+2}", line) + if not fastq.endswith(".fastq.gz") and not fastq.endswith(".fq.gz"): + print_error( + "FastQ file does not have extension '.fastq.gz' or '.fq.gz'!", + "Line", + line, + ) + + ## Create sample mapping dictionary + sample_info = {"single_end": "1", "fastq_1": fastq} if sample not in sample_mapping_dict: sample_mapping_dict[sample] = [sample_info] else: @@ -113,19 +102,17 @@ def check_samplesheet(file_in, file_out): sample_mapping_dict[sample].append(sample_info) ## Write validated samplesheet with appropriate columns + output_cols = ["id", "intrasample_id", "single_end", "fastq_1"] if len(sample_mapping_dict) > 0: out_dir = os.path.dirname(file_out) make_dir(out_dir) with open(file_out, "w") as fout: - fout.write(",".join(["sample", "single_end", "fastq_1"]) + "\n") + fout.write(",".join(output_cols) + "\n") for sample in sorted(sample_mapping_dict.keys()): - - ## Check that multiple runs of the same sample are of the same datatype - if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]): - print_error("Multiple runs of a sample must be of the same datatype!", "Sample: {}".format(sample)) - - for idx, val in enumerate(sample_mapping_dict[sample]): - fout.write(",".join(["{}_T{}".format(sample, idx + 1)] + val) + "\n") + for intrasample_id, val in enumerate(sample_mapping_dict[sample]): + sample_info = {**{"id": sample, "intrasample_id": str(intrasample_id)}, **val} + outrow = [sample_info.get(colname, None) for colname in output_cols] + fout.write(",".join(outrow) + "\n") else: print_error("No entries to process!", "Samplesheet: {}".format(file_in)) diff --git a/bin/edgeR_miRBase.r b/bin/edgeR_miRBase.r index d6647a31..5be691fc 100755 --- a/bin/edgeR_miRBase.r +++ b/bin/edgeR_miRBase.r @@ -43,7 +43,7 @@ for (i in 1:2) { # Only subset if at least one sample is remaining nr_keep <- sum(row_sub) if (nr_keep > 0){ - data<-data[!row_sub,] + data<-data[!row_sub,, drop=FALSE] } #Also check for colSums > 0, otherwise DGEList will fail if samples have entirely colSum == 0 #Fixes #134 drop_colsum_zero <- (colSums(data, na.rm=T) != 0) # T if colSum is not 0, F otherwise diff --git a/conf/base.config b/conf/base.config index ada4c36f..544ed42d 100644 --- a/conf/base.config +++ b/conf/base.config @@ -14,7 +14,7 @@ process { memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' @@ -23,6 +23,11 @@ process { // If possible, it would be nice to keep the same label naming convention when // adding in your processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_single { + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } withLabel:process_low { cpus = { check_max( 2 * task.attempt, 'cpus' ) } memory = { check_max( 12.GB * task.attempt, 'memory' ) } diff --git a/conf/igenomes.config b/conf/igenomes.config index b55f7a2b..d608b45b 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -38,6 +38,14 @@ params { blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" mirtrace_species = "hsa" } + 'CHM13' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" + bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" + gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" + mito_name = "chrM" + } 'GRCm38' { fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" diff --git a/conf/modules.config b/conf/modules.config index 0d1bd11c..4c8ee5aa 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -81,38 +81,65 @@ process { process { withName: 'MIRTRACE_RUN' { publishDir = [ - path: { "${params.outdir}/mirtrace" }, + path: { "${params.outdir}/mirtrace/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } } -if (!(params.skip_fastqc || params.skip_qc)) { +if (!(params.skip_fastqc)) { process { - withName: '.*:FASTQC_UMITOOLS_TRIMGALORE:FASTQC' { + withName: '.*:FASTQC_UMITOOLS_FASTP:FASTQC_.*' { ext.args = '--quiet' } } } -if (!params.skip_trimming) { +if (!params.skip_fastp) { process { - withName: '.*:FASTQC_UMITOOLS_TRIMGALORE:TRIMGALORE' { - ext.args = '--fastqc' + withName: '.*:FASTQC_UMITOOLS_FASTP:FASTP' { + ext.args = [ "", + params.trim_fastq ? "" : "--disable_adapter_trimming", + params.clip_r1 > 0 ? "--trim_front1 ${params.clip_r1}" : "", // Remove bp from the 5' end of read 1. + params.three_prime_clip_r1 > 0 ? "--trim_tail1 ${params.three_prime_clip_r1}" : "", // Remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed. + params.fastp_min_length > 0 ? "-l ${params.fastp_min_length}" : "", + params.fastp_max_length > 0 ? "--max_len1 ${params.fastp_max_length}" : "", + params.three_prime_adapter == null ? '' : "--adapter_sequence ${params.three_prime_adapter}" + ].join(" ").trim() publishDir = [ [ - path: { "${params.outdir}/trimmed/fastqc" }, + path: { "${params.outdir}/fastp" }, mode: params.publish_dir_mode, - pattern: "*.{html,zip}" + pattern: "*.{json,html}" ], [ - path: { "${params.outdir}/trimmed" }, + path: { "${params.outdir}/fastp/log" }, mode: params.publish_dir_mode, - pattern: "*.{txt,fastq.gz}" + pattern: "*.log" + ], + [ + path: { "${params.outdir}/fastp" }, + mode: params.publish_dir_mode, + pattern: "*.fail.fastq.gz", + enabled: params.save_trimmed_fail ] ] } + + } + + if (!params.skip_fastqc) { + process { + withName: '.*:.*:FASTQC_FASTP:FASTQC_TRIM' { + ext.args = '--quiet' + publishDir = [ + path: { "${params.outdir}/fastqc/trim" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } } } @@ -264,47 +291,43 @@ process { } } -def fasta_from_species = false -def fasta = false -fasta_from_species = params.genome ? params.genomes[ params.genome ].fasta ?: false : false -fasta = params.fasta ?: fasta_from_species -if (fasta) { + +process { + withName: 'NFCORE_SMRNASEQ:SMRNASEQ:GENOME_QUANT:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_.*' { + ext.prefix = { "${meta.id}.sorted" } + publishDir = [ + path: { "${params.outdir}/samtools" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'NFCORE_SMRNASEQ:SMRNASEQ:GENOME_QUANT:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.sorted" } + publishDir = [ + path: { "${params.outdir}/samtools/samtools_stats" }, + mode: params.publish_dir_mode, + pattern: "*.{stats,flagstat,idxstats}" + ] + } + withName: 'NFCORE_SMRNASEQ:SMRNASEQ:GENOME_QUANT:BOWTIE_MAP_.*' { + publishDir = [ + path: { "${params.outdir}/unmapped/fastq" }, + mode: params.publish_dir_mode, + pattern: "unmapped/*.gz" + ] + } +} + +if (!params.skip_mirdeep) { process { - withName: 'NFCORE_SMRNASEQ:SMRNASEQ:GENOME_QUANT:BAM_SORT_SAMTOOLS:SAMTOOLS_.*' { - ext.prefix = { "${meta.id}.sorted" } + withName: 'MIRDEEP2_MAPPER' { publishDir = [ - path: { "${params.outdir}/samtools" }, + path: { "${params.outdir}/mirdeep" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: 'NFCORE_SMRNASEQ:SMRNASEQ:GENOME_QUANT:BAM_SORT_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { - ext.prefix = { "${meta.id}.sorted" } - publishDir = [ - path: { "${params.outdir}/samtools/samtools_stats" }, - mode: params.publish_dir_mode, - pattern: "*.{stats,flagstat,idxstats}" - ] - } - withName: 'NFCORE_SMRNASEQ:SMRNASEQ:GENOME_QUANT:BOWTIE_MAP_.*' { - publishDir = [ - path: { "${params.outdir}/unmapped/fastq" }, - mode: params.publish_dir_mode, - pattern: "unmapped/*.gz" - ] - } - } - - if (!params.skip_mirdeep) { - process { - withName: 'MIRDEEP2_MAPPER' { - publishDir = [ - path: { "${params.outdir}/mirdeep" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } } } @@ -319,4 +342,5 @@ if (!params.skip_multiqc) { ] } } + } diff --git a/conf/test.config b/conf/test.config index 36333670..1a81afee 100644 --- a/conf/test.config +++ b/conf/test.config @@ -23,10 +23,11 @@ params { input = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/samplesheet/v2.0/samplesheet.csv' fasta = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/genome.fa' - mature = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/mature.fa' - hairpin = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/hairpin.fa' - mirna_gtf = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/hsa.gff3' - mirtrace_species = "hsa" + mature = 'https://mirbase.org/download/CURRENT/mature.fa' + hairpin = 'https://mirbase.org/download/CURRENT/hairpin.fa' + mirna_gtf = 'https://mirbase.org/download/hsa.gff3' + mirtrace_species = 'hsa' + protocol = 'illumina' skip_mirdeep = true } diff --git a/conf/test_full.config b/conf/test_full.config index c5df616a..964dc5b2 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -11,15 +11,14 @@ */ params { - max_memory = 12.GB - max_cpus = 8 config_profile_name = 'Full test profile' config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test input = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/samplesheet/v2.0/samplesheet-full.csv' genome = 'GRCh37' - mirtrace_species = "hsa" + mirtrace_species = 'hsa' + protocol = 'illumina' } diff --git a/conf/test_no_genome.config b/conf/test_no_genome.config index 4883ab51..485870de 100644 --- a/conf/test_no_genome.config +++ b/conf/test_no_genome.config @@ -16,15 +16,16 @@ params { // Limit resources so that this can run on GitHub Actions max_cpus = 2 - max_memory = 6.GB - max_time = 6.h + max_memory = '6.GB' + max_time = '6.h' // Input data input = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/samplesheet/v2.0/samplesheet.csv' mature = 'https://github.com/nf-core/test-datasets/raw/smrnaseq-better-input/reference/mature.fa' hairpin = 'https://github.com/nf-core/test-datasets/raw/smrnaseq-better-input/reference/hairpin.fa' mirna_gtf = 'https://github.com/nf-core/test-datasets/raw/smrnaseq-better-input/reference/hsa.gff3' - mirtrace_species = "hsa" + mirtrace_species = 'hsa' skip_mirdeep = true + protocol = 'illumina' } diff --git a/docs/images/cutadapt_plot.png b/docs/images/cutadapt_plot.png deleted file mode 100644 index 8b4c978a..00000000 Binary files a/docs/images/cutadapt_plot.png and /dev/null differ diff --git a/docs/output.md b/docs/output.md index 7b30c08b..fc9d14ef 100644 --- a/docs/output.md +++ b/docs/output.md @@ -14,8 +14,8 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [FastQC](#fastqc) - read quality control - [UMI-tools extract](#umi-tools-extract) - UMI barcode extraction -- [TrimGalore](#trimgalore) - adapter trimming - [UMI-tools deduplicate](#umi-tools-deduplicate) - read deduplication +- [FastP](#fastp) - adapter trimming - [Bowtie2](#bowtie2) - contamination filtering - [Bowtie](#bowtie) - alignment against mature miRNAs and miRNA precursors (hairpins) - [SAMtools](#samtools) - alignment result processing and feature counting @@ -57,24 +57,20 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d To facilitate processing of input data which has the UMI barcode already embedded in the read name from the start, `--skip_umi_extract` can be specified in conjunction with `--with_umi`. -## TrimGalore +## FastP -[TrimGalore](http://www.bioinformatics.babraham.ac.uk/projects/trim_galore/) is used for removal of adapter contamination and trimming of low quality regions. TrimGalore uses [Cutadapt](https://github.com/marcelm/cutadapt) for adapter trimming and runs FastQC after it finishes. +[FastP](https://github.com/OpenGene/fastp) is used for removal of adapter contamination and trimming of low quality regions. -MultiQC reports the percentage of bases removed by TrimGalore in the _General Statistics_ table, along with a line plot showing where reads were trimmed. +MultiQC reports the percentage of bases removed by FastP in the _General Statistics_ table, along some further information on the results. -**Output directory: `results/trimmed`** +**Output directory: `results/fastp`** Contains FastQ files with quality and adapter trimmed reads for each sample, along with a log file describing the trimming. -- `sample_trimmed.fq.gz` Trimmed FastQ data -- `sample.fastq.gz_trimming_report.txt` Trimming report (describes which parameters that were used) -- `sample_trimmed_fastqc.html` -- `sample_trimmed_fastqc.zip` FastQC report for trimmed reads +- `sample_fastp.json` - JSON report file with information on parameters and trimming metrics +- `sample_fastp.html` - HTML report with some visualizations of trimming metrics -This is an example of the output we can get: - -![cutadapt](images/cutadapt_plot.png) +FastP can automatically detect adapter sequences when not specified directly by the user - the pipeline also comes with a feature and a supplied miRNA adapters file to ensure adapters auto-detected are more accurate. If there are needs to add more known miRNA adapters to this list, please open a pull request. ## UMI-tools deduplicate @@ -176,7 +172,9 @@ Refer to the [tool manual](https://github.com/friedlanderlab/mirtrace/blob/maste ![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png) -> **NB:** The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. +:::note +The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. +::: ### MultiQC @@ -203,6 +201,7 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + - Parameters used by the pipeline run: `params.json`. diff --git a/docs/usage.md b/docs/usage.md index 8e4015ba..2884df74 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -12,27 +12,29 @@ This option indicates the experimental protocol used for the sample preparation. - 'illumina': adapter (`TGGAATTCTCGGGTGCCAAGG`) - 'nextflex': adapter (`TGGAATTCTCGGGTGCCAAGG), clip_r1 (`4`), three_prime_clip_r1 (`4`) -- 'qiaseq': adapter (`AACTGTAGGCACCATCAAT) +- 'qiaseq': adapter (`AACTGTAGGCACCATCAAT`) - 'cats': adapter (`GATCGGAAGAGCACACGTCTG), clip_r1(`3) -- 'custom' (where the ser can indicate the `three_prime_adapter`, `clip_r1` and three_prime_clip_r1`) +- 'custom' (where the user can indicate the `three_prime_adapter`, `clip_r1` and `three_prime_clip_r1` manually) -### `mirtrace_species or mirGeneDB_species` +:warning: At least the `custom` protocol has to be specified, otherwise the pipeline won't run. In case you specify the `custom` protocol, ensure that the parameters above are set accordingly or the defaults will be applied. If you want to auto-detect the adapters using `fastp`, please set `--three_prime_adapter` to `""`. -It should point to the 3-letter species name used by `miRBase`, or `MirGeneDB`. Note the difference in case for the two databases. +### `mirtrace_species` or `mirgenedb_species` + +It should point to the 3-letter species name used by [miRBase](https://www.mirbase.org/help/genome_summary.shtml) or [MirGeneDB](https://www.mirgenedb.org/browse). Note the difference in case for the two databases. ### miRNA related files -Different parameters can be set for the two supported datbases. By default `miRBase` will be used with the parameters below. +Different parameters can be set for the two supported databases. By default `miRBase` will be used with the parameters below. -- `mirna_gtf`: If not supplied by the user, then `mirna_gtf` will point to the latest GFF3 file in miRbase: `https://mirbase.org/ftp/CURRENT/genomes/${params.mirtrace_species}.gff3` -- `mature`: points to the FASTA file of mature miRNA sequences. `https://mirbase.org/ftp/CURRENT/mature.fa.gz` -- `hairpin`: points to the FASTA file of precursor miRNA sequences. `https://mirbase.org/ftp/CURRENT/hairpin.fa.gz` +- `mirna_gtf`: If not supplied by the user, then `mirna_gtf` will point to the latest GFF3 file in miRbase: `https://mirbase.org/download/CURRENT/genomes/${params.mirtrace_species}.gff3` +- `mature`: points to the FASTA file of mature miRNA sequences. `https://mirbase.org/download/CURRENT/mature.fa` +- `hairpin`: points to the FASTA file of precursor miRNA sequences. `https://mirbase.org/download/CURRENT/hairpin.fa` -If `MirGeneDB` should be used instead it needs to be specified using `--mirGeneDB` and use the parameters below . +If MirGeneDB should be used instead it needs to be specified using `--mirgenedb` and use the parameters below . -- `mirGeneDB_gff`: The data can not be downloaded automatically, thus the user needs to supply the gff file for either his species, or all species downloaded from `https://mirgenedb.org/download`. The total set will automatically be subsetted to the species specified with `mirGeneDB_species`. -- `mirGeneDB_mature`: points to the FASTA file of mature miRNA sequences. Download from `https://mirgenedb.org/download`. -- `mirGeneDB_hairpin`: points to the FASTA file of precursor miRNA sequences. Download from `https://mirgenedb.org/download`. Note that `MirGeneDB` does not have a dedicated `hairpin` file, but the `Precursor sequences` are to be used. +- `mirgenedb_gff`: The data can not be downloaded automatically (URLs are created with short term tokens in it), thus the user needs to supply the gff file for either his species, or all species downloaded from `https://mirgenedb.org/download`. The total set will automatically be subsetted to the species specified with `--mirgenedb_species`. +- `mirgenedb_mature`: points to the FASTA file of mature miRNA sequences. Download from `https://mirgenedb.org/download`. +- `mirgenedb_hairpin`: points to the FASTA file of precursor miRNA sequences. Download from `https://mirgenedb.org/download`. Note that MirGeneDB does not have a dedicated `hairpin` file, but the `Precursor sequences` are to be used. ### Genome @@ -43,7 +45,7 @@ If `MirGeneDB` should be used instead it needs to be specified using `--mirGeneD This step has, until now, only been tested for human data. Unexpected behaviour can occur when using it with a different species. -Contamination filtering of the sequencing reads is optional and can be invoked using `filter_contamination`. FASTA files with contamination sequences to use need to be supplied using the following commands. Otherwise the contamination filtering of the specific type will be omitted. +Contamination filtering of the sequencing reads is optional and can be invoked using the `filter_contamination` parameter. FASTA files with - `rrna`: Used to supply a FASTA file containing rRNA contamination sequence. - `trna`: Used to supply a FASTA file containing tRNA contamination sequence. e.g. `http://gtrnadb.ucsc.edu/genomes/eukaryota/Hsapi38/hg38-tRNAs.fa` @@ -54,9 +56,11 @@ Contamination filtering of the sequencing reads is optional and can be invoked u ## Samplesheet input -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 2 columns ("sample" and "fastq_1"), and a header row as shown in the examples below. -```console +If a second fastq file is provided using another column, the extra data are ignored by this pipeline. The smRNA species should be sufficiently contained in the first read, and so the second read is superfluous data in this smRNA context. + +```bash --input '[path to samplesheet file]' ``` @@ -99,26 +103,51 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p The typical command for running the pipeline is as follows: -```console -nextflow run nf-core/smrnaseq --input samplesheet.csv --outdir --genome GRCh37 -profile docker +```bash +nextflow run nf-core/smrnaseq --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. Note that the pipeline will create the following files in your working directory: -```console +```bash work # Directory containing the nextflow working files - # Finished results in specified location (defined with --outdir) + # Finished results in specified location (defined with --outdir) .nextflow_log # Log file from Nextflow # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` +If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. + +Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. + +:::warning +Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). +::: + +The above pipeline run specified with a params file in yaml format: + +```bash +nextflow run nf-core/smrnaseq -profile docker -params-file params.yaml +``` + +with `params.yaml` containing: + +```yaml +input: './samplesheet.csv' +outdir: './results/' +genome: 'GRCh37' +<...> +``` + +You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). + ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: -```console +```bash nextflow pull nf-core/smrnaseq ``` @@ -126,7 +155,7 @@ nextflow pull nf-core/smrnaseq It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. -First, go to the [nf-core/smrnaseq releases page](https://github.com/nf-core/smrnaseq/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. +First, go to the [nf-core/smrnaseq releases page](https://github.com/nf-core/smrnaseq/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag. This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. @@ -135,26 +164,40 @@ This version number will be logged in reports when you run the pipeline, so that The `bin` directory contains some scripts used by the pipeline which may also be run manually: - `edgeR_miRBase.r`: R script using for processing reads counts of mature miRNAs and miRNA precursors (hairpins). + This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. + +To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. + +:::tip +If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +::: ## Core Nextflow arguments -> **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +:::note +These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +::: ### `-profile` Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. -Several generic profiles are bundled with the pipeline. Different profiles instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. When using Biocontainers, most of these software packaging methods pull Docker containers from `quay.io`, e.g [FastQC](https://quay.io/repository/biocontainers/fastqc). The Singularity profile directly downloads Singularity images via HTTPS hosted by the [Galaxy project](https://depot.galaxyproject.org/singularity/), while the Conda profile downloads and installs software locally from [Bioconda](https://bioconda.github.io/). +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. -> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +:::info +We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +::: The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! They are loaded in sequence, so later profiles can overwrite earlier profiles. -If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended. +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters - `docker` - A generic configuration profile to be used with [Docker](https://docker.com/) - `singularity` @@ -165,11 +208,10 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) - `charliecloud` - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) - `conda` - - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. -- `test` - - A profile with a complete configuration for automated testing - - Includes links to test data so needs no other parameters + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. ### `-resume` @@ -187,96 +229,19 @@ Specify the path to a specific config file (this is a core Nextflow command). Se Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. -For example, if the nf-core/rnaseq pipeline is failing after multiple re-submissions of the `STAR_ALIGN` process due to an exit code of `137` this would indicate that there is an out of memory issue: - -```console -[62/149eb0] NOTE: Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) -- Execution is retried (1) -Error executing process > 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)' - -Caused by: - Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) - -Command executed: - STAR \ - --genomeDir star \ - --readFilesIn WT_REP1_trimmed.fq.gz \ - --runThreadN 2 \ - --outFileNamePrefix WT_REP1. \ - - -Command exit status: - 137 - -Command output: - (empty) - -Command error: - .command.sh: line 9: 30 Killed STAR --genomeDir star --readFilesIn WT_REP1_trimmed.fq.gz --runThreadN 2 --outFileNamePrefix WT_REP1. -Work dir: - /home/pipelinetest/work/9d/172ca5881234073e8d76f2a19c88fb - -Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run` -``` - -To bypass this error you would need to find exactly which resources are set by the `STAR_ALIGN` process. The quickest way is to search for `process STAR_ALIGN` in the [nf-core/rnaseq Github repo](https://github.com/nf-core/rnaseq/search?q=process+STAR_ALIGN). -We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/software/star/align/main.nf`. -If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_high`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/modules/nf-core/software/star/align/main.nf#L9). -The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements. -The default values for the `process_high` label are set in the pipeline's [`base.config`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L33-L37) which in this case is defined as 72GB. -Providing you haven't set any other standard nf-core parameters to **cap** the [maximum resources](https://nf-co.re/usage/configuration#max-resources) used by the pipeline then we can try and bypass the `STAR_ALIGN` process failure by creating a custom config file that sets at least 72GB of memory, in this case increased to 100GB. -The custom config below can then be provided to the pipeline via the [`-c`](#-c) parameter as highlighted in previous sections. - -```nextflow -process { - withName: 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN' { - memory = 100.GB - } -} -``` - -> **NB:** We specify the full process name i.e. `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN` in the config file because this takes priority over the short name (`STAR_ALIGN`) and allows existing configuration using the full process name to be correctly overridden. -> -> If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly. - -### Updating containers - -The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. If for some reason you need to use a different version of a particular tool with the pipeline then you just need to identify the `process` name and override the Nextflow `container` definition for that process using the `withName` declaration. For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a tool called [Pangolin](https://github.com/cov-lineages/pangolin) has been used during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. Given that the lineage assignments change quite frequently it doesn't make sense to re-release the nf-core/viralrecon everytime a new version of Pangolin has been released. However, you can override the default container used by the pipeline by creating a custom config file and passing it as a command-line argument via `-c custom.config`. - -1. Check the default version used by the pipeline in the module file for [Pangolin](https://github.com/nf-core/viralrecon/blob/a85d5969f9025409e3618d6c280ef15ce417df65/modules/nf-core/software/pangolin/main.nf#L14-L19) -2. Find the latest version of the Biocontainer available on [Quay.io](https://quay.io/repository/biocontainers/pangolin?tag=latest&tab=tags) -3. Create the custom config accordingly: +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. - - For Docker: +### Custom Containers - ```nextflow - process { - withName: PANGOLIN { - container = 'quay.io/biocontainers/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. - - For Singularity: +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. - ```nextflow - process { - withName: PANGOLIN { - container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +### Custom Tool Arguments - - For Conda: +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. - ```nextflow - process { - withName: PANGOLIN { - conda = 'bioconda::pangolin=3.0.5' - } - } - ``` - -> **NB:** If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch. +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. ### nf-core/configs @@ -286,6 +251,14 @@ See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). +## Azure Resource Requests + +To be used with the `azurebatch` profile by specifying the `-profile azurebatch`. +We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required. + +Note that the choice of VM size depends on your quota and the overall workload during the analysis. +For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes). + ## Running in the background Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. @@ -300,6 +273,6 @@ Some HPC setups also allow you to run nextflow within a cluster job submitted yo In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): -```console +```bash NXF_OPTS='-Xms1g -Xmx4g' ``` diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy deleted file mode 100755 index b3d092f8..00000000 --- a/lib/NfcoreSchema.groovy +++ /dev/null @@ -1,529 +0,0 @@ -// -// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. -// - -import org.everit.json.schema.Schema -import org.everit.json.schema.loader.SchemaLoader -import org.everit.json.schema.ValidationException -import org.json.JSONObject -import org.json.JSONTokener -import org.json.JSONArray -import groovy.json.JsonSlurper -import groovy.json.JsonBuilder - -class NfcoreSchema { - - // - // Resolve Schema path relative to main workflow directory - // - public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { - return "${workflow.projectDir}/${schema_filename}" - } - - // - // Function to loop over all parameters defined in schema and check - // whether the given parameters adhere to the specifications - // - /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { - def has_error = false - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Check for nextflow core params and unexpected params - def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text - def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') - def nf_params = [ - // Options for base `nextflow` command - 'bg', - 'c', - 'C', - 'config', - 'd', - 'D', - 'dockerize', - 'h', - 'log', - 'q', - 'quiet', - 'syslog', - 'v', - 'version', - - // Options for `nextflow run` command - 'ansi', - 'ansi-log', - 'bg', - 'bucket-dir', - 'c', - 'cache', - 'config', - 'dsl2', - 'dump-channels', - 'dump-hashes', - 'E', - 'entry', - 'latest', - 'lib', - 'main-script', - 'N', - 'name', - 'offline', - 'params-file', - 'pi', - 'plugins', - 'poll-interval', - 'pool-size', - 'profile', - 'ps', - 'qs', - 'queue-size', - 'r', - 'resume', - 'revision', - 'stdin', - 'stub', - 'stub-run', - 'test', - 'w', - 'with-charliecloud', - 'with-conda', - 'with-dag', - 'with-docker', - 'with-mpi', - 'with-notification', - 'with-podman', - 'with-report', - 'with-singularity', - 'with-timeline', - 'with-tower', - 'with-trace', - 'with-weblog', - 'without-docker', - 'without-podman', - 'work-dir' - ] - def unexpectedParams = [] - - // Collect expected parameters from the schema - def expectedParams = [] - def enums = [:] - for (group in schemaParams) { - for (p in group.value['properties']) { - expectedParams.push(p.key) - if (group.value['properties'][p.key].containsKey('enum')) { - enums[p.key] = group.value['properties'][p.key]['enum'] - } - } - } - - for (specifiedParam in params.keySet()) { - // nextflow params - if (nf_params.contains(specifiedParam)) { - log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" - has_error = true - } - // unexpected params - def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' - def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } - def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() - def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { - // Temporarily remove camelCase/camel-case params #1035 - def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} - if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ - unexpectedParams.push(specifiedParam) - } - } - } - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Validate parameters against the schema - InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() - JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) - - // Remove anything that's in params.schema_ignore_params - raw_schema = removeIgnoredParams(raw_schema, params) - - Schema schema = SchemaLoader.load(raw_schema) - - // Clean the parameters - def cleanedParams = cleanParameters(params) - - // Convert to JSONObject - def jsonParams = new JsonBuilder(cleanedParams) - JSONObject params_json = new JSONObject(jsonParams.toString()) - - // Validate - try { - schema.validate(params_json) - } catch (ValidationException e) { - println '' - log.error 'ERROR: Validation of pipeline parameters failed!' - JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, params_json, log, enums) - println '' - has_error = true - } - - // Check for unexpected parameters - if (unexpectedParams.size() > 0) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - println '' - def warn_msg = 'Found unexpected parameters:' - for (unexpectedParam in unexpectedParams) { - warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" - } - log.warn warn_msg - log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" - println '' - } - - if (has_error) { - System.exit(1) - } - } - - // - // Beautify parameters for --help - // - public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - Integer num_hidden = 0 - String output = '' - output += 'Typical pipeline command:\n\n' - output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - Integer max_chars = paramsMaxChars(params_map) + 1 - Integer desc_indent = max_chars + 14 - Integer dec_linewidth = 160 - desc_indent - for (group in params_map.keySet()) { - Integer num_params = 0 - String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (group_params.get(param).hidden && !params.show_hidden_params) { - num_hidden += 1 - continue; - } - def type = '[' + group_params.get(param).type + ']' - def description = group_params.get(param).description - def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' - def description_default = description + colors.dim + defaultValue + colors.reset - // Wrap long description texts - // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap - if (description_default.length() > dec_linewidth){ - List olines = [] - String oline = "" // " " * indent - description_default.split(" ").each() { wrd -> - if ((oline.size() + wrd.size()) <= dec_linewidth) { - oline += wrd + " " - } else { - olines += oline - oline = wrd + " " - } - } - olines += oline - description_default = olines.join("\n" + " " * desc_indent) - } - group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' - num_params += 1 - } - group_output += '\n' - if (num_params > 0){ - output += group_output - } - } - if (num_hidden > 0){ - output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset - } - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Groovy Map summarising parameters/workflow options used by the pipeline - // - public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { - // Get a selection of core Nextflow workflow options - def Map workflow_summary = [:] - if (workflow.revision) { - workflow_summary['revision'] = workflow.revision - } - workflow_summary['runName'] = workflow.runName - if (workflow.containerEngine) { - workflow_summary['containerEngine'] = workflow.containerEngine - } - if (workflow.container) { - workflow_summary['container'] = workflow.container - } - workflow_summary['launchDir'] = workflow.launchDir - workflow_summary['workDir'] = workflow.workDir - workflow_summary['projectDir'] = workflow.projectDir - workflow_summary['userName'] = workflow.userName - workflow_summary['profile'] = workflow.profile - workflow_summary['configFiles'] = workflow.configFiles.join(', ') - - // Get pipeline parameters defined in JSON Schema - def Map params_summary = [:] - def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - for (group in params_map.keySet()) { - def sub_params = new LinkedHashMap() - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (params.containsKey(param)) { - def params_value = params.get(param) - def schema_value = group_params.get(param).default - def param_type = group_params.get(param).type - if (schema_value != null) { - if (param_type == 'string') { - if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { - def sub_string = schema_value.replace('\$projectDir', '') - sub_string = sub_string.replace('\${projectDir}', '') - if (params_value.contains(sub_string)) { - schema_value = params_value - } - } - if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { - def sub_string = schema_value.replace('\$params.outdir', '') - sub_string = sub_string.replace('\${params.outdir}', '') - if ("${params.outdir}${sub_string}" == params_value) { - schema_value = params_value - } - } - } - } - - // We have a default in the schema, and this isn't it - if (schema_value != null && params_value != schema_value) { - sub_params.put(param, params_value) - } - // No default in the schema, and this isn't empty - else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { - sub_params.put(param, params_value) - } - } - } - params_summary.put(group, sub_params) - } - return [ 'Core Nextflow options' : workflow_summary ] << params_summary - } - - // - // Beautify parameters for summary and return as string - // - public static String paramsSummaryLog(workflow, params) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - String output = '' - def params_map = paramsSummaryMap(workflow, params) - def max_chars = paramsMaxChars(params_map) - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - if (group_params) { - output += colors.bold + group + colors.reset + '\n' - for (param in group_params.keySet()) { - output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' - } - output += '\n' - } - } - output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Loop over nested exceptions and print the causingException - // - private static void printExceptions(ex_json, params_json, log, enums, limit=5) { - def causingExceptions = ex_json['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (ex_json['pointerToViolation'] == '#') { - log.error "* ${ex_json['message']}" - } - // Error with specific param - else { - def param = ex_json['pointerToViolation'] - ~/^#\// - def param_val = params_json[param].toString() - if (enums.containsKey(param)) { - def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" - if (enums[param].size() > limit) { - log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" - } else { - log.error "${error_msg}: ${enums[param].join(', ')})" - } - } else { - log.error "* --${param}: ${ex_json['message']} (${param_val})" - } - } - } - for (ex in causingExceptions) { - printExceptions(ex, params_json, log, enums) - } - } - - // - // Remove an element from a JSONArray - // - private static JSONArray removeElement(json_array, element) { - def list = [] - int len = json_array.length() - for (int i=0;i - if(raw_schema.keySet().contains('definitions')){ - raw_schema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { - raw_schema.get("properties").remove(ignore_param) - } - if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { - def cleaned_required = removeElement(raw_schema.required, ignore_param) - raw_schema.put("required", cleaned_required) - } - } - return raw_schema - } - - // - // Clean and check parameters relative to Nextflow native classes - // - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - // - // This function tries to read a JSON params file - // - private static LinkedHashMap paramsLoad(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = paramsRead(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - // - // Method to actually read in JSON file using Groovy. - // Group (as Key), values are all parameters - // - Parameter1 as Key, Description as Value - // - Parameter2 as Key, Description as Value - // .... - // Group - // - - private static LinkedHashMap paramsRead(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - // - // Get maximum number of characters across all parameter names - // - private static Integer paramsMaxChars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } -} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 2fc0a9b9..01b8653d 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -3,6 +3,7 @@ // import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput class NfcoreTemplate { @@ -32,6 +33,25 @@ class NfcoreTemplate { } } + // + // Generate version string + // + public static String version(workflow) { + String version_string = "" + + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string + } + // // Construct and send completion email // @@ -61,7 +81,7 @@ class NfcoreTemplate { misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp def email_fields = [:] - email_fields['version'] = workflow.manifest.version + email_fields['version'] = NfcoreTemplate.version(workflow) email_fields['runName'] = workflow.runName email_fields['success'] = workflow.success email_fields['dateComplete'] = workflow.complete @@ -109,7 +129,7 @@ class NfcoreTemplate { def email_html = html_template.toString() // Render the sendmail template - def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] def sf = new File("$projectDir/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) @@ -145,6 +165,79 @@ class NfcoreTemplate { output_tf.withWriter { w -> w << email_txt } } + // + // Construct and send a notification to a web server as JSON + // e.g. Microsoft Teams and Slack + // + public static void IM_notification(workflow, params, summary_params, projectDir, log) { + def hook_url = params.hook_url + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = NfcoreTemplate.version(workflow) + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("$projectDir/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } + } + + // + // Dump pipeline parameters in a json file + // + public static void dump_parameters(workflow, params) { + def output_d = new File("${params.outdir}/pipeline_info/") + if (!output_d.exists()) { + output_d.mkdirs() + } + + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def output_pf = new File(output_d, "params_${timestamp}.json") + def jsonStr = JsonOutput.toJson(params) + output_pf.text = JsonOutput.prettyPrint(jsonStr) + } + // // Print pipeline summary on completion // @@ -154,7 +247,7 @@ class NfcoreTemplate { if (workflow.stats.ignoredCount == 0) { log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" } } else { log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" @@ -242,6 +335,7 @@ class NfcoreTemplate { // public static String logo(workflow, monochrome_logs) { Map colors = logColours(monochrome_logs) + String workflow_version = NfcoreTemplate.version(workflow) String.format( """\n ${dashedLine(monochrome_logs)} @@ -250,7 +344,7 @@ class NfcoreTemplate { ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} ${dashedLine(monochrome_logs)} """.stripIndent() ) diff --git a/lib/Utils.groovy b/lib/Utils.groovy old mode 100755 new mode 100644 index 28567bd7..8d030f4e --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -21,19 +21,26 @@ class Utils { } // Check that all channels are present - def required_channels = ['conda-forge', 'bioconda', 'defaults'] - def conda_check_failed = !required_channels.every { ch -> ch in channels } + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean // Check that they are in the right order - conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda')) - conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } - if (conda_check_failed) { + if (channels_missing | channel_priority_violation) { log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " There is a problem with your Conda configuration!\n\n" + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + - " NB: The order of the channels matters!\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" } } diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index d6574da6..ed5a126c 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -2,6 +2,8 @@ // This file holds several functions specific to the main.nf workflow in the nf-core/smrnaseq pipeline // +import nextflow.Nextflow + class WorkflowMain { // @@ -17,54 +19,33 @@ class WorkflowMain { " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" } - // - // Print help to screen if required - // - public static String help(workflow, params, log) { - def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" - def help_string = '' - help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) - help_string += NfcoreSchema.paramsHelp(workflow, params, command) - help_string += '\n' + citation(workflow) + '\n' - help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) - return help_string - } - - // - // Print parameter summary log to screen - // - public static String paramsSummaryLog(workflow, params, log) { - def summary_log = '' - summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) - summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) - summary_log += '\n' + citation(workflow) + '\n' - summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) - return summary_log - } // // Validate parameters and print summary to screen // public static void initialise(workflow, params, log) { + + //Detect Protocol setting, set this early before help so help shows proper adapters etc pp + WorkflowSmrnaseq.formatProtocol(params,log) + // Print help to screen if required if (params.help) { - log.info help(workflow, params, log) + log.info help(workflow, params) System.exit(0) } - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) + // Print workflow version and exit on --version + if (params.version) { + String workflow_version = NfcoreTemplate.version(workflow) + log.info "${workflow.manifest.name} ${workflow_version}" + System.exit(0) } - // Print parameter summary log to screen - log.info paramsSummaryLog(workflow, params, log) - // Check that a -profile or Nextflow config has been provided to run the pipeline NfcoreTemplate.checkConfigProvided(workflow, log) // Check that conda channels are set-up correctly - if (params.enable_conda) { + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { Utils.checkCondaChannels(log) } @@ -73,21 +54,19 @@ class WorkflowMain { // Check input has been provided if (!params.input) { - log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'" - System.exit(1) + Nextflow.error("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'") } - } + } // // Get attribute from genome config file e.g. fasta // - public static String getGenomeAttribute(params, attribute) { - def val = '' + public static Object getGenomeAttribute(params, attribute) { if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { if (params.genomes[ params.genome ].containsKey(attribute)) { - val = params.genomes[ params.genome ][ attribute ] + return params.genomes[ params.genome ][ attribute ] } } - return val + return null } } diff --git a/lib/WorkflowSmrnaseq.groovy b/lib/WorkflowSmrnaseq.groovy index 38ed7217..2575fd9b 100755 --- a/lib/WorkflowSmrnaseq.groovy +++ b/lib/WorkflowSmrnaseq.groovy @@ -2,18 +2,17 @@ // This file holds several functions specific to the workflow/smrnaseq.nf in the nf-core/smrnaseq pipeline // +import nextflow.Nextflow +import groovy.text.SimpleTemplateEngine + class WorkflowSmrnaseq { // // Check and validate parameters // public static void initialise(params, log) { - genomeExistsError(params, log) - // if (!params.fasta) { - // log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." - // System.exit(1) - // } + genomeExistsError(params, log) } // @@ -43,17 +42,119 @@ class WorkflowSmrnaseq { return yaml_file_text } + // + // Generate methods description for MultiQC + // + + public static String toolCitationText(params) { + + // TODO nf-core: Optionally add in-text citation tools to this list. + + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text + } + + public static String toolBibliographyText(params) { + + // TODO Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text + } + + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = run_workflow.toMap() + meta["manifest_map"] = run_workflow.manifest.toMap() + + // Pipeline DOI + meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" + meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + //meta["tool_bibliography"] = toolBibliographyText(params) + + + def methods_text = mqc_methods_yaml.text + + def engine = new SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html + } + // // Exit pipeline if incorrect --genome key provided // private static void genomeExistsError(params, log) { if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + " Currently, the available genome keys are:\n" + " ${params.genomes.keySet().join(", ")}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - System.exit(1) + Nextflow.error(error_string) } } + + /* + * Format the protocol + * Given the protocol parameter (params.protocol), + * this function formats the protocol such that it is fit for the respective + * subworkflow + */ + public static String formatProtocol(params,log) { + + switch(params.protocol){ + case 'illumina': + params.putIfAbsent("clip_r1", 0); + params.putIfAbsent("three_prime_clip_r1",0); + params.putIfAbsent("three_prime_adapter", "TGGAATTCTCGGGTGCCAAGG"); + break + case 'nextflex': + params.putIfAbsent("clip_r1", 4); + params.putIfAbsent("three_prime_clip_r1", 4); + params.putIfAbsent("three_prime_adapter", "TGGAATTCTCGGGTGCCAAGG"); + break + case 'qiaseq': + params.putIfAbsent("clip_r1",0); + params.putIfAbsent("three_prime_clip_r1",0); + params.putIfAbsent("three_prime_adapter","AACTGTAGGCACCATCAAT"); + break + case 'cats': + params.putIfAbsent("clip_r1",3); + params.putIfAbsent("three_prime_clip_r1", 0); + params.putIfAbsent("three_prime_adapter", "AAAAAAAA"); + break + case 'custom': + params.putIfAbsent("clip_r1", params.clip_r1) + params.putIfAbsent("three_prime_clip_r1", params.three_prime_clip_r1) + default: + log.warn "Please make sure to specify all required clipping and trimming parameters, otherwise only adapter detection will be performed." + } + + log.warn "Running with Protocol ${params.protocol}" + log.warn "Therefore using Adapter: ${params.three_prime_adapter}" + log.warn "Clipping ${params.clip_r1} bases from R1" + log.warn "And clipping ${params.three_prime_clip_r1} bases from 3' end" + } } diff --git a/main.nf b/main.nf index 63e7e8b9..1e4232eb 100644 --- a/main.nf +++ b/main.nf @@ -17,12 +17,34 @@ nextflow.enable.dsl = 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ + +params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +params.mirtrace_species = WorkflowMain.getGenomeAttribute(params, 'mirtrace_species') +params.bowtie_index = WorkflowMain.getGenomeAttribute(params, 'bowtie') + + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ VALIDATE & PRINT PARAMETER SUMMARY ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +include { validateParameters; paramsHelp } from 'plugin/nf-validation' + +// Print help message if needed +if (params.help) { + def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) + def citation = '\n' + WorkflowMain.citation(workflow) + '\n' + def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" + log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) + System.exit(0) +} + +// Validate input parameters +if (params.validate_params) { + validateParameters() +} + WorkflowMain.initialise(workflow, params, log) /* diff --git a/modules.json b/modules.json index c74fa935..1250670b 100644 --- a/modules.json +++ b/modules.json @@ -2,48 +2,74 @@ "name": "nf-core/smrnaseq", "homePage": "https://github.com/nf-core/smrnaseq", "repos": { - "nf-core/modules": { - "cat/cat": { - "git_sha": "eeda4136c096688d04cc40bb3c70d948213ed641" + "https://github.com/nf-core/modules.git": { + "modules": { + "nf-core": { + "cat/fastq": { + "branch": "master", + "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "installed_by": ["modules"] + }, + "custom/dumpsoftwareversions": { + "branch": "master", + "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "installed_by": ["modules"] + }, + "fastp": { + "branch": "master", + "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "installed_by": ["modules"] + }, + "fastqc": { + "branch": "master", + "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "installed_by": ["modules"] + }, + "multiqc": { + "branch": "master", + "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "installed_by": ["modules"] + }, + "samtools/flagstat": { + "branch": "master", + "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "installed_by": ["modules", "bam_stats_samtools"] + }, + "samtools/idxstats": { + "branch": "master", + "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "installed_by": ["modules", "bam_stats_samtools"] + }, + "samtools/index": { + "branch": "master", + "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "installed_by": ["modules", "bam_sort_stats_samtools"] + }, + "samtools/sort": { + "branch": "master", + "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "installed_by": ["modules", "bam_sort_stats_samtools"] + }, + "samtools/stats": { + "branch": "master", + "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "installed_by": ["modules", "bam_stats_samtools"] + } + } }, - "cat/fastq": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "custom/dumpsoftwareversions": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "fastqc": { - "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" - }, - "multiqc": { - "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" - }, - "samtools/bam2fq": { - "git_sha": "5510ea39fe638594bc26ac34cadf4a84bf27d159" - }, - "samtools/flagstat": { - "git_sha": "1ad73f1b2abdea9398680d6d20014838135c9a35" - }, - "samtools/idxstats": { - "git_sha": "1ad73f1b2abdea9398680d6d20014838135c9a35" - }, - "samtools/index": { - "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" - }, - "samtools/sort": { - "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" - }, - "samtools/stats": { - "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" - }, - "trimgalore": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "umitools/dedup": { - "git_sha": "f425aa3cea10015fe9b345b9d6dcc2336b53155f" - }, - "umitools/extract": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "subworkflows": { + "nf-core": { + "bam_sort_stats_samtools": { + "branch": "master", + "git_sha": "7c8eeb2b37a6c6d3ffba0aef55ff60c8718c0ba6", + "installed_by": ["subworkflows"] + }, + "bam_stats_samtools": { + "branch": "master", + "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", + "installed_by": ["subworkflows", "bam_sort_stats_samtools"] + } + } } } } diff --git a/modules/local/blat_mirna.nf b/modules/local/blat_mirna.nf index b0037565..7f8a2324 100644 --- a/modules/local/blat_mirna.nf +++ b/modules/local/blat_mirna.nf @@ -2,10 +2,10 @@ process BLAT_MIRNA { tag "$fasta" label 'process_medium' - conda (params.enable_conda ? 'bioconda::blat=36' : null) + conda 'bioconda::blat=36' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/blat:36--0' : - 'quay.io/biocontainers/blat:36--0' }" + 'biocontainers/blat:36--0' }" input: val db_type @@ -17,8 +17,10 @@ process BLAT_MIRNA { path 'filtered.fa' , emit: filtered_set path "versions.yml" , emit: versions - script: + when: + task.ext.when == null || task.ext.when + script: if ( db_type == "cdna" ) """ echo $db_type @@ -50,7 +52,8 @@ process BLAT_MIRNA { echo $db_type blat -out=blast8 $mirna $contaminants /dev/stdout | awk 'BEGIN{FS="\t"}{if(\$11 < 1e-5)print \$1;}' | uniq > mirnahit.txt awk 'BEGIN { while((getline<"mirnahit.txt")>0) l[">"\$1]=1 } /^>/ {x = l[\$1]} {if(!x) print }' $contaminants > filtered.fa - cat <<-END_VERSIONS > versions.yml + +cat <<-END_VERSIONS > versions.yml "${task.process}": blat: \$(echo \$(blat) | grep Standalone | awk '{ if (match(\$0,/[0-9]*[0-9]/,m)) print m[0] }') END_VERSIONS diff --git a/modules/local/bowtie_contaminants.nf b/modules/local/bowtie_contaminants.nf index 7a86cd1d..e6a594a7 100644 --- a/modules/local/bowtie_contaminants.nf +++ b/modules/local/bowtie_contaminants.nf @@ -1,18 +1,21 @@ process INDEX_CONTAMINANTS { label 'process_medium' - conda (params.enable_conda ? 'bowtie2=2.4.5' : null) + conda 'bowtie2=2.4.5' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bowtie2:2.4.5--py39hd2f7db1_2' : - 'quay.io/biocontainers/bowtie2:2.4.5--py36hfca12d5_2' }" + 'biocontainers/bowtie2:2.4.5--py36hfca12d5_2'}" input: path fasta output: - path 'fasta_bidx*' , emit: bt_indices + path 'fasta_bidx*' , emit: index path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: """ bowtie2-build ${fasta} fasta_bidx --threads ${task.cpus} diff --git a/modules/local/bowtie_genome.nf b/modules/local/bowtie_genome.nf index 723100db..91a6cd53 100644 --- a/modules/local/bowtie_genome.nf +++ b/modules/local/bowtie_genome.nf @@ -2,19 +2,22 @@ process INDEX_GENOME { tag "$fasta" label 'process_medium' - conda (params.enable_conda ? 'bioconda::bowtie=1.3.0-2' : null) + conda 'bioconda::bowtie=1.3.1-4' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bowtie:1.3.0--py38hcf49a77_2' : - 'quay.io/biocontainers/bowtie:1.3.0--py38hcf49a77_2' }" + 'https://depot.galaxyproject.org/singularity/bowtie%3A1.3.1--py39hd400a0c_2' : + 'biocontainers/bowtie:1.3.1--py310h4070885_4' }" input: - path fasta + tuple val(meta2), path(fasta) output: - path 'genome*ebwt' , emit: bowtie_indices + path 'genome*ebwt' , emit: index path 'genome.edited.fa', emit: fasta path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: """ # Remove any special base characters from reference genome FASTA file diff --git a/modules/local/bowtie_map_contaminants.nf b/modules/local/bowtie_map_contaminants.nf index a041e512..d10f13b5 100644 --- a/modules/local/bowtie_map_contaminants.nf +++ b/modules/local/bowtie_map_contaminants.nf @@ -1,10 +1,10 @@ process BOWTIE_MAP_CONTAMINANTS { label 'process_medium' - conda (params.enable_conda ? 'bowtie2=2.4.5' : null) + conda 'bowtie2=2.4.5' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bowtie2:2.4.5--py39hd2f7db1_2' : - 'quay.io/biocontainers/bowtie2:2.4.5--py36hfca12d5_2' }" + 'biocontainers/bowtie2:2.4.5--py36hfca12d5_2' }" input: tuple val(meta), path(reads) @@ -17,16 +17,20 @@ process BOWTIE_MAP_CONTAMINANTS { path "versions.yml" , emit: versions path "filtered.*.stats" , emit: stats + when: + task.ext.when == null || task.ext.when + script: - def index_base = index.toString().tokenize(' ')[0].tokenize('.')[0] """ + INDEX=`find -L ./ -name "*.3.ebwt" | sed 's/.3.ebwt//'` bowtie2 \\ --threads ${task.cpus} \\ --very-sensitive-local \\ -k 1 \\ - -x $index_base \\ + -x \$INDEX \\ --un ${meta.id}.${contaminant_type}.filter.unmapped.contaminant.fastq \\ ${reads} \\ + ${args} \\ -S ${meta.id}.filter.contaminant.sam > ${meta.id}.contaminant_bowtie.log 2>&1 # extracting number of reads from bowtie logs diff --git a/modules/local/bowtie_map_mirna.nf b/modules/local/bowtie_map_mirna.nf index 9a1301fb..148b47f5 100644 --- a/modules/local/bowtie_map_mirna.nf +++ b/modules/local/bowtie_map_mirna.nf @@ -2,10 +2,10 @@ process BOWTIE_MAP_SEQ { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bowtie=1.3.0-2 bioconda::samtools=1.13' : null) + conda 'bowtie=1.3.0-2 bioconda::samtools=1.13' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-ffbf83a6b0ab6ec567a336cf349b80637135bca3:40128b496751b037e2bd85f6789e83d4ff8a4837-0' : - 'quay.io/biocontainers/mulled-v2-ffbf83a6b0ab6ec567a336cf349b80637135bca3:40128b496751b037e2bd85f6789e83d4ff8a4837-0' }" + 'biocontainers/mulled-v2-ffbf83a6b0ab6ec567a336cf349b80637135bca3:40128b496751b037e2bd85f6789e83d4ff8a4837-0' }" input: tuple val(meta), path(reads) @@ -16,11 +16,14 @@ process BOWTIE_MAP_SEQ { tuple val(meta), path('unmapped/*fq.gz'), emit: unmapped path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: - def index_base = index.toString().tokenize(' ')[0].tokenize('.')[0] """ + INDEX=`find -L ./ -name "*.3.ebwt" | sed 's/.3.ebwt//'` bowtie \\ - -x $index_base \\ + -x \$INDEX \\ -q <(zcat $reads) \\ -p ${task.cpus} \\ -t \\ diff --git a/modules/local/bowtie_mirna.nf b/modules/local/bowtie_mirna.nf index 1f340cdf..2be45bb8 100644 --- a/modules/local/bowtie_mirna.nf +++ b/modules/local/bowtie_mirna.nf @@ -1,18 +1,21 @@ process INDEX_MIRNA { label 'process_medium' - conda (params.enable_conda ? 'bioconda::bowtie=1.3.0-2' : null) + conda 'bioconda::bowtie=1.3.0-2' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bowtie:1.3.0--py38hcf49a77_2' : - 'quay.io/biocontainers/bowtie:1.3.0--py38hcf49a77_2' }" + 'https://depot.galaxyproject.org/singularity/bowtie%3A1.3.1--py39hd400a0c_2' : + 'biocontainers/bowtie:1.3.1--py310h4070885_4' }" input: - path fasta + tuple val(meta2), path(fasta) output: - path 'fasta_bidx*' , emit: bowtie_indices + path 'fasta_bidx*' , emit: index path "versions.yml", emit: versions + when: + task.ext.when == null || task.ext.when + script: """ bowtie-build ${fasta} fasta_bidx --threads ${task.cpus} diff --git a/modules/local/datatable_merge.nf b/modules/local/datatable_merge.nf index 3fec291b..c71b9c4d 100644 --- a/modules/local/datatable_merge.nf +++ b/modules/local/datatable_merge.nf @@ -1,10 +1,10 @@ process TABLE_MERGE { label 'process_medium' - conda (params.enable_conda ? 'conda-base::r-data.table=1.12.2' : null) + conda 'conda-base::r-data.table=1.12.2' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/r-data.table:1.12.2' : - 'quay.io/biocontainers/r-data.table:1.12.2' }" + 'biocontainers/r-data.table:1.12.2' }" input: path mirtop @@ -13,6 +13,9 @@ process TABLE_MERGE { path "mirna.tsv" , emit: mirna_tsv path "versions.yml", emit: versions + when: + task.ext.when == null || task.ext.when + script: """ collapse_mirtop.r ${mirtop} diff --git a/modules/local/edger_qc.nf b/modules/local/edger_qc.nf index d8a4c520..729d5eed 100644 --- a/modules/local/edger_qc.nf +++ b/modules/local/edger_qc.nf @@ -1,10 +1,10 @@ process EDGER_QC { label 'process_medium' - conda (params.enable_conda ? 'bioconda::bioconductor-limma=3.50.0 bioconda::bioconductor-edger=3.36.0 conda-forge::r-data.table=1.14.2 conda-forge::r-gplots=3.1.1 conda-forge::r-statmod=1.4.36' : null) + conda 'bioconda::bioconductor-limma=3.50.0 bioconda::bioconductor-edger=3.36.0 conda-forge::r-data.table=1.14.2 conda-forge::r-gplots=3.1.1 conda-forge::r-statmod=1.4.36' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-419bd7f10b2b902489ac63bbaafc7db76f8e0ae1:709335c37934db1b481054cbec637c6e5b5971cb-0' : - 'quay.io/biocontainers/mulled-v2-419bd7f10b2b902489ac63bbaafc7db76f8e0ae1:709335c37934db1b481054cbec637c6e5b5971cb-0' }" + 'biocontainers/mulled-v2-419bd7f10b2b902489ac63bbaafc7db76f8e0ae1:709335c37934db1b481054cbec637c6e5b5971cb-0' }" input: path input_files @@ -13,6 +13,9 @@ process EDGER_QC { path '*.{txt,pdf,csv}', emit: edger_files path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: """ edgeR_miRBase.r $input_files diff --git a/modules/local/filter_stats.nf b/modules/local/filter_stats.nf index f0819e99..18e7016b 100644 --- a/modules/local/filter_stats.nf +++ b/modules/local/filter_stats.nf @@ -1,18 +1,21 @@ process FILTER_STATS { label 'process_medium' - conda (params.enable_conda ? 'bowtie2=2.4.5' : null) + conda 'bowtie2=2.4.5' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bowtie2:2.4.5--py39hd2f7db1_2' : - 'quay.io/biocontainers/bowtie2:2.4.5--py36hfca12d5_2' }" + 'biocontainers/bowtie2:2.4.5--py36hfca12d5_2' }" input: tuple val(meta), path(reads) path stats_files output: - path "*_mqc.yaml" , emit: stats - tuple val(meta), path('*.filtered.fastq.gz') , emit: reads + path "*_mqc.yaml" , emit: stats + tuple val(meta), path('*.filtered.fastq.gz'), emit: reads + + when: + task.ext.when == null || task.ext.when script: """ diff --git a/modules/local/format_fasta_mirna.nf b/modules/local/format_fasta_mirna.nf index ae8a3fda..489879a5 100644 --- a/modules/local/format_fasta_mirna.nf +++ b/modules/local/format_fasta_mirna.nf @@ -4,17 +4,20 @@ process FORMAT_FASTA_MIRNA { tag "$fasta" label 'process_medium' - conda (params.enable_conda ? 'bioconda::fastx_toolkit=0.0.14-9' : null) + conda 'bioconda::fastx_toolkit=0.0.14-9' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastx_toolkit:0.0.14--he1b5a44_8' : - 'quay.io/biocontainers/fastx_toolkit:0.0.14--he1b5a44_8' }" + 'biocontainers/fastx_toolkit:0.0.14--he1b5a44_8' }" input: - path fasta + tuple val(meta2), path(fasta) output: - path '*_idx.fa' , emit: formatted_fasta - path "versions.yml", emit: versions + tuple val(meta2), path('*_idx.fa') , emit: formatted_fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when script: """ diff --git a/modules/local/mirdeep2_mapper.nf b/modules/local/mirdeep2_mapper.nf index 7368f4b0..842af6e6 100644 --- a/modules/local/mirdeep2_mapper.nf +++ b/modules/local/mirdeep2_mapper.nf @@ -4,10 +4,10 @@ process MIRDEEP2_MAPPER { label 'process_medium' tag "$meta.id" - conda (params.enable_conda ? 'bioconda::mirdeep2=2.0.1' : null) + conda 'bioconda::mirdeep2=2.0.1' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mirdeep2:2.0.1.3--hdfd78af_1' : - 'quay.io/biocontainers/mirdeep2:2.0.1.3--hdfd78af_1' }" + 'biocontainers/mirdeep2:2.0.1.3--hdfd78af_1' }" input: tuple val(meta), path(reads) @@ -17,6 +17,8 @@ process MIRDEEP2_MAPPER { tuple path('*_collapsed.fa'), path('*reads_vs_refdb.arf'), emit: mirdeep2_inputs path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when script: def index_base = index.toString().tokenize(' ')[0].tokenize('.')[0] diff --git a/modules/local/mirdeep2_prepare.nf b/modules/local/mirdeep2_prepare.nf index a62a5845..7e2f2437 100644 --- a/modules/local/mirdeep2_prepare.nf +++ b/modules/local/mirdeep2_prepare.nf @@ -3,20 +3,22 @@ process MIRDEEP2_PIGZ { tag "$meta.id" // TODO maybe create a mulled container and uncompress within mirdeep2_mapper? - conda (params.enable_conda ? 'bioconda::bioconvert=0.4.3' : null) + conda 'bioconda::bioconvert=0.4.3' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bioconvert:0.4.3--py_0' : - 'quay.io/biocontainers/bioconvert:0.4.3--py_0' }" + 'biocontainers/bioconvert:0.4.3--py_0' }" input: tuple val(meta), path(reads) output: - tuple val(meta), path("*.fq"), emit: reads - path "versions.yml" , emit: versions + tuple val(meta), path("*.{fastq,fq}"), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when script: - def unzip = reads.toString() - '.gz' """ pigz -f -d -p $task.cpus $reads diff --git a/modules/local/mirdeep2_run.nf b/modules/local/mirdeep2_run.nf index 1045e9a7..98fb16ad 100644 --- a/modules/local/mirdeep2_run.nf +++ b/modules/local/mirdeep2_run.nf @@ -4,21 +4,24 @@ process MIRDEEP2_RUN { label 'process_medium' errorStrategy 'ignore' - conda (params.enable_conda ? 'bioconda::mirdeep2=2.0.1' : null) + conda 'bioconda::mirdeep2=2.0.1' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mirdeep2:2.0.1.3--hdfd78af_1' : - 'quay.io/biocontainers/mirdeep2:2.0.1.3--hdfd78af_1' }" + 'biocontainers/mirdeep2:2.0.1.3--hdfd78af_1' }" input: - path fasta + path(fasta) tuple path(reads), path(arf) - path hairpin - path mature + tuple val(meta2), path(hairpin) + tuple val(meta2), path(mature) output: path 'result*.{bed,csv,html}', emit: result path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: """ miRDeep2.pl \\ diff --git a/modules/local/mirtop_quant.nf b/modules/local/mirtop_quant.nf index ffbfc0a0..e97d6a09 100644 --- a/modules/local/mirtop_quant.nf +++ b/modules/local/mirtop_quant.nf @@ -1,30 +1,35 @@ process MIRTOP_QUANT { label 'process_medium' - conda (params.enable_conda ? 'mirtop=0.4.25 bioconda::samtools=1.15.1 conda-base::r-base=4.1.1 conda-base::r-data.table=1.14.2' : null) + conda 'mirtop=0.4.25 bioconda::samtools=1.15.1 conda-base::r-base=4.1.1 conda-base::r-data.table=1.14.2' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-0c13ef770dd7cc5c76c2ce23ba6669234cf03385:63be019f50581cc5dfe4fc0f73ae50f2d4d661f7-0' : - 'quay.io/biocontainers/mulled-v2-0c13ef770dd7cc5c76c2ce23ba6669234cf03385:63be019f50581cc5dfe4fc0f73ae50f2d4d661f7-0' }" + 'biocontainers/mulled-v2-0c13ef770dd7cc5c76c2ce23ba6669234cf03385:63be019f50581cc5dfe4fc0f73ae50f2d4d661f7-0' }" input: path ("bams/*") path hairpin path gtf - //if (!params.mirGeneDB) {params.filterSpecies = params.mirtrace_species} else {params.filterSpecies = params.mirGeneDB_species} - output: - path "mirtop/mirtop.gff" + path "mirtop/mirtop.gff" , emit: mirtop_gff path "mirtop/mirtop.tsv" , emit: mirtop_table - path "mirtop/mirtop_rawData.tsv" + path "mirtop/mirtop_rawData.tsv", emit: mirtop_rawdata path "mirtop/stats/*" , emit: logs path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: + def filter_species = params.mirgenedb ? params.mirgenedb_species : params.mirtrace_species """ - mirtop gff --hairpin $hairpin --gtf $gtf -o mirtop --sps $params.filterSpecies ./bams/* - mirtop counts --hairpin $hairpin --gtf $gtf -o mirtop --sps $params.filterSpecies --add-extra --gff mirtop/mirtop.gff - mirtop export --format isomir --hairpin $hairpin --gtf $gtf --sps $params.filterSpecies -o mirtop mirtop/mirtop.gff + #Cleanup the GTF if mirbase html form is broken + GTF="$gtf" + sed 's/>/>/g' \$GTF | sed 's#
    #\\n#g' | sed 's#

    ##g' | sed 's#

    ##g' | sed -e :a -e '/^\\n*\$/{\$d;N;};/\\n\$/ba' > \${GTF}_html_cleaned.gtf + mirtop gff --hairpin $hairpin --gtf \${GTF}_html_cleaned.gtf -o mirtop --sps $filter_species ./bams/* + mirtop counts --hairpin $hairpin --gtf \${GTF}_html_cleaned.gtf -o mirtop --sps $filter_species --add-extra --gff mirtop/mirtop.gff + mirtop export --format isomir --hairpin $hairpin --gtf \${GTF}_html_cleaned.gtf --sps $filter_species -o mirtop mirtop/mirtop.gff mirtop stats mirtop/mirtop.gff --out mirtop/stats mv mirtop/stats/mirtop_stats.log mirtop/stats/full_mirtop_stats.log diff --git a/modules/local/mirtrace.nf b/modules/local/mirtrace.nf index 6dd553e9..f576ebc0 100644 --- a/modules/local/mirtrace.nf +++ b/modules/local/mirtrace.nf @@ -1,58 +1,42 @@ process MIRTRACE_RUN { label 'process_medium' - conda (params.enable_conda ? 'bioconda::mirtrace=1.0.1' : null) + conda 'bioconda::mirtrace=1.0.1' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mirtrace:1.0.1--hdfd78af_1' : - 'quay.io/biocontainers/mirtrace:1.0.1--hdfd78af_1' }" + 'biocontainers/mirtrace:1.0.1--hdfd78af_1' }" input: - path reads + tuple val(adapter), val(ids), path(reads) output: path "mirtrace/*" , emit: mirtrace path "versions.yml", emit: versions + when: + task.ext.when == null || task.ext.when + script: - def three_prime_adapter = params.three_prime_adapter - // Presets - if (params.protocol == "illumina"){ - three_prime_adapter = "TGGAATTCTCGGGTGCCAAGG" - } else if (params.protocol == "nextflex"){ - three_prime_adapter = "TGGAATTCTCGGGTGCCAAGG" - } else if (params.protocol == "qiaseq"){ - three_prime_adapter = "AACTGTAGGCACCATCAAT" - } else if (params.protocol == "cats"){ - three_prime_adapter = "AAAAAAAA" - } - if (params.three_prime_adapter){ - // to allow replace of 3' primer using one of the previous protocols - three_prime_adapter = params.three_prime_adapter - } // mirtrace protocol defaults to 'params.protocol' if not set - def mirtrace_protocol = params.mirtrace_protocol - if (!params.mirtrace_protocol){ - mirtrace_protocol = params.protocol - } - def primer = (mirtrace_protocol=="cats") ? " " : " --adapter $three_prime_adapter " + def primer = adapter ? "--adapter ${adapter}" : "" + def protocol = params.protocol == 'custom' ? '' : "--protocol $params.protocol" def java_mem = '' if(task.memory){ tmem = task.memory.toBytes() java_mem = "-Xms${tmem} -Xmx${tmem}" } + def config_lines = [ids,reads] + .transpose() + .collect({ id, path -> "echo '${path},${id}' >> mirtrace_config" }) """ export mirtracejar=\$(dirname \$(which mirtrace)) - for i in $reads - do - path=\$(realpath \${i}) - prefix=\$(echo \${i} | sed -e 's/.gz//' -e 's/.fastq//' -e 's/.fq//' -e 's/_val_1//' -e 's/_trimmed//' -e 's/_R1//' -e 's/.R1//') - echo \$path","\$prefix - done > mirtrace_config + + ${config_lines.join("\n ")} java $java_mem -jar \$mirtracejar/mirtrace.jar --mirtrace-wrapper-name mirtrace qc \\ --species $params.mirtrace_species \\ $primer \\ - --protocol $mirtrace_protocol \\ + $protocol \\ --config mirtrace_config \\ --write-fasta \\ --output-dir mirtrace \\ diff --git a/modules/local/parse_fasta_mirna.nf b/modules/local/parse_fasta_mirna.nf index 18b51066..a0bbc75e 100644 --- a/modules/local/parse_fasta_mirna.nf +++ b/modules/local/parse_fasta_mirna.nf @@ -1,21 +1,23 @@ process PARSE_FASTA_MIRNA { label 'process_medium' - conda (params.enable_conda ? 'bioconda::seqkit=2.0.0' : null) + conda 'bioconda::seqkit=2.3.1' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/seqkit:2.0.0--h9ee0642_0' : - 'quay.io/biocontainers/seqkit:2.0.0--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/seqkit:2.3.1--h9ee0642_0' : + 'biocontainers/seqkit:2.3.1--h9ee0642_0' }" input: - path fasta - - //if (!params.mirGeneDB) {params.filterSpecies = params.mirtrace_species} else {params.filterSpecies = params.mirGeneDB_species} + tuple val(meta2), path(fasta) output: - path '*_igenome.fa', emit: parsed_fasta - path "versions.yml", emit: versions + tuple val(meta2), path('*_igenome.fa'), emit: parsed_fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when script: + def filter_species = params.mirgenedb ? params.mirgenedb_species : params.mirtrace_species """ # Uncompress FASTA reference files if necessary FASTA="$fasta" @@ -23,13 +25,12 @@ process PARSE_FASTA_MIRNA { gunzip -f \$FASTA FASTA=\${FASTA%%.gz} fi + sed 's/>/>/g' \$FASTA | sed 's#
    #\\n#g' | sed 's#

    ##g' | sed 's#

    ##g' | sed -e :a -e '/^\\n*\$/{\$d;N;};/\\n\$/ba' > \${FASTA}_html_cleaned.fa # Remove spaces from miRBase FASTA files - # sed -i 's, ,_,g' \$FASTA - sed '/^[^>]/s/[^AUGCaugc]/N/g' \$FASTA > \${FASTA}_parsed.fa - # TODO perl -ane 's/[ybkmrsw]/N/ig;print;' \${FASTA}_parsed_tmp.fa > \${FASTA}_parsed.fa + sed '#^[^>]#s#[^AUGCaugc]#N#g' \${FASTA}_html_cleaned.fa > \${FASTA}_parsed.fa - sed -i 's/\s.*//' \${FASTA}_parsed.fa - seqkit grep -r --pattern \".*${params.filterSpecies}-.*\" \${FASTA}_parsed.fa > \${FASTA}_sps.fa + sed -i 's#\s.*##' \${FASTA}_parsed.fa + seqkit grep -r --pattern \".*${filter_species}-.*\" \${FASTA}_parsed.fa > \${FASTA}_sps.fa seqkit seq --rna2dna \${FASTA}_sps.fa > \${FASTA}_igenome.fa cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index c0265a72..351d2322 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -1,11 +1,11 @@ process SAMPLESHEET_CHECK { - label 'process_low' tag "$samplesheet" + label 'process_single' - conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + conda "conda-forge::python=3.8.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'quay.io/biocontainers/python:3.8.3' }" + 'biocontainers/python:3.8.3' }" input: path samplesheet @@ -14,6 +14,9 @@ process SAMPLESHEET_CHECK { path '*.csv' , emit: csv path "versions.yml", emit: versions + when: + task.ext.when == null || task.ext.when + script: // This script is bundled with the pipeline, in nf-core/smrnaseq/bin/ """ check_samplesheet.py \\ diff --git a/modules/local/seqcluster_collapse.nf b/modules/local/seqcluster_collapse.nf index f430e65f..39f6ce85 100644 --- a/modules/local/seqcluster_collapse.nf +++ b/modules/local/seqcluster_collapse.nf @@ -2,10 +2,10 @@ process SEQCLUSTER_SEQUENCES { label 'process_medium' tag "$meta.id" - conda (params.enable_conda ? 'bioconda::seqcluster=1.2.9-0' : null) + conda 'bioconda::seqcluster=1.2.9-0' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/seqcluster:1.2.9--pyh5e36f6f_0' : - 'quay.io/biocontainers/seqcluster:1.2.8--pyh5e36f6f_0' }" + 'biocontainers/seqcluster:1.2.8--pyh5e36f6f_0' }" input: tuple val(meta), path(reads) @@ -14,6 +14,9 @@ process SEQCLUSTER_SEQUENCES { tuple val(meta), path("final/*.fastq.gz"), emit: collapsed path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: """ seqcluster collapse -f $reads -m 1 --min_size 15 -o collapsed diff --git a/modules/local/trimgalore.nf b/modules/local/trimgalore.nf deleted file mode 100644 index 98519584..00000000 --- a/modules/local/trimgalore.nf +++ /dev/null @@ -1,74 +0,0 @@ -process TRIMGALORE { - tag "$meta.id" - label 'process_high' - - conda (params.enable_conda ? 'bioconda::trim-galore=0.6.7' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/trim-galore:0.6.7--hdfd78af_0' : - 'quay.io/biocontainers/trim-galore:0.6.7--hdfd78af_0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.fq.gz") , emit: reads - tuple val(meta), path("*report.txt"), emit: log - path "versions.yml" , emit: versions - - tuple val(meta), path("*.html"), emit: html optional true - tuple val(meta), path("*.zip") , emit: zip optional true - - script: - def args = task.ext.args ?: '' - def cores = 1 - if (task.cpus) { - cores = (task.cpus as int) - 4 - if (meta.single_end) cores = (task.cpus as int) - 3 - if (cores < 1) cores = 1 - if (cores > 4) cores = 4 - } - def prefix = "${meta.id}" - // Define regular variables so that they can be overwritten - def clip_r1 = params.clip_r1 - def three_prime_clip_r1 = params.three_prime_clip_r1 - def three_prime_adapter = params.three_prime_adapter - def protocol = params.protocol - // Presets - if (params.protocol == "illumina"){ - clip_r1 = 0 - three_prime_clip_r1 = 0 - three_prime_adapter = "TGGAATTCTCGGGTGCCAAGG" - } else if (params.protocol == "nextflex"){ - clip_r1 = 4 - three_prime_clip_r1 = 4 - three_prime_adapter = "TGGAATTCTCGGGTGCCAAGG" - } else if (params.protocol == "qiaseq"){ - clip_r1 = 0 - three_prime_clip_r1 = 0 - three_prime_adapter = "AACTGTAGGCACCATCAAT" - } else if (params.protocol == "cats"){ - clip_r1 = 3 - three_prime_clip_r1 = 0 - // three_prime_adapter = "GATCGGAAGAGCACACGTCTG" - three_prime_adapter = "AAAAAAAA" - } else { - //custom protocol - clip_r1 = params.clip_r1 - three_prime_clip_r1 = params.three_prime_clip_r1 - three_prime_adapter = params.three_prime_adapter - protocol = params.protocol - } - def tg_length = "--length ${params.min_length}" - def c_r1 = clip_r1 > 0 ? "--clip_r1 ${clip_r1}" : '' - def tpc_r1 = three_prime_clip_r1 > 0 ? "--three_prime_clip_r1 ${three_prime_clip_r1}" : '' - """ - [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz - trim_galore $args --cores $cores --adapter ${three_prime_adapter} $tg_length $c_r1 $tpc_r1 --max_length ${params.trim_galore_max_length} --gzip ${prefix}.fastq.gz - - cat <<-END_VERSIONS > versions.yml - ${task.process}": - trimgalore: \$(echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//') - cutadapt: \$(cutadapt --version) - END_VERSIONS - """ -} diff --git a/modules/nf-core/cat/fastq/environment.yml b/modules/nf-core/cat/fastq/environment.yml new file mode 100644 index 00000000..222b301f --- /dev/null +++ b/modules/nf-core/cat/fastq/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::sed=4.7 diff --git a/modules/nf-core/modules/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf similarity index 55% rename from modules/nf-core/modules/cat/fastq/main.nf rename to modules/nf-core/cat/fastq/main.nf index bf0877c3..b75a2e73 100644 --- a/modules/nf-core/modules/cat/fastq/main.nf +++ b/modules/nf-core/cat/fastq/main.nf @@ -1,11 +1,11 @@ process CAT_FASTQ { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + conda 'modules/nf-core/cat/fastq/environment.yml' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : - 'biocontainers/biocontainers:v1.2.0_cv1' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" input: tuple val(meta), path(reads, stageAs: "input*/*") @@ -20,9 +20,9 @@ process CAT_FASTQ { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def readList = reads.collect{ it.toString() } + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] if (meta.single_end) { - if (readList.size > 1) { + if (readList.size >= 1) { """ cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz @@ -33,7 +33,7 @@ process CAT_FASTQ { """ } } else { - if (readList.size > 2) { + if (readList.size >= 2) { def read1 = [] def read2 = [] readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v } @@ -48,4 +48,33 @@ process CAT_FASTQ { """ } } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size > 1) { + """ + touch ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size > 2) { + """ + touch ${prefix}_1.merged.fastq.gz + touch ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } + } diff --git a/modules/nf-core/modules/cat/fastq/meta.yml b/modules/nf-core/cat/fastq/meta.yml similarity index 93% rename from modules/nf-core/modules/cat/fastq/meta.yml rename to modules/nf-core/cat/fastq/meta.yml index c836598e..db4ac3c7 100644 --- a/modules/nf-core/modules/cat/fastq/meta.yml +++ b/modules/nf-core/cat/fastq/meta.yml @@ -1,6 +1,7 @@ name: cat_fastq description: Concatenates fastq files keywords: + - cat - fastq - concatenate tools: @@ -16,7 +17,7 @@ input: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - reads: - type: list + type: file description: | List of input FastQ files to be concatenated. output: @@ -33,7 +34,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@joseespinosa" - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test b/modules/nf-core/cat/fastq/tests/main.nf.test new file mode 100644 index 00000000..f5f94182 --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/main.nf.test @@ -0,0 +1,143 @@ +nextflow_process { + + name "Test Process CAT_FASTQ" + script "../main.nf" + process "CAT_FASTQ" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/fastq" + + test("test_cat_fastq_single_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match() }, + { assert path(process.out.versions.get(0)).getText().contains("cat") } + ) + } + } + + test("test_cat_fastq_paired_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match() }, + { assert path(process.out.versions.get(0)).getText().contains("cat") } + ) + } + } + + test("test_cat_fastq_single_end_same_name") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match() }, + { assert path(process.out.versions.get(0)).getText().contains("cat") } + ) + } + } + + test("test_cat_fastq_paired_end_same_name") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match() }, + { assert path(process.out.versions.get(0)).getText().contains("cat") } + ) + } + } + + test("test_cat_fastq_single_end_single_file") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match() }, + { assert path(process.out.versions.get(0)).getText().contains("cat") } + ) + } + } +} diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test.snap b/modules/nf-core/cat/fastq/tests/main.nf.test.snap new file mode 100644 index 00000000..ec2342e5 --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/main.nf.test.snap @@ -0,0 +1,78 @@ +{ + "test_cat_fastq_single_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,f9cf5e375f7de81a406144a2c70cc64d" + ] + ] + ], + "timestamp": "2023-10-17T23:19:12.990284837" + }, + "test_cat_fastq_single_end_same_name": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,63f817db7a29a03eb538104495556f66" + ] + ] + ], + "timestamp": "2023-10-17T23:19:31.554568147" + }, + "test_cat_fastq_single_end_single_file": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,e325ef7deb4023447a1f074e285761af" + ] + ] + ], + "timestamp": "2023-10-17T23:19:49.629360033" + }, + "test_cat_fastq_paired_end_same_name": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,63f817db7a29a03eb538104495556f66", + "test_2.merged.fastq.gz:md5,fe9f266f43a6fc3dcab690a18419a56e" + ] + ] + ] + ], + "timestamp": "2023-10-17T23:19:40.711617539" + }, + "test_cat_fastq_paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,f9cf5e375f7de81a406144a2c70cc64d", + "test_2.merged.fastq.gz:md5,77c8e966e130d8c6b6ec9be52fcb2bda" + ] + ] + ] + ], + "timestamp": "2023-10-18T07:53:20.923560211" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/fastq/tests/tags.yml b/modules/nf-core/cat/fastq/tests/tags.yml new file mode 100644 index 00000000..6ac43614 --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/tags.yml @@ -0,0 +1,2 @@ +cat/fastq: + - modules/nf-core/cat/fastq/** diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml new file mode 100644 index 00000000..7ca22161 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.15 diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf similarity index 79% rename from modules/nf-core/modules/custom/dumpsoftwareversions/main.nf rename to modules/nf-core/custom/dumpsoftwareversions/main.nf index 327d5100..60a19e0e 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -1,11 +1,11 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_low' + label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? "bioconda::multiqc=1.11" : null) + conda 'modules/nf-core/custom/dumpsoftwareversions/environment.yml' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : + 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml similarity index 85% rename from modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml rename to modules/nf-core/custom/dumpsoftwareversions/meta.yml index 60b546a0..9414c32d 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -1,7 +1,9 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json name: custom_dumpsoftwareversions description: Custom module used to dump software versions within the nf-core pipeline template keywords: - custom + - dump - version tools: - custom: @@ -14,7 +16,6 @@ input: type: file description: YML file containing software versions pattern: "*.yml" - output: - yml: type: file @@ -28,7 +29,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@drpatelh" - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py new file mode 100755 index 00000000..da033408 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python + + +"""Provide functions to merge multiple versions.yml files.""" + + +import yaml +import platform +from textwrap import dedent + + +def _make_versions_html(versions): + """Generate a tabular HTML output of all versions for MultiQC.""" + html = [ + dedent( + """\\ + + + + + + + + + + """ + ) + ] + for process, tmp_versions in sorted(versions.items()): + html.append("") + for i, (tool, version) in enumerate(sorted(tmp_versions.items())): + html.append( + dedent( + f"""\\ + + + + + + """ + ) + ) + html.append("") + html.append("
    Process Name Software Version
    {process if (i == 0) else ''}{tool}{version}
    ") + return "\\n".join(html) + + +def main(): + """Load all version files and generate merged output.""" + versions_this_module = {} + versions_this_module["${task.process}"] = { + "python": platform.python_version(), + "yaml": yaml.__version__, + } + + with open("$versions") as f: + versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module + + # aggregate versions by the module name (derived from fully-qualified process name) + versions_by_module = {} + for process, process_versions in versions_by_process.items(): + module = process.split(":")[-1] + try: + if versions_by_module[module] != process_versions: + raise AssertionError( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) + except KeyError: + versions_by_module[module] = process_versions + + versions_by_module["Workflow"] = { + "Nextflow": "$workflow.nextflow.version", + "$workflow.manifest.name": "$workflow.manifest.version", + } + + versions_mqc = { + "id": "software_versions", + "section_name": "${workflow.manifest.name} Software Versions", + "section_href": "https://github.com/${workflow.manifest.name}", + "plot_type": "html", + "description": "are collected at run time from the software output.", + "data": _make_versions_html(versions_by_module), + } + + with open("software_versions.yml", "w") as f: + yaml.dump(versions_by_module, f, default_flow_style=False) + with open("software_versions_mqc.yml", "w") as f: + yaml.dump(versions_mqc, f, default_flow_style=False) + + with open("versions.yml", "w") as f: + yaml.dump(versions_this_module, f, default_flow_style=False) + + +if __name__ == "__main__": + main() diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test new file mode 100644 index 00000000..eec1db10 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -0,0 +1,38 @@ +nextflow_process { + + name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" + script "../main.nf" + process "CUSTOM_DUMPSOFTWAREVERSIONS" + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "dumpsoftwareversions" + tag "custom/dumpsoftwareversions" + + test("Should run without failures") { + when { + process { + """ + def tool1_version = ''' + TOOL1: + tool1: 0.11.9 + '''.stripIndent() + + def tool2_version = ''' + TOOL2: + tool2: 1.9 + '''.stripIndent() + + input[0] = Channel.of(tool1_version, tool2_version).collectFile() + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap new file mode 100644 index 00000000..8713b921 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -0,0 +1,27 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + "software_versions.yml:md5,a027f820f30b8191a20ca16465daaf37" + ], + "1": [ + "software_versions_mqc.yml:md5,ee4a1d028ad29987f9ac511f4668f17c" + ], + "2": [ + "versions.yml:md5,f47ebd22aba1dd987b7e5d5247b766c3" + ], + "mqc_yml": [ + "software_versions_mqc.yml:md5,ee4a1d028ad29987f9ac511f4668f17c" + ], + "versions": [ + "versions.yml:md5,f47ebd22aba1dd987b7e5d5247b766c3" + ], + "yml": [ + "software_versions.yml:md5,a027f820f30b8191a20ca16465daaf37" + ] + } + ], + "timestamp": "2023-10-11T17:10:02.930699" + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml new file mode 100644 index 00000000..405aa24a --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml @@ -0,0 +1,2 @@ +custom/dumpsoftwareversions: + - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/fastp/environment.yml b/modules/nf-core/fastp/environment.yml new file mode 100644 index 00000000..19ccec25 --- /dev/null +++ b/modules/nf-core/fastp/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastp=0.23.4 diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf new file mode 100644 index 00000000..ca5f100f --- /dev/null +++ b/modules/nf-core/fastp/main.nf @@ -0,0 +1,102 @@ +process FASTP { + tag "$meta.id" + label 'process_medium' + + conda 'modules/nf-core/fastp/environment.yml' + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' : + 'biocontainers/fastp:0.23.4--h5f740d0_0' }" + + input: + tuple val(meta), path(reads) + path adapter_fasta + val save_trimmed_fail + val save_merged + + output: + tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads + tuple val(meta), path('*.json') , emit: json + tuple val(meta), path('*.html') , emit: html + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail + tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" + def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' + // Added soft-links to original fastqs for consistent naming in MultiQC + // Use single ended for interleaved. Add --interleaved_in in config. + if ( task.ext.args?.contains('--interleaved_in') ) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --stdout \\ + --in1 ${prefix}.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> ${prefix}.fastp.log \\ + | gzip -c > ${prefix}.fastp.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --in1 ${prefix}.fastq.gz \\ + --out1 ${prefix}.fastp.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> ${prefix}.fastp.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else { + def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz + fastp \\ + --in1 ${prefix}_1.fastq.gz \\ + --in2 ${prefix}_2.fastq.gz \\ + --out1 ${prefix}_1.fastp.fastq.gz \\ + --out2 ${prefix}_2.fastp.fastq.gz \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $merge_fastq \\ + --thread $task.cpus \\ + --detect_adapter_for_pe \\ + $args \\ + 2> ${prefix}.fastp.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml new file mode 100644 index 00000000..c22a16ab --- /dev/null +++ b/modules/nf-core/fastp/meta.yml @@ -0,0 +1,75 @@ +name: fastp +description: Perform adapter/quality trimming on sequencing reads +keywords: + - trimming + - quality control + - fastq +tools: + - fastp: + description: | + A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance. + documentation: https://github.com/OpenGene/fastp + doi: 10.1093/bioinformatics/bty560 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. If you wish to run interleaved paired-end data, supply as single-end data + but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. + - adapter_fasta: + type: file + description: File in FASTA format containing possible adapters to remove. + pattern: "*.{fasta,fna,fas,fa}" + - save_trimmed_fail: + type: boolean + description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` + - save_merged: + type: boolean + description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz` +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: The trimmed/modified/unmerged fastq reads + pattern: "*fastp.fastq.gz" + - json: + type: file + description: Results in JSON format + pattern: "*.json" + - html: + type: file + description: Results in HTML format + pattern: "*.html" + - log: + type: file + description: fastq log file + pattern: "*.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads_fail: + type: file + description: Reads the failed the preprocessing + pattern: "*fail.fastq.gz" + - reads_merged: + type: file + description: Reads that were successfully merged + pattern: "*.{merged.fastq.gz}" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test new file mode 100644 index 00000000..f610b735 --- /dev/null +++ b/modules/nf-core/fastp/tests/main.nf.test @@ -0,0 +1,485 @@ +nextflow_process { + + name "Test Process FASTP" + script "../main.nf" + process "FASTP" + tag "modules" + tag "modules_nfcore" + tag "fastp" + + test("test_fastp_single_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = [ + [ id:'test', single_end:true ], + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:12.922000 K (92.984097%)", + "single end (151 cycles)" ] + def log_text = [ "Q20 bases: 12922(92.9841%)", + "reads passed filter: 99" ] + def read_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { assert snapshot(process.out.json).match("test_fastp_single_end_json") }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:25.719000 K (93.033098%)", + "The input has little adapter percentage (~0.000000%), probably it's trimmed before."] + def log_text = [ "No adapter detected for read1", + "Q30 bases: 12281(88.3716%)"] + def json_text = ['"passed_filter_reads": 198'] + def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("fastp test_fastp_interleaved") { + config './nextflow.config' + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ] + ] + + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:25.719000 K (93.033098%)", + "paired end (151 cycles + 151 cycles)"] + def log_text = [ "Q20 bases: 12922(92.9841%)", + "reads passed filter: 198"] + def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { assert snapshot(process.out.json).match("fastp test_fastp_interleaved_json") }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_single_end_trim_fail") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = true + save_merged = false + + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:12.922000 K (92.984097%)", + "single end (151 cycles)"] + def log_text = [ "Q20 bases: 12922(92.9841%)", + "reads passed filter: 99" ] + def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { failed_read_lines.each { failed_read_line -> + { assert path(process.out.reads_fail.get(0).get(1)).linesGzip.contains(failed_read_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { assert snapshot(process.out.json).match("test_fastp_single_end_trim_fail_json") }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end_trim_fail") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = true + save_merged = false + + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:25.719000 K (93.033098%)", + "The input has little adapter percentage (~0.000000%), probably it's trimmed before."] + def log_text = [ "No adapter detected for read1", + "Q30 bases: 12281(88.3716%)"] + def json_text = ['"passed_filter_reads": 198'] + def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { failed_read2_lines.each { failed_read2_line -> + { assert path(process.out.reads_fail.get(0).get(1).get(1)).linesGzip.contains(failed_read2_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end_merged") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = true + + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "

    "] + def log_text = [ "Merged and filtered:", + "total reads: 75", + "total bases: 13683"] + def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683'] + def read1_lines = [ "@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1", + "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC", + "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { read_merged_lines.each { read_merged_line -> + { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end_merged_adapterlist") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/fastp/adapters.fasta", checkIfExists: true) + save_trimmed_fail = false + save_merged = true + + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "
    "] + def log_text = [ "Merged and filtered:", + "total reads: 75", + "total bases: 13683"] + def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683',"--adapter_fasta"] + def read1_lines = ["@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1", + "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC", + "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { read_merged_lines.each { read_merged_line -> + { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } +} diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap new file mode 100644 index 00000000..0fa68c7d --- /dev/null +++ b/modules/nf-core/fastp/tests/main.nf.test.snap @@ -0,0 +1,52 @@ +{ + "fastp test_fastp_interleaved_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,168f516f7bd4b7b6c32da7cba87299a4" + ] + ] + ], + "timestamp": "2023-10-17T11:04:45.794175881" + }, + "test_fastp_single_end_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,c852d7a6dba5819e4ac8d9673bedcacc" + ] + ] + ], + "timestamp": "2023-10-17T11:04:10.566343705" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "timestamp": "2023-10-17T11:04:10.582076024" + }, + "test_fastp_single_end_trim_fail_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,9a7ee180f000e8d00c7fb67f06293eb5" + ] + ] + ], + "timestamp": "2023-10-17T11:05:00.379878948" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastp/tests/nextflow.config b/modules/nf-core/fastp/tests/nextflow.config new file mode 100644 index 00000000..0f7849ad --- /dev/null +++ b/modules/nf-core/fastp/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + + withName: FASTP { + ext.args = "--interleaved_in" + } +} diff --git a/modules/nf-core/fastp/tests/tags.yml b/modules/nf-core/fastp/tests/tags.yml new file mode 100644 index 00000000..c1afcce7 --- /dev/null +++ b/modules/nf-core/fastp/tests/tags.yml @@ -0,0 +1,2 @@ +fastp: + - modules/nf-core/fastp/** diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml new file mode 100644 index 00000000..f52a53a0 --- /dev/null +++ b/modules/nf-core/fastqc/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf new file mode 100644 index 00000000..5def8818 --- /dev/null +++ b/modules/nf-core/fastqc/main.nf @@ -0,0 +1,55 @@ +process FASTQC { + tag "$meta.id" + label 'process_medium' + + conda 'modules/nf-core/fastqc/environment.yml' + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : + 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.html"), emit: html + tuple val(meta), path("*.zip") , emit: zip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // Make list of old name and new name pairs to use for renaming in the bash while loop + def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } + def rename_to = old_new_pairs*.join(' ').join(' ') + def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + """ + printf "%s %s\\n" $rename_to | while read old_name new_name; do + [ -f "\${new_name}" ] || ln -s \$old_name \$new_name + done + + fastqc \\ + $args \\ + --threads $task.cpus \\ + $renamed_files + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + touch ${prefix}.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml similarity index 95% rename from modules/nf-core/modules/fastqc/meta.yml rename to modules/nf-core/fastqc/meta.yml index 4da5bb5a..ee5507e0 100644 --- a/modules/nf-core/modules/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -50,3 +50,8 @@ authors: - "@grst" - "@ewels" - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test new file mode 100644 index 00000000..6437a144 --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -0,0 +1,41 @@ +nextflow_process { + + name "Test Process FASTQC" + script "../main.nf" + process "FASTQC" + tag "modules" + tag "modules_nfcore" + tag "fastqc" + + test("Single-Read") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id: 'test', single_end:true ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. + // looks like this:
    Mon 2 Oct 2023
    test.gz
    + // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 + { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" }, + { assert path(process.out.html.get(0).get(1)).getText().contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match("versions") }, + { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" } + ) + } + } +} diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap new file mode 100644 index 00000000..636a32ce --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -0,0 +1,10 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "timestamp": "2023-10-09T23:40:54+0000" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml new file mode 100644 index 00000000..7834294b --- /dev/null +++ b/modules/nf-core/fastqc/tests/tags.yml @@ -0,0 +1,2 @@ +fastqc: + - modules/nf-core/fastqc/** diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py deleted file mode 100644 index d1390392..00000000 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/env python - -import yaml -import platform -from textwrap import dedent - - -def _make_versions_html(versions): - html = [ - dedent( - """\\ - - - - - - - - - - """ - ) - ] - for process, tmp_versions in sorted(versions.items()): - html.append("") - for i, (tool, version) in enumerate(sorted(tmp_versions.items())): - html.append( - dedent( - f"""\\ - - - - - - """ - ) - ) - html.append("") - html.append("
    Process Name Software Version
    {process if (i == 0) else ''}{tool}{version}
    ") - return "\\n".join(html) - - -versions_this_module = {} -versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, -} - -with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module - -# aggregate versions by the module name (derived from fully-qualified process name) -versions_by_module = {} -for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - assert versions_by_module[module] == process_versions, ( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - -versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", -} - -versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), -} - -with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) -with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - -with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) diff --git a/modules/nf-core/modules/fastqc/main.nf b/modules/nf-core/modules/fastqc/main.nf deleted file mode 100644 index 05730368..00000000 --- a/modules/nf-core/modules/fastqc/main.nf +++ /dev/null @@ -1,59 +0,0 @@ -process FASTQC { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'quay.io/biocontainers/fastqc:0.11.9--0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.html"), emit: html - tuple val(meta), path("*.zip") , emit: zip - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - // Add soft-links to original FastQs for consistent naming in pipeline - def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - """ - [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz - fastqc $args --threads $task.cpus ${prefix}.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } else { - """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz - fastqc $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.html - touch ${prefix}.zip - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/trimgalore/main.nf b/modules/nf-core/modules/trimgalore/main.nf deleted file mode 100644 index 9487c799..00000000 --- a/modules/nf-core/modules/trimgalore/main.nf +++ /dev/null @@ -1,83 +0,0 @@ -process TRIMGALORE { - tag "$meta.id" - label 'process_high' - - conda (params.enable_conda ? 'bioconda::trim-galore=0.6.7' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/trim-galore:0.6.7--hdfd78af_0' : - 'quay.io/biocontainers/trim-galore:0.6.7--hdfd78af_0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.fq.gz") , emit: reads - tuple val(meta), path("*report.txt"), emit: log - path "versions.yml" , emit: versions - - tuple val(meta), path("*.html"), emit: html optional true - tuple val(meta), path("*.zip") , emit: zip optional true - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - // Calculate number of --cores for TrimGalore based on value of task.cpus - // See: https://github.com/FelixKrueger/TrimGalore/blob/master/Changelog.md#version-060-release-on-1-mar-2019 - // See: https://github.com/nf-core/atacseq/pull/65 - def cores = 1 - if (task.cpus) { - cores = (task.cpus as int) - 4 - if (meta.single_end) cores = (task.cpus as int) - 3 - if (cores < 1) cores = 1 - if (cores > 4) cores = 4 - } - - // Clipping presets have to be evaluated in the context of SE/PE - def c_r1 = params.clip_r1 > 0 ? "--clip_r1 ${params.clip_r1}" : '' - def c_r2 = params.clip_r2 > 0 ? "--clip_r2 ${params.clip_r2}" : '' - def tpc_r1 = params.three_prime_clip_r1 > 0 ? "--three_prime_clip_r1 ${params.three_prime_clip_r1}" : '' - def tpc_r2 = params.three_prime_clip_r2 > 0 ? "--three_prime_clip_r2 ${params.three_prime_clip_r2}" : '' - - // Added soft-links to original fastqs for consistent naming in MultiQC - def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - """ - [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz - trim_galore \\ - $args \\ - --cores $cores \\ - --gzip \\ - $c_r1 \\ - $tpc_r1 \\ - ${prefix}.fastq.gz - cat <<-END_VERSIONS > versions.yml - "${task.process}": - trimgalore: \$(echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//') - cutadapt: \$(cutadapt --version) - END_VERSIONS - """ - } else { - """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz - trim_galore \\ - $args \\ - --cores $cores \\ - --paired \\ - --gzip \\ - $c_r1 \\ - $c_r2 \\ - $tpc_r1 \\ - $tpc_r2 \\ - ${prefix}_1.fastq.gz \\ - ${prefix}_2.fastq.gz - cat <<-END_VERSIONS > versions.yml - "${task.process}": - trimgalore: \$(echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//') - cutadapt: \$(cutadapt --version) - END_VERSIONS - """ - } -} diff --git a/modules/nf-core/modules/trimgalore/meta.yml b/modules/nf-core/modules/trimgalore/meta.yml deleted file mode 100644 index e99a8833..00000000 --- a/modules/nf-core/modules/trimgalore/meta.yml +++ /dev/null @@ -1,59 +0,0 @@ -name: trimgalore -description: Trim FastQ files using Trim Galore! -keywords: - - trimming - - adapters - - sequencing adapters - - fastq -tools: - - trimgalore: - description: | - A wrapper tool around Cutadapt and FastQC to consistently apply quality - and adapter trimming to FastQ files, with some extra functionality for - MspI-digested RRBS-type (Reduced Representation Bisufite-Seq) libraries. - homepage: https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/ - documentation: https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md - licence: ["GPL-3.0-or-later"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input adapter trimmed FastQ files of size 1 and 2 for - single-end and paired-end data, respectively. - pattern: "*.{fq.gz}" - - html: - type: file - description: FastQC report (optional) - pattern: "*_{fastqc.html}" - - zip: - type: file - description: FastQC report archive (optional) - pattern: "*_{fastqc.zip}" - - log: - type: file - description: Trim Galore! trimming report - pattern: "*_{report.txt}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@ewels" - - "@FelixKrueger" diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml new file mode 100644 index 00000000..9d0e6b20 --- /dev/null +++ b/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.17 diff --git a/modules/nf-core/modules/multiqc/main.nf b/modules/nf-core/multiqc/main.nf similarity index 64% rename from modules/nf-core/modules/multiqc/main.nf rename to modules/nf-core/multiqc/main.nf index ae019dbf..485b3ba8 100644 --- a/modules/nf-core/modules/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,13 +1,16 @@ process MULTIQC { - label 'process_medium' + label 'process_single' - conda (params.enable_conda ? 'bioconda::multiqc=1.12' : null) + conda 'modules/nf-core/multiqc/environment.yml' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' : + 'biocontainers/multiqc:1.17--pyhdfd78af_0' }" input: - path multiqc_files + path multiqc_files, stageAs: "?/*" + path(multiqc_config) + path(extra_multiqc_config) + path(multiqc_logo) output: path "*multiqc_report.html", emit: report @@ -20,8 +23,15 @@ process MULTIQC { script: def args = task.ext.args ?: '' + def config = multiqc_config ? "--config $multiqc_config" : '' + def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' """ - multiqc -f $args . + multiqc \\ + --force \\ + $args \\ + $config \\ + $extra_config \\ + . cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml similarity index 64% rename from modules/nf-core/modules/multiqc/meta.yml rename to modules/nf-core/multiqc/meta.yml index 6fa891ef..a61223ed 100644 --- a/modules/nf-core/modules/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json name: MultiQC description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: @@ -17,13 +18,25 @@ input: type: file description: | List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. + pattern: "*.{yml,yaml}" + - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" output: - report: type: file description: MultiQC report file pattern: "multiqc_report.html" - data: - type: dir + type: directory description: MultiQC data dir pattern: "multiqc_data" - plots: @@ -38,3 +51,9 @@ authors: - "@abhi18av" - "@bunop" - "@drpatelh" + - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/samtools/flagstat/environment.yml b/modules/nf-core/samtools/flagstat/environment.yml new file mode 100644 index 00000000..04c82f14 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.17 diff --git a/modules/nf-core/modules/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf similarity index 57% rename from modules/nf-core/modules/samtools/flagstat/main.nf rename to modules/nf-core/samtools/flagstat/main.nf index 9e3440ac..b289d151 100644 --- a/modules/nf-core/modules/samtools/flagstat/main.nf +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -1,11 +1,11 @@ process SAMTOOLS_FLAGSTAT { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::samtools=1.15" : null) + conda 'modules/nf-core/samtools/flagstat/environment.yml' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15--h1170115_1' : - 'quay.io/biocontainers/samtools:1.15--h1170115_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(bam), path(bai) @@ -19,12 +19,24 @@ process SAMTOOLS_FLAGSTAT { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ samtools \\ flagstat \\ - --threads ${task.cpus-1} \\ + --threads ${task.cpus} \\ $bam \\ - > ${bam}.flagstat + > ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.flagstat cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml similarity index 93% rename from modules/nf-core/modules/samtools/flagstat/meta.yml rename to modules/nf-core/samtools/flagstat/meta.yml index 95269063..97991358 100644 --- a/modules/nf-core/modules/samtools/flagstat/meta.yml +++ b/modules/nf-core/samtools/flagstat/meta.yml @@ -14,7 +14,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: @@ -47,3 +47,5 @@ output: pattern: "versions.yml" authors: - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/samtools/idxstats/environment.yml b/modules/nf-core/samtools/idxstats/environment.yml new file mode 100644 index 00000000..04c82f14 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.17 diff --git a/modules/nf-core/modules/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf similarity index 57% rename from modules/nf-core/modules/samtools/idxstats/main.nf rename to modules/nf-core/samtools/idxstats/main.nf index 7d5cee17..97217419 100644 --- a/modules/nf-core/modules/samtools/idxstats/main.nf +++ b/modules/nf-core/samtools/idxstats/main.nf @@ -1,11 +1,11 @@ process SAMTOOLS_IDXSTATS { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::samtools=1.15" : null) + conda 'modules/nf-core/samtools/idxstats/environment.yml' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15--h1170115_1' : - 'quay.io/biocontainers/samtools:1.15--h1170115_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(bam), path(bai) @@ -19,11 +19,26 @@ process SAMTOOLS_IDXSTATS { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ samtools \\ idxstats \\ + --threads ${task.cpus-1} \\ $bam \\ - > ${bam}.idxstats + > ${prefix}.idxstats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.idxstats cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/samtools/idxstats/meta.yml b/modules/nf-core/samtools/idxstats/meta.yml similarity index 93% rename from modules/nf-core/modules/samtools/idxstats/meta.yml rename to modules/nf-core/samtools/idxstats/meta.yml index 3710ab88..344e92a3 100644 --- a/modules/nf-core/modules/samtools/idxstats/meta.yml +++ b/modules/nf-core/samtools/idxstats/meta.yml @@ -15,7 +15,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: @@ -48,3 +48,5 @@ output: pattern: "versions.yml" authors: - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml new file mode 100644 index 00000000..04c82f14 --- /dev/null +++ b/modules/nf-core/samtools/index/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.17 diff --git a/modules/nf-core/modules/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf similarity index 89% rename from modules/nf-core/modules/samtools/index/main.nf rename to modules/nf-core/samtools/index/main.nf index fff6e1b8..af3dbc4c 100644 --- a/modules/nf-core/modules/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_INDEX { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.15" : null) + conda 'modules/nf-core/samtools/index/environment.yml' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15--h1170115_1' : - 'quay.io/biocontainers/samtools:1.15--h1170115_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(input) diff --git a/modules/nf-core/modules/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml similarity index 91% rename from modules/nf-core/modules/samtools/index/meta.yml rename to modules/nf-core/samtools/index/meta.yml index e5cadbc2..01a4ee03 100644 --- a/modules/nf-core/modules/samtools/index/meta.yml +++ b/modules/nf-core/samtools/index/meta.yml @@ -12,7 +12,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: @@ -51,3 +51,7 @@ authors: - "@drpatelh" - "@ewels" - "@maxulysse" +maintainers: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/sort/environment.yml b/modules/nf-core/samtools/sort/environment.yml new file mode 100644 index 00000000..04c82f14 --- /dev/null +++ b/modules/nf-core/samtools/sort/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.17 diff --git a/modules/nf-core/modules/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf similarity index 78% rename from modules/nf-core/modules/samtools/sort/main.nf rename to modules/nf-core/samtools/sort/main.nf index ba46f0c9..77256702 100644 --- a/modules/nf-core/modules/samtools/sort/main.nf +++ b/modules/nf-core/samtools/sort/main.nf @@ -2,16 +2,17 @@ process SAMTOOLS_SORT { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::samtools=1.15" : null) + conda 'modules/nf-core/samtools/sort/environment.yml' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15--h1170115_1' : - 'quay.io/biocontainers/samtools:1.15--h1170115_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(bam) output: tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path("*.csi"), emit: csi, optional: true path "versions.yml" , emit: versions when: @@ -22,7 +23,13 @@ process SAMTOOLS_SORT { def prefix = task.ext.prefix ?: "${meta.id}" if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ - samtools sort $args -@ $task.cpus -o ${prefix}.bam -T $prefix $bam + samtools sort \\ + $args \\ + -@ $task.cpus \\ + -o ${prefix}.bam \\ + -T $prefix \\ + $bam + cat <<-END_VERSIONS > versions.yml "${task.process}": samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') diff --git a/modules/nf-core/modules/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml similarity index 85% rename from modules/nf-core/modules/samtools/sort/meta.yml rename to modules/nf-core/samtools/sort/meta.yml index a820c55a..2200de72 100644 --- a/modules/nf-core/modules/samtools/sort/meta.yml +++ b/modules/nf-core/samtools/sort/meta.yml @@ -12,7 +12,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: @@ -39,6 +39,13 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - csi: + type: file + description: BAM index file (optional) + pattern: "*.csi" authors: - "@drpatelh" - "@ewels" +maintainers: + - "@drpatelh" + - "@ewels" diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test b/modules/nf-core/samtools/sort/tests/main.nf.test new file mode 100644 index 00000000..1f72f3b9 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test @@ -0,0 +1,70 @@ +nextflow_process { + + name "Test Process SAMTOOLS_SORT" + script "../main.nf" + process "SAMTOOLS_SORT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/sort" + + test("test_samtools_sort") { + + config "./nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("test_samtools_sort_stub") { + + config "./nextflow.config" + options "-stub-run" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/modules/nf-core/samtools/sort/tests/main.nf.test.snap new file mode 100644 index 00000000..a43566da --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test.snap @@ -0,0 +1,39 @@ +{ + "test_samtools_sort": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,a29570e7607d217c2fa4d75829e09cd7" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,46f7a36082fa1f68285fe30d689244e8" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,a29570e7607d217c2fa4d75829e09cd7" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,46f7a36082fa1f68285fe30d689244e8" + ] + } + ], + "timestamp": "2023-10-17T17:21:46.5427968" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/sort/tests/nextflow.config b/modules/nf-core/samtools/sort/tests/nextflow.config new file mode 100644 index 00000000..d0f35086 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_SORT { + ext.prefix = { "${meta.id}.sorted" } + } + +} diff --git a/modules/nf-core/samtools/sort/tests/tags.yml b/modules/nf-core/samtools/sort/tests/tags.yml new file mode 100644 index 00000000..cd63ea20 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/tags.yml @@ -0,0 +1,3 @@ +samtools/sort: + - modules/nf-core/samtools/sort/** + - tests/modules/nf-core/samtools/sort/** diff --git a/modules/nf-core/samtools/stats/environment.yml b/modules/nf-core/samtools/stats/environment.yml new file mode 100644 index 00000000..04c82f14 --- /dev/null +++ b/modules/nf-core/samtools/stats/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.17 diff --git a/modules/nf-core/modules/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf similarity index 76% rename from modules/nf-core/modules/samtools/stats/main.nf rename to modules/nf-core/samtools/stats/main.nf index 85cb64f3..fe30bf89 100644 --- a/modules/nf-core/modules/samtools/stats/main.nf +++ b/modules/nf-core/samtools/stats/main.nf @@ -1,15 +1,15 @@ process SAMTOOLS_STATS { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::samtools=1.15" : null) + conda 'modules/nf-core/samtools/stats/environment.yml' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15--h1170115_1' : - 'quay.io/biocontainers/samtools:1.15--h1170115_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(input), path(input_index) - path fasta + tuple val(meta2), path(fasta) output: tuple val(meta), path("*.stats"), emit: stats @@ -20,14 +20,15 @@ process SAMTOOLS_STATS { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--reference ${fasta}" : "" """ samtools \\ stats \\ - --threads ${task.cpus-1} \\ + --threads ${task.cpus} \\ ${reference} \\ ${input} \\ - > ${input}.stats + > ${prefix}.stats cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -38,7 +39,7 @@ process SAMTOOLS_STATS { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${input}.stats + touch ${prefix}.stats cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml similarity index 68% rename from modules/nf-core/modules/samtools/stats/meta.yml rename to modules/nf-core/samtools/stats/meta.yml index cac50b1c..735ff812 100644 --- a/modules/nf-core/modules/samtools/stats/meta.yml +++ b/modules/nf-core/samtools/stats/meta.yml @@ -13,7 +13,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: @@ -23,16 +23,21 @@ input: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - input: - type: file - description: BAM/CRAM file from alignment - pattern: "*.{bam,cram}" + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" - input_index: - type: file - description: BAI/CRAI file from alignment - pattern: "*.{bai,crai}" + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: - type: optional file - description: Reference file the CRAM was created with + type: file + description: Reference file the CRAM was created with (optional) pattern: "*.{fasta,fa}" output: - meta: @@ -51,3 +56,8 @@ output: authors: - "@drpatelh" - "@FriederikeHanssen" + - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test b/modules/nf-core/samtools/stats/tests/main.nf.test new file mode 100644 index 00000000..e037132c --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/main.nf.test @@ -0,0 +1,78 @@ +nextflow_process { + + name "Test Process SAMTOOLS_STATS" + script "../main.nf" + process "SAMTOOLS_STATS" + tag "modules" + tag "modules/nf-core" + tag "samtools" + tag "samtools/stats" + + test("SAMTOOLS STATS Should run without failures") { + + when { + params { + + outdir = "$outputDir" + } + process { + """ + // define inputs of the process here. + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + + ] + input[1] = [[],[]] + """ + + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + + } + + test("SAMTOOLS CRAM Should run without failures") { + + when { + params { + + outdir = "$outputDir" + } + process { + """ + // define inputs of the process here + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true) + + ] + input[1] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + + + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + + } + + +} diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test.snap b/modules/nf-core/samtools/stats/tests/main.nf.test.snap new file mode 100644 index 00000000..516b2b01 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/main.nf.test.snap @@ -0,0 +1,64 @@ +{ + "SAMTOOLS STATS Should run without failures": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,6e768486d5df0257351c5419a79f9c9b" + ] + ], + "1": [ + "versions.yml:md5,08035f3409d934d47a416150884bb0df" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,6e768486d5df0257351c5419a79f9c9b" + ] + ], + "versions": [ + "versions.yml:md5,08035f3409d934d47a416150884bb0df" + ] + } + ], + "timestamp": "2023-10-18T12:12:42.998746" + }, + "SAMTOOLS CRAM Should run without failures": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,7c9ee5747793cceb9d6f4d733345641a" + ] + ], + "1": [ + "versions.yml:md5,08035f3409d934d47a416150884bb0df" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,7c9ee5747793cceb9d6f4d733345641a" + ] + ], + "versions": [ + "versions.yml:md5,08035f3409d934d47a416150884bb0df" + ] + } + ], + "timestamp": "2023-10-18T12:13:30.747222" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/stats/tests/tags.yml b/modules/nf-core/samtools/stats/tests/tags.yml new file mode 100644 index 00000000..7c28e30f --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/stats: + - modules/nf-core/samtools/stats/** diff --git a/nextflow.config b/nextflow.config index 2366b717..a069471c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -10,27 +10,24 @@ params { // Input options - input = null + + input = null // Workflow flags - protocol = 'illumina' + protocol = 'illumina' // References - genome = null - igenomes_base = 's3://ngi-igenomes/igenomes' - igenomes_ignore = false - fasta = null - mirna_gtf = null - bowtie_indices = null - mirtrace_species = null - mirtrace_protocol = 'illumina' - mature = "https://mirbase.org/ftp/CURRENT/mature.fa.gz" - hairpin = "https://mirbase.org/ftp/CURRENT/hairpin.fa.gz" - mirGeneDB = false - mirGeneDB_mature = "/Users/chriskub/Downloads/ALL-mat.fas" - mirGeneDB_hairpin = "/Users/chriskub/Downloads/ALL-pre.fas" - mirGeneDB_gff = "/Users/chriskub/Downloads/ALL.gff" - mirGeneDB_species = null + genome = null + igenomes_base = 's3://ngi-igenomes/igenomes' + igenomes_ignore = false + mirna_gtf = null + mature = "https://mirbase.org/download/CURRENT/mature.fa" + hairpin = "https://mirbase.org/download/CURRENT/hairpin.fa" + mirgenedb = false + mirgenedb_mature = null + mirgenedb_hairpin = null + mirgenedb_gff = null + mirgenedb_species = null // UMI handling with_umi = false @@ -42,59 +39,77 @@ params { umi_merge_unmapped = true // Trimming options - clip_r1 = 0 - three_prime_clip_r1 = 0 - three_prime_adapter = "TGGAATTCTCGGGTGCCAAGG" - min_length = 17 - skip_qc = false - skip_fastqc = false - skip_multiqc = false - skip_mirdeep = false - skip_trimming = false - save_reference = false - trim_galore_max_length = 40 + clip_r1 = null + three_prime_clip_r1 = null + three_prime_adapter = null + trim_fastq = true + fastp_min_length = 17 + fastp_known_mirna_adapters = "$projectDir/assets/known_adapters.fa" + save_trimmed_fail = false + skip_fastqc = false + skip_multiqc = false + skip_mirdeep = false + skip_fastp = false + save_reference = false + fastp_max_length = 40 // Contamination filtering - filter_contamination = false - rrna = null - trna = null - cdna = null - ncrna = null - pirna = null - other_contamination = null + filter_contamination = false + rrna = null + trna = null + cdna = null + ncrna = null + pirna = null + other_contamination = null // MultiQC options - multiqc_config = null - multiqc_title = null - max_multiqc_email_size = '25.MB' + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' + multiqc_methods_description = null // Boilerplate options outdir = null - tracedir = "${params.outdir}/pipeline_info" publish_dir_mode = 'copy' email = null email_on_fail = null plaintext_email = false monochrome_logs = false + hook_url = null help = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes' - enable_conda = false + version = false // Config options - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_description = null - config_profile_contact = null - config_profile_url = null - config_profile_name = null + + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_description = null + config_profile_contact = null + config_profile_url = null + config_profile_name = null + // Max resource options // Defaults only, expecting to be overwritten - max_memory = '128.GB' - max_cpus = 16 - max_time = '240.h' + max_memory = '128.GB' + max_cpus = 16 + max_time = '240.h' + + + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes' + validationShowHiddenParams = false + validate_params = true + + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes,igenomes_base' + validationShowHiddenParams = false + validate_params = true } @@ -116,60 +131,119 @@ try { // System.err.println("WARNING: Could not load nf-core/config/smrnaseq profiles: ${params.custom_config_base}/pipeline/smrnaseq.config") // } - profiles { - debug { process.beforeScript = 'echo $HOSTNAME' } + debug { + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + } conda { - params.enable_conda = true + conda.enabled = true docker.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false + } + mamba { + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } docker { docker.enabled = true docker.userEmulation = true + docker.runOptions = '-u $(id -u):$(id -g)' + conda.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false - docker.runOptions = '-u $(id -u):$(id -g)' + apptainer.enabled = false + } + arm { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { singularity.enabled = true singularity.autoMounts = true + conda.enabled = false docker.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } podman { podman.enabled = true + conda.enabled = false docker.enabled = false singularity.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } shifter { shifter.enabled = true + conda.enabled = false docker.enabled = false singularity.enabled = false podman.enabled = false charliecloud.enabled = false + apptainer.enabled = false } charliecloud { charliecloud.enabled = true + conda.enabled = false docker.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + gitpod { + + executor.name = 'local' + executor.cpus = 16 + executor.memory = 60.GB + } test { includeConfig 'conf/test.config' } test_no_genome { includeConfig 'conf/test_no_genome.config' } test_full { includeConfig 'conf/test_full.config' } } +// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' + +// Nextflow plugins +plugins { + + id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet + +} + // Load igenomes.config if required if (!params.igenomes_ignore) { includeConfig 'conf/igenomes.config' @@ -191,32 +265,39 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Set default registry for Docker and Podman independent of -profile +// Will not be used unless Docker / Podman are enabled +// Set to your registry if you have a mirror of containers +docker.registry = 'quay.io' +podman.registry = 'quay.io' + def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { name = 'nf-core/smrnaseq' - author = 'P. Ewels , C. Wang , R. Hammarén , L. Pantano ' + author = """P. Ewels, C. Wang, R. Hammarén, L. Pantano, A. Peltzer""" homePage = 'https://github.com/nf-core/smrnaseq' - description = 'Small RNA-Seq Best Practice Analysis Pipeline.' + description = """Small RNA-Seq Best Practice Analysis Pipeline.""" mainScript = 'main.nf' - nextflowVersion = '!>=21.10.3' - version = '2.1.0dev' + nextflowVersion = '!>=23.04.0' + version = '2.3dev' + doi = '10.5281/zenodo.3456879' } // Load modules.config for DSL2 module specific options diff --git a/nextflow_schema.json b/nextflow_schema.json index b6696017..33b150b4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -15,9 +15,9 @@ "input": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", - "schema": "assets/schema_input.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/smrnaseq/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" @@ -109,50 +109,53 @@ "fa_icon": "fas fa-book", "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." }, - "mirGeneDB": { + "mirgenedb": { "type": "boolean", - "description": "Boolean wether mirGeneDB should be used instead of miRBase", - "help_text": "This allows you to use mirGeneDB instead of miRBase as the database. \n Note that you will need to set the additional flags `--mirGeneDB_species`, `--mirGeneDB_gff`, `--mirGeneDB_mature` and `--mirGeneDB_hairpin`", - "default": "false" + "description": "Boolean whether MirGeneDB should be used instead of miRBase", + "help_text": "This allows you to use MirGeneDB instead of miRBase as the database. \n Note that you will need to set the additional flags `--mirgenedb_species`, `--mirgenedb_gff`, `--mirgenedb_mature` and `--mirgenedb_hairpin`" }, "mirtrace_species": { "type": "string", "description": "Species for miRTrace.", - "help_text": "This is automatically set when using `--genome`. Example values: `hsa`, `mmu`...\n Note that mirTrace relies on miRBase for its species reference. See available references [here](https://mirbase.org/ftp/CURRENT/genomes/).", + "help_text": "This is automatically set when using `--genome`. Example values: `hsa`, `mmu`...\n Note that mirTrace relies on miRBase for its species reference. See available references [here](https://www.mirbase.org/help/genome_summary.shtml).", "fa_icon": "fas fa-journal-whills" }, - "mirGeneDB_species": { + "mirgenedb_species": { "type": "string", - "description": "Species of mirGeneDB.", - "help_text": "This replaces the value of `--mirtrace_species` if `--mirGeneDB` is used. \n Note the difference in case for species names used in MirGeneDB and miRBase." + "description": "Species of MirGeneDB.", + "help_text": "This replaces the value of `--mirtrace_species` if `--mirgenedb` is used. \n Note the difference in case for species names used in MirGeneDB and miRBase. See https://www.mirgenedb.org/browse for more information." }, "fasta": { "type": "string", - "fa_icon": "fas fa-font", - "description": "Path to reference genome FASTA genome file.", - "help_text": "If you have no genome reference available, the pipeline can build one using a FASTA file. This requires additional time and resources, so it's better to use a pre-build index if possible." + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", + "description": "Path to FASTA genome file.", + "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", + "fa_icon": "far fa-file-code" }, "mirna_gtf": { "type": "string", "description": "GFF/GTF file with coordinates positions of precursor and miRNAs.", - "help_text": "miRBase `.gff3` file, typically downloaded from [`https://mirbase.org/ftp/CURRENT/genomes/`](https://mirbase.org/ftp/CURRENT/genomes/)\n\nIf using iGenomes with `--genome` this file will be downloaded from miRBase automatically during the pipeline run.\n\n", + "help_text": "miRBase `.gff3` file, typically downloaded from [`https://mirbase.org/download/CURRENT/genomes/`](https://mirbase.org/download/CURRENT/genomes/)\n\nIf using iGenomes with `--genome` this file will be downloaded from miRBase automatically during the pipeline run.\n\n", "fa_icon": "fas fa-address-book" }, - "mirGeneDB_gff": { + "mirgenedb_gff": { "type": "string", "description": "GFF/GTF file with coordinates positions of precursor and miRNAs.", - "help_text": "mirGeneDB `.gff3` file, typically downloaded from [`https://mirgenedb.org/download`]. This replaces the value of --mirna_gff if --mirGeneDB is used." + "help_text": "MirGeneDB `.gff3` file, typically downloaded from [`https://mirgenedb.org/download`]. This replaces the value of --mirna_gff if --mirgenedb is used." }, "mature": { "type": "string", "description": "Path to FASTA file with mature miRNAs.", "fa_icon": "fas fa-wheelchair", "help_text": "Typically this will be the `mature.fa` file from miRBase. Can be given either as a plain text `.fa` file or a compressed `.gz` file.\n\nDefaults to the current miRBase release URL, from which the file will be downloaded.", - "default": "https://mirbase.org/ftp/CURRENT/mature.fa.gz" + "default": "https://mirbase.org/download/CURRENT/mature.fa" }, - "mirGeneDB_mature": { + "mirgenedb_mature": { "type": "string", - "description": "Path to FASTA file with mirGeneDB mature miRNAs.", + "description": "Path to FASTA file with MirGeneDB mature miRNAs.", "help_text": "This file needs to be downloaded from [`https://mirgenedb.org/download`]. Can be given either as a plain text `.fa` file or a compressed `.gz` file." }, "hairpin": { @@ -160,14 +163,14 @@ "description": "Path to FASTA file with miRNAs precursors.", "fa_icon": "fab fa-cuttlefish", "help_text": "Typically this will be the `mature.fa` file from miRBase. Can be given either as a plain text `.fa` file or a compressed `.gz` file.\n\nDefaults to the current miRBase release URL, from which the file will be downloaded.", - "default": "https://mirbase.org/ftp/CURRENT/hairpin.fa.gz" + "default": "https://mirbase.org/download/CURRENT/hairpin.fa" }, - "mirGeneDB_hairpin": { + "mirgenedb_hairpin": { "type": "string", "description": "Path to FASTA file with miRNAs precursors.", - "help_text": "This file needs to be downloaded from [`https://mirgenedb.org/download`]. Can be given either as a plain text `.fa` file or a compressed `.gz` file.\nNote that mirGeneDB does not have a dedicated hairpin file. The equivalent is the `Precursor sequences`." + "help_text": "This file needs to be downloaded from [`https://mirgenedb.org/download`]. Can be given either as a plain text `.fa` file or a compressed `.gz` file.\nNote that MirGeneDB does not have a dedicated hairpin file. The equivalent is the `Precursor sequences`." }, - "bowtie_indices": { + "bowtie_index": { "type": "string", "description": "Path to a Bowtie 1 index directory", "fa_icon": "fas fa-book", @@ -179,18 +182,9 @@ "help_text": "Saving generated references means that you can use them for future pipeline runs, reducing processing times.", "fa_icon": "fas fa-save" }, - "igenomes_base": { - "type": "string", - "format": "directory-path", - "description": "Directory / URL base for iGenomes references.", - "default": "s3://ngi-igenomes/igenomes", - "fa_icon": "fas fa-cloud-download-alt", - "hidden": true - }, "igenomes_ignore": { "type": "boolean", "description": "Do not load the iGenomes reference config.", - "default": false, "fa_icon": "fas fa-ban", "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.", "hidden": true @@ -203,12 +197,6 @@ "description": "Options for trimming reads and primers.", "fa_icon": "fas fa-cut", "properties": { - "min_length": { - "type": "integer", - "default": 17, - "description": "Discard reads that are shorter than this after quality / adapter trimming.", - "fa_icon": "fas fa-ruler" - }, "clip_r1": { "type": "integer", "fa_icon": "fas fa-cut", @@ -225,19 +213,34 @@ "fa_icon": "fas fa-text-width", "description": "Sequencing adapter sequence to use for trimming." }, - "mirtrace_protocol": { - "type": "string", - "default": "illumina", - "fa_icon": "fas fa-vial", - "description": "The miRTrace protocol for QC reporting.", - "help_text": "miRTrace can handle four protocols, each with their own primer-read structure. See the protocol descriptions [here](https://github.com/friedlanderlab/mirtrace/blob/master/release-bundle-includes/doc/manual/mirtrace_manual.pdf).", - "enum": ["illumina", "cats", "qiaseq", "nextflex"] + "trim_fastq": { + "type": "boolean", + "default": true, + "fa_icon": "fas fa-hand-scissors", + "description": "Trim FastQ files" + }, + "fastp_min_length": { + "type": "integer", + "default": 17, + "description": "Minimum filter length for raw reads.", + "fa_icon": "fas fa-ruler-horizontal" }, - "trim_galore_max_length": { + "fastp_max_length": { "type": "integer", "default": 40, - "description": "The max-length parameter used for trimming with TrimGalore!.", - "fa_icon": "fas fa-ruler" + "description": "Maximum filter length for raw reads.", + "fa_icon": "fas fa-ruler-horizontal" + }, + "save_trimmed_fail": { + "type": "boolean", + "fa_icon": "fas fa-cloud-download-alt", + "description": "Save reads failing trimming" + }, + "fastp_known_mirna_adapters": { + "type": "string", + "default": "${projectDir}/assets/known_adapters.fa", + "description": "FastA with known miRNA adapter sequences for adapter trimming", + "fa_icon": "far fa-question-circle" } } }, @@ -249,7 +252,6 @@ "properties": { "filter_contamination": { "type": "boolean", - "default": false, "description": "Enables the contamination filtering." }, "rrna": { @@ -290,35 +292,25 @@ "description": "Switches to skip specific pipeline steps, if desired.", "fa_icon": "fas fa-fast-forward", "properties": { - "skip_qc": { - "type": "boolean", - "fa_icon": "fas fa-fast-forward", - "description": "Skip all QC steps", - "default": false - }, "skip_fastqc": { "type": "boolean", "fa_icon": "fas fa-fast-forward", - "description": "Skip FastQC", - "default": false + "description": "Skip FastQC" }, "skip_mirdeep": { "type": "boolean", "fa_icon": "fas fa-fast-forward", - "description": "Skip miRDeep", - "default": false + "description": "Skip miRDeep" }, "skip_multiqc": { "type": "boolean", "fa_icon": "fas fa-fast-forward", - "description": "Skip MultiQC", - "default": false + "description": "Skip MultiQC" }, - "skip_trimming": { + "skip_fastp": { "type": "boolean", - "fa_icon": "fas fa-fast-forward", - "description": "Skip all trimming steps.", - "default": false + "description": "Skip FastP", + "fa_icon": "fas fa-forward" } } }, @@ -399,7 +391,7 @@ "description": "Maximum amount of time that can be requested for any single job.", "default": "240.h", "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } @@ -416,6 +408,14 @@ "type": "boolean", "description": "Display help text.", "fa_icon": "fas fa-question-circle", + "default": false, + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "default": false, "hidden": true }, "publish_dir_mode": { @@ -439,6 +439,7 @@ "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", + "default": false, "hidden": true }, "max_multiqc_email_size": { @@ -453,21 +454,34 @@ "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", + "default": false, + "hidden": true + }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", "hidden": true }, "multiqc_config": { "type": "string", + "format": "file-path", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true }, - "tracedir": { + "multiqc_logo": { "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs", + "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", + "fa_icon": "fas fa-image", "hidden": true }, + "multiqc_methods_description": { + "type": "string", + "description": "Custom MultiQC yaml file containing HTML including a methods description.", + "fa_icon": "fas fa-cog" + }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", @@ -475,18 +489,29 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "show_hidden_params": { + "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", + "default": false, "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." }, - "enable_conda": { + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "default": false, + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { "type": "boolean", - "description": "Run this workflow with Conda. You can also use '-profile conda' instead of providing this parameter.", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "default": false, "hidden": true, - "fa_icon": "fas fa-bacon" + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } } } diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..0d62beb6 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. +# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. +[tool.black] +line-length = 120 +target_version = ["py37", "py38", "py39", "py310"] + +[tool.isort] +profile = "black" +known_first_party = ["nf_core"] +multi_line_output = 3 diff --git a/subworkflows/local/contaminant_filter.nf b/subworkflows/local/contaminant_filter.nf index 848682ea..383c85ad 100644 --- a/subworkflows/local/contaminant_filter.nf +++ b/subworkflows/local/contaminant_filter.nf @@ -44,12 +44,11 @@ workflow CONTAMINANT_FILTER { reads.set { rrna_reads } - if (params.rrna) { // Index DB and filter $reads emit: $rrna_reads INDEX_RRNA ( rrna ) ch_versions = ch_versions.mix(INDEX_RRNA.out.versions) - MAP_RRNA ( reads, INDEX_RRNA.out.bt_indices, 'rRNA' ) + MAP_RRNA ( reads, INDEX_RRNA.out.index, 'rRNA' ) ch_versions = ch_versions.mix(MAP_RRNA.out.versions) ch_filter_stats = ch_filter_stats.mix(MAP_RRNA.out.stats.ifEmpty(null)) MAP_RRNA.out.unmapped.set { rrna_reads } @@ -61,7 +60,7 @@ workflow CONTAMINANT_FILTER { // Index DB and filter $rrna_reads emit: $trna_reads INDEX_TRNA ( trna ) ch_versions = ch_versions.mix(INDEX_TRNA.out.versions) - MAP_TRNA ( rrna_reads, INDEX_TRNA.out.bt_indices, 'tRNA') + MAP_TRNA ( rrna_reads, INDEX_TRNA.out.index, 'tRNA') ch_versions = ch_versions.mix(MAP_TRNA.out.versions) ch_filter_stats = ch_filter_stats.mix(MAP_TRNA.out.stats.ifEmpty(null)) MAP_TRNA.out.unmapped.set { trna_reads } @@ -75,7 +74,7 @@ workflow CONTAMINANT_FILTER { ch_versions = ch_versions.mix(BLAT_CDNA.out.versions) INDEX_CDNA ( BLAT_CDNA.out.filtered_set ) ch_versions = ch_versions.mix(INDEX_CDNA.out.versions) - MAP_CDNA ( trna_reads, INDEX_CDNA.out.bt_indices, 'cDNA' ) + MAP_CDNA ( trna_reads, INDEX_CDNA.out.index, 'cDNA' ) ch_versions = ch_versions.mix(MAP_CDNA.out.versions) ch_filter_stats = ch_filter_stats.mix(MAP_CDNA.out.stats.ifEmpty(null)) MAP_CDNA.out.unmapped.set { cdna_reads } @@ -88,7 +87,7 @@ workflow CONTAMINANT_FILTER { ch_versions = ch_versions.mix(BLAT_NCRNA.out.versions) INDEX_NCRNA ( BLAT_NCRNA.out.filtered_set ) ch_versions = ch_versions.mix(INDEX_NCRNA.out.versions) - MAP_NCRNA ( cdna_reads, INDEX_NCRNA.out.bt_indices, 'ncRNA' ) + MAP_NCRNA ( cdna_reads, INDEX_NCRNA.out.index, 'ncRNA' ) ch_versions = ch_versions.mix(MAP_NCRNA.out.versions) ch_filter_stats = ch_filter_stats.mix(MAP_NCRNA.out.stats.ifEmpty(null)) MAP_NCRNA.out.unmapped.set { ncrna_reads } @@ -101,7 +100,7 @@ workflow CONTAMINANT_FILTER { ch_versions = ch_versions.mix(BLAT_PIRNA.out.versions) INDEX_PIRNA ( BLAT_PIRNA.out.filtered_set ) ch_versions = ch_versions.mix(INDEX_PIRNA.out.versions) - MAP_PIRNA (ncrna_reads, INDEX_PIRNA.out.bt_indices, 'piRNA' ) + MAP_PIRNA ( ncrna_reads, INDEX_PIRNA.out.index, 'piRNA' ) ch_versions = ch_versions.mix(MAP_PIRNA.out.versions) ch_filter_stats = ch_filter_stats.mix(MAP_PIRNA.out.stats.ifEmpty(null)) MAP_PIRNA.out.unmapped.set { pirna_reads } @@ -114,7 +113,7 @@ workflow CONTAMINANT_FILTER { ch_versions = ch_versions.mix(BLAT_OTHER.out.versions) INDEX_OTHER ( BLAT_OTHER.out.filtered_set ) ch_versions = ch_versions.mix(INDEX_OTHER.out.versions) - MAP_OTHER (ncrna_reads, INDEX_OTHER.out.bt_indices, 'other' ) + MAP_OTHER ( ncrna_reads, INDEX_OTHER.out.index, 'other' ) ch_versions = ch_versions.mix(MAP_OTHER.out.versions) ch_filter_stats = ch_filter_stats.mix(MAP_OTHER.out.stats.ifEmpty(null)) MAP_OTHER.out.unmapped.set { other_cont_reads } diff --git a/subworkflows/local/fastqc_fastp.nf b/subworkflows/local/fastqc_fastp.nf new file mode 100644 index 00000000..9e4d952e --- /dev/null +++ b/subworkflows/local/fastqc_fastp.nf @@ -0,0 +1,116 @@ +// +// Read QC and trimming +// + +include { FASTQC as FASTQC_RAW } from '../../modules/nf-core/fastqc/main' +include { FASTQC as FASTQC_TRIM } from '../../modules/nf-core/fastqc/main' +include { FASTP } from '../../modules/nf-core/fastp/main' + +// +// Function that parses fastp json output file to get total number of reads after trimming +// +import groovy.json.JsonSlurper + +def getFastpReadsAfterFiltering(json_file) { + return new JsonSlurper().parseText(json_file.text) + ?.get('summary') + ?.get('after_filtering') + ?.get('total_reads') + ?.toInteger() +} + +String getFastpAdapterSequence(json_file){ + return new JsonSlurper().parseText(json_file.text) + ?.get('adapter_cutting') + ?.get('read1_adapter_sequence') +} + +workflow FASTQC_FASTP { + take: + reads // channel: [ val(meta), [ reads ] ] + adapter_list // channel: [ path/to/adapters.fa ] + save_trimmed_fail // value: boolean + save_merged // value: boolean + + + main: + + ch_versions = Channel.empty() + fastqc_raw_html = Channel.empty() + fastqc_raw_zip = Channel.empty() + adapterseq = reads.map { meta, _ -> [meta, null] } + if (!params.skip_fastqc) { + FASTQC_RAW ( + reads + ) + fastqc_raw_html = FASTQC_RAW.out.html + fastqc_raw_zip = FASTQC_RAW.out.zip + ch_versions = ch_versions.mix(FASTQC_RAW.out.versions.first()) + } + + trim_reads = reads + trim_json = Channel.empty() + trim_html = Channel.empty() + trim_log = Channel.empty() + trim_reads_fail = Channel.empty() + trim_reads_merged = Channel.empty() + fastqc_trim_html = Channel.empty() + fastqc_trim_zip = Channel.empty() + if (!params.skip_fastp) { + FASTP ( + reads, + adapter_list, + save_trimmed_fail, + save_merged + ) + trim_reads = FASTP.out.reads + trim_json = FASTP.out.json + trim_html = FASTP.out.html + trim_log = FASTP.out.log + trim_reads_fail = FASTP.out.reads_fail + trim_reads_merged = FASTP.out.reads_merged + ch_versions = ch_versions.mix(FASTP.out.versions.first()) + + // + // Filter empty FastQ files after adapter trimming so FastQC doesn't fail + // + trim_reads + .join(trim_json) + .map { + meta, reads, json -> + if (getFastpReadsAfterFiltering(json) > 0) { + [ meta, reads ] + } + } + .set { trim_reads } + + trim_json + .map { meta, json -> [meta, getFastpAdapterSequence(json)] } + .set { adapterseq } + + if (!params.skip_fastqc) { + FASTQC_TRIM ( + trim_reads + ) + fastqc_trim_html = FASTQC_TRIM.out.html + fastqc_trim_zip = FASTQC_TRIM.out.zip + ch_versions = ch_versions.mix(FASTQC_TRIM.out.versions.first()) + } + } + + emit: + reads = trim_reads // channel: [ val(meta), [ reads ] ] + trim_json // channel: [ val(meta), [ json ] ] + trim_html // channel: [ val(meta), [ html ] ] + trim_log // channel: [ val(meta), [ log ] ] + trim_reads_fail // channel: [ val(meta), [ fastq.gz ] ] + trim_reads_merged // channel: [ val(meta), [ fastq.gz ] ] + adapterseq // channel: [ val(meta), [ adapterseq ] ] + + fastqc_raw_html // channel: [ val(meta), [ html ] ] + fastqc_raw_zip // channel: [ val(meta), [ zip ] ] + fastqc_trim_html // channel: [ val(meta), [ html ] ] + fastqc_trim_zip // channel: [ val(meta), [ zip ] ] + + versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] +} diff --git a/subworkflows/local/genome_quant.nf b/subworkflows/local/genome_quant.nf index 54c7675c..967b2757 100644 --- a/subworkflows/local/genome_quant.nf +++ b/subworkflows/local/genome_quant.nf @@ -3,40 +3,40 @@ // include { INDEX_GENOME } from '../../modules/local/bowtie_genome' -include { BAM_SORT_SAMTOOLS } from '../nf-core/bam_sort_samtools' +include { BAM_SORT_STATS_SAMTOOLS } from '../nf-core/bam_sort_stats_samtools' include { BOWTIE_MAP_SEQ as BOWTIE_MAP_GENOME } from '../../modules/local/bowtie_map_mirna' workflow GENOME_QUANT { take: - fasta - bt_index - reads // channel: [ val(meta), [ reads ] ] + fasta + index + reads // channel: [ val(meta), [ reads ] ] main: ch_versions = Channel.empty() - if (!bt_index){ - INDEX_GENOME ( fasta ) - bowtie_indices = INDEX_GENOME.out.bowtie_indices + if (!index){ + INDEX_GENOME ( [ [:], fasta ] ) + bowtie_index = INDEX_GENOME.out.index fasta_formatted = INDEX_GENOME.out.fasta ch_versions = ch_versions.mix(INDEX_GENOME.out.versions) } else { - bowtie_indices = Channel.fromPath("${bt_index}**ebwt", checkIfExists: true).ifEmpty { exit 1, "Bowtie1 index directory not found: ${bt_index}" } + bowtie_index = Channel.fromPath("${index}**ebwt", checkIfExists: true).ifEmpty { exit 1, "Bowtie1 index directory not found: ${index}" } fasta_formatted = fasta } - if (bowtie_indices){ - BOWTIE_MAP_GENOME ( reads, bowtie_indices.collect() ) + if (bowtie_index){ + BOWTIE_MAP_GENOME ( reads, bowtie_index.collect() ) ch_versions = ch_versions.mix(BOWTIE_MAP_GENOME.out.versions) - BAM_SORT_SAMTOOLS ( BOWTIE_MAP_GENOME.out.bam, Channel.empty() ) - ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions) + BAM_SORT_STATS_SAMTOOLS ( BOWTIE_MAP_GENOME.out.bam, Channel.empty() ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) } emit: fasta = fasta_formatted - indices = bowtie_indices - stats = BAM_SORT_SAMTOOLS.out.stats + index = bowtie_index + stats = BAM_SORT_STATS_SAMTOOLS.out.stats versions = ch_versions } diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 136e28be..910afd05 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -20,16 +20,11 @@ workflow INPUT_CHECK { versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] } -// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] +// Function to get list of [ meta, [ fastq_1 ] ] def create_fastq_channel(LinkedHashMap row) { - // create meta map - def meta = [:] - meta.id = row.sample - meta.single_end = 1 - def array = [] + def meta = row.findAll {it.key != "fastq_1"} if (!file(row.fastq_1).exists()) { exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" } - array = [ meta, [ file(row.fastq_1) ] ] - return array + return [ meta, [ file(row.fastq_1) ] ] } diff --git a/subworkflows/local/mirdeep2.nf b/subworkflows/local/mirdeep2.nf index a790189f..f8098ba5 100644 --- a/subworkflows/local/mirdeep2.nf +++ b/subworkflows/local/mirdeep2.nf @@ -10,7 +10,7 @@ workflow MIRDEEP2 { take: reads // channel: [ val(meta), [ reads ] ] fasta - indices + index hairpin mature @@ -20,7 +20,7 @@ workflow MIRDEEP2 { MIRDEEP2_PIGZ ( reads ) ch_versions = ch_versions.mix(MIRDEEP2_PIGZ.out.versions.first()) - MIRDEEP2_MAPPER ( MIRDEEP2_PIGZ.out.reads, indices ) + MIRDEEP2_MAPPER ( MIRDEEP2_PIGZ.out.reads, index ) ch_versions = ch_versions.mix(MIRDEEP2_MAPPER.out.versions.first()) MIRDEEP2_RUN ( fasta, MIRDEEP2_MAPPER.out.mirdeep2_inputs, hairpin, mature ) diff --git a/subworkflows/local/mirna_quant.nf b/subworkflows/local/mirna_quant.nf index a851d729..dfa16ab4 100644 --- a/subworkflows/local/mirna_quant.nf +++ b/subworkflows/local/mirna_quant.nf @@ -15,8 +15,8 @@ include { BOWTIE_MAP_SEQ as BOWTIE_MAP_MATURE BOWTIE_MAP_SEQ as BOWTIE_MAP_HAIRPIN BOWTIE_MAP_SEQ as BOWTIE_MAP_SEQCLUSTER } from '../../modules/local/bowtie_map_mirna' -include { BAM_SORT_SAMTOOLS as BAM_STATS_MATURE - BAM_SORT_SAMTOOLS as BAM_STATS_HAIRPIN } from '../nf-core/bam_sort_samtools' +include { BAM_SORT_STATS_SAMTOOLS as BAM_STATS_MATURE + BAM_SORT_STATS_SAMTOOLS as BAM_STATS_HAIRPIN } from '../nf-core/bam_sort_stats_samtools' include { SEQCLUSTER_SEQUENCES } from '../../modules/local/seqcluster_collapse.nf' include { MIRTOP_QUANT } from '../../modules/local/mirtop_quant.nf' @@ -25,8 +25,8 @@ include { EDGER_QC } from '../../modules/local/edger_qc.nf' workflow MIRNA_QUANT { take: - mature // channel: fasta file - hairpin // channel: fasta file + mature // channel: [ val(meta), fasta file] + hairpin // channel: [ val(meta), fasta file] gtf // channle: GTF file reads // channel: [ val(meta), [ reads ] ] @@ -45,10 +45,10 @@ workflow MIRNA_QUANT { FORMAT_HAIRPIN ( hairpin_parsed ) ch_versions = ch_versions.mix(FORMAT_HAIRPIN.out.versions) - INDEX_MATURE ( FORMAT_MATURE.out.formatted_fasta ).bowtie_indices.set { mature_bowtie } + INDEX_MATURE ( FORMAT_MATURE.out.formatted_fasta ).index.set { mature_bowtie } ch_versions = ch_versions.mix(INDEX_MATURE.out.versions) - INDEX_HAIRPIN ( FORMAT_HAIRPIN.out.formatted_fasta ).bowtie_indices.set { hairpin_bowtie } + INDEX_HAIRPIN ( FORMAT_HAIRPIN.out.formatted_fasta ).index.set { hairpin_bowtie } ch_versions = ch_versions.mix(INDEX_HAIRPIN.out.versions) reads @@ -64,7 +64,6 @@ workflow MIRNA_QUANT { .dump (tag:'hsux') .set { reads_hairpin } - BOWTIE_MAP_HAIRPIN ( reads_hairpin, hairpin_bowtie.collect() ) ch_versions = ch_versions.mix(BOWTIE_MAP_HAIRPIN.out.versions) @@ -93,8 +92,10 @@ workflow MIRNA_QUANT { BOWTIE_MAP_SEQCLUSTER ( reads_collapsed, hairpin_bowtie.collect() ) ch_versions = ch_versions.mix(BOWTIE_MAP_SEQCLUSTER.out.versions) + ch_mirtop_logs = Channel.empty() if (params.mirtrace_species){ - MIRTOP_QUANT ( BOWTIE_MAP_SEQCLUSTER.out.bam.collect{it[1]}, FORMAT_HAIRPIN.out.formatted_fasta, gtf ) + MIRTOP_QUANT ( BOWTIE_MAP_SEQCLUSTER.out.bam.collect{it[1]}, FORMAT_HAIRPIN.out.formatted_fasta.collect{it[1]}, gtf ) + ch_mirtop_logs = MIRTOP_QUANT.out.logs ch_versions = ch_versions.mix(MIRTOP_QUANT.out.versions) TABLE_MERGE ( MIRTOP_QUANT.out.mirtop_table ) @@ -109,22 +110,16 @@ workflow MIRNA_QUANT { fasta_mature = FORMAT_MATURE.out.formatted_fasta fasta_hairpin = FORMAT_HAIRPIN.out.formatted_fasta unmapped = reads_genome - bowtie_versions = BOWTIE_MAP_MATURE.out.versions - samtools_versions = BAM_STATS_MATURE.out.versions - seqcluster_versions = SEQCLUSTER_SEQUENCES.out.versions - mirtop_versions = MIRTOP_QUANT.out.versions mature_stats = BAM_STATS_MATURE.out.stats hairpin_stats = BAM_STATS_HAIRPIN.out.stats - mirtop_logs = MIRTOP_QUANT.out.logs - merge_versions = TABLE_MERGE.out.versions + mirtop_logs = ch_mirtop_logs versions = ch_versions } - def add_suffix(row, suffix) { def meta = [:] - meta.id = "${row[0].id}_${suffix}" + meta.id = "${row[0].id}_${suffix}" def array = [] array = [ meta, row[1] ] return array diff --git a/subworkflows/local/mirtrace.nf b/subworkflows/local/mirtrace.nf index ea4fc3a7..317c4444 100644 --- a/subworkflows/local/mirtrace.nf +++ b/subworkflows/local/mirtrace.nf @@ -6,16 +6,10 @@ include { MIRTRACE_RUN } from '../../modules/local/mirtrace' workflow MIRTRACE { take: - reads // channel: [ val(meta), [ reads ] ] + reads // channel: [ val(adapterseq), [ val(ids) ], [ path(reads) ] ] main: - reads - .map { it[1] } - .flatten() - .dump(tag:'mirtrace') - .set { all_reads } - - MIRTRACE_RUN ( all_reads.collect() ) + reads | MIRTRACE_RUN emit: results = MIRTRACE_RUN.out.mirtrace diff --git a/subworkflows/nf-core/bam_sort_samtools.nf b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf similarity index 77% rename from subworkflows/nf-core/bam_sort_samtools.nf rename to subworkflows/nf-core/bam_sort_stats_samtools/main.nf index a3f2741a..fc1c652b 100644 --- a/subworkflows/nf-core/bam_sort_samtools.nf +++ b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf @@ -2,16 +2,17 @@ // Sort, index BAM file and run samtools stats, flagstat and idxstats // -include { SAMTOOLS_SORT } from '../../modules/nf-core/modules/samtools/sort/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/modules/samtools/index/main' -include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' +include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' -workflow BAM_SORT_SAMTOOLS { +workflow BAM_SORT_STATS_SAMTOOLS { take: - ch_bam // channel: [ val(meta), [ bam ] ] - fasta + ch_bam // channel: [ val(meta), [ bam ] ] + ch_fasta // channel: [ val(meta), path(fasta) ] main: + ch_versions = Channel.empty() SAMTOOLS_SORT ( ch_bam ) @@ -33,7 +34,7 @@ workflow BAM_SORT_SAMTOOLS { } .set { ch_bam_bai } - BAM_STATS_SAMTOOLS ( ch_bam_bai, fasta ) + BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) emit: diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml new file mode 100644 index 00000000..e01f9ccf --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml @@ -0,0 +1,70 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_sort_stats_samtools +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +components: + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_stats_samtools +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - fasta: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa}" +# TODO Update when we decide on a standard for subworkflow docs +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" +maintainers: + - "@drpatelh" + - "@ewels" diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test new file mode 100644 index 00000000..a8a13f2a --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test @@ -0,0 +1,68 @@ +nextflow_workflow { + + name "Test Workflow BAM_SORT_STATS_SAMTOOLS" + script "../main.nf" + workflow "BAM_SORT_STATS_SAMTOOLS" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "bam_sort_stats_samtools" + tag "bam_stats_samtools" + tag "samtools" + tag "samtools/index" + tag "samtools/sort" + tag "samtools/stats" + tag "samtools/idxstats" + tag "samtools/flagstat" + + test("test_bam_sort_stats_samtools_single_end") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_bam'], checkIfExists: true) + ] + input[1] = [ [ id:'genome' ], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } + + test("test_bam_sort_stats_samtools_paired_end") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + ] + input[1] = [ [ id:'genome' ], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } +} diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap new file mode 100644 index 00000000..50ffde60 --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap @@ -0,0 +1,236 @@ +{ + "test_bam_sort_stats_samtools_single_end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,2cf8fe8dbba3da7eb4fb251c79f428dc" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam.bai:md5,002488588110dcee464e65f68c4726e8" + ] + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,796f45f791f06291b76329528fae0a54" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20" + ] + ], + "6": [ + "versions.yml:md5,176f12ceae81f76341e481988c799c15", + "versions.yml:md5,7beadfaf6b22ea0ae6e655b41447803f", + "versions.yml:md5,bfcdd8e2d5151a14dac15a9332d73d52", + "versions.yml:md5,dd8f44a9bfef10555ef1c8cc0267ff9c", + "versions.yml:md5,f2eb7aba102adae159006c9a443c301b" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam.bai:md5,002488588110dcee464e65f68c4726e8" + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,2cf8fe8dbba3da7eb4fb251c79f428dc" + ] + ], + "csi": [ + + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53" + ] + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,796f45f791f06291b76329528fae0a54" + ] + ], + "versions": [ + "versions.yml:md5,176f12ceae81f76341e481988c799c15", + "versions.yml:md5,7beadfaf6b22ea0ae6e655b41447803f", + "versions.yml:md5,bfcdd8e2d5151a14dac15a9332d73d52", + "versions.yml:md5,dd8f44a9bfef10555ef1c8cc0267ff9c", + "versions.yml:md5,f2eb7aba102adae159006c9a443c301b" + ] + } + ], + "timestamp": "2023-10-18T09:34:31.989804787" + }, + "test_bam_sort_stats_samtools_paired_end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,81adec7882577c0ad17962599acf7745" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam.bai:md5,9e6427a796975290b1110c9d542ac79d" + ] + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,f3f0e5aad236aae678ac5361b529a664" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ], + "6": [ + "versions.yml:md5,176f12ceae81f76341e481988c799c15", + "versions.yml:md5,7beadfaf6b22ea0ae6e655b41447803f", + "versions.yml:md5,bfcdd8e2d5151a14dac15a9332d73d52", + "versions.yml:md5,dd8f44a9bfef10555ef1c8cc0267ff9c", + "versions.yml:md5,f2eb7aba102adae159006c9a443c301b" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam.bai:md5,9e6427a796975290b1110c9d542ac79d" + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,81adec7882577c0ad17962599acf7745" + ] + ], + "csi": [ + + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,f3f0e5aad236aae678ac5361b529a664" + ] + ], + "versions": [ + "versions.yml:md5,176f12ceae81f76341e481988c799c15", + "versions.yml:md5,7beadfaf6b22ea0ae6e655b41447803f", + "versions.yml:md5,bfcdd8e2d5151a14dac15a9332d73d52", + "versions.yml:md5,dd8f44a9bfef10555ef1c8cc0267ff9c", + "versions.yml:md5,f2eb7aba102adae159006c9a443c301b" + ] + } + ], + "timestamp": "2023-10-18T09:34:57.682759147" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml b/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml new file mode 100644 index 00000000..a8274109 --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml @@ -0,0 +1,2 @@ +bam_sort_stats_samtools: + - subworkflows/nf-core/bam_sort_stats_samtools/** diff --git a/subworkflows/nf-core/bam_stats_samtools.nf b/subworkflows/nf-core/bam_stats_samtools.nf deleted file mode 100644 index 857ae2c1..00000000 --- a/subworkflows/nf-core/bam_stats_samtools.nf +++ /dev/null @@ -1,32 +0,0 @@ -// -// Run SAMtools stats, flagstat and idxstats -// - -include { SAMTOOLS_STATS } from '../../modules/nf-core/modules/samtools/stats/main' -include { SAMTOOLS_IDXSTATS } from '../../modules/nf-core/modules/samtools/idxstats/main' -include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/modules/samtools/flagstat/main' - -workflow BAM_STATS_SAMTOOLS { - take: - ch_bam_bai // channel: [ val(meta), [ bam ], [bai/csi] ] - fasta - - main: - ch_versions = Channel.empty() - - SAMTOOLS_STATS ( ch_bam_bai, fasta ) - ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) - - SAMTOOLS_FLAGSTAT ( ch_bam_bai ) - ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions.first()) - - SAMTOOLS_IDXSTATS ( ch_bam_bai ) - ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions.first()) - - emit: - stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/bam_stats_samtools/main.nf b/subworkflows/nf-core/bam_stats_samtools/main.nf new file mode 100644 index 00000000..44d4c010 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/main.nf @@ -0,0 +1,32 @@ +// +// Run SAMtools stats, flagstat and idxstats +// + +include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' +include { SAMTOOLS_IDXSTATS } from '../../../modules/nf-core/samtools/idxstats/main' +include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/samtools/flagstat/main' + +workflow BAM_STATS_SAMTOOLS { + take: + ch_bam_bai // channel: [ val(meta), path(bam), path(bai) ] + ch_fasta // channel: [ val(meta), path(fasta) ] + + main: + ch_versions = Channel.empty() + + SAMTOOLS_STATS ( ch_bam_bai, ch_fasta ) + ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions) + + SAMTOOLS_FLAGSTAT ( ch_bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) + + SAMTOOLS_IDXSTATS ( ch_bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions) + + emit: + stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), path(idxstats) ] + + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/bam_stats_samtools/meta.yml b/subworkflows/nf-core/bam_stats_samtools/meta.yml new file mode 100644 index 00000000..809bf736 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/meta.yml @@ -0,0 +1,43 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_stats_samtools +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +components: + - samtools/stats + - samtools/idxstats + - samtools/flagstat +input: + - ch_bam_bai: + description: | + The input channel containing the BAM/CRAM and it's index + Structure: [ val(meta), path(bam), path(bai) ] + - ch_fasta: + description: | + Reference genome fasta file + Structure: [ path(fasta) ] +output: + - stats: + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + - flagstat: + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + - idxstats: + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats)] + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/subworkflows/nf-core/fastqc_trimgalore.nf b/subworkflows/nf-core/fastqc_trimgalore.nf deleted file mode 100644 index ba2deecd..00000000 --- a/subworkflows/nf-core/fastqc_trimgalore.nf +++ /dev/null @@ -1,48 +0,0 @@ -// -// Read QC, UMI extraction and trimming -// - -include { FASTQC } from '../../modules/nf-core/modules/fastqc/main' -include { TRIMGALORE } from '../../modules/local/trimgalore' - -workflow FASTQC_TRIMGALORE { - take: - reads // channel: [ val(meta), [ reads ] ] - skip_fastqc // boolean: true/false - skip_trimming // boolean: true/false - - main: - ch_versions = Channel.empty() - fastqc_html = Channel.empty() - fastqc_zip = Channel.empty() - if (!skip_fastqc) { - FASTQC ( reads ).html.set { fastqc_html } - fastqc_zip = FASTQC.out.zip - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - } - - trim_reads = reads - trim_html = Channel.empty() - trim_zip = Channel.empty() - trim_log = Channel.empty() - trimgalore_versions = Channel.empty() - if (!skip_trimming) { - TRIMGALORE ( reads ).reads.set { trim_reads } - trim_html = TRIMGALORE.out.html - trim_zip = TRIMGALORE.out.zip - trim_log = TRIMGALORE.out.log - ch_versions = ch_versions.mix(TRIMGALORE.out.versions.first()) - } - - emit: - reads = trim_reads // channel: [ val(meta), [ reads ] ] - - fastqc_html // channel: [ val(meta), [ html ] ] - fastqc_zip // channel: [ val(meta), [ zip ] ] - - trim_html // channel: [ val(meta), [ html ] ] - trim_zip // channel: [ val(meta), [ zip ] ] - trim_log // channel: [ val(meta), [ txt ] ] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/tower.yml b/tower.yml new file mode 100644 index 00000000..1a72c200 --- /dev/null +++ b/tower.yml @@ -0,0 +1,29 @@ +reports: + multiqc_report.html: + display: "MultiQC HTML report" + "**/edger/hairpin_counts.csv": + display: "EdgeR Read counts for known hairpins (raw counts)" + "**/edger/hairpin_logtpm.csv": + display: "EdgeR Read counts for known hairpins (log TPM)" + "**/edger/hairpin_normalized_CPM.txt": + display: "EdgeR Read counts for known hairpins (normalised)" + "**/edger/mature_counts.csv": + display: "EdgeR Read counts for known mature miRNAs (raw)" + "**/edger/mature_logtpm.csv": + display: "EdgeR Read counts for known mature miRNAs (log TPM)" + "**/edger/mature_normalized_CPM.txt": + display: "EdgeR Read counts for known mature miRNAs (normalised)" + "**/edger/hairpin_CPM_heatmap.pdf": + display: "EdgeR Heatmap of hairpin sequences for all samples" + "**/edger/hairpin_edgeR_MDS_plot.pdf": + display: "EdgeR Dimensionality reduction plot of samples from hairpin counts" + "**/edger/hairpin_log2CPM_sample_distances_dendrogram.pdf": + display: "EdgeR dendrogram from hairpin counts" + "**/edger/hairpin_log2CPM_sample_distances_heatmap.pdf": + display: "EdgeR distances from hairpin counts" + "**/mirtop/mirna.tsv": + display: "Mirtop miRNA summary table" + "**/mirtop/mirtop.tsv": + display: "Mirtop summary table" + "**/mirtrace/mirtrace-report.html": + display: "Mirtrace report" diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index dc30e875..55f7614b 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -1,12 +1,19 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS + PRINT PARAMS SUMMARY ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) +include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' + +def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) +def citation = '\n' + WorkflowMain.citation(workflow) + '\n' +def summary_params = paramsSummaryMap(workflow) + +// Print parameter summary log to screen +log.info logo + paramsSummaryLog(workflow) + citation + -// Validate input parameters WorkflowSmrnaseq.initialise(params, log) // Check input path parameters to see if they exist @@ -16,23 +23,16 @@ def checkPathParamList = [ ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } -// Check mandatory parameters -if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } +WorkflowSmrnaseq.initialise(params, log) // Check optional parameters -if (!params.mirtrace_species){ - exit 1, "Reference species for miRTrace is not defined." +if (!params.mirtrace_species) { + exit 1, "Reference species for miRTrace is not defined via the --mirtrace_species parameter." } -// Genome options -bt_index_from_species = params.genome ? params.genomes[ params.genome ].bowtie ?: false : false -bt_index = params.bowtie_indices ?: bt_index_from_species -mirtrace_species_from_species = params.genome ? params.genomes[ params.genome ].mirtrace_species ?: false : false -mirtrace_species = params.mirtrace_species ?: mirtrace_species_from_species -fasta_from_species = params.genome ? params.genomes[ params.genome ].fasta ?: false : false -fasta = params.fasta ?: fasta_from_species -mirna_gtf_from_species = params.mirtrace_species ? "https://mirbase.org/ftp/CURRENT/genomes/${params.mirtrace_species}.gff3" : false -mirna_gtf = params.mirna_gtf ? params.mirna_gtf : mirna_gtf_from_species +// Genome options +def mirna_gtf_from_species = params.mirtrace_species ? "https://mirbase.org/download/CURRENT/genomes/${params.mirtrace_species}.gff3" : false +def mirna_gtf = params.mirna_gtf ?: mirna_gtf_from_species /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -40,8 +40,11 @@ mirna_gtf = params.mirna_gtf ? params.mirna_gtf : mirna_gtf_from_species ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -ch_multiqc_config = file("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() +ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() +ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() +ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) +ch_fastp_adapters = Channel.fromPath(params.fastp_known_mirna_adapters, checkIfExists: true).collect() // collect to consume for all incoming samples to FASTP /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -52,25 +55,23 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multi // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -if (!params.mirGeneDB) { +if (!params.mirgenedb) { if (params.mature) { reference_mature = file(params.mature, checkIfExists: true) } else { exit 1, "Mature miRNA fasta file not found: ${params.mature}" } if (params.hairpin) { reference_hairpin = file(params.hairpin, checkIfExists: true) } else { exit 1, "Hairpin miRNA fasta file not found: ${params.hairpin}" } - params.filterSpecies = params.mirtrace_species } else { - if (params.mirGeneDB_mature) { reference_mature = file(params.mirGeneDB_mature, checkIfExists: true) } else { exit 1, "Mature miRNA fasta file not found: ${params.mirGeneDB_mature}" } - if (params.mirGeneDB_hairpin) { reference_hairpin = file(params.mirGeneDB_hairpin, checkIfExists: true) } else { exit 1, "Hairpin miRNA fasta file not found: ${params.mirGeneDB_hairpin}" } - if (params.mirGeneDB_gff) { mirna_gtf = file(params.mirGeneDB_gff, checkIfExists: true) } else { exit 1, "MirGeneDB gff file not found: ${params.mirGeneDB_gff}"} - params.filterSpecies = params.mirGeneDB_species + if (params.mirgenedb_mature) { reference_mature = file(params.mirgenedb_mature, checkIfExists: true) } else { exit 1, "Mature miRNA fasta file not found: ${params.mirgenedb_mature}" } + if (params.mirgenedb_hairpin) { reference_hairpin = file(params.mirgenedb_hairpin, checkIfExists: true) } else { exit 1, "Hairpin miRNA fasta file not found: ${params.mirgenedb_hairpin}" } + if (params.mirgenedb_gff) { mirna_gtf = file(params.mirgenedb_gff, checkIfExists: true) } else { exit 1, "MirGeneDB gff file not found: ${params.mirgenedb_gff}"} } -include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { FASTQC_UMITOOLS_TRIMGALORE } from '../subworkflows/nf-core/fastqc_umitools_trimgalore' +include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { FASTQC_UMITOOLS_FASTP } from '../subworkflows/nf-core/fastqc_umitools_trimgalore' include { DEDUPLICATE_UMIS } from '../subworkflows/local/umi_dedup' -include { CONTAMINANT_FILTER } from '../subworkflows/local/contaminant_filter' -include { MIRNA_QUANT } from '../subworkflows/local/mirna_quant' -include { GENOME_QUANT } from '../subworkflows/local/genome_quant' -include { MIRTRACE } from '../subworkflows/local/mirtrace' -include { MIRDEEP2 } from '../subworkflows/local/mirdeep2' +include { CONTAMINANT_FILTER } from '../subworkflows/local/contaminant_filter' +include { MIRNA_QUANT } from '../subworkflows/local/mirna_quant' +include { GENOME_QUANT } from '../subworkflows/local/genome_quant' +include { MIRTRACE } from '../subworkflows/local/mirtrace' +include { MIRDEEP2 } from '../subworkflows/local/mirdeep2' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -81,10 +82,9 @@ include { MIRDEEP2 } from '../subworkflows/local/mirdeep2' // // MODULE: Installed directly from nf-core/modules // -include { CAT_FASTQ } from '../modules/nf-core/modules/cat/fastq/main' -include { FASTQC } from '../modules/nf-core/modules/fastqc/main' -include { MULTIQC } from '../modules/nf-core/modules/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' +include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -103,15 +103,9 @@ workflow SMRNASEQ { // SUBWORKFLOW: Read in samplesheet, validate and stage input files // INPUT_CHECK ( - ch_input + file(params.input) ) .reads - .map { - meta, fastq -> - meta.id = meta.id.split('_')[0..-2].join('_') - [ meta, fastq ] } - .dump(tag: 'map') - .groupTuple(by: [0]) .dump(tag: 'group') .branch { meta, fastq -> @@ -122,6 +116,9 @@ workflow SMRNASEQ { } .set { ch_fastq } ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) + // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input") + // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ + // ! There is currently no tooling to help you write a sample sheet schema // // MODULE: Concatenate FastQ files from same sample if required @@ -135,24 +132,22 @@ workflow SMRNASEQ { ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first().ifEmpty(null)) // - // SUBWORKFLOW: mirtrace QC + // SUBWORKFLOW: Read QC and trim adapters // - MIRTRACE (ch_cat_fastq) - ch_versions = ch_versions.mix(MIRTRACE.out.versions.ifEmpty(null)) // // SUBWORKFLOW: Read QC, extract UMI and trim adapters // - FASTQC_UMITOOLS_TRIMGALORE ( + FASTQC_UMITOOLS_FASTP ( ch_cat_fastq, params.skip_fastqc || params.skip_qc, params.with_umi, params.skip_trimming, params.umi_discard_read ) - ch_versions = ch_versions.mix(FASTQC_UMITOOLS_TRIMGALORE.out.versions) + ch_versions = ch_versions.mix(FASTQC_UMITOOLS_FASTP.out.versions) - reads_for_mirna = FASTQC_UMITOOLS_TRIMGALORE.out.reads + reads_for_mirna = FASTQC_UMITOOLS_FASTP.out.reads // // SUBWORKFLOW: Deduplicate UMIs by mapping them to the genome @@ -163,41 +158,62 @@ workflow SMRNASEQ { DEDUPLICATE_UMIS ( fasta_ch, bt_index, - FASTQC_UMITOOLS_TRIMGALORE.out.reads + FASTQC_UMITOOLS_FASTP.out.reads ) reads_for_mirna = DEDUPLICATE_UMIS.out.reads ch_versions = ch_versions.mix(DEDUPLICATE_UMIS.out.versions) } } + FASTQC_UMITOOLS_FASTP ( + ch_cat_fastq, + ch_fastp_adapters, + false, + false + ) + ch_versions = ch_versions.mix(FASTQC_FASTP.out.versions) + + // + // SUBWORKFLOW: mirtrace QC + // + FASTQC_FASTP.out.adapterseq + .join( FASTQC_FASTP.out.reads ) + .map { meta, adapterseq, reads -> [adapterseq, meta.id, reads] } + .groupTuple() + .set { ch_mirtrace_inputs } + + MIRTRACE(ch_mirtrace_inputs) + ch_versions = ch_versions.mix(MIRTRACE.out.versions.ifEmpty(null)) + + // // SUBWORKFLOW: remove contaminants from reads // contamination_stats = Channel.empty() + mirna_reads = FASTQC_FASTP.out.reads if (params.filter_contamination){ - CONTAMINANT_FILTER ( + CONTAMINANT_FILTER ( reference_hairpin, - params.rrna, - params.trna, - params.cdna, - params.ncrna, - params.pirna, + params.rrna, + params.trna, + params.cdna, + params.ncrna, + params.pirna, params.other_contamination, - FASTQC_TRIMGALORE.out.reads + FASTQC_FASTP.out.reads ) - - reads_for_mirna = CONTAMINANT_FILTER.out.filtered_reads + + contamination_stats = CONTAMINANT_FILTER.out.filter_stats ch_versions = ch_versions.mix(CONTAMINANT_FILTER.out.versions) - CONTAMINANT_FILTER.out.filter_stats - .set { contamination_stats } - + mirna_reads = CONTAMINANT_FILTER.out.filtered_reads + } MIRNA_QUANT ( - reference_mature, - reference_hairpin, + [ [:], reference_mature], + [ [:], reference_hairpin], mirna_gtf, - reads_for_mirna + mirna_reads ) ch_versions = ch_versions.mix(MIRNA_QUANT.out.versions.ifEmpty(null)) @@ -205,15 +221,19 @@ workflow SMRNASEQ { // GENOME // genome_stats = Channel.empty() - if (fasta){ - fasta_ch = file(fasta) - GENOME_QUANT ( fasta_ch, bt_index, MIRNA_QUANT.out.unmapped ) - GENOME_QUANT.out.stats - .set { genome_stats } + if (params.fasta){ + GENOME_QUANT ( file(params.fasta), params.bowtie_index, MIRNA_QUANT.out.unmapped ) + genome_stats = GENOME_QUANT.out.stats ch_versions = ch_versions.mix(GENOME_QUANT.out.versions) if (!params.skip_mirdeep) { - MIRDEEP2 (FASTQC_UMITOOLS_TRIMGALORE.out.reads, GENOME_QUANT.out.fasta, GENOME_QUANT.out.indices, MIRNA_QUANT.out.fasta_hairpin, MIRNA_QUANT.out.fasta_mature) + MIRDEEP2 ( + FASTQC_UMITOOLS_FASTP.out.reads, + GENOME_QUANT.out.fasta, + GENOME_QUANT.out.index.collect(), + MIRNA_QUANT.out.fasta_hairpin, + MIRNA_QUANT.out.fasta_mature + ) ch_versions = ch_versions.mix(MIRDEEP2.out.versions) } } @@ -232,28 +252,31 @@ workflow SMRNASEQ { workflow_summary = WorkflowSmrnaseq.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) + methods_description = WorkflowSmrnaseq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) + ch_methods_description = Channel.value(methods_description) ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config)) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - - ch_multiqc_files = ch_multiqc_files.mix(FASTQC_UMITOOLS_TRIMGALORE.out.fastqc_zip.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC_UMITOOLS_FASTP.out.fastqc_raw_zip.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC_FASTP.out.trim_json.collect{it[1]}.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(contamination_stats.collect().ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(genome_stats.collect({it[1]}).ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(MIRNA_QUANT.out.mature_stats.collect({it[1]}).ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(MIRNA_QUANT.out.hairpin_stats.collect({it[1]}).ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(genome_stats.collect({it[1]}).ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(MIRNA_QUANT.out.mirtop_logs.collect().ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(MIRTRACE.out.results.collect().ifEmpty([])) MULTIQC ( - ch_multiqc_files.collect() + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList() ) - multiqc_report = MULTIQC.out.report.toList() } } + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPLETION EMAIL AND SUMMARY @@ -264,7 +287,11 @@ workflow.onComplete { if (params.email || params.email_on_fail) { NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) } + NfcoreTemplate.dump_parameters(workflow, params) NfcoreTemplate.summary(workflow, params, log) + if (params.hook_url) { + NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) + } } /*