diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..ea27a584 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,27 @@ +{ + "name": "nfcore", + "image": "nfcore/gitpod:latest", + "remoteUser": "gitpod", + + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + // Set *default* container specific settings.json values on container create. + "settings": { + "python.defaultInterpreterPath": "/opt/conda/bin/python", + "python.linting.enabled": true, + "python.linting.pylintEnabled": true, + "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", + "python.formatting.yapfPath": "/opt/conda/bin/yapf", + "python.linting.flake8Path": "/opt/conda/bin/flake8", + "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", + "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", + "python.linting.pylintPath": "/opt/conda/bin/pylint" + }, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + } + } +} diff --git a/.gitattributes b/.gitattributes index 050bb120..7a2dabc2 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,4 @@ *.config linguist-language=nextflow +*.nf.test linguist-language=nextflow modules/nf-core/** linguist-generated subworkflows/nf-core/** linguist-generated diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 7a76df36..90bd1ebb 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -101,3 +101,18 @@ If you are using a new feature from core Nextflow, you may bump the minimum requ ### Images and figures For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). + +## GitHub Codespaces + +This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal. + +To get started: + +- Open the repo in [Codespaces](https://github.com/nf-core/rnavar/codespaces) +- Tools installed + - nf-core + - Nextflow + +Devcontainer specs: + +- [DevContainer config](.devcontainer/devcontainer.json) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index fb5c200e..b70618d8 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -42,9 +42,9 @@ body: attributes: label: System information description: | - * Nextflow version _(eg. 21.10.3)_ + * Nextflow version _(eg. 23.04.0)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ - * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ * OS _(eg. CentOS Linux, macOS, Linux Mint)_ * Version of nf-core/rnavar _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index ab5c01d7..80d8e275 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -15,8 +15,8 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/rnav - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! - - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/rnavar/tree/master/.github/CONTRIBUTING.md) - - [ ] If necessary, also make a PR on the nf-core/rnavar _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/rnavar/tree/master/.github/CONTRIBUTING.md) +- [ ] If necessary, also make a PR on the nf-core/rnavar _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index fb5fe695..3e2b977a 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -14,16 +14,26 @@ jobs: runs-on: ubuntu-latest steps: - name: Launch workflow via tower - uses: nf-core/tower-action@v3 + uses: seqeralabs/action-tower-launch@v2 + # TODO nf-core: You can customise AWS full pipeline tests as required # Add full size test data (but still relatively small datasets for few samples) # on the `test_full.config` test runs with only one set of parameters with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnavar/work-${{ github.sha }} parameters: | { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnavar/results-${{ github.sha }}" } - profiles: test_full,aws_tower + profiles: test_full + + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index a05594d9..ea0bd288 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -12,14 +12,22 @@ jobs: steps: # Launch workflow using Tower CLI tool action - name: Launch workflow via tower - uses: nf-core/tower-action@v3 + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnavar/work-${{ github.sha }} parameters: | { "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnavar/results-test-${{ github.sha }}" } - profiles: test,aws_tower + profiles: test + + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 377dde74..40e9c617 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -13,7 +13,7 @@ jobs: - name: Check PRs if: github.repository == 'nf-core/rnavar' run: | - { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/rnavar ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/rnavar ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] # If the above check failed, post a comment on the PR explaining the failure # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2155d459..fc55fff3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,10 @@ on: env: NXF_ANSI_LOG: false - CAPSULE_LOG: none + +concurrency: + group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + cancel-in-progress: true jobs: test: @@ -20,39 +23,31 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - # Nextflow versions - include: - # Test pipeline minimum Nextflow version - - NXF_VER: "21.10.3" - NXF_EDGE: "" - # Test latest edge release of Nextflow - - NXF_VER: "" - NXF_EDGE: "1" + NXF_VER: + - "23.04.0" + - "latest-everything" test: - "default" - - "annotation" + # - "annotation" - "removeduplicates" - "skipbasecalib" - "bamcsiindex" steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Install Nextflow - env: - NXF_VER: ${{ matrix.NXF_VER }} - # Uncomment only if the edge release is more recent than the latest stable release - # See https://github.com/nextflow-io/nextflow/issues/2467 - # NXF_EDGE: ${{ matrix.NXF_EDGE }} - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - nextflow self-update + uses: nf-core/setup-nextflow@v1 + with: + version: "${{ matrix.NXF_VER }}" + - name: Set up Python uses: actions/setup-python@v2 with: python-version: "3.x" + - name: Install dependencies run: python -m pip install --upgrade pip pytest-workflow + - name: Run pipeline with tests settings - run: pytest --tag ${{matrix.test}} --kwdof + run: TMPDIR=~ PROFILE=docker pytest --tag ${{ matrix.test }} --symlink --kwdof --git-aware --color=yes diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml new file mode 100644 index 00000000..694e90ec --- /dev/null +++ b/.github/workflows/clean-up.yml @@ -0,0 +1,24 @@ +name: "Close user-tagged issues and PRs" +on: + schedule: + - cron: "0 0 * * 0" # Once a week + +jobs: + clean-up: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@v7 + with: + stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." + stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." + close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." + days-before-stale: 30 + days-before-close: 20 + days-before-pr-close: -1 + any-of-labels: "awaiting-changes,awaiting-feedback" + exempt-issue-labels: "WIP" + exempt-pr-labels: "WIP" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index e67cdc60..aedaa933 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -24,7 +24,7 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v2 + - uses: actions/setup-node@v3 - name: Install Prettier run: npm install -g prettier @prettier/plugin-php @@ -34,9 +34,9 @@ jobs: id: prettier_status run: | if prettier --check ${GITHUB_WORKSPACE}; then - echo "::set-output name=result::pass" + echo "result=pass" >> $GITHUB_OUTPUT else - echo "::set-output name=result::fail" + echo "result=fail" >> $GITHUB_OUTPUT fi - name: Run 'prettier --write' diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 77358dee..888cb4bc 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -4,6 +4,8 @@ name: nf-core linting # that the code meets the nf-core guidelines. on: push: + branches: + - dev pull_request: release: types: [published] @@ -12,9 +14,9 @@ jobs: EditorConfig: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: actions/setup-node@v2 + - uses: actions/setup-node@v3 - name: Install editorconfig-checker run: npm install -g editorconfig-checker @@ -25,9 +27,9 @@ jobs: Prettier: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: actions/setup-node@v2 + - uses: actions/setup-node@v3 - name: Install Prettier run: npm install -g prettier @@ -35,22 +37,48 @@ jobs: - name: Run Prettier --check run: prettier --check ${GITHUB_WORKSPACE} + PythonBlack: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Check code lints with Black + uses: psf/black@stable + + # If the above check failed, post a comment on the PR explaining the failure + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@v1 + with: + message: | + ## Python linting (`black`) is failing + + To keep the code consistent with lots of contributors, we run automated code consistency checks. + To fix this CI test, please run: + + * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` + * Fix formatting errors in your pipeline: `black .` + + Once you push these changes the test should pass, and you can hide this comment :+1: + + We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false + nf-core: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + uses: nf-core/setup-nextflow@v1 - - uses: actions/setup-python@v3 + - uses: actions/setup-python@v4 with: - python-version: "3.6" + python-version: "3.8" architecture: "x64" - name: Install dependencies @@ -71,7 +99,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 04758f61..0bbcd30f 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -18,7 +18,7 @@ jobs: - name: Get PR number id: pr_number - run: echo "::set-output name=pr_number::$(cat linting-logs/PR_number.txt)" + run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment uses: marocchino/sticky-pull-request-comment@v2 diff --git a/.gitpod.yml b/.gitpod.yml index 85d95ecc..25488dcc 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,4 +1,9 @@ image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update vscode: extensions: # based on nf-core.nf-core-extensionpack diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..0c31cdb9 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,5 @@ +repos: + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v2.7.1" + hooks: + - id: prettier diff --git a/.prettierignore b/.prettierignore index d0e7ae58..437d763d 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,4 +1,6 @@ email_template.html +adaptivecard.json +slackreport.json .nextflow* work/ data/ @@ -7,3 +9,4 @@ results/ testing/ testing* *.pyc +bin/ diff --git a/CITATIONS.md b/CITATIONS.md index fedf7b37..94e9a465 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -12,6 +12,8 @@ - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. Available online https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. + - [STAR](https://pubmed.ncbi.nlm.nih.gov/23104886/) > Dobin A, Davis CA, Schlesinger F, Drenkow J, Zaleski C, Jha S, Batut P, Chaisson M, Gingeras TR. STAR: ultrafast universal RNA-seq aligner Bioinformatics. 2013 Jan 1;29(1):15-21. doi: 10.1093/bioinformatics/bts635. Epub 2012 Oct 25. PubMed PMID: 23104886; PubMed Central PMCID: PMC3530905. @@ -60,5 +62,8 @@ - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/LICENSE b/LICENSE index 2a63dfa9..747ad37f 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) @praveenraj2018 +Copyright (c) Praveen Raj, Maxime U Garcia Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 0cbbbe7d..21484ba3 100644 --- a/README.md +++ b/README.md @@ -5,23 +5,17 @@ [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?logo=Amazon%20AWS)](https://nf-co.re/rnavar/results) [![Cite with Zenodo](https://zenodo.org/badge/DOI/10.5281/zenodo.6669637.svg)](https://doi.org/10.5281/zenodo.6669637) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/) -[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?logo=anaconda)](https://docs.conda.io/en/latest/) -[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?logo=docker)](https://www.docker.com/) -[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg)](https://sylabs.io/docs/) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) +[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) +[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/rnavar) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23rnavar-4A154B?logo=slack)](https://nfcore.slack.com/channels/rnavar) -[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?logo=twitter)](https://twitter.com/nf_core) -[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?logo=youtube)](https://www.youtube.com/c/nf-core) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23rnavar-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/rnavar)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction -**nf-core/rnavar** is a bioinformatics best-practice analysis pipeline for GATK4 RNA variant calling. - -The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! - -On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/rnavar/results). +**nf-core/rnavar** is a bioinformatics pipeline for RNA variant calling analysis following GATK4 best practices. ## Pipeline summary @@ -41,7 +35,7 @@ On release, automated continuous integration tests run the pipeline on a full-si 14. Annotate variants ([`snpEff`](https://pcingola.github.io/SnpEff/se_introduction/), [Ensembl VEP](https://www.ensembl.org/info/docs/tools/vep/index.html)) 15. Present QC for raw read, alignment, gene biotype, sample similarity, and strand-specificity checks ([`MultiQC`](http://multiqc.info/), [`R`](https://www.r-project.org/)) -### Summary of tools and version used in the pipeline: +### Summary of tools and version used in the pipeline | Tool | Version | | ----------- | ------- | @@ -54,40 +48,53 @@ On release, automated continuous integration tests run the pipeline on a full-si | Ensembl VEP | 104.3 | | MultiQC | 1.12 | -## Quick Start +## Usage + +> **Note** +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how +> to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) +> with `-profile test` before running the workflow on actual data. + + - Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string. +Now, you can run the pipeline using: - > - The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`. - > - Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. - > - If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. - > - If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs. +```console +nextflow run nf-core/rnavar -profile --input samplesheet.csv --outdir --genome GRCh38 +``` -4. Start running your own analysis! +> **Warning:** +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those +> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). - ```console - nextflow run nf-core/rnavar -profile --input samplesheet.csv --genome GRCh38 - ``` +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/rnavar/usage) and the [parameter documentation](https://nf-co.re/rnavar/parameters). -## Documentation +## Pipeline output -The nf-core/rnavar pipeline comes with documentation about the pipeline [usage](https://nf-co.re/rnavar/usage), [parameters](https://nf-co.re/rnavar/parameters) and [output](https://nf-co.re/rnavar/output). +To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/rnavar/results) tab on the nf-core website pipeline page. +For more details about the output files and reports, please refer to the +[output documentation](https://nf-co.re/rnavar/output). ## Credits -These scripts were originally written in Nextflow DSL2 for use at the [Barntumörbanken, Karolinska Institutet](https://ki.se/forskning/barntumorbanken), by Praveen Raj ([@praveenraj2018](https://github.com/praveenraj2018)) and Maxime U. Garcia ([@maxulysse](https://github.com/maxulysse)). +These scripts were originally written in Nextflow DSL2 for use at the [Barntumörbanken, Karolinska Institutet](https://ki.se/forskning/barntumorbanken), by Praveen Raj ([@praveenraj2018](https://github.com/praveenraj2018)) and Maxime U Garcia ([@maxulysse](https://github.com/maxulysse)). -The pipeline is primarily maintained by Praveen Raj ([@praveenraj2018](https://github.com/praveenraj2018)) and Maxime U. Garcia ([@maxulysse](https://github.com/maxulysse)) from [Barntumörbanken, Karolinska Institutet](https://ki.se/forskning/barntumorbanken). +The pipeline is primarily maintained by Praveen Raj ([@praveenraj2018](https://github.com/praveenraj2018)) from [Barntumörbanken, Karolinska Institutet](https://ki.se/forskning/barntumorbanken) and Maxime U Garcia ([@maxulysse](https://github.com/maxulysse)) from [Seqera Labs](https://seqera/io) Many thanks to other who have helped out along the way too, including (but not limited to): [@ewels](https://github.com/ewels), diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json new file mode 100644 index 00000000..2b1d36cb --- /dev/null +++ b/assets/adaptivecard.json @@ -0,0 +1,67 @@ +{ + "type": "message", + "attachments": [ + { + "contentType": "application/vnd.microsoft.card.adaptive", + "contentUrl": null, + "content": { + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "msteams": { + "width": "Full" + }, + "type": "AdaptiveCard", + "version": "1.2", + "body": [ + { + "type": "TextBlock", + "size": "Large", + "weight": "Bolder", + "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", + "text": "nf-core/rnavar v${version} - ${runName}", + "wrap": true + }, + { + "type": "TextBlock", + "spacing": "None", + "text": "Completed at ${dateComplete} (duration: ${duration})", + "isSubtle": true, + "wrap": true + }, + { + "type": "TextBlock", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", + "wrap": true + }, + { + "type": "TextBlock", + "text": "The command used to launch the workflow was as follows:", + "wrap": true + }, + { + "type": "TextBlock", + "text": "${commandLine}", + "isSubtle": true, + "wrap": true + } + ], + "actions": [ + { + "type": "Action.ShowCard", + "title": "Pipeline Configuration", + "card": { + "type": "AdaptiveCard", + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "body": [ + { + "type": "FactSet", + "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> + ] + } + ] + } + } + ] + } + } + ] +} diff --git a/assets/email_template.txt b/assets/email_template.txt index 2c43ec61..cbc1a918 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -6,7 +6,6 @@ `._,._,' nf-core/rnavar v${version} ---------------------------------------------------- - Run Name: $runName <% if (success){ diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml new file mode 100644 index 00000000..46025e30 --- /dev/null +++ b/assets/methods_description_template.yml @@ -0,0 +1,29 @@ +id: "nf-core-rnavar-methods-description" +description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." +section_name: "nf-core/rnavar Methods Description" +section_href: "https://github.com/nf-core/rnavar" +plot_type: "html" +## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline +## You inject any metadata in the Nextflow '${workflow}' object +data: | +

Methods

+

Data was processed using nf-core/rnavar v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

+

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

+
${workflow.commandLine}
+

${tool_citations}

+

References

+
    +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
  • +
  • Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
  • +
  • da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
  • + ${tool_bibliography} +
+
+
Notes:
+
    + ${nodoi_text} +
  • The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
  • +
  • You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
  • +
+
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index e1cb0b2b..e8b24927 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,12 +1,14 @@ report_comment: > - This report has been generated by the nf-core/rnavar + This report has been generated by the nf-core/rnavar analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: - software_versions: + "nf-core-rnavar-methods-description": order: -1000 - "nf-core-rnavar-summary": + software_versions: order: -1001 + "nf-core-rnavar-summary": + order: -1002 export_plots: true diff --git a/assets/nf-core-rnavar_logo_light.png b/assets/nf-core-rnavar_logo_light.png index 9cca37e8..509f923f 100644 Binary files a/assets/nf-core-rnavar_logo_light.png and b/assets/nf-core-rnavar_logo_light.png differ diff --git a/assets/schema_input.json b/assets/schema_input.json index 6fd501da..7cbc45b8 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -10,15 +10,19 @@ "sample": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" + "errorMessage": "Sample ID must be provided and cannot contain spaces", + "meta": ["sample"] }, "fastq_1": { + "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", "type": "string", "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "format": "file-path", + "exists": true }, "fastq_2": { "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", + "dependentRequired": ["fastq_1"], "anyOf": [ { "type": "string", @@ -28,12 +32,15 @@ "type": "string", "maxLength": 0 } - ] + ], + "format": "file-path", + "exists": true }, "strandedness": { "type": "string", "errorMessage": "Strandedness must be provided and be one of 'forward', 'reverse' or 'unstranded'", - "enum": ["forward", "reverse", "unstranded"] + "enum": ["forward", "reverse", "unstranded"], + "meta": ["strandedness"] } }, "required": ["sample", "fastq_1", "strandedness"] diff --git a/assets/slackreport.json b/assets/slackreport.json new file mode 100644 index 00000000..6ab75715 --- /dev/null +++ b/assets/slackreport.json @@ -0,0 +1,34 @@ +{ + "attachments": [ + { + "fallback": "Plain-text summary of the attachment.", + "color": "<% if (success) { %>good<% } else { %>danger<%} %>", + "author_name": "nf-core/rnavar v${version} - ${runName}", + "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", + "fields": [ + { + "title": "Command used to launch the workflow", + "value": "```${commandLine}```", + "short": false + } + <% + if (!success) { %> + , + { + "title": "Full error message", + "value": "```${errorReport}```", + "short": false + }, + { + "title": "Pipeline configuration", + "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>", + "short": false + } + <% } + %> + ], + "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})" + } + ] +} diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py deleted file mode 100755 index e9a7a178..00000000 --- a/bin/check_samplesheet.py +++ /dev/null @@ -1,257 +0,0 @@ -#!/usr/bin/env python - - -"""Provide a command line tool to validate and transform tabular samplesheets.""" - - -import argparse -import csv -import logging -import sys -from collections import Counter -from pathlib import Path - - -logger = logging.getLogger() - - -class RowChecker: - """ - Define a service that can validate and transform each given row. - - Attributes: - modified (list): A list of dicts, where each dict corresponds to a previously - validated and transformed row. The order of rows is maintained. - - """ - - VALID_FORMATS = ( - ".fq.gz", - ".fastq.gz", - ) - - def __init__( - self, - sample_col="sample", - first_col="fastq_1", - second_col="fastq_2", - single_col="single_end", - **kwargs, - ): - """ - Initialize the row checker with the expected column names. - - Args: - sample_col (str): The name of the column that contains the sample name - (default "sample"). - first_col (str): The name of the column that contains the first (or only) - FASTQ file path (default "fastq_1"). - second_col (str): The name of the column that contains the second (if any) - FASTQ file path (default "fastq_2"). - single_col (str): The name of the new column that will be inserted and - records whether the sample contains single- or paired-end sequencing - reads (default "single_end"). - - """ - super().__init__(**kwargs) - self._sample_col = sample_col - self._first_col = first_col - self._second_col = second_col - self._single_col = single_col - self._seen = set() - self.modified = [] - - def validate_and_transform(self, row): - """ - Perform all validations on the given row and insert the read pairing status. - - Args: - row (dict): A mapping from column headers (keys) to elements of that row - (values). - - """ - self._validate_sample(row) - self._validate_first(row) - self._validate_second(row) - self._validate_pair(row) - self._seen.add((row[self._sample_col], row[self._first_col])) - self.modified.append(row) - - def _validate_sample(self, row): - """Assert that the sample name exists and convert spaces to underscores.""" - assert len(row[self._sample_col]) > 0, "Sample input is required." - # Sanitize samples slightly. - row[self._sample_col] = row[self._sample_col].replace(" ", "_") - - def _validate_first(self, row): - """Assert that the first FASTQ entry is non-empty and has the right format.""" - assert len(row[self._first_col]) > 0, "At least the first FASTQ file is required." - self._validate_fastq_format(row[self._first_col]) - - def _validate_second(self, row): - """Assert that the second FASTQ entry has the right format if it exists.""" - if len(row[self._second_col]) > 0: - self._validate_fastq_format(row[self._second_col]) - - def _validate_pair(self, row): - """Assert that read pairs have the same file extension. Report pair status.""" - if row[self._first_col] and row[self._second_col]: - row[self._single_col] = False - assert ( - Path(row[self._first_col]).suffixes[-2:] == Path(row[self._second_col]).suffixes[-2:] - ), "FASTQ pairs must have the same file extensions." - else: - row[self._single_col] = True - - def _validate_fastq_format(self, filename): - """Assert that a given filename has one of the expected FASTQ extensions.""" - assert any(filename.endswith(extension) for extension in self.VALID_FORMATS), ( - f"The FASTQ file has an unrecognized extension: {filename}\n" - f"It should be one of: {', '.join(self.VALID_FORMATS)}" - ) - - def validate_unique_samples(self): - """ - Assert that the combination of sample name and FASTQ filename is unique. - - In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the - number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment. - - """ - assert len(self._seen) == len(self.modified), "The pair of sample name and FASTQ must be unique." - seen = Counter() - for row in self.modified: - sample = row[self._sample_col] - seen[sample] += 1 - row[self._sample_col] = f"{sample}_T{seen[sample]}" - - -def read_head(handle, num_lines=10): - """Read the specified number of lines from the current position in the file.""" - lines = [] - for idx, line in enumerate(handle): - if idx == num_lines: - break - lines.append(line) - return "".join(lines) - - -def sniff_format(handle): - """ - Detect the tabular format. - - Args: - handle (text file): A handle to a `text file`_ object. The read position is - expected to be at the beginning (index 0). - - Returns: - csv.Dialect: The detected tabular format. - - .. _text file: - https://docs.python.org/3/glossary.html#term-text-file - - """ - peek = read_head(handle) - handle.seek(0) - sniffer = csv.Sniffer() - if not sniffer.has_header(peek): - logger.critical(f"The given sample sheet does not appear to contain a header.") - sys.exit(1) - dialect = sniffer.sniff(peek) - return dialect - - -def check_samplesheet(file_in, file_out): - """ - Check that the tabular samplesheet has the structure expected by nf-core pipelines. - - Validate the general shape of the table, expected columns, and each row. Also add - an additional column which records whether one or two FASTQ reads were found. - - Args: - file_in (pathlib.Path): The given tabular samplesheet. The format can be either - CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``. - file_out (pathlib.Path): Where the validated and transformed samplesheet should - be created; always in CSV format. - - Example: - This function checks that the samplesheet follows the following structure, - see also the `rnavar test samplesheet`_:: - - sample,fastq_1,fastq_2 - SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz - SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz - SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz, - - .. _rnavar test samplesheet: - https://raw.githubusercontent.com/nf-core/test-datasets/rnavar/samplesheet/v1.0/samplesheet.csv - - """ - required_columns = {"sample", "fastq_1", "fastq_2"} - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_in.open(newline="") as in_handle: - reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) - # Validate the existence of the expected header columns. - if not required_columns.issubset(reader.fieldnames): - logger.critical(f"The sample sheet **must** contain the column headers: {', '.join(required_columns)}.") - sys.exit(1) - # Validate each row. - checker = RowChecker() - for i, row in enumerate(reader): - try: - checker.validate_and_transform(row) - except AssertionError as error: - logger.critical(f"{str(error)} On line {i + 2}.") - sys.exit(1) - checker.validate_unique_samples() - header = list(reader.fieldnames) - header.insert(1, "single_end") - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_out.open(mode="w", newline="") as out_handle: - writer = csv.DictWriter(out_handle, header, delimiter=",") - writer.writeheader() - for row in checker.modified: - writer.writerow(row) - - -def parse_args(argv=None): - """Define and immediately parse command line arguments.""" - parser = argparse.ArgumentParser( - description="Validate and transform a tabular samplesheet.", - epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", - ) - parser.add_argument( - "file_in", - metavar="FILE_IN", - type=Path, - help="Tabular input samplesheet in CSV or TSV format.", - ) - parser.add_argument( - "file_out", - metavar="FILE_OUT", - type=Path, - help="Transformed output samplesheet in CSV format.", - ) - parser.add_argument( - "-l", - "--log-level", - help="The desired log level (default WARNING).", - choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), - default="WARNING", - ) - return parser.parse_args(argv) - - -def main(argv=None): - """Coordinate argument parsing and program execution.""" - args = parse_args(argv) - logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") - if not args.file_in.is_file(): - logger.error(f"The given input file {args.file_in} was not found!") - sys.exit(2) - args.file_out.parent.mkdir(parents=True, exist_ok=True) - check_samplesheet(args.file_in, args.file_out) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/conf/base.config b/conf/base.config index ced75d06..6409be0d 100644 --- a/conf/base.config +++ b/conf/base.config @@ -14,7 +14,7 @@ process { memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' @@ -24,6 +24,11 @@ process { // If possible, it would be nice to keep the same label naming convention when // adding in your local modules too. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_single { + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } withLabel:process_low { cpus = { check_max( 2 * task.attempt, 'cpus' ) } memory = { check_max( 12.GB * task.attempt, 'memory' ) } diff --git a/conf/igenomes.config b/conf/igenomes.config index 7a1b3ac6..3f114377 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -36,6 +36,14 @@ params { macs_gsize = "2.7e9" blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" } + 'CHM13' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" + bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" + gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" + mito_name = "chrM" + } 'GRCm38' { fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" diff --git a/conf/modules.config b/conf/modules.config index f05b3322..55f7dcc9 100755 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,14 +18,6 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: 'NFCORE_RNAVAR:RNAVAR:INPUT_CHECK:SAMPLESHEET_CHECK' { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, @@ -212,19 +204,19 @@ process { publishDir = [ enabled: false ] } - withName: '.*:MARKDUPLICATES:GATK4_MARKDUPLICATES' { + withName: 'GATK4_MARKDUPLICATES' { ext.args = [ '--ASSUME_SORTED true', '--VALIDATION_STRINGENCY LENIENT', params.remove_duplicates ? '--REMOVE_DUPLICATES true' : '' ].join(' ').trim() - ext.prefix = {"${meta.id}.markdup.sorted"} + ext.prefix = {"${meta.id}.md.bam"} publishDir = [ [ path: { "${params.outdir}/preprocessing/${meta.id}" }, mode: params.publish_dir_mode, enabled: true, - pattern: "*.{bam}" + pattern: "*.{bam,bai,csi}" ], [ path: { "${params.outdir}/reports/stats/${meta.id}" }, @@ -235,9 +227,9 @@ process { ] } - withName: '.*:MARKDUPLICATES:SAMTOOLS_INDEX' { + withName: '.*:BAM_MARKDUPLICATES:SAMTOOLS_INDEX' { ext.args = params.bam_csi_index ? '-c' : '' - ext.prefix = {"${meta.id}.markdup.sorted"} + ext.prefix = {"${meta.id}.md"} publishDir = [ path: { "${params.outdir}/preprocessing/${meta.id}" }, mode: params.publish_dir_mode, @@ -334,7 +326,6 @@ process { withName: GATK4_MERGEVCFS { ext.prefix = {"${meta.id}.haplotypecaller"} - ext.args = params.bam_csi_index ? '--CREATE_INDEX false' : '' publishDir = [ path: { "${params.outdir}/variant_calling/${meta.id}" }, mode: params.publish_dir_mode, diff --git a/conf/test.config b/conf/test.config index dd25aa41..68f97def 100644 --- a/conf/test.config +++ b/conf/test.config @@ -20,7 +20,7 @@ params { max_time = '6.h' // Input data - input = "https://raw.githubusercontent.com/nf-core/test-datasets/rnavar/samplesheet/v1.0/samplesheet.csv" + input = "${projectDir}/tests/csv/1.0/fastq_single.csv" // Genome references genome = 'WBcel235' diff --git a/docs/usage.md b/docs/usage.md index a59c1965..9bb2a4fe 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -8,26 +8,49 @@ The typical command for running the pipeline is as follows: -```console -nextflow run nf-core/rnavar --input samplesheet.csv --genome GRCh38 -profile docker +```bash +nextflow run nf-core/rnavar --input ./samplesheet.csv --outdir ./results --genome GRCh38 -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. Note that the pipeline will create the following files in your working directory: -```console +```bash work # Directory containing the nextflow working files # Finished results in specified location (defined with --outdir) .nextflow_log # Log file from Nextflow # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` +If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. + +Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. + +> ⚠️ Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). + +The above pipeline run specified with a params file in yaml format: + +```bash +nextflow run nf-core/rnavar -profile docker -params-file params.yaml +``` + +with `params.yaml` containing: + +```yaml +input: './samplesheet.csv' +outdir: './results/' +genome: 'GRCh37' +<...> +``` + +You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). + ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: -```console +```bash nextflow pull nf-core/rnavar ``` @@ -35,9 +58,13 @@ nextflow pull nf-core/rnavar It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. -First, go to the [nf-core/rnavar releases page](https://github.com/nf-core/rnavar/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. +First, go to the [nf-core/rnavar releases page](https://github.com/nf-core/rnavar/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag. + +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. -This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. +To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. + +> 💡 If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. ## Samplesheet input @@ -210,7 +237,7 @@ nextflow run download_cache.nf --cadd_cache --cadd_version Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. -Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. When using Biocontainers, most of these software packaging methods pull Docker containers from quay.io e.g [FastQC](https://quay.io/repository/biocontainers/fastqc) except for Singularity which directly downloads Singularity images via https hosted by the [Galaxy project](https://depot.galaxyproject.org/singularity/) and Conda which downloads and installs software locally from [Bioconda](https://bioconda.github.io/). +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. > We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. @@ -219,8 +246,11 @@ The pipeline also dynamically loads configurations from [https://github.com/nf-c Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! They are loaded in sequence, so later profiles can overwrite earlier profiles. -If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended. +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters - `docker` - A generic configuration profile to be used with [Docker](https://docker.com/) - `singularity` @@ -231,11 +261,10 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) - `charliecloud` - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) - `conda` - - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. -- `test` - - A profile with a complete configuration for automated testing - - Includes links to test data so needs no other parameters + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. ### `-resume` @@ -253,97 +282,19 @@ Specify the path to a specific config file (this is a core Nextflow command). Se Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnavar/blob/master/conf/base.config#L17) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. -For example, if the nf-core/rnavar pipeline is failing after multiple re-submissions of the `STAR_ALIGN` process due to an exit code of `137` this would indicate that there is an out of memory issue: - -```console -[62/149eb0] NOTE: Process `NFCORE_RNAVAR:RNAVAR:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) -- Execution is retried (1) -Error executing process > 'NFCORE_RNAVAR:RNAVAR:ALIGN_STAR:STAR_ALIGN (WT_REP1)' - -Caused by: - Process `NFCORE_RNAVAR:RNAVAR:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) - -Command executed: - STAR \ - --genomeDir star \ - --readFilesIn WT_REP1_trimmed.fq.gz \ - --runThreadN 2 \ - --outFileNamePrefix WT_REP1. \ - +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. -Command exit status: - 137 - -Command output: - (empty) - -Command error: - .command.sh: line 9: 30 Killed STAR --genomeDir star --readFilesIn WT_REP1_trimmed.fq.gz --runThreadN 2 --outFileNamePrefix WT_REP1. -Work dir: - /home/pipelinetest/work/9d/172ca5881234073e8d76f2a19c88fb - -Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run` -``` - -To bypass this error you would need to find exactly which resources are set by the `STAR_ALIGN` process. -The quickest way is to search for `process STAR_ALIGN` in the [nf-core/rnavar Github repo](https://github.com/nf-core/rnavar/search?q=process+STAR_ALIGN). -We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so based on the search results the file we want is `modules/nf-core/software/star/align/main.nf`. -If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_high`](https://github.com/nf-core/rnavar/blob/master/modules/nf-core/modules/star/align/main.nf#L3). -The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements. -The default values for the `process_high` label are set in the pipeline's [`base.config`](https://github.com/nf-core/rnavar/blob/master/conf/base.config#L37-L41) which in this case is defined as `72GB`. -Providing you haven't set any other standard nf-core parameters to **cap** the [maximum resources](https://nf-co.re/usage/configuration#max-resources) used by the pipeline then we can try and bypass the `STAR_ALIGN` process failure by creating a custom config file that sets at least `72GB` of memory, in this case increased to `100GB`. -The custom config below can then be provided to the pipeline via the [`-c`](#-c) parameter as highlighted in previous sections. - -```nextflow -process { - withName: 'NFCORE_RNAVAR:RNAVAR:ALIGN_STAR:STAR_ALIGN' { - memory = 100.GB - } -} -``` +### Custom Containers -> **NB:** We specify the full process name i.e. `NFCORE_RNAVAR:RNAVAR:ALIGN_STAR:STAR_ALIGN` in the config file because this takes priority over the short name (`STAR_ALIGN`) and allows existing configuration using the full process name to be correctly overridden. -> -> If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly. +In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. -### Updating containers +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. -The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. If for some reason you need to use a different version of a particular tool with the pipeline then you just need to identify the `process` name and override the Nextflow `container` definition for that process using the `withName` declaration. For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a tool called [Pangolin](https://github.com/cov-lineages/pangolin) has been used during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. Given that the lineage assignments change quite frequently it doesn't make sense to re-release the nf-core/viralrecon everytime a new version of Pangolin has been released. However, you can override the default container used by the pipeline by creating a custom config file and passing it as a command-line argument via `-c custom.config`. +### Custom Tool Arguments -1. Check the default version used by the pipeline in the module file for [Pangolin](https://github.com/nf-core/viralrecon/blob/a85d5969f9025409e3618d6c280ef15ce417df65/modules/nf-core/software/pangolin/main.nf#L14-L19) -2. Find the latest version of the Biocontainer available on [Quay.io](https://quay.io/repository/biocontainers/pangolin?tag=latest&tab=tags) -3. Create the custom config accordingly: +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. - - For Docker: - - ```nextflow - process { - withName: PANGOLIN { - container = 'quay.io/biocontainers/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` - - - For Singularity: - - ```nextflow - process { - withName: PANGOLIN { - container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` - - - For Conda: - - ```nextflow - process { - withName: PANGOLIN { - conda = 'bioconda::pangolin=3.0.5' - } - } - ``` - -> **NB:** If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch. +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. ### nf-core/configs @@ -353,6 +304,14 @@ See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). +## Azure Resource Requests + +To be used with the `azurebatch` profile by specifying the `-profile azurebatch`. +We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required. + +Note that the choice of VM size depends on your quota and the overall workload during the analysis. +For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes). + ## Running in the background Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. @@ -367,6 +326,6 @@ Some HPC setups also allow you to run nextflow within a cluster job submitted yo In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): -```console +```bash NXF_OPTS='-Xms1g -Xmx4g' ``` diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy deleted file mode 100755 index b3d092f8..00000000 --- a/lib/NfcoreSchema.groovy +++ /dev/null @@ -1,529 +0,0 @@ -// -// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. -// - -import org.everit.json.schema.Schema -import org.everit.json.schema.loader.SchemaLoader -import org.everit.json.schema.ValidationException -import org.json.JSONObject -import org.json.JSONTokener -import org.json.JSONArray -import groovy.json.JsonSlurper -import groovy.json.JsonBuilder - -class NfcoreSchema { - - // - // Resolve Schema path relative to main workflow directory - // - public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { - return "${workflow.projectDir}/${schema_filename}" - } - - // - // Function to loop over all parameters defined in schema and check - // whether the given parameters adhere to the specifications - // - /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { - def has_error = false - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Check for nextflow core params and unexpected params - def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text - def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') - def nf_params = [ - // Options for base `nextflow` command - 'bg', - 'c', - 'C', - 'config', - 'd', - 'D', - 'dockerize', - 'h', - 'log', - 'q', - 'quiet', - 'syslog', - 'v', - 'version', - - // Options for `nextflow run` command - 'ansi', - 'ansi-log', - 'bg', - 'bucket-dir', - 'c', - 'cache', - 'config', - 'dsl2', - 'dump-channels', - 'dump-hashes', - 'E', - 'entry', - 'latest', - 'lib', - 'main-script', - 'N', - 'name', - 'offline', - 'params-file', - 'pi', - 'plugins', - 'poll-interval', - 'pool-size', - 'profile', - 'ps', - 'qs', - 'queue-size', - 'r', - 'resume', - 'revision', - 'stdin', - 'stub', - 'stub-run', - 'test', - 'w', - 'with-charliecloud', - 'with-conda', - 'with-dag', - 'with-docker', - 'with-mpi', - 'with-notification', - 'with-podman', - 'with-report', - 'with-singularity', - 'with-timeline', - 'with-tower', - 'with-trace', - 'with-weblog', - 'without-docker', - 'without-podman', - 'work-dir' - ] - def unexpectedParams = [] - - // Collect expected parameters from the schema - def expectedParams = [] - def enums = [:] - for (group in schemaParams) { - for (p in group.value['properties']) { - expectedParams.push(p.key) - if (group.value['properties'][p.key].containsKey('enum')) { - enums[p.key] = group.value['properties'][p.key]['enum'] - } - } - } - - for (specifiedParam in params.keySet()) { - // nextflow params - if (nf_params.contains(specifiedParam)) { - log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" - has_error = true - } - // unexpected params - def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' - def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } - def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() - def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { - // Temporarily remove camelCase/camel-case params #1035 - def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} - if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ - unexpectedParams.push(specifiedParam) - } - } - } - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Validate parameters against the schema - InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() - JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) - - // Remove anything that's in params.schema_ignore_params - raw_schema = removeIgnoredParams(raw_schema, params) - - Schema schema = SchemaLoader.load(raw_schema) - - // Clean the parameters - def cleanedParams = cleanParameters(params) - - // Convert to JSONObject - def jsonParams = new JsonBuilder(cleanedParams) - JSONObject params_json = new JSONObject(jsonParams.toString()) - - // Validate - try { - schema.validate(params_json) - } catch (ValidationException e) { - println '' - log.error 'ERROR: Validation of pipeline parameters failed!' - JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, params_json, log, enums) - println '' - has_error = true - } - - // Check for unexpected parameters - if (unexpectedParams.size() > 0) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - println '' - def warn_msg = 'Found unexpected parameters:' - for (unexpectedParam in unexpectedParams) { - warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" - } - log.warn warn_msg - log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" - println '' - } - - if (has_error) { - System.exit(1) - } - } - - // - // Beautify parameters for --help - // - public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - Integer num_hidden = 0 - String output = '' - output += 'Typical pipeline command:\n\n' - output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - Integer max_chars = paramsMaxChars(params_map) + 1 - Integer desc_indent = max_chars + 14 - Integer dec_linewidth = 160 - desc_indent - for (group in params_map.keySet()) { - Integer num_params = 0 - String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (group_params.get(param).hidden && !params.show_hidden_params) { - num_hidden += 1 - continue; - } - def type = '[' + group_params.get(param).type + ']' - def description = group_params.get(param).description - def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' - def description_default = description + colors.dim + defaultValue + colors.reset - // Wrap long description texts - // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap - if (description_default.length() > dec_linewidth){ - List olines = [] - String oline = "" // " " * indent - description_default.split(" ").each() { wrd -> - if ((oline.size() + wrd.size()) <= dec_linewidth) { - oline += wrd + " " - } else { - olines += oline - oline = wrd + " " - } - } - olines += oline - description_default = olines.join("\n" + " " * desc_indent) - } - group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' - num_params += 1 - } - group_output += '\n' - if (num_params > 0){ - output += group_output - } - } - if (num_hidden > 0){ - output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset - } - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Groovy Map summarising parameters/workflow options used by the pipeline - // - public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { - // Get a selection of core Nextflow workflow options - def Map workflow_summary = [:] - if (workflow.revision) { - workflow_summary['revision'] = workflow.revision - } - workflow_summary['runName'] = workflow.runName - if (workflow.containerEngine) { - workflow_summary['containerEngine'] = workflow.containerEngine - } - if (workflow.container) { - workflow_summary['container'] = workflow.container - } - workflow_summary['launchDir'] = workflow.launchDir - workflow_summary['workDir'] = workflow.workDir - workflow_summary['projectDir'] = workflow.projectDir - workflow_summary['userName'] = workflow.userName - workflow_summary['profile'] = workflow.profile - workflow_summary['configFiles'] = workflow.configFiles.join(', ') - - // Get pipeline parameters defined in JSON Schema - def Map params_summary = [:] - def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - for (group in params_map.keySet()) { - def sub_params = new LinkedHashMap() - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (params.containsKey(param)) { - def params_value = params.get(param) - def schema_value = group_params.get(param).default - def param_type = group_params.get(param).type - if (schema_value != null) { - if (param_type == 'string') { - if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { - def sub_string = schema_value.replace('\$projectDir', '') - sub_string = sub_string.replace('\${projectDir}', '') - if (params_value.contains(sub_string)) { - schema_value = params_value - } - } - if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { - def sub_string = schema_value.replace('\$params.outdir', '') - sub_string = sub_string.replace('\${params.outdir}', '') - if ("${params.outdir}${sub_string}" == params_value) { - schema_value = params_value - } - } - } - } - - // We have a default in the schema, and this isn't it - if (schema_value != null && params_value != schema_value) { - sub_params.put(param, params_value) - } - // No default in the schema, and this isn't empty - else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { - sub_params.put(param, params_value) - } - } - } - params_summary.put(group, sub_params) - } - return [ 'Core Nextflow options' : workflow_summary ] << params_summary - } - - // - // Beautify parameters for summary and return as string - // - public static String paramsSummaryLog(workflow, params) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - String output = '' - def params_map = paramsSummaryMap(workflow, params) - def max_chars = paramsMaxChars(params_map) - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - if (group_params) { - output += colors.bold + group + colors.reset + '\n' - for (param in group_params.keySet()) { - output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' - } - output += '\n' - } - } - output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Loop over nested exceptions and print the causingException - // - private static void printExceptions(ex_json, params_json, log, enums, limit=5) { - def causingExceptions = ex_json['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (ex_json['pointerToViolation'] == '#') { - log.error "* ${ex_json['message']}" - } - // Error with specific param - else { - def param = ex_json['pointerToViolation'] - ~/^#\// - def param_val = params_json[param].toString() - if (enums.containsKey(param)) { - def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" - if (enums[param].size() > limit) { - log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" - } else { - log.error "${error_msg}: ${enums[param].join(', ')})" - } - } else { - log.error "* --${param}: ${ex_json['message']} (${param_val})" - } - } - } - for (ex in causingExceptions) { - printExceptions(ex, params_json, log, enums) - } - } - - // - // Remove an element from a JSONArray - // - private static JSONArray removeElement(json_array, element) { - def list = [] - int len = json_array.length() - for (int i=0;i - if(raw_schema.keySet().contains('definitions')){ - raw_schema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { - raw_schema.get("properties").remove(ignore_param) - } - if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { - def cleaned_required = removeElement(raw_schema.required, ignore_param) - raw_schema.put("required", cleaned_required) - } - } - return raw_schema - } - - // - // Clean and check parameters relative to Nextflow native classes - // - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - // - // This function tries to read a JSON params file - // - private static LinkedHashMap paramsLoad(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = paramsRead(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - // - // Method to actually read in JSON file using Groovy. - // Group (as Key), values are all parameters - // - Parameter1 as Key, Description as Value - // - Parameter2 as Key, Description as Value - // .... - // Group - // - - private static LinkedHashMap paramsRead(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - // - // Get maximum number of characters across all parameter names - // - private static Integer paramsMaxChars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } -} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 2fc0a9b9..408951ae 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -32,6 +32,25 @@ class NfcoreTemplate { } } + // + // Generate version string + // + public static String version(workflow) { + String version_string = "" + + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string + } + // // Construct and send completion email // @@ -61,7 +80,7 @@ class NfcoreTemplate { misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp def email_fields = [:] - email_fields['version'] = workflow.manifest.version + email_fields['version'] = NfcoreTemplate.version(workflow) email_fields['runName'] = workflow.runName email_fields['success'] = workflow.success email_fields['dateComplete'] = workflow.complete @@ -109,7 +128,7 @@ class NfcoreTemplate { def email_html = html_template.toString() // Render the sendmail template - def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] def sf = new File("$projectDir/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) @@ -145,6 +164,64 @@ class NfcoreTemplate { output_tf.withWriter { w -> w << email_txt } } + // + // Construct and send a notification to a web server as JSON + // e.g. Microsoft Teams and Slack + // + public static void IM_notification(workflow, params, summary_params, projectDir, log) { + def hook_url = params.hook_url + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = NfcoreTemplate.version(workflow) + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("$projectDir/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } + } + // // Print pipeline summary on completion // @@ -154,7 +231,7 @@ class NfcoreTemplate { if (workflow.stats.ignoredCount == 0) { log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" } } else { log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" @@ -242,6 +319,7 @@ class NfcoreTemplate { // public static String logo(workflow, monochrome_logs) { Map colors = logColours(monochrome_logs) + String workflow_version = NfcoreTemplate.version(workflow) String.format( """\n ${dashedLine(monochrome_logs)} @@ -250,7 +328,7 @@ class NfcoreTemplate { ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} ${dashedLine(monochrome_logs)} """.stripIndent() ) diff --git a/lib/Utils.groovy b/lib/Utils.groovy old mode 100755 new mode 100644 index 28567bd7..8d030f4e --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -21,19 +21,26 @@ class Utils { } // Check that all channels are present - def required_channels = ['conda-forge', 'bioconda', 'defaults'] - def conda_check_failed = !required_channels.every { ch -> ch in channels } + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean // Check that they are in the right order - conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda')) - conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } - if (conda_check_failed) { + if (channels_missing | channel_priority_violation) { log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " There is a problem with your Conda configuration!\n\n" + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + - " NB: The order of the channels matters!\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" } } diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 6b3af280..3bc11291 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -2,6 +2,8 @@ // This file holds several functions specific to the main.nf workflow in the nf-core/rnavar pipeline // +import nextflow.Nextflow + class WorkflowMain { // @@ -17,54 +19,24 @@ class WorkflowMain { " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" } - // - // Print help to screen if required - // - public static String help(workflow, params, log) { - def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" - def help_string = '' - help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) - help_string += NfcoreSchema.paramsHelp(workflow, params, command) - help_string += '\n' + citation(workflow) + '\n' - help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) - return help_string - } - - // - // Print parameter summary log to screen - // - public static String paramsSummaryLog(workflow, params, log) { - def summary_log = '' - summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) - summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) - summary_log += '\n' + citation(workflow) + '\n' - summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) - return summary_log - } // // Validate parameters and print summary to screen // public static void initialise(workflow, params, log) { - // Print help to screen if required - if (params.help) { - log.info help(workflow, params, log) - System.exit(0) - } - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) + // Print workflow version and exit on --version + if (params.version) { + String workflow_version = NfcoreTemplate.version(workflow) + log.info "${workflow.manifest.name} ${workflow_version}" + System.exit(0) } - // Print parameter summary log to screen - log.info paramsSummaryLog(workflow, params, log) - // Check that a -profile or Nextflow config has been provided to run the pipeline NfcoreTemplate.checkConfigProvided(workflow, log) // Check that conda channels are set-up correctly - if (params.enable_conda) { + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { Utils.checkCondaChannels(log) } @@ -73,21 +45,18 @@ class WorkflowMain { // Check input has been provided if (!params.input) { - log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'" - System.exit(1) + Nextflow.error("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'") } } - // // Get attribute from genome config file e.g. fasta // - public static String getGenomeAttribute(params, attribute) { - def val = '' + public static Object getGenomeAttribute(params, attribute) { if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { if (params.genomes[ params.genome ].containsKey(attribute)) { - val = params.genomes[ params.genome ][ attribute ] + return params.genomes[ params.genome ][ attribute ] } } - return val + return null } } diff --git a/lib/WorkflowRnavar.groovy b/lib/WorkflowRnavar.groovy index 9f530eb3..a4a924c2 100755 --- a/lib/WorkflowRnavar.groovy +++ b/lib/WorkflowRnavar.groovy @@ -2,17 +2,21 @@ // This file holds several functions specific to the workflow/rnavar.nf in the nf-core/rnavar pipeline // +import nextflow.Nextflow +import groovy.text.SimpleTemplateEngine + class WorkflowRnavar { // // Check and validate parameters // public static void initialise(params, log) { + genomeExistsError(params, log) + if (!params.fasta) { - log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." - System.exit(1) + Nextflow.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." } if (!params.gtf && !params.gff) { @@ -64,17 +68,76 @@ class WorkflowRnavar { return yaml_file_text } + // + // Generate methods description for MultiQC + // + + public static String toolCitationText(params) { + + // TODO Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text + } + + public static String toolBibliographyText(params) { + + // TODO Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text + } + + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = run_workflow.toMap() + meta["manifest_map"] = run_workflow.manifest.toMap() + + // Pipeline DOI + meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" + meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + //meta["tool_bibliography"] = toolBibliographyText(params) + + + def methods_text = mqc_methods_yaml.text + + def engine = new SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html + } + // // Exit pipeline if incorrect --genome key provided // private static void genomeExistsError(params, log) { if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + " Currently, the available genome keys are:\n" + " ${params.genomes.keySet().join(", ")}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - System.exit(1) + Nextflow.error(error_string) } } } diff --git a/main.nf b/main.nf index 65ee5089..f658cace 100644 --- a/main.nf +++ b/main.nf @@ -38,6 +38,22 @@ params.vep_species = WorkflowMain.getGenomeAttribute(params, 'vep_speci ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +include { validateParameters; paramsHelp } from 'plugin/nf-validation' + +// Print help message if needed +if (params.help) { + def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) + def citation = '\n' + WorkflowMain.citation(workflow) + '\n' + def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" + log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) + System.exit(0) +} + +// Validate input parameters +if (params.validate_params) { + validateParameters() +} + WorkflowMain.initialise(workflow, params, log) /* diff --git a/modules.json b/modules.json index 6962f631..eb4ff72e 100755 --- a/modules.json +++ b/modules.json @@ -2,105 +2,188 @@ "name": "nf-core/rnavar", "homePage": "https://github.com/nf-core/rnavar", "repos": { - "nf-core/modules": { - "cat/fastq": { - "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9" - }, - "custom/dumpsoftwareversions": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "ensemblvep": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "fastqc": { - "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" - }, - "gatk4/applybqsr": { - "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" - }, - "gatk4/baserecalibrator": { - "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" - }, - "gatk4/bedtointervallist": { - "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" - }, - "gatk4/createsequencedictionary": { - "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" - }, - "gatk4/haplotypecaller": { - "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" - }, - "gatk4/indexfeaturefile": { - "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" - }, - "gatk4/intervallisttools": { - "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" - }, - "gatk4/markduplicates": { - "git_sha": "df2620cfc7e4c21b14ed03c1c928f09fbabf83c4" - }, - "gatk4/mergevcfs": { - "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" - }, - "gatk4/splitncigarreads": { - "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" - }, - "gatk4/variantfiltration": { - "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" - }, - "gffread": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "gunzip": { - "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9" - }, - "multiqc": { - "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" - }, - "bedtools/merge": { - "git_sha": "4bb1d4e362a38642e877afe41aaf58ded9e56c86" - }, - "bedtools/sort": { - "git_sha": "4bb1d4e362a38642e877afe41aaf58ded9e56c86" - }, - "samtools/faidx": { - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" - }, - "samtools/flagstat": { - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" - }, - "samtools/idxstats": { - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" - }, - "samtools/index": { - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" - }, - "samtools/merge": { - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" - }, - "samtools/sort": { - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" - }, - "samtools/stats": { - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" - }, - "snpeff": { - "git_sha": "40dd662fd26c3eb3160b7c8cbbe9bff80bbe2c30" - }, - "star/align": { - "git_sha": "1dddf1ce9443e3d93853d86e7a7aab52e5b4d614" - }, - "star/genomegenerate": { - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" - }, - "tabix/bgziptabix": { - "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" - }, - "tabix/tabix": { - "git_sha": "b3e9b88e80880f450ad79a95b2b7aa05e1de5484" - }, - "untar": { - "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9" + "https://github.com/nf-core/modules.git": { + "modules": { + "nf-core": { + "bedtools/merge": { + "branch": "master", + "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", + "installed_by": ["modules"] + }, + "bedtools/sort": { + "branch": "master", + "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", + "installed_by": ["modules"] + }, + "cat/fastq": { + "branch": "master", + "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", + "installed_by": ["modules"] + }, + "custom/dumpsoftwareversions": { + "branch": "master", + "git_sha": "05c280924b6c768d484c7c443dad5e605c4ff4b4", + "installed_by": ["modules"] + }, + "ensemblvep/vep": { + "branch": "master", + "git_sha": "9f9e1fc31cb35876922070c0e601ae05abae5cae", + "installed_by": ["modules"] + }, + "fastqc": { + "branch": "master", + "git_sha": "9a4517e720bc812e95b56d23d15a1653b6db4f53", + "installed_by": ["modules"] + }, + "gatk4/applybqsr": { + "branch": "master", + "git_sha": "240937a2a9c30298110753292be041188891f2cb", + "installed_by": ["modules"] + }, + "gatk4/baserecalibrator": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "gatk4/bedtointervallist": { + "branch": "master", + "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", + "installed_by": ["modules"] + }, + "gatk4/combinegvcfs": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "gatk4/createsequencedictionary": { + "branch": "master", + "git_sha": "541811d779026c5d395925895fa5ed35e7216cc0", + "installed_by": ["modules"] + }, + "gatk4/haplotypecaller": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "gatk4/indexfeaturefile": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "gatk4/intervallisttools": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "gatk4/markduplicates": { + "branch": "master", + "git_sha": "0a261469640941da2488e1a5aa023b64db837c70", + "installed_by": ["modules"] + }, + "gatk4/mergevcfs": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "gatk4/splitncigarreads": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "gatk4/variantfiltration": { + "branch": "master", + "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", + "installed_by": ["modules"] + }, + "gffread": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "gunzip": { + "branch": "master", + "git_sha": "e06548bfa36ee31869b81041879dd6b3a83b1d57", + "installed_by": ["modules"] + }, + "mosdepth": { + "branch": "master", + "git_sha": "ebb27711cd5f4de921244bfa81c676504072d31c", + "installed_by": ["modules"] + }, + "multiqc": { + "branch": "master", + "git_sha": "a6e11ac655e744f7ebc724be669dd568ffdc0e80", + "installed_by": ["modules"] + }, + "samtools/faidx": { + "branch": "master", + "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", + "installed_by": ["modules"] + }, + "samtools/flagstat": { + "branch": "master", + "git_sha": "570ec5bcfe19c49e16c9ca35a7a116563af6cc1c", + "installed_by": ["modules"] + }, + "samtools/idxstats": { + "branch": "master", + "git_sha": "e662ab16e0c11f1e62983e21de9871f59371a639", + "installed_by": ["modules"] + }, + "samtools/index": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "samtools/merge": { + "branch": "master", + "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", + "installed_by": ["modules"] + }, + "samtools/sort": { + "branch": "master", + "git_sha": "a0f7be95788366c1923171e358da7d049eb440f9", + "installed_by": ["modules"] + }, + "samtools/stats": { + "branch": "master", + "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", + "installed_by": ["modules"] + }, + "snpeff/snpeff": { + "branch": "master", + "git_sha": "4d584d5cf6ed5f7363a51cdb4b3eb25398e9e537", + "installed_by": ["modules"] + }, + "star/align": { + "branch": "master", + "git_sha": "cc08a888069f67cab8120259bddab8032d4c0fe3", + "installed_by": ["modules"] + }, + "star/genomegenerate": { + "branch": "master", + "git_sha": "cc08a888069f67cab8120259bddab8032d4c0fe3", + "installed_by": ["modules"] + }, + "tabix/bgziptabix": { + "branch": "master", + "git_sha": "591b71642820933dcb3c954c934b397bd00d8e5e", + "installed_by": ["modules"] + }, + "tabix/tabix": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "untar": { + "branch": "master", + "git_sha": "d0b4fc03af52a1cc8c6fb4493b921b57352b1dd8", + "installed_by": ["modules"] + } + } + }, + "subworkflows": { + "nf-core": {} } } } diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf b/modules/local/custom/dumpsoftwareversions/main.nf similarity index 79% rename from modules/nf-core/modules/custom/dumpsoftwareversions/main.nf rename to modules/local/custom/dumpsoftwareversions/main.nf index 327d5100..c9d014b1 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf +++ b/modules/local/custom/dumpsoftwareversions/main.nf @@ -1,11 +1,11 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_low' + label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? "bioconda::multiqc=1.11" : null) + conda "bioconda::multiqc=1.15" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : + 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml b/modules/local/custom/dumpsoftwareversions/meta.yml similarity index 100% rename from modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml rename to modules/local/custom/dumpsoftwareversions/meta.yml diff --git a/modules/local/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/local/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py new file mode 100755 index 00000000..e55b8d43 --- /dev/null +++ b/modules/local/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python + + +"""Provide functions to merge multiple versions.yml files.""" + + +import platform +from textwrap import dedent + +import yaml + + +def _make_versions_html(versions): + """Generate a tabular HTML output of all versions for MultiQC.""" + html = [ + dedent( + """\\ + + + + + + + + + + """ + ) + ] + for process, tmp_versions in sorted(versions.items()): + html.append("") + for i, (tool, version) in enumerate(sorted(tmp_versions.items())): + html.append( + dedent( + f"""\\ + + + + + + """ + ) + ) + html.append("") + html.append("
    Process Name Software Version
    {process if (i == 0) else ''}{tool}{version}
    ") + return "\\n".join(html) + + +def main(): + """Load all version files and generate merged output.""" + versions_this_module = {} + versions_this_module["${task.process}"] = { + "python": platform.python_version(), + "yaml": yaml.__version__, + } + + with open("$versions") as f: + versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module + + # aggregate versions by the module name (derived from fully-qualified process name) + versions_by_module = {} + for process, process_versions in versions_by_process.items(): + module = process.split(":")[-1] + try: + if versions_by_module[module] != process_versions: + raise AssertionError( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) + except KeyError: + versions_by_module[module] = process_versions + + versions_by_module["Workflow"] = { + "Nextflow": "$workflow.nextflow.version", + "$workflow.manifest.name": "$workflow.manifest.version", + } + + versions_mqc = { + "id": "software_versions", + "section_name": "${workflow.manifest.name} Software Versions", + "section_href": "https://github.com/${workflow.manifest.name}", + "plot_type": "html", + "description": "are collected at run time from the software output.", + "data": _make_versions_html(versions_by_module), + } + + with open("software_versions.yml", "w") as f: + yaml.dump(versions_by_module, f, default_flow_style=False) + with open("software_versions_mqc.yml", "w") as f: + yaml.dump(versions_mqc, f, default_flow_style=False) + + with open("versions.yml", "w") as f: + yaml.dump(versions_this_module, f, default_flow_style=False) + + +if __name__ == "__main__": + main() diff --git a/modules/local/gtf2bed.nf b/modules/local/gtf2bed/main.nf similarity index 90% rename from modules/local/gtf2bed.nf rename to modules/local/gtf2bed/main.nf index a8baa452..b18a63d4 100755 --- a/modules/local/gtf2bed.nf +++ b/modules/local/gtf2bed/main.nf @@ -2,14 +2,15 @@ process GTF2BED { tag "$gtf" label 'process_low' - conda (params.enable_conda ? "conda-forge::r-base=3.5.0" : null) + conda "conda-forge::r-base=3.5.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/r-base:3.5.0' : - 'quay.io/biocontainers/r-base:3.5.0'}" + 'biocontainers/r-base:3.5.0'}" input: - path gtf + tuple val(meta), path(gtf) val feature_type + output: path '*.bed' , emit: bed path "versions.yml", emit: versions diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf deleted file mode 100644 index 7667d217..00000000 --- a/modules/local/samplesheet_check.nf +++ /dev/null @@ -1,30 +0,0 @@ -process SAMPLESHEET_CHECK { - tag "$samplesheet" - - conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'quay.io/biocontainers/python:3.8.3' }" - - input: - path samplesheet - - output: - path '*.csv' , emit: csv - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: // This script is bundled with the pipeline, in nf-core/rnavar/bin/ - """ - check_samplesheet.py \\ - $samplesheet \\ - samplesheet.valid.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/bedtools/merge/main.nf b/modules/nf-core/bedtools/merge/main.nf similarity index 72% rename from modules/nf-core/modules/bedtools/merge/main.nf rename to modules/nf-core/bedtools/merge/main.nf index 06dad822..6868d39f 100644 --- a/modules/nf-core/modules/bedtools/merge/main.nf +++ b/modules/nf-core/bedtools/merge/main.nf @@ -2,10 +2,10 @@ process BEDTOOLS_MERGE { tag "$meta.id" label 'process_single' - conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) + conda "bioconda::bedtools=2.31.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : - 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.0--hf5e1c6e_2' : + 'biocontainers/bedtools:2.31.0--hf5e1c6e_2' }" input: tuple val(meta), path(bed) @@ -33,4 +33,15 @@ process BEDTOOLS_MERGE { bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/bedtools/merge/meta.yml b/modules/nf-core/bedtools/merge/meta.yml similarity index 97% rename from modules/nf-core/modules/bedtools/merge/meta.yml rename to modules/nf-core/bedtools/merge/meta.yml index 76743679..82248afe 100644 --- a/modules/nf-core/modules/bedtools/merge/meta.yml +++ b/modules/nf-core/bedtools/merge/meta.yml @@ -3,6 +3,8 @@ description: combines overlapping or “book-ended” features in an interval fi keywords: - bed - merge + - bedtools + - overlapped bed tools: - bedtools: description: | diff --git a/modules/nf-core/modules/bedtools/sort/main.nf b/modules/nf-core/bedtools/sort/main.nf similarity index 50% rename from modules/nf-core/modules/bedtools/sort/main.nf rename to modules/nf-core/bedtools/sort/main.nf index 331c129a..df372bc5 100644 --- a/modules/nf-core/modules/bedtools/sort/main.nf +++ b/modules/nf-core/bedtools/sort/main.nf @@ -2,14 +2,14 @@ process BEDTOOLS_SORT { tag "$meta.id" label 'process_single' - conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) + conda "bioconda::bedtools=2.31.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : - 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.0--hf5e1c6e_2' : + 'biocontainers/bedtools:2.31.0--hf5e1c6e_2' }" input: tuple val(meta), path(intervals) - val extension + path genome_file output: tuple val(meta), path("*.${extension}"), emit: sorted @@ -19,13 +19,18 @@ process BEDTOOLS_SORT { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - if ("$intervals" == "${prefix}.${extension}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def genome_cmd = genome_file ? "-g $genome_file" : "" + extension = task.ext.suffix ?: intervals.extension + if ("$intervals" == "${prefix}.${extension}") { + error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + } """ bedtools \\ sort \\ -i $intervals \\ + $genome_cmd \\ $args \\ > ${prefix}.${extension} @@ -34,4 +39,16 @@ process BEDTOOLS_SORT { bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + extension = task.ext.suffix ?: intervals.extension + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/bedtools/sort/meta.yml b/modules/nf-core/bedtools/sort/meta.yml similarity index 82% rename from modules/nf-core/modules/bedtools/sort/meta.yml rename to modules/nf-core/bedtools/sort/meta.yml index 369e51ff..3a3b4e4d 100644 --- a/modules/nf-core/modules/bedtools/sort/meta.yml +++ b/modules/nf-core/bedtools/sort/meta.yml @@ -3,6 +3,8 @@ description: Sorts a feature file by chromosome and other criteria. keywords: - bed - sort + - bedtools + - chromosome tools: - bedtools: description: | @@ -19,10 +21,11 @@ input: type: file description: BED/BEDGRAPH pattern: "*.{bed|bedGraph}" - - - extension: - type: string - description: Extension of the output file (e. g., ".bg", ".bedgraph", ".txt", ".tab", etc.) It is set arbitrarily by the user and corresponds to the file format which depends on arguments. + - genome_file: + type: file + description: | + Optional reference genome 2 column file that defines the expected chromosome order. + pattern: "*.{fai,txt,chromsizes}" output: - meta: type: map @@ -44,3 +47,4 @@ authors: - "@sruthipsuresh" - "@drpatelh" - "@chris-cheshire" + - "@adamrtalbot" diff --git a/modules/nf-core/modules/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf similarity index 58% rename from modules/nf-core/modules/cat/fastq/main.nf rename to modules/nf-core/cat/fastq/main.nf index b6854895..5021e6fc 100644 --- a/modules/nf-core/modules/cat/fastq/main.nf +++ b/modules/nf-core/cat/fastq/main.nf @@ -1,11 +1,11 @@ process CAT_FASTQ { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + conda "conda-forge::sed=4.7" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'nf-core/ubuntu:20.04' }" input: tuple val(meta), path(reads, stageAs: "input*/*") @@ -20,9 +20,9 @@ process CAT_FASTQ { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def readList = reads.collect{ it.toString() } + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] if (meta.single_end) { - if (readList.size > 1) { + if (readList.size >= 1) { """ cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz @@ -33,7 +33,7 @@ process CAT_FASTQ { """ } } else { - if (readList.size > 2) { + if (readList.size >= 2) { def read1 = [] def read2 = [] readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v } @@ -48,4 +48,33 @@ process CAT_FASTQ { """ } } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size > 1) { + """ + touch ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size > 2) { + """ + touch ${prefix}_1.merged.fastq.gz + touch ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } + } diff --git a/modules/nf-core/modules/cat/fastq/meta.yml b/modules/nf-core/cat/fastq/meta.yml similarity index 97% rename from modules/nf-core/modules/cat/fastq/meta.yml rename to modules/nf-core/cat/fastq/meta.yml index c836598e..8a39e309 100644 --- a/modules/nf-core/modules/cat/fastq/meta.yml +++ b/modules/nf-core/cat/fastq/meta.yml @@ -1,6 +1,7 @@ name: cat_fastq description: Concatenates fastq files keywords: + - cat - fastq - concatenate tools: @@ -16,7 +17,7 @@ input: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - reads: - type: list + type: file description: | List of input FastQ files to be concatenated. output: diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf new file mode 100644 index 00000000..c9d014b1 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -0,0 +1,24 @@ +process CUSTOM_DUMPSOFTWAREVERSIONS { + label 'process_single' + + // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container + conda "bioconda::multiqc=1.15" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : + 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" + + input: + path versions + + output: + path "software_versions.yml" , emit: yml + path "software_versions_mqc.yml", emit: mqc_yml + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + template 'dumpsoftwareversions.py' +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml new file mode 100644 index 00000000..c32657de --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -0,0 +1,36 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: custom_dumpsoftwareversions +description: Custom module used to dump software versions within the nf-core pipeline template +keywords: + - custom + - dump + - version +tools: + - custom: + description: Custom module used to dump software versions within the nf-core pipeline template + homepage: https://github.com/nf-core/tools + documentation: https://github.com/nf-core/tools + licence: ["MIT"] +input: + - versions: + type: file + description: YML file containing software versions + pattern: "*.yml" + +output: + - yml: + type: file + description: Standard YML file containing software versions + pattern: "software_versions.yml" + - mqc_yml: + type: file + description: MultiQC custom content YML file containing software versions + pattern: "software_versions_mqc.yml" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py new file mode 100755 index 00000000..da033408 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python + + +"""Provide functions to merge multiple versions.yml files.""" + + +import yaml +import platform +from textwrap import dedent + + +def _make_versions_html(versions): + """Generate a tabular HTML output of all versions for MultiQC.""" + html = [ + dedent( + """\\ + + + + + + + + + + """ + ) + ] + for process, tmp_versions in sorted(versions.items()): + html.append("") + for i, (tool, version) in enumerate(sorted(tmp_versions.items())): + html.append( + dedent( + f"""\\ + + + + + + """ + ) + ) + html.append("") + html.append("
    Process Name Software Version
    {process if (i == 0) else ''}{tool}{version}
    ") + return "\\n".join(html) + + +def main(): + """Load all version files and generate merged output.""" + versions_this_module = {} + versions_this_module["${task.process}"] = { + "python": platform.python_version(), + "yaml": yaml.__version__, + } + + with open("$versions") as f: + versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module + + # aggregate versions by the module name (derived from fully-qualified process name) + versions_by_module = {} + for process, process_versions in versions_by_process.items(): + module = process.split(":")[-1] + try: + if versions_by_module[module] != process_versions: + raise AssertionError( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) + except KeyError: + versions_by_module[module] = process_versions + + versions_by_module["Workflow"] = { + "Nextflow": "$workflow.nextflow.version", + "$workflow.manifest.name": "$workflow.manifest.version", + } + + versions_mqc = { + "id": "software_versions", + "section_name": "${workflow.manifest.name} Software Versions", + "section_href": "https://github.com/${workflow.manifest.name}", + "plot_type": "html", + "description": "are collected at run time from the software output.", + "data": _make_versions_html(versions_by_module), + } + + with open("software_versions.yml", "w") as f: + yaml.dump(versions_by_module, f, default_flow_style=False) + with open("software_versions_mqc.yml", "w") as f: + yaml.dump(versions_mqc, f, default_flow_style=False) + + with open("versions.yml", "w") as f: + yaml.dump(versions_this_module, f, default_flow_style=False) + + +if __name__ == "__main__": + main() diff --git a/modules/nf-core/ensemblvep/vep/main.nf b/modules/nf-core/ensemblvep/vep/main.nf new file mode 100644 index 00000000..da0e3646 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/main.nf @@ -0,0 +1,71 @@ +process ENSEMBLVEP_VEP { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::ensembl-vep=110.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ensembl-vep:110.0--pl5321h2a3209d_0' : + 'biocontainers/ensembl-vep:110.0--pl5321h2a3209d_0' }" + + input: + tuple val(meta), path(vcf), path(custom_extra_files) + val genome + val species + val cache_version + path cache + tuple val(meta2), path(fasta) + path extra_files + + output: + tuple val(meta), path("*.vcf.gz") , optional:true, emit: vcf + tuple val(meta), path("*.tab.gz") , optional:true, emit: tab + tuple val(meta), path("*.json.gz") , optional:true, emit: json + path "*.summary.html" , emit: report + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def file_extension = args.contains("--vcf") ? 'vcf' : args.contains("--json")? 'json' : args.contains("--tab")? 'tab' : 'vcf' + def compress_cmd = args.contains("--compress_output") ? '' : '--compress_output bgzip' + def prefix = task.ext.prefix ?: "${meta.id}" + def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep" + def reference = fasta ? "--fasta $fasta" : "" + """ + vep \\ + -i $vcf \\ + -o ${prefix}.${file_extension}.gz \\ + $args \\ + $compress_cmd \\ + $reference \\ + --assembly $genome \\ + --species $species \\ + --cache \\ + --cache_version $cache_version \\ + --dir_cache $dir_cache \\ + --fork $task.cpus \\ + --stats_file ${prefix}.summary.html \\ + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf.gz + touch ${prefix}.tab.gz + touch ${prefix}.json.gz + touch ${prefix}.summary.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/ensemblvep/meta.yml b/modules/nf-core/ensemblvep/vep/meta.yml similarity index 54% rename from modules/nf-core/modules/ensemblvep/meta.yml rename to modules/nf-core/ensemblvep/vep/meta.yml index cd9c8905..7783847d 100644 --- a/modules/nf-core/modules/ensemblvep/meta.yml +++ b/modules/nf-core/ensemblvep/vep/meta.yml @@ -1,7 +1,10 @@ -name: ENSEMBLVEP -description: Ensembl Variant Effect Predictor (VEP) +name: ENSEMBLVEP_VEP +description: Ensembl Variant Effect Predictor (VEP). The output-file-format is controlled through `task.ext.args`. keywords: - annotation + - vcf + - json + - tab tools: - ensemblvep: description: | @@ -10,17 +13,6 @@ tools: homepage: https://www.ensembl.org/info/docs/tools/vep/index.html documentation: https://www.ensembl.org/info/docs/tools/vep/script/index.html licence: ["Apache-2.0"] -params: - - use_cache: - type: boolean - description: | - Enable the usage of containers with cache - Does not work with conda - - vep_tag: - type: value - description: | - Specify the tag for the container - https://hub.docker.com/r/nfcore/vep/tags input: - meta: type: map @@ -31,28 +23,57 @@ input: type: file description: | vcf to annotate + - custom_extra_files: + type: file + description: | + extra sample-specific files to be used with the `--custom` flag to be configured with ext.args + (optional) - genome: - type: value + type: string description: | which genome to annotate with - species: - type: value + type: string description: | which species to annotate with - cache_version: - type: value + type: integer description: | which version of the cache to annotate with - cache: type: file description: | path to VEP cache (optional) + - meta2: + type: map + description: | + Groovy Map containing fasta reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: | + reference FASTA file (optional) + pattern: "*.{fasta,fa}" + - extra_files: + type: file + description: | + path to file(s) needed for plugins (optional) output: - vcf: type: file description: | - annotated vcf - pattern: "*.ann.vcf" + annotated vcf (optional) + pattern: "*.ann.vcf.gz" + - tab: + type: file + description: | + tab file with annotated variants (optional) + pattern: "*.ann.tab.gz" + - json: + type: file + description: | + json file with annotated variants (optional) + pattern: "*.ann.json.gz" - report: type: file description: VEP report file @@ -63,3 +84,5 @@ output: pattern: "versions.yml" authors: - "@maxulysse" + - "@matthdsm" + - "@nvnieuwk" diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf new file mode 100644 index 00000000..249f9064 --- /dev/null +++ b/modules/nf-core/fastqc/main.nf @@ -0,0 +1,55 @@ +process FASTQC { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::fastqc=0.11.9" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : + 'biocontainers/fastqc:0.11.9--0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.html"), emit: html + tuple val(meta), path("*.zip") , emit: zip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // Make list of old name and new name pairs to use for renaming in the bash while loop + def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } + def rename_to = old_new_pairs*.join(' ').join(' ') + def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + """ + printf "%s %s\\n" $rename_to | while read old_name new_name; do + [ -f "\${new_name}" ] || ln -s \$old_name \$new_name + done + + fastqc \\ + $args \\ + --threads $task.cpus \\ + $renamed_files + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + touch ${prefix}.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml similarity index 100% rename from modules/nf-core/modules/fastqc/meta.yml rename to modules/nf-core/fastqc/meta.yml diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test new file mode 100644 index 00000000..3961de60 --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -0,0 +1,32 @@ +nextflow_process { + + name "Test Process FASTQC" + script "modules/nf-core/fastqc/main.nf" + process "FASTQC" + tag "fastqc" + + test("Single-Read") { + + when { + process { + """ + input[0] = [ + [ id: 'test', single_end:true ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assert process.success + assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" + assert path(process.out.html.get(0).get(1)).getText().contains("File typeConventional base calls") + assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" + } + + } + +} diff --git a/modules/nf-core/modules/gatk4/applybqsr/main.nf b/modules/nf-core/gatk4/applybqsr/main.nf similarity index 80% rename from modules/nf-core/modules/gatk4/applybqsr/main.nf rename to modules/nf-core/gatk4/applybqsr/main.nf index a0e2c45c..b515f1c5 100644 --- a/modules/nf-core/modules/gatk4/applybqsr/main.nf +++ b/modules/nf-core/gatk4/applybqsr/main.nf @@ -2,10 +2,10 @@ process GATK4_APPLYBQSR { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(input), path(input_index), path(bqsr_table), path(intervals) @@ -26,14 +26,14 @@ process GATK4_APPLYBQSR { def prefix = task.ext.prefix ?: "${meta.id}" def interval_command = intervals ? "--intervals $intervals" : "" - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK ApplyBQSR] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" ApplyBQSR \\ + gatk --java-options "-Xmx${avail_mem}M" ApplyBQSR \\ --input $input \\ --output ${prefix}.${input.getExtension()} \\ --reference $fasta \\ diff --git a/modules/nf-core/modules/gatk4/applybqsr/meta.yml b/modules/nf-core/gatk4/applybqsr/meta.yml similarity index 72% rename from modules/nf-core/modules/gatk4/applybqsr/meta.yml rename to modules/nf-core/gatk4/applybqsr/meta.yml index 3fc93f10..3002ab60 100644 --- a/modules/nf-core/modules/gatk4/applybqsr/meta.yml +++ b/modules/nf-core/gatk4/applybqsr/meta.yml @@ -3,16 +3,17 @@ description: Apply base quality score recalibration (BQSR) to a bam file keywords: - bqsr - bam + - base quality score recalibration tools: - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - licence: ["Apache-2.0"] + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] input: - meta: diff --git a/modules/nf-core/modules/gatk4/baserecalibrator/main.nf b/modules/nf-core/gatk4/baserecalibrator/main.nf similarity index 79% rename from modules/nf-core/modules/gatk4/baserecalibrator/main.nf rename to modules/nf-core/gatk4/baserecalibrator/main.nf index fb26d3da..318703a4 100644 --- a/modules/nf-core/modules/gatk4/baserecalibrator/main.nf +++ b/modules/nf-core/gatk4/baserecalibrator/main.nf @@ -2,10 +2,10 @@ process GATK4_BASERECALIBRATOR { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(input), path(input_index), path(intervals) @@ -28,14 +28,14 @@ process GATK4_BASERECALIBRATOR { def interval_command = intervals ? "--intervals $intervals" : "" def sites_command = known_sites.collect{"--known-sites $it"}.join(' ') - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK BaseRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" BaseRecalibrator \\ + gatk --java-options "-Xmx${avail_mem}M" BaseRecalibrator \\ --input $input \\ --output ${prefix}.table \\ --reference $fasta \\ diff --git a/modules/nf-core/modules/gatk4/baserecalibrator/meta.yml b/modules/nf-core/gatk4/baserecalibrator/meta.yml similarity index 75% rename from modules/nf-core/modules/gatk4/baserecalibrator/meta.yml rename to modules/nf-core/gatk4/baserecalibrator/meta.yml index 08c1ebbf..a6b06c73 100644 --- a/modules/nf-core/modules/gatk4/baserecalibrator/meta.yml +++ b/modules/nf-core/gatk4/baserecalibrator/meta.yml @@ -4,14 +4,14 @@ keywords: - sort tools: - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - licence: ["Apache-2.0"] + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] input: - meta: diff --git a/modules/nf-core/modules/gatk4/bedtointervallist/main.nf b/modules/nf-core/gatk4/bedtointervallist/main.nf similarity index 78% rename from modules/nf-core/modules/gatk4/bedtointervallist/main.nf rename to modules/nf-core/gatk4/bedtointervallist/main.nf index 6224d9b3..a23abd06 100644 --- a/modules/nf-core/modules/gatk4/bedtointervallist/main.nf +++ b/modules/nf-core/gatk4/bedtointervallist/main.nf @@ -2,14 +2,14 @@ process GATK4_BEDTOINTERVALLIST { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(bed) - path dict + tuple val(meta2), path(dict) output: tuple val(meta), path('*.interval_list'), emit: interval_list @@ -22,14 +22,14 @@ process GATK4_BEDTOINTERVALLIST { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK BedToIntervalList] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" BedToIntervalList \\ + gatk --java-options "-Xmx${avail_mem}M" BedToIntervalList \\ --INPUT $bed \\ --OUTPUT ${prefix}.interval_list \\ --SEQUENCE_DICTIONARY $dict \\ diff --git a/modules/nf-core/modules/gatk4/bedtointervallist/meta.yml b/modules/nf-core/gatk4/bedtointervallist/meta.yml similarity index 88% rename from modules/nf-core/modules/gatk4/bedtointervallist/meta.yml rename to modules/nf-core/gatk4/bedtointervallist/meta.yml index 986f1592..40daf752 100644 --- a/modules/nf-core/modules/gatk4/bedtointervallist/meta.yml +++ b/modules/nf-core/gatk4/bedtointervallist/meta.yml @@ -3,6 +3,7 @@ description: Creates an interval list from a bed file and a reference dict keywords: - bed - interval list + - bedtointervallist tools: - gatk4: description: | @@ -23,6 +24,11 @@ input: type: file description: Input bed file pattern: "*.bed" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - dict: type: file description: Sequence dictionary @@ -38,3 +44,4 @@ output: pattern: "versions.yml" authors: - "@kevinmenden" + - "@ramprasadn" diff --git a/modules/nf-core/modules/gatk4/combinegvcfs/main.nf b/modules/nf-core/gatk4/combinegvcfs/main.nf old mode 100755 new mode 100644 similarity index 76% rename from modules/nf-core/modules/gatk4/combinegvcfs/main.nf rename to modules/nf-core/gatk4/combinegvcfs/main.nf index 488666b3..152fef42 --- a/modules/nf-core/modules/gatk4/combinegvcfs/main.nf +++ b/modules/nf-core/gatk4/combinegvcfs/main.nf @@ -2,14 +2,13 @@ process GATK4_COMBINEGVCFS { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: - tuple val(meta), path(vcf) - tuple val(meta), path(vcf_idx) + tuple val(meta), path(vcf), path(vcf_idx) path fasta path fai path dict @@ -26,14 +25,14 @@ process GATK4_COMBINEGVCFS { def prefix = task.ext.prefix ?: "${meta.id}" def input_list = vcf.collect{"--variant $it"}.join(' ') - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK COMBINEGVCFS] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" CombineGVCFs \\ + gatk --java-options "-Xmx${avail_mem}M" CombineGVCFs \\ $input_list \\ --output ${prefix}.combined.g.vcf.gz \\ --reference ${fasta} \\ diff --git a/modules/nf-core/modules/gatk4/combinegvcfs/meta.yml b/modules/nf-core/gatk4/combinegvcfs/meta.yml old mode 100755 new mode 100644 similarity index 100% rename from modules/nf-core/modules/gatk4/combinegvcfs/meta.yml rename to modules/nf-core/gatk4/combinegvcfs/meta.yml diff --git a/modules/nf-core/modules/gatk4/createsequencedictionary/main.nf b/modules/nf-core/gatk4/createsequencedictionary/main.nf similarity index 66% rename from modules/nf-core/modules/gatk4/createsequencedictionary/main.nf rename to modules/nf-core/gatk4/createsequencedictionary/main.nf index 13fa9e81..15a86bea 100644 --- a/modules/nf-core/modules/gatk4/createsequencedictionary/main.nf +++ b/modules/nf-core/gatk4/createsequencedictionary/main.nf @@ -2,17 +2,17 @@ process GATK4_CREATESEQUENCEDICTIONARY { tag "$fasta" label 'process_medium' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: - path fasta + tuple val(meta), path(fasta) output: - path "*.dict" , emit: dict - path "versions.yml" , emit: versions + tuple val(meta), path('*.dict') , emit: dict + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -20,14 +20,14 @@ process GATK4_CREATESEQUENCEDICTIONARY { script: def args = task.ext.args ?: '' - def avail_mem = 6 + def avail_mem = 6144 if (!task.memory) { log.info '[GATK CreateSequenceDictionary] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" CreateSequenceDictionary \\ + gatk --java-options "-Xmx${avail_mem}M" CreateSequenceDictionary \\ --REFERENCE $fasta \\ --URI $fasta \\ --TMP_DIR . \\ @@ -41,7 +41,7 @@ process GATK4_CREATESEQUENCEDICTIONARY { stub: """ - touch test.dict + touch ${fasta.baseName}.dict cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/gatk4/createsequencedictionary/meta.yml b/modules/nf-core/gatk4/createsequencedictionary/meta.yml new file mode 100644 index 00000000..a421e681 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/meta.yml @@ -0,0 +1,39 @@ +name: gatk4_createsequencedictionary +description: Creates a sequence dictionary for a reference sequence +keywords: + - dictionary + - fasta + - createsequencedictionary +tools: + - gatk: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Input fasta file + pattern: "*.{fasta,fa}" +output: + - dict: + type: file + description: gatk dictionary file + pattern: "*.{dict}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/haplotypecaller/main.nf b/modules/nf-core/gatk4/haplotypecaller/main.nf new file mode 100644 index 00000000..478681bd --- /dev/null +++ b/modules/nf-core/gatk4/haplotypecaller/main.nf @@ -0,0 +1,75 @@ +process GATK4_HAPLOTYPECALLER { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::gatk4=4.4.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(input), path(input_index), path(intervals), path(dragstr_model) + path fasta + path fai + path dict + path dbsnp + path dbsnp_tbi + + output: + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.tbi") , optional:true, emit: tbi + tuple val(meta), path("*.realigned.bam"), optional:true, emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def dbsnp_command = dbsnp ? "--dbsnp $dbsnp" : "" + def interval_command = intervals ? "--intervals $intervals" : "" + def dragstr_command = dragstr_model ? "--dragstr-params-path $dragstr_model" : "" + def bamout_command = args.contains("--bam-writer-type") ? "--bam-output ${prefix.replaceAll('.g\\s*$', '')}.realigned.bam" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M" HaplotypeCaller \\ + --input $input \\ + --output ${prefix}.vcf.gz \\ + --reference $fasta \\ + $dbsnp_command \\ + $interval_command \\ + $dragstr_command \\ + $bamout_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def bamout_command = args.contains("--bam-writer-type") ? "--bam-output ${prefix.replaceAll('.g\\s*$', '')}.realigned.bam" : "" + + def stub_realigned_bam = bamout_command ? "touch ${prefix.replaceAll('.g\\s*$', '')}.realigned.bam" : "" + """ + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + ${stub_realigned_bam} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/gatk4/haplotypecaller/meta.yml b/modules/nf-core/gatk4/haplotypecaller/meta.yml similarity index 66% rename from modules/nf-core/modules/gatk4/haplotypecaller/meta.yml rename to modules/nf-core/gatk4/haplotypecaller/meta.yml index 81851a96..27633cca 100644 --- a/modules/nf-core/modules/gatk4/haplotypecaller/meta.yml +++ b/modules/nf-core/gatk4/haplotypecaller/meta.yml @@ -6,14 +6,14 @@ keywords: - haplotype tools: - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - licence: ["Apache-2.0"] + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] input: - meta: @@ -32,6 +32,10 @@ input: - intervals: type: file description: Bed file with the genomic regions included in the library (optional) + - dragstr_model: + type: file + description: Text file containing the DragSTR model of the used BAM/CRAM file (optional) + pattern: "*.txt" - fasta: type: file description: The reference fasta file @@ -69,6 +73,10 @@ output: type: file description: Index of VCF file pattern: "*.vcf.gz.tbi" + - bam: + type: file + description: Assembled haplotypes and locally realigned reads + pattern: "*.realigned.bam" authors: - "@suzannejin" diff --git a/modules/nf-core/modules/gatk4/indexfeaturefile/main.nf b/modules/nf-core/gatk4/indexfeaturefile/main.nf similarity index 73% rename from modules/nf-core/modules/gatk4/indexfeaturefile/main.nf rename to modules/nf-core/gatk4/indexfeaturefile/main.nf index 264f71ef..d3bb04a7 100644 --- a/modules/nf-core/modules/gatk4/indexfeaturefile/main.nf +++ b/modules/nf-core/gatk4/indexfeaturefile/main.nf @@ -2,10 +2,10 @@ process GATK4_INDEXFEATUREFILE { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(feature_file) @@ -20,14 +20,14 @@ process GATK4_INDEXFEATUREFILE { script: def args = task.ext.args ?: '' - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK IndexFeatureFile] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" IndexFeatureFile \\ + gatk --java-options "-Xmx${avail_mem}M" IndexFeatureFile \\ --input $feature_file \\ --tmp-dir . \\ $args diff --git a/modules/nf-core/modules/gatk4/indexfeaturefile/meta.yml b/modules/nf-core/gatk4/indexfeaturefile/meta.yml similarity index 100% rename from modules/nf-core/modules/gatk4/indexfeaturefile/meta.yml rename to modules/nf-core/gatk4/indexfeaturefile/meta.yml diff --git a/modules/nf-core/modules/gatk4/intervallisttools/main.nf b/modules/nf-core/gatk4/intervallisttools/main.nf similarity index 86% rename from modules/nf-core/modules/gatk4/intervallisttools/main.nf rename to modules/nf-core/gatk4/intervallisttools/main.nf index 7ab26c15..0054659a 100644 --- a/modules/nf-core/modules/gatk4/intervallisttools/main.nf +++ b/modules/nf-core/gatk4/intervallisttools/main.nf @@ -2,10 +2,10 @@ process GATK4_INTERVALLISTTOOLS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(intervals) @@ -21,17 +21,17 @@ process GATK4_INTERVALLISTTOOLS { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK IntervalListTools] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ mkdir ${prefix}_split - gatk --java-options "-Xmx${avail_mem}g" IntervalListTools \\ + gatk --java-options "-Xmx${avail_mem}M" IntervalListTools \\ --INPUT $intervals \\ --OUTPUT ${prefix}_split \\ --TMP_DIR . \\ diff --git a/modules/nf-core/modules/gatk4/intervallisttools/meta.yml b/modules/nf-core/gatk4/intervallisttools/meta.yml similarity index 100% rename from modules/nf-core/modules/gatk4/intervallisttools/meta.yml rename to modules/nf-core/gatk4/intervallisttools/meta.yml diff --git a/modules/nf-core/gatk4/markduplicates/main.nf b/modules/nf-core/gatk4/markduplicates/main.nf new file mode 100644 index 00000000..f4b3f6dc --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/main.nf @@ -0,0 +1,67 @@ +process GATK4_MARKDUPLICATES { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::gatk4=4.4.0.0 bioconda::samtools=1.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-d9e7bad0f7fbc8f4458d5c3ab7ffaaf0235b59fb:f857e2d6cc88d35580d01cf39e0959a68b83c1d9-0': + 'biocontainers/mulled-v2-d9e7bad0f7fbc8f4458d5c3ab7ffaaf0235b59fb:f857e2d6cc88d35580d01cf39e0959a68b83c1d9-0' }" + + input: + tuple val(meta), path(bam) + path fasta + path fasta_fai + + output: + tuple val(meta), path("*cram"), emit: cram, optional: true + tuple val(meta), path("*bam"), emit: bam, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + tuple val(meta), path("*.bai"), emit: bai, optional: true + tuple val(meta), path("*.metrics"), emit: metrics + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}.bam" + + // If the extension is CRAM, then change it to BAM + prefix_bam = prefix.tokenize('.')[-1] == 'cram' ? "${prefix.substring(0, prefix.lastIndexOf('.'))}.bam" : prefix + + def input_list = bam.collect{"--INPUT $it"}.join(' ') + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + + // Using samtools and not Markduplicates to compress to CRAM speeds up computation: + // https://medium.com/@acarroll.dna/looking-at-trade-offs-in-compression-levels-for-genomics-tools-eec2834e8b94 + """ + gatk --java-options "-Xmx${avail_mem}M" MarkDuplicates \\ + $input_list \\ + --OUTPUT ${prefix_bam} \\ + --METRICS_FILE ${prefix}.metrics \\ + --TMP_DIR . \\ + ${reference} \\ + $args + + # If cram files are wished as output, the run samtools for conversion + if [[ ${prefix} == *.cram ]]; then + samtools view -Ch -T ${fasta} -o ${prefix} ${prefix_bam} + rm ${prefix_bam} + samtools index ${prefix} + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/gatk4/markduplicates/meta.yml b/modules/nf-core/gatk4/markduplicates/meta.yml similarity index 78% rename from modules/nf-core/modules/gatk4/markduplicates/meta.yml rename to modules/nf-core/gatk4/markduplicates/meta.yml index 93877f47..ddf98d2f 100644 --- a/modules/nf-core/modules/gatk4/markduplicates/meta.yml +++ b/modules/nf-core/gatk4/markduplicates/meta.yml @@ -26,6 +26,14 @@ input: type: file description: Sorted BAM file pattern: "*.{bam}" + - fasta: + type: file + description: Fasta file + pattern: "*.{fasta}" + - fasta_fai: + type: file + description: Fasta index file + pattern: "*.{fai}" output: - meta: @@ -41,6 +49,18 @@ output: type: file description: Marked duplicates BAM file pattern: "*.{bam}" + - cram: + type: file + description: Marked duplicates CRAM file + pattern: "*.{cram}" + - bai: + type: file + description: BAM index file + pattern: "*.{bam.bai}" + - crai: + type: file + description: CRAM index file + pattern: "*.{cram.crai}" - metrics: type: file description: Duplicate metrics file generated by GATK diff --git a/modules/nf-core/modules/gatk4/mergevcfs/main.nf b/modules/nf-core/gatk4/mergevcfs/main.nf similarity index 61% rename from modules/nf-core/modules/gatk4/mergevcfs/main.nf rename to modules/nf-core/gatk4/mergevcfs/main.nf index 964c1a3b..dfb5b33a 100644 --- a/modules/nf-core/modules/gatk4/mergevcfs/main.nf +++ b/modules/nf-core/gatk4/mergevcfs/main.nf @@ -2,17 +2,18 @@ process GATK4_MERGEVCFS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(vcf) - path dict + tuple val(meta2), path(dict) output: tuple val(meta), path('*.vcf.gz'), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi path "versions.yml" , emit: versions when: @@ -24,14 +25,14 @@ process GATK4_MERGEVCFS { def input_list = vcf.collect{ "--INPUT $it"}.join(' ') def reference_command = dict ? "--SEQUENCE_DICTIONARY $dict" : "" - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK MergeVcfs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" MergeVcfs \\ + gatk --java-options "-Xmx${avail_mem}M" MergeVcfs \\ $input_list \\ --OUTPUT ${prefix}.vcf.gz \\ $reference_command \\ @@ -43,4 +44,16 @@ process GATK4_MERGEVCFS { gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/gatk4/mergevcfs/meta.yml b/modules/nf-core/gatk4/mergevcfs/meta.yml similarity index 83% rename from modules/nf-core/modules/gatk4/mergevcfs/meta.yml rename to modules/nf-core/gatk4/mergevcfs/meta.yml index 8d4123d9..db8c4cb0 100644 --- a/modules/nf-core/modules/gatk4/mergevcfs/meta.yml +++ b/modules/nf-core/gatk4/mergevcfs/meta.yml @@ -23,18 +23,26 @@ input: type: list description: Two or more VCF files pattern: "*.{vcf,vcf.gz}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] - ref_dict: type: file description: Optional Sequence Dictionary as input pattern: "*.dict" - - use_ref_dict: - type: boolean - description: Specify whether or not to use a given reference dictionary + output: - vcf: type: file description: merged vcf file pattern: "*.vcf.gz" + - tbi: + type: file + description: index files for the merged vcf files + pattern: "*.tbi" + - versions: type: file description: File containing software versions diff --git a/modules/nf-core/modules/gatk4/splitncigarreads/main.nf b/modules/nf-core/gatk4/splitncigarreads/main.nf similarity index 77% rename from modules/nf-core/modules/gatk4/splitncigarreads/main.nf rename to modules/nf-core/gatk4/splitncigarreads/main.nf index 456ec055..0178976c 100644 --- a/modules/nf-core/modules/gatk4/splitncigarreads/main.nf +++ b/modules/nf-core/gatk4/splitncigarreads/main.nf @@ -2,10 +2,10 @@ process GATK4_SPLITNCIGARREADS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(bam), path(bai), path(intervals) @@ -25,14 +25,14 @@ process GATK4_SPLITNCIGARREADS { def prefix = task.ext.prefix ?: "${meta.id}" def interval_command = intervals ? "--intervals $intervals" : "" - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK SplitNCigarReads] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" SplitNCigarReads \\ + gatk --java-options "-Xmx${avail_mem}M" SplitNCigarReads \\ --input $bam \\ --output ${prefix}.bam \\ --reference $fasta \\ diff --git a/modules/nf-core/modules/gatk4/splitncigarreads/meta.yml b/modules/nf-core/gatk4/splitncigarreads/meta.yml similarity index 100% rename from modules/nf-core/modules/gatk4/splitncigarreads/meta.yml rename to modules/nf-core/gatk4/splitncigarreads/meta.yml diff --git a/modules/nf-core/modules/gatk4/variantfiltration/main.nf b/modules/nf-core/gatk4/variantfiltration/main.nf similarity index 59% rename from modules/nf-core/modules/gatk4/variantfiltration/main.nf rename to modules/nf-core/gatk4/variantfiltration/main.nf index cda06e11..387ff8ca 100644 --- a/modules/nf-core/modules/gatk4/variantfiltration/main.nf +++ b/modules/nf-core/gatk4/variantfiltration/main.nf @@ -2,16 +2,16 @@ process GATK4_VARIANTFILTRATION { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + conda "bioconda::gatk4=4.4.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(vcf), path(tbi) - path fasta - path fai - path dict + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) output: tuple val(meta), path("*.vcf.gz"), emit: vcf @@ -25,20 +25,31 @@ process GATK4_VARIANTFILTRATION { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK VariantFiltration] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.toGiga() + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}G" VariantFiltration \\ + gatk --java-options "-Xmx${avail_mem}M" VariantFiltration \\ --variant $vcf \\ --output ${prefix}.vcf.gz \\ --reference $fasta \\ --tmp-dir . \\ $args + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + cat <<-END_VERSIONS > versions.yml "${task.process}": gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') diff --git a/modules/nf-core/modules/gatk4/variantfiltration/meta.yml b/modules/nf-core/gatk4/variantfiltration/meta.yml similarity index 79% rename from modules/nf-core/modules/gatk4/variantfiltration/meta.yml rename to modules/nf-core/gatk4/variantfiltration/meta.yml index 04b1c086..2260f37b 100644 --- a/modules/nf-core/modules/gatk4/variantfiltration/meta.yml +++ b/modules/nf-core/gatk4/variantfiltration/meta.yml @@ -3,6 +3,7 @@ description: Filter variants keywords: - vcf - filter + - variantfiltration tools: - gatk4: description: | @@ -27,14 +28,29 @@ input: type: list description: List of VCF file indexes pattern: "*.{idx,tbi}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: type: file description: Fasta file of reference genome pattern: "*.fasta" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fai: type: file description: Index of fasta file pattern: "*.fasta.fai" + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - dict: type: file description: Sequence dictionary of fastea file @@ -54,3 +70,4 @@ output: pattern: "versions.yml" authors: - "@kevinmenden" + - "@ramprasadn" diff --git a/modules/nf-core/modules/gffread/main.nf b/modules/nf-core/gffread/main.nf similarity index 85% rename from modules/nf-core/modules/gffread/main.nf rename to modules/nf-core/gffread/main.nf index 7c575c97..f4472b0e 100644 --- a/modules/nf-core/modules/gffread/main.nf +++ b/modules/nf-core/gffread/main.nf @@ -2,10 +2,10 @@ process GFFREAD { tag "$gff" label 'process_low' - conda (params.enable_conda ? "bioconda::gffread=0.12.1" : null) + conda "bioconda::gffread=0.12.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gffread:0.12.1--h8b12597_0' : - 'quay.io/biocontainers/gffread:0.12.1--h8b12597_0' }" + 'biocontainers/gffread:0.12.1--h8b12597_0' }" input: path gff diff --git a/modules/nf-core/modules/gffread/meta.yml b/modules/nf-core/gffread/meta.yml similarity index 100% rename from modules/nf-core/modules/gffread/meta.yml rename to modules/nf-core/gffread/meta.yml diff --git a/modules/nf-core/modules/gunzip/main.nf b/modules/nf-core/gunzip/main.nf similarity index 55% rename from modules/nf-core/modules/gunzip/main.nf rename to modules/nf-core/gunzip/main.nf index 61bf1afa..73bf08cd 100644 --- a/modules/nf-core/modules/gunzip/main.nf +++ b/modules/nf-core/gunzip/main.nf @@ -1,11 +1,11 @@ process GUNZIP { tag "$archive" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + conda "conda-forge::sed=4.7" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'nf-core/ubuntu:20.04' }" input: tuple val(meta), path(archive) @@ -21,14 +21,28 @@ process GUNZIP { def args = task.ext.args ?: '' gunzip = archive.toString() - '.gz' """ - gunzip \\ - -f \\ + # Not calling gunzip itself because it creates files + # with the original group ownership rather than the + # default one for that user / the work directory + gzip \\ + -cd \\ $args \\ - $archive + $archive \\ + > $gunzip cat <<-END_VERSIONS > versions.yml "${task.process}": gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') END_VERSIONS """ + + stub: + gunzip = archive.toString() - '.gz' + """ + touch $gunzip + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml new file mode 100644 index 00000000..4cdcdf4c --- /dev/null +++ b/modules/nf-core/gunzip/meta.yml @@ -0,0 +1,35 @@ +name: gunzip +description: Compresses and decompresses files. +keywords: + - gunzip + - compression + - decompression +tools: + - gunzip: + description: | + gzip is a file format and a software application used for file compression and decompression. + documentation: https://www.gnu.org/software/gzip/manual/gzip.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be compressed/uncompressed + pattern: "*.*" +output: + - gunzip: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py deleted file mode 100644 index d1390392..00000000 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/env python - -import yaml -import platform -from textwrap import dedent - - -def _make_versions_html(versions): - html = [ - dedent( - """\\ - - - - - - - - - - """ - ) - ] - for process, tmp_versions in sorted(versions.items()): - html.append("") - for i, (tool, version) in enumerate(sorted(tmp_versions.items())): - html.append( - dedent( - f"""\\ - - - - - - """ - ) - ) - html.append("") - html.append("
    Process Name Software Version
    {process if (i == 0) else ''}{tool}{version}
    ") - return "\\n".join(html) - - -versions_this_module = {} -versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, -} - -with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module - -# aggregate versions by the module name (derived from fully-qualified process name) -versions_by_module = {} -for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - assert versions_by_module[module] == process_versions, ( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - -versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", -} - -versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), -} - -with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) -with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - -with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) diff --git a/modules/nf-core/modules/ensemblvep/Dockerfile b/modules/nf-core/modules/ensemblvep/Dockerfile deleted file mode 100644 index ac1b4691..00000000 --- a/modules/nf-core/modules/ensemblvep/Dockerfile +++ /dev/null @@ -1,30 +0,0 @@ -FROM nfcore/base:1.14 -LABEL \ - author="Maxime Garcia" \ - description="VEP image for nf-core pipelines" \ - maintainer="maxime.garcia@scilifelab.se" - -# Install the conda environment -COPY environment.yml / -RUN conda env create -f /environment.yml && conda clean -a - -# Add conda installation dir to PATH (instead of doing 'conda activate') -ENV PATH /opt/conda/envs/nf-core-vep-104.3/bin:$PATH - -# Setup default ARG variables -ARG GENOME=GRCh38 -ARG SPECIES=homo_sapiens -ARG VEP_VERSION=99 - -# Download Genome -RUN vep_install \ - -a c \ - -c .vep \ - -s ${SPECIES} \ - -y ${GENOME} \ - --CACHE_VERSION ${VEP_VERSION} \ - --CONVERT \ - --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE - -# Dump the details of the installed packages to a file for posterity -RUN conda env export --name nf-core-vep-104.3 > nf-core-vep-104.3.yml diff --git a/modules/nf-core/modules/ensemblvep/build.sh b/modules/nf-core/modules/ensemblvep/build.sh deleted file mode 100644 index 5fcb91df..00000000 --- a/modules/nf-core/modules/ensemblvep/build.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Build and push all containers - -build_push() { - GENOME=$1 - SPECIES=$2 - VEP_VERSION=$3 - VEP_TAG=$4 - - docker build \ - -t nfcore/vep:${VEP_TAG}.${GENOME} \ - software/vep/. \ - --build-arg GENOME=${GENOME} \ - --build-arg SPECIES=${SPECIES} \ - --build-arg VEP_VERSION=${VEP_VERSION} - - docker push nfcore/vep:${VEP_TAG}.${GENOME} -} - -build_push "GRCh37" "homo_sapiens" "104" "104.3" -build_push "GRCh38" "homo_sapiens" "104" "104.3" -build_push "GRCm38" "mus_musculus" "102" "104.3" -build_push "GRCm39" "mus_musculus" "104" "104.3" -build_push "CanFam3.1" "canis_lupus_familiaris" "104" "104.3" -build_push "WBcel235" "caenorhabditis_elegans" "104" "104.3" diff --git a/modules/nf-core/modules/ensemblvep/environment.yml b/modules/nf-core/modules/ensemblvep/environment.yml deleted file mode 100644 index c0731c26..00000000 --- a/modules/nf-core/modules/ensemblvep/environment.yml +++ /dev/null @@ -1,10 +0,0 @@ -# You can use this file to create a conda environment for this module: -# conda env create -f environment.yml -name: nf-core-vep-104.3 -channels: - - conda-forge - - bioconda - - defaults - -dependencies: - - bioconda::ensembl-vep=104.3 diff --git a/modules/nf-core/modules/ensemblvep/main.nf b/modules/nf-core/modules/ensemblvep/main.nf deleted file mode 100644 index c2bd055f..00000000 --- a/modules/nf-core/modules/ensemblvep/main.nf +++ /dev/null @@ -1,52 +0,0 @@ -process ENSEMBLVEP { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::ensembl-vep=104.3" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ensembl-vep:104.3--pl5262h4a94de4_0' : - 'quay.io/biocontainers/ensembl-vep:104.3--pl5262h4a94de4_0' }" - - input: - tuple val(meta), path(vcf) - val genome - val species - val cache_version - path cache - - output: - tuple val(meta), path("*.ann.vcf"), emit: vcf - path "*.summary.html" , emit: report - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep" - """ - mkdir $prefix - - vep \\ - -i $vcf \\ - -o ${prefix}.ann.vcf \\ - $args \\ - --assembly $genome \\ - --species $species \\ - --cache \\ - --cache_version $cache_version \\ - --dir_cache $dir_cache \\ - --fork $task.cpus \\ - --vcf \\ - --stats_file ${prefix}.summary.html - - rm -rf $prefix - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/fastqc/main.nf b/modules/nf-core/modules/fastqc/main.nf deleted file mode 100644 index 05730368..00000000 --- a/modules/nf-core/modules/fastqc/main.nf +++ /dev/null @@ -1,59 +0,0 @@ -process FASTQC { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'quay.io/biocontainers/fastqc:0.11.9--0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.html"), emit: html - tuple val(meta), path("*.zip") , emit: zip - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - // Add soft-links to original FastQs for consistent naming in pipeline - def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - """ - [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz - fastqc $args --threads $task.cpus ${prefix}.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } else { - """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz - fastqc $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.html - touch ${prefix}.zip - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/gatk4/createsequencedictionary/meta.yml b/modules/nf-core/modules/gatk4/createsequencedictionary/meta.yml deleted file mode 100644 index bd247888..00000000 --- a/modules/nf-core/modules/gatk4/createsequencedictionary/meta.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: gatk4_createsequencedictionary -description: Creates a sequence dictionary for a reference sequence -keywords: - - dictionary - - fasta -tools: - - gatk: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - licence: ["Apache-2.0"] - -input: - - fasta: - type: file - description: Input fasta file - pattern: "*.{fasta,fa}" -output: - - dict: - type: file - description: gatk dictionary file - pattern: "*.{dict}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@maxulysse" diff --git a/modules/nf-core/modules/gatk4/haplotypecaller/main.nf b/modules/nf-core/modules/gatk4/haplotypecaller/main.nf deleted file mode 100644 index 6dd3f69e..00000000 --- a/modules/nf-core/modules/gatk4/haplotypecaller/main.nf +++ /dev/null @@ -1,53 +0,0 @@ -process GATK4_HAPLOTYPECALLER { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" - - input: - tuple val(meta), path(input), path(input_index), path(intervals) - path fasta - path fai - path dict - path dbsnp - path dbsnp_tbi - - output: - tuple val(meta), path("*.vcf.gz"), emit: vcf - tuple val(meta), path("*.tbi") , optional:true, emit: tbi - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def dbsnp_command = dbsnp ? "--dbsnp $dbsnp" : "" - def interval_command = intervals ? "--intervals $intervals" : "" - - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - """ - gatk --java-options "-Xmx${avail_mem}g" HaplotypeCaller \\ - --input $input \\ - --output ${prefix}.vcf.gz \\ - --reference $fasta \\ - $dbsnp_command \\ - $interval_command \\ - --tmp-dir . \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/gatk4/markduplicates/main.nf b/modules/nf-core/modules/gatk4/markduplicates/main.nf deleted file mode 100644 index 68e4a21a..00000000 --- a/modules/nf-core/modules/gatk4/markduplicates/main.nf +++ /dev/null @@ -1,46 +0,0 @@ -process GATK4_MARKDUPLICATES { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" - - input: - tuple val(meta), path(bam) - - output: - tuple val(meta), path("*.bam") , emit: bam - tuple val(meta), path("*.bai") , optional:true, emit: bai - tuple val(meta), path("*.metrics"), emit: metrics - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def input_list = bam.collect{"--INPUT $it"}.join(' ') - - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - """ - gatk --java-options "-Xmx${avail_mem}g" MarkDuplicates \\ - $input_list \\ - --OUTPUT ${prefix}.bam \\ - --METRICS_FILE ${prefix}.metrics \\ - --TMP_DIR . \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/gunzip/meta.yml b/modules/nf-core/modules/gunzip/meta.yml deleted file mode 100644 index 4d2ebc84..00000000 --- a/modules/nf-core/modules/gunzip/meta.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: gunzip -description: Compresses and decompresses files. -keywords: - - gunzip - - compression -tools: - - gunzip: - description: | - gzip is a file format and a software application used for file compression and decompression. - documentation: https://www.gnu.org/software/gzip/manual/gzip.html - licence: ["GPL-3.0-or-later"] -input: - - meta: - type: map - description: | - Optional groovy Map containing meta information - e.g. [ id:'test', single_end:false ] - - archive: - type: file - description: File to be compressed/uncompressed - pattern: "*.*" -output: - - gunzip: - type: file - description: Compressed/uncompressed file - pattern: "*.*" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@joseespinosa" - - "@drpatelh" - - "@jfy133" diff --git a/modules/nf-core/modules/snpeff/Dockerfile b/modules/nf-core/modules/snpeff/Dockerfile deleted file mode 100644 index d0e34757..00000000 --- a/modules/nf-core/modules/snpeff/Dockerfile +++ /dev/null @@ -1,23 +0,0 @@ -FROM nfcore/base:1.14 -LABEL \ - author="Maxime Garcia" \ - description="snpEff image for nf-core pipelines" \ - maintainer="maxime.garcia@scilifelab.se" - -# Install the conda environment -COPY environment.yml / -RUN conda env create -f /environment.yml && conda clean -a - -# Setup default ARG variables -ARG GENOME=GRCh38 -ARG SNPEFF_CACHE_VERSION=99 -ARG SNPEFF_TAG=99 - -# Add conda installation dir to PATH (instead of doing 'conda activate') -ENV PATH /opt/conda/envs/nf-core-snpeff-${SNPEFF_TAG}/bin:$PATH - -# Download Genome -RUN snpEff download -v ${GENOME}.${SNPEFF_CACHE_VERSION} - -# Dump the details of the installed packages to a file for posterity -RUN conda env export --name nf-core-snpeff-${SNPEFF_TAG} > nf-core-snpeff-${SNPEFF_TAG}.yml diff --git a/modules/nf-core/modules/snpeff/build.sh b/modules/nf-core/modules/snpeff/build.sh deleted file mode 100644 index 2fccf9a8..00000000 --- a/modules/nf-core/modules/snpeff/build.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Build and push all containers - -build_push() { - GENOME=$1 - SNPEFF_CACHE_VERSION=$2 - SNPEFF_TAG=$3 - - docker build \ - . \ - -t nfcore/snpeff:${SNPEFF_TAG}.${GENOME} \ - --build-arg GENOME=${GENOME} \ - --build-arg SNPEFF_CACHE_VERSION=${SNPEFF_CACHE_VERSION} \ - --build-arg SNPEFF_TAG=${SNPEFF_TAG} - - docker push nfcore/snpeff:${SNPEFF_TAG}.${GENOME} -} - -build_push "GRCh37" "75" "5.0" -build_push "GRCh38" "99" "5.0" -build_push "GRCm38" "99" "5.0" -build_push "CanFam3.1" "99" "5.0" -build_push "WBcel235" "99" "5.0" diff --git a/modules/nf-core/modules/snpeff/environment.yml b/modules/nf-core/modules/snpeff/environment.yml deleted file mode 100644 index ad0523fb..00000000 --- a/modules/nf-core/modules/snpeff/environment.yml +++ /dev/null @@ -1,10 +0,0 @@ -# You can use this file to create a conda environment for this module: -# conda env create -f environment.yml -name: nf-core-snpeff-5.0 -channels: - - conda-forge - - bioconda - - defaults - -dependencies: - - bioconda::snpeff=5.0 diff --git a/modules/nf-core/modules/star/align/main.nf b/modules/nf-core/modules/star/align/main.nf deleted file mode 100644 index 762b84f6..00000000 --- a/modules/nf-core/modules/star/align/main.nf +++ /dev/null @@ -1,72 +0,0 @@ -process STAR_ALIGN { - tag "$meta.id" - label 'process_high' - - // Note: 2.7X indices incompatible with AWS iGenomes. - conda (params.enable_conda ? 'bioconda::star=2.7.9a' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/star:2.7.9a--h9ee0642_0' : - 'quay.io/biocontainers/star:2.7.9a--h9ee0642_0' }" - - input: - tuple val(meta), path(reads) - path index - path gtf - val star_ignore_sjdbgtf - val seq_platform - val seq_center - - output: - tuple val(meta), path('*d.out.bam') , emit: bam - tuple val(meta), path('*Log.final.out') , emit: log_final - tuple val(meta), path('*Log.out') , emit: log_out - tuple val(meta), path('*Log.progress.out'), emit: log_progress - path "versions.yml" , emit: versions - - tuple val(meta), path('*sortedByCoord.out.bam') , optional:true, emit: bam_sorted - tuple val(meta), path('*toTranscriptome.out.bam'), optional:true, emit: bam_transcript - tuple val(meta), path('*Aligned.unsort.out.bam') , optional:true, emit: bam_unsorted - tuple val(meta), path('*fastq.gz') , optional:true, emit: fastq - tuple val(meta), path('*.tab') , optional:true, emit: tab - tuple val(meta), path('*.out.junction') , optional:true, emit: junction - tuple val(meta), path('*.out.sam') , optional:true, emit: sam - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def ignore_gtf = star_ignore_sjdbgtf ? '' : "--sjdbGTFfile $gtf" - def seq_platform = seq_platform ? "'PL:$seq_platform'" : "" - def seq_center = seq_center ? "--outSAMattrRGline ID:$prefix 'CN:$seq_center' 'SM:$prefix' $seq_platform " : "--outSAMattrRGline ID:$prefix 'SM:$prefix' $seq_platform " - def out_sam_type = (args.contains('--outSAMtype')) ? '' : '--outSAMtype BAM Unsorted' - def mv_unsorted_bam = (args.contains('--outSAMtype BAM Unsorted SortedByCoordinate')) ? "mv ${prefix}.Aligned.out.bam ${prefix}.Aligned.unsort.out.bam" : '' - """ - STAR \\ - --genomeDir $index \\ - --readFilesIn $reads \\ - --runThreadN $task.cpus \\ - --outFileNamePrefix $prefix. \\ - $out_sam_type \\ - $ignore_gtf \\ - $seq_center \\ - $args - - $mv_unsorted_bam - - if [ -f ${prefix}.Unmapped.out.mate1 ]; then - mv ${prefix}.Unmapped.out.mate1 ${prefix}.unmapped_1.fastq - gzip ${prefix}.unmapped_1.fastq - fi - if [ -f ${prefix}.Unmapped.out.mate2 ]; then - mv ${prefix}.Unmapped.out.mate2 ${prefix}.unmapped_2.fastq - gzip ${prefix}.unmapped_2.fastq - fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - star: \$(STAR --version | sed -e "s/STAR_//g") - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/untar/main.nf b/modules/nf-core/modules/untar/main.nf deleted file mode 100644 index 058d1764..00000000 --- a/modules/nf-core/modules/untar/main.nf +++ /dev/null @@ -1,47 +0,0 @@ -process UNTAR { - tag "$archive" - label 'process_low' - - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" - - input: - tuple val(meta), path(archive) - - output: - tuple val(meta), path("$untar"), emit: untar - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - untar = archive.toString() - '.tar.gz' - """ - tar \\ - -xzvf \\ - $args \\ - $archive \\ - $args2 \\ - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') - END_VERSIONS - """ - - stub: - untar = archive.toString() - '.tar.gz' - """ - touch $untar - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/mosdepth/main.nf b/modules/nf-core/mosdepth/main.nf new file mode 100644 index 00000000..74db3a27 --- /dev/null +++ b/modules/nf-core/mosdepth/main.nf @@ -0,0 +1,80 @@ +process MOSDEPTH { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::mosdepth=0.3.3" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mosdepth:0.3.3--hdfd78af_1' : + 'biocontainers/mosdepth:0.3.3--hdfd78af_1'}" + + input: + tuple val(meta), path(bam), path(bai), path(bed) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path('*.global.dist.txt') , emit: global_txt + tuple val(meta), path('*.summary.txt') , emit: summary_txt + tuple val(meta), path('*.region.dist.txt') , optional:true, emit: regions_txt + tuple val(meta), path('*.per-base.d4') , optional:true, emit: per_base_d4 + tuple val(meta), path('*.per-base.bed.gz') , optional:true, emit: per_base_bed + tuple val(meta), path('*.per-base.bed.gz.csi') , optional:true, emit: per_base_csi + tuple val(meta), path('*.regions.bed.gz') , optional:true, emit: regions_bed + tuple val(meta), path('*.regions.bed.gz.csi') , optional:true, emit: regions_csi + tuple val(meta), path('*.quantized.bed.gz') , optional:true, emit: quantized_bed + tuple val(meta), path('*.quantized.bed.gz.csi') , optional:true, emit: quantized_csi + tuple val(meta), path('*.thresholds.bed.gz') , optional:true, emit: thresholds_bed + tuple val(meta), path('*.thresholds.bed.gz.csi'), optional:true, emit: thresholds_csi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--fasta ${fasta}" : "" + def interval = bed ? "--by ${bed}" : "" + if (bed && args.contains("--by")) { + error "'--by' can only be specified once when running mosdepth! Either remove input BED file definition or remove '--by' from 'ext.args' definition" + } + if (!bed && args.contains("--thresholds")) { + error "'--thresholds' can only be specified in conjunction with '--by'" + } + + """ + mosdepth \\ + --threads $task.cpus \\ + $interval \\ + $reference \\ + $args \\ + $prefix \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mosdepth: \$(mosdepth --version 2>&1 | sed 's/^.*mosdepth //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.global.dist.txt + touch ${prefix}.region.dist.txt + touch ${prefix}.summary.txt + touch ${prefix}.per-base.d4 + touch ${prefix}.per-base.bed.gz + touch ${prefix}.per-base.bed.gz.csi + touch ${prefix}.regions.bed.gz + touch ${prefix}.regions.bed.gz.csi + touch ${prefix}.quantized.bed.gz + touch ${prefix}.quantized.bed.gz.csi + touch ${prefix}.thresholds.bed.gz + touch ${prefix}.thresholds.bed.gz.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mosdepth: \$(mosdepth --version 2>&1 | sed 's/^.*mosdepth //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/mosdepth/meta.yml b/modules/nf-core/mosdepth/meta.yml new file mode 100644 index 00000000..adf3893f --- /dev/null +++ b/modules/nf-core/mosdepth/meta.yml @@ -0,0 +1,109 @@ +name: mosdepth +description: Calculates genome-wide sequencing coverage. +keywords: + - mosdepth + - bam + - cram + - coverage +tools: + - mosdepth: + description: | + Fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. + documentation: https://github.com/brentp/mosdepth + doi: 10.1093/bioinformatics/btx699 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Input BAM/CRAM file + pattern: "*.{bam,cram}" + - bai: + type: file + description: Index for BAM/CRAM file + pattern: "*.{bai,crai}" + - meta2: + type: map + description: | + Groovy Map containing bed information + e.g. [ id:'test' ] + - bed: + type: file + description: BED file with intersected intervals + pattern: "*.{bed}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fa,fasta}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - global_txt: + type: file + description: Text file with global cumulative coverage distribution + pattern: "*.{global.dist.txt}" + - regions_txt: + type: file + description: Text file with region cumulative coverage distribution + pattern: "*.{region.dist.txt}" + - summary_txt: + type: file + description: Text file with summary mean depths per chromosome and regions + pattern: "*.{summary.txt}" + - per_base_bed: + type: file + description: BED file with per-base coverage + pattern: "*.{per-base.bed.gz}" + - per_base_csi: + type: file + description: Index file for BED file with per-base coverage + pattern: "*.{per-base.bed.gz.csi}" + - per_base_d4: + type: file + description: D4 file with per-base coverage + pattern: "*.{per-base.d4}" + - regions_bed: + type: file + description: BED file with per-region coverage + pattern: "*.{regions.bed.gz}" + - regions_csi: + type: file + description: Index file for BED file with per-region coverage + pattern: "*.{regions.bed.gz.csi}" + - quantized_bed: + type: file + description: BED file with binned coverage + pattern: "*.{quantized.bed.gz}" + - quantized_csi: + type: file + description: Index file for BED file with binned coverage + pattern: "*.{quantized.bed.gz.csi}" + - thresholds_bed: + type: file + description: BED file with the number of bases in each region that are covered at or above each threshold + pattern: "*.{thresholds.bed.gz}" + - thresholds_csi: + type: file + description: Index file for BED file with threshold coverage + pattern: "*.{thresholds.bed.gz.csi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@ramprasadn" + - "@matthdsm" diff --git a/modules/nf-core/modules/multiqc/main.nf b/modules/nf-core/multiqc/main.nf similarity index 65% rename from modules/nf-core/modules/multiqc/main.nf rename to modules/nf-core/multiqc/main.nf index ae019dbf..65d7dd0d 100644 --- a/modules/nf-core/modules/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,13 +1,16 @@ process MULTIQC { - label 'process_medium' + label 'process_single' - conda (params.enable_conda ? 'bioconda::multiqc=1.12' : null) + conda "bioconda::multiqc=1.15" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : + 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" input: - path multiqc_files + path multiqc_files, stageAs: "?/*" + path(multiqc_config) + path(extra_multiqc_config) + path(multiqc_logo) output: path "*multiqc_report.html", emit: report @@ -20,8 +23,15 @@ process MULTIQC { script: def args = task.ext.args ?: '' + def config = multiqc_config ? "--config $multiqc_config" : '' + def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' """ - multiqc -f $args . + multiqc \\ + --force \\ + $args \\ + $config \\ + $extra_config \\ + . cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml similarity index 67% rename from modules/nf-core/modules/multiqc/meta.yml rename to modules/nf-core/multiqc/meta.yml index 6fa891ef..f93b5ee5 100644 --- a/modules/nf-core/modules/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json name: MultiQC description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: @@ -12,18 +13,32 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] + input: - multiqc_files: type: file description: | List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. + pattern: "*.{yml,yaml}" + - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + output: - report: type: file description: MultiQC report file pattern: "multiqc_report.html" - data: - type: dir + type: directory description: MultiQC data dir pattern: "multiqc_data" - plots: @@ -38,3 +53,4 @@ authors: - "@abhi18av" - "@bunop" - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/modules/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf similarity index 57% rename from modules/nf-core/modules/samtools/faidx/main.nf rename to modules/nf-core/samtools/faidx/main.nf index fdce7d9b..59ed3088 100644 --- a/modules/nf-core/modules/samtools/faidx/main.nf +++ b/modules/nf-core/samtools/faidx/main.nf @@ -1,18 +1,21 @@ process SAMTOOLS_FAIDX { tag "$fasta" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(fasta) + tuple val(meta2), path(fai) output: - tuple val(meta), path ("*.fai"), emit: fai - path "versions.yml" , emit: versions + tuple val(meta), path ("*.{fa,fasta}") , emit: fa , optional: true + tuple val(meta), path ("*.fai") , emit: fai, optional: true + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -22,7 +25,8 @@ process SAMTOOLS_FAIDX { """ samtools \\ faidx \\ - $fasta + $fasta \\ + $args cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -31,8 +35,12 @@ process SAMTOOLS_FAIDX { """ stub: + def match = (task.ext.args =~ /-o(?:utput)?\s(.*)\s?/).findAll() + def fastacmd = match[0] ? "touch ${match[0][1]}" : '' """ + ${fastacmd} touch ${fasta}.fai + cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml similarity index 72% rename from modules/nf-core/modules/samtools/faidx/meta.yml rename to modules/nf-core/samtools/faidx/meta.yml index e9767764..957b25e5 100644 --- a/modules/nf-core/modules/samtools/faidx/meta.yml +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -3,6 +3,7 @@ description: Index FASTA file keywords: - index - fasta + - faidx tools: - samtools: description: | @@ -17,12 +18,21 @@ input: - meta: type: map description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] + Groovy Map containing reference information + e.g. [ id:'test' ] - fasta: type: file description: FASTA file pattern: "*.{fa,fasta}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" output: - meta: type: map @@ -33,6 +43,10 @@ output: type: file description: FASTA index file pattern: "*.{fai}" + - gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" - versions: type: file description: File containing software versions diff --git a/modules/nf-core/modules/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf similarity index 58% rename from modules/nf-core/modules/samtools/flagstat/main.nf rename to modules/nf-core/samtools/flagstat/main.nf index b87b2108..b75707ec 100644 --- a/modules/nf-core/modules/samtools/flagstat/main.nf +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -1,11 +1,11 @@ process SAMTOOLS_FLAGSTAT { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(bam), path(bai) @@ -19,12 +19,24 @@ process SAMTOOLS_FLAGSTAT { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ samtools \\ flagstat \\ - --threads ${task.cpus-1} \\ + --threads ${task.cpus} \\ $bam \\ - > ${bam}.flagstat + > ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.flagstat cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml similarity index 95% rename from modules/nf-core/modules/samtools/flagstat/meta.yml rename to modules/nf-core/samtools/flagstat/meta.yml index 95269063..954225df 100644 --- a/modules/nf-core/modules/samtools/flagstat/meta.yml +++ b/modules/nf-core/samtools/flagstat/meta.yml @@ -14,7 +14,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: diff --git a/modules/nf-core/modules/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf similarity index 58% rename from modules/nf-core/modules/samtools/idxstats/main.nf rename to modules/nf-core/samtools/idxstats/main.nf index a49ff35f..83c7c34b 100644 --- a/modules/nf-core/modules/samtools/idxstats/main.nf +++ b/modules/nf-core/samtools/idxstats/main.nf @@ -1,11 +1,11 @@ process SAMTOOLS_IDXSTATS { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(bam), path(bai) @@ -19,11 +19,26 @@ process SAMTOOLS_IDXSTATS { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ samtools \\ idxstats \\ + --threads ${task.cpus-1} \\ $bam \\ - > ${bam}.idxstats + > ${prefix}.idxstats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.idxstats cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/samtools/idxstats/meta.yml b/modules/nf-core/samtools/idxstats/meta.yml similarity index 95% rename from modules/nf-core/modules/samtools/idxstats/meta.yml rename to modules/nf-core/samtools/idxstats/meta.yml index 3710ab88..dda87e1e 100644 --- a/modules/nf-core/modules/samtools/idxstats/meta.yml +++ b/modules/nf-core/samtools/idxstats/meta.yml @@ -15,7 +15,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: diff --git a/modules/nf-core/modules/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf similarity index 88% rename from modules/nf-core/modules/samtools/index/main.nf rename to modules/nf-core/samtools/index/main.nf index e04e63e8..0b20aa4b 100644 --- a/modules/nf-core/modules/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_INDEX { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(input) diff --git a/modules/nf-core/modules/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml similarity index 95% rename from modules/nf-core/modules/samtools/index/meta.yml rename to modules/nf-core/samtools/index/meta.yml index e5cadbc2..8bd2fa6f 100644 --- a/modules/nf-core/modules/samtools/index/meta.yml +++ b/modules/nf-core/samtools/index/meta.yml @@ -12,7 +12,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: diff --git a/modules/nf-core/modules/samtools/merge/main.nf b/modules/nf-core/samtools/merge/main.nf similarity index 72% rename from modules/nf-core/modules/samtools/merge/main.nf rename to modules/nf-core/samtools/merge/main.nf index bbf7e8fb..b73b7cb2 100644 --- a/modules/nf-core/modules/samtools/merge/main.nf +++ b/modules/nf-core/samtools/merge/main.nf @@ -2,27 +2,30 @@ process SAMTOOLS_MERGE { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: - tuple val(meta), path(input_files) - path fasta + tuple val(meta), path(input_files, stageAs: "?/*") + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) output: tuple val(meta), path("${prefix}.bam") , optional:true, emit: bam tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram + tuple val(meta), path("*.csi") , optional:true, emit: csi path "versions.yml" , emit: versions + when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" - def file_type = input_files[0].getExtension() + def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() def reference = fasta ? "--reference ${fasta}" : "" """ samtools \\ @@ -41,7 +44,7 @@ process SAMTOOLS_MERGE { stub: prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def file_type = input_files[0].getExtension() + def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() """ touch ${prefix}.${file_type} diff --git a/modules/nf-core/modules/samtools/merge/meta.yml b/modules/nf-core/samtools/merge/meta.yml similarity index 66% rename from modules/nf-core/modules/samtools/merge/meta.yml rename to modules/nf-core/samtools/merge/meta.yml index fb78e55c..3a815f74 100644 --- a/modules/nf-core/modules/samtools/merge/meta.yml +++ b/modules/nf-core/samtools/merge/meta.yml @@ -12,7 +12,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: @@ -25,10 +25,24 @@ input: type: file description: BAM/CRAM file pattern: "*.{bam,cram,sam}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: - type: optional file - description: Reference file the CRAM was created with + type: file + description: Reference file the CRAM was created with (optional) pattern: "*.{fasta,fa}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of the reference file the CRAM was created with (optional) + pattern: "*.fai" output: - meta: type: map @@ -47,8 +61,13 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - csi: + type: file + description: BAM index file (optional) + pattern: "*.csi" authors: - "@drpatelh" - "@yuukiiwa " - "@maxulysse" - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/modules/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf similarity index 79% rename from modules/nf-core/modules/samtools/sort/main.nf rename to modules/nf-core/samtools/sort/main.nf index b4fc1cbe..2b7753fd 100644 --- a/modules/nf-core/modules/samtools/sort/main.nf +++ b/modules/nf-core/samtools/sort/main.nf @@ -2,16 +2,17 @@ process SAMTOOLS_SORT { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(bam) output: tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path("*.csi"), emit: csi, optional: true path "versions.yml" , emit: versions when: @@ -22,7 +23,13 @@ process SAMTOOLS_SORT { def prefix = task.ext.prefix ?: "${meta.id}" if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ - samtools sort $args -@ $task.cpus -o ${prefix}.bam -T $prefix $bam + samtools sort \\ + $args \\ + -@ $task.cpus \\ + -o ${prefix}.bam \\ + -T $prefix \\ + $bam + cat <<-END_VERSIONS > versions.yml "${task.process}": samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') diff --git a/modules/nf-core/modules/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml similarity index 88% rename from modules/nf-core/modules/samtools/sort/meta.yml rename to modules/nf-core/samtools/sort/meta.yml index a820c55a..07328431 100644 --- a/modules/nf-core/modules/samtools/sort/meta.yml +++ b/modules/nf-core/samtools/sort/meta.yml @@ -12,7 +12,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: @@ -39,6 +39,10 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - csi: + type: file + description: BAM index file (optional) + pattern: "*.csi" authors: - "@drpatelh" - "@ewels" diff --git a/modules/nf-core/modules/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf similarity index 78% rename from modules/nf-core/modules/samtools/stats/main.nf rename to modules/nf-core/samtools/stats/main.nf index bbdc3240..4a2607de 100644 --- a/modules/nf-core/modules/samtools/stats/main.nf +++ b/modules/nf-core/samtools/stats/main.nf @@ -1,15 +1,15 @@ process SAMTOOLS_STATS { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(input), path(input_index) - path fasta + tuple val(meta2), path(fasta) output: tuple val(meta), path("*.stats"), emit: stats @@ -20,14 +20,15 @@ process SAMTOOLS_STATS { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--reference ${fasta}" : "" """ samtools \\ stats \\ - --threads ${task.cpus-1} \\ + --threads ${task.cpus} \\ ${reference} \\ ${input} \\ - > ${input}.stats + > ${prefix}.stats cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -38,7 +39,7 @@ process SAMTOOLS_STATS { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${input}.stats + touch ${prefix}.stats cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml similarity index 71% rename from modules/nf-core/modules/samtools/stats/meta.yml rename to modules/nf-core/samtools/stats/meta.yml index cac50b1c..90e6345f 100644 --- a/modules/nf-core/modules/samtools/stats/meta.yml +++ b/modules/nf-core/samtools/stats/meta.yml @@ -13,7 +13,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: @@ -23,16 +23,21 @@ input: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - input: - type: file - description: BAM/CRAM file from alignment - pattern: "*.{bam,cram}" + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" - input_index: - type: file - description: BAI/CRAI file from alignment - pattern: "*.{bai,crai}" + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: - type: optional file - description: Reference file the CRAM was created with + type: file + description: Reference file the CRAM was created with (optional) pattern: "*.{fasta,fa}" output: - meta: @@ -51,3 +56,4 @@ output: authors: - "@drpatelh" - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/modules/snpeff/main.nf b/modules/nf-core/snpeff/snpeff/main.nf similarity index 60% rename from modules/nf-core/modules/snpeff/main.nf rename to modules/nf-core/snpeff/snpeff/main.nf index 1b4d5f43..e92c1597 100644 --- a/modules/nf-core/modules/snpeff/main.nf +++ b/modules/nf-core/snpeff/snpeff/main.nf @@ -1,20 +1,22 @@ -process SNPEFF { +process SNPEFF_SNPEFF { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::snpeff=5.0" : null) + conda "bioconda::snpeff=5.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/snpeff:5.0--hdfd78af_1' : - 'quay.io/biocontainers/snpeff:5.0--hdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/snpeff:5.1--hdfd78af_2' : + 'biocontainers/snpeff:5.1--hdfd78af_2' }" input: tuple val(meta), path(vcf) val db - path cache + tuple val(meta2), path(cache) output: tuple val(meta), path("*.ann.vcf"), emit: vcf path "*.csv" , emit: report + path "*.html" , emit: summary_html + path "*.genes.txt" , emit: genes_txt path "versions.yml" , emit: versions when: @@ -22,17 +24,17 @@ process SNPEFF { script: def args = task.ext.args ?: '' - def avail_mem = 6 + def avail_mem = 6144 if (!task.memory) { log.info '[snpEff] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } def prefix = task.ext.prefix ?: "${meta.id}" def cache_command = cache ? "-dataDir \${PWD}/${cache}" : "" """ snpEff \\ - -Xmx${avail_mem}g \\ + -Xmx${avail_mem}M \\ $db \\ $args \\ -csvStats ${prefix}.csv \\ @@ -45,4 +47,16 @@ process SNPEFF { snpeff: \$(echo \$(snpEff -version 2>&1) | cut -f 2 -d ' ') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.ann.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snpeff: \$(echo \$(snpEff -version 2>&1) | cut -f 2 -d ' ') + END_VERSIONS + """ + } diff --git a/modules/nf-core/modules/snpeff/meta.yml b/modules/nf-core/snpeff/snpeff/meta.yml similarity index 72% rename from modules/nf-core/modules/snpeff/meta.yml rename to modules/nf-core/snpeff/snpeff/meta.yml index 2f0d866e..44bada23 100644 --- a/modules/nf-core/modules/snpeff/meta.yml +++ b/modules/nf-core/snpeff/snpeff/meta.yml @@ -1,7 +1,11 @@ -name: snpEff +name: SNPEFF_SNPEFF description: Genetic variant annotation and functional effect prediction toolbox keywords: - annotation + - effect prediction + - snpeff + - variant + - vcf tools: - snpeff: description: | @@ -21,7 +25,7 @@ input: description: | vcf to annotate - db: - type: value + type: string description: | which db to annotate with - cache: @@ -36,8 +40,16 @@ output: pattern: "*.ann.vcf" - report: type: file - description: snpEff report file + description: snpEff report csv file + pattern: "*.csv" + - summary_html: + type: file + description: snpEff summary statistics in html file pattern: "*.html" + - genes_txt: + type: file + description: txt (tab separated) file having counts of the number of variants affecting each transcript and gene + pattern: "*.genes.txt" - versions: type: file description: File containing software versions diff --git a/modules/nf-core/star/align/main.nf b/modules/nf-core/star/align/main.nf new file mode 100644 index 00000000..d0e20384 --- /dev/null +++ b/modules/nf-core/star/align/main.nf @@ -0,0 +1,109 @@ +process STAR_ALIGN { + tag "$meta.id" + label 'process_high' + + conda "bioconda::star=2.7.10a bioconda::samtools=1.16.1 conda-forge::gawk=5.1.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' : + 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }" + + input: + tuple val(meta), path(reads, stageAs: "input*/*") + tuple val(meta2), path(index) + tuple val(meta3), path(gtf) + val star_ignore_sjdbgtf + val seq_platform + val seq_center + + output: + tuple val(meta), path('*Log.final.out') , emit: log_final + tuple val(meta), path('*Log.out') , emit: log_out + tuple val(meta), path('*Log.progress.out'), emit: log_progress + path "versions.yml" , emit: versions + + tuple val(meta), path('*d.out.bam') , optional:true, emit: bam + tuple val(meta), path('*sortedByCoord.out.bam') , optional:true, emit: bam_sorted + tuple val(meta), path('*toTranscriptome.out.bam'), optional:true, emit: bam_transcript + tuple val(meta), path('*Aligned.unsort.out.bam') , optional:true, emit: bam_unsorted + tuple val(meta), path('*fastq.gz') , optional:true, emit: fastq + tuple val(meta), path('*.tab') , optional:true, emit: tab + tuple val(meta), path('*.SJ.out.tab') , optional:true, emit: spl_junc_tab + tuple val(meta), path('*.ReadsPerGene.out.tab') , optional:true, emit: read_per_gene_tab + tuple val(meta), path('*.out.junction') , optional:true, emit: junction + tuple val(meta), path('*.out.sam') , optional:true, emit: sam + tuple val(meta), path('*.wig') , optional:true, emit: wig + tuple val(meta), path('*.bg') , optional:true, emit: bedgraph + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reads1 = [], reads2 = [] + meta.single_end ? [reads].flatten().each{reads1 << it} : reads.eachWithIndex{ v, ix -> ( ix & 1 ? reads2 : reads1) << v } + def ignore_gtf = star_ignore_sjdbgtf ? '' : "--sjdbGTFfile $gtf" + def seq_platform = seq_platform ? "'PL:$seq_platform'" : "" + def seq_center = seq_center ? "'CN:$seq_center'" : "" + def attrRG = args.contains("--outSAMattrRGline") ? "" : "--outSAMattrRGline 'ID:$prefix' $seq_center 'SM:$prefix' $seq_platform" + def out_sam_type = (args.contains('--outSAMtype')) ? '' : '--outSAMtype BAM Unsorted' + def mv_unsorted_bam = (args.contains('--outSAMtype BAM Unsorted SortedByCoordinate')) ? "mv ${prefix}.Aligned.out.bam ${prefix}.Aligned.unsort.out.bam" : '' + """ + STAR \\ + --genomeDir $index \\ + --readFilesIn ${reads1.join(",")} ${reads2.join(",")} \\ + --runThreadN $task.cpus \\ + --outFileNamePrefix $prefix. \\ + $out_sam_type \\ + $ignore_gtf \\ + $attrRG \\ + $args + + $mv_unsorted_bam + + if [ -f ${prefix}.Unmapped.out.mate1 ]; then + mv ${prefix}.Unmapped.out.mate1 ${prefix}.unmapped_1.fastq + gzip ${prefix}.unmapped_1.fastq + fi + if [ -f ${prefix}.Unmapped.out.mate2 ]; then + mv ${prefix}.Unmapped.out.mate2 ${prefix}.unmapped_2.fastq + gzip ${prefix}.unmapped_2.fastq + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}Xd.out.bam + touch ${prefix}.Log.final.out + touch ${prefix}.Log.out + touch ${prefix}.Log.progress.out + touch ${prefix}.sortedByCoord.out.bam + touch ${prefix}.toTranscriptome.out.bam + touch ${prefix}.Aligned.unsort.out.bam + touch ${prefix}.Aligned.sortedByCoord.out.bam + touch ${prefix}.unmapped_1.fastq.gz + touch ${prefix}.unmapped_2.fastq.gz + touch ${prefix}.tab + touch ${prefix}.SJ.out.tab + touch ${prefix}.ReadsPerGene.out.tab + touch ${prefix}.Chimeric.out.junction + touch ${prefix}.out.sam + touch ${prefix}.Signal.UniqueMultiple.str1.out.wig + touch ${prefix}.Signal.UniqueMultiple.str1.out.bg + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/star/align/meta.yml b/modules/nf-core/star/align/meta.yml similarity index 73% rename from modules/nf-core/modules/star/align/meta.yml rename to modules/nf-core/star/align/meta.yml index 7ee10f1c..3d8fed0c 100644 --- a/modules/nf-core/modules/star/align/meta.yml +++ b/modules/nf-core/star/align/meta.yml @@ -25,10 +25,34 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] - index: type: directory description: STAR genome index pattern: "star" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - gtf: + type: file + description: Annotation GTF file + pattern: "*.{gtf}" + - star_ignore_sjdbgtf: + type: boolean + description: Ignore annotation GTF file + - seq_platform: + type: string + description: Sequencing platform + - seq_center: + type: string + description: Sequencing center + output: - bam: type: file @@ -74,6 +98,14 @@ output: type: file description: STAR chimeric junction output file (optional) pattern: "*.out.junction" + - wig: + type: file + description: STAR output wiggle format file(s) (optional) + pattern: "*.wig" + - bedgraph: + type: file + description: STAR output bedGraph format file(s) (optional) + pattern: "*.bg" authors: - "@kevinmenden" diff --git a/modules/nf-core/modules/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf similarity index 59% rename from modules/nf-core/modules/star/genomegenerate/main.nf rename to modules/nf-core/star/genomegenerate/main.nf index e5568f1d..43424042 100644 --- a/modules/nf-core/modules/star/genomegenerate/main.nf +++ b/modules/nf-core/star/genomegenerate/main.nf @@ -2,19 +2,18 @@ process STAR_GENOMEGENERATE { tag "$fasta" label 'process_high' - // Note: 2.7X indices incompatible with AWS iGenomes. - conda (params.enable_conda ? "bioconda::star=2.7.9a bioconda::samtools=1.15.1 conda-forge::gawk=5.1.0" : null) + conda "bioconda::star=2.7.10a bioconda::samtools=1.16.1 conda-forge::gawk=5.1.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1c4c32d87798d425c970ececfbadd155e7560277-0' : - 'quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1c4c32d87798d425c970ececfbadd155e7560277-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' : + 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }" input: - path fasta - path gtf + tuple val(meta), path(fasta) + tuple val(meta2), path(gtf) output: - path "star" , emit: index - path "versions.yml" , emit: versions + tuple val(meta), path("star") , emit: index + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -22,7 +21,7 @@ process STAR_GENOMEGENERATE { script: def args = task.ext.args ?: '' def args_list = args.tokenize() - def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' if (args_list.contains('--genomeSAindexNbases')) { """ mkdir star @@ -66,4 +65,32 @@ process STAR_GENOMEGENERATE { END_VERSIONS """ } + + stub: + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/exonGeTrInfo.tab + touch star/exonInfo.tab + touch star/geneInfo.tab + touch star/genomeParameters.txt + touch star/sjdbInfo.txt + touch star/sjdbList.fromGTF.out.tab + touch star/sjdbList.out.tab + touch star/transcriptInfo.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/star/genomegenerate/meta.yml b/modules/nf-core/star/genomegenerate/meta.yml similarity index 69% rename from modules/nf-core/modules/star/genomegenerate/meta.yml rename to modules/nf-core/star/genomegenerate/meta.yml index 8181157a..eba2d9cf 100644 --- a/modules/nf-core/modules/star/genomegenerate/meta.yml +++ b/modules/nf-core/star/genomegenerate/meta.yml @@ -15,14 +15,29 @@ tools: doi: 10.1093/bioinformatics/bts635 licence: ["MIT"] input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - fasta: type: file description: Fasta file of the reference genome + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] - gtf: type: file description: GTF file of the reference genome output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - index: type: directory description: Folder containing the star index files diff --git a/modules/nf-core/modules/tabix/bgziptabix/main.nf b/modules/nf-core/tabix/bgziptabix/main.nf similarity index 63% rename from modules/nf-core/modules/tabix/bgziptabix/main.nf rename to modules/nf-core/tabix/bgziptabix/main.nf index 77fd91a5..d6c5a760 100644 --- a/modules/nf-core/modules/tabix/bgziptabix/main.nf +++ b/modules/nf-core/tabix/bgziptabix/main.nf @@ -1,17 +1,18 @@ process TABIX_BGZIPTABIX { tag "$meta.id" - label 'process_medium' + label 'process_single' - conda (params.enable_conda ? 'bioconda::tabix=1.11' : null) + conda "bioconda::tabix=1.11" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/tabix:1.11--hdfd78af_0' : - 'quay.io/biocontainers/tabix:1.11--hdfd78af_0' }" + 'biocontainers/tabix:1.11--hdfd78af_0' }" input: tuple val(meta), path(input) output: - tuple val(meta), path("*.gz"), path("*.tbi"), emit: gz_tbi + tuple val(meta), path("*.gz"), path("*.tbi"), optional: true, emit: gz_tbi + tuple val(meta), path("*.gz"), path("*.csi"), optional: true, emit: gz_csi path "versions.yml" , emit: versions when: @@ -22,8 +23,8 @@ process TABIX_BGZIPTABIX { def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - bgzip --threads ${task.cpus} -c $args $input > ${prefix}.gz - tabix $args2 ${prefix}.gz + bgzip --threads ${task.cpus} -c $args $input > ${prefix}.${input.getExtension()}.gz + tabix $args2 ${prefix}.${input.getExtension()}.gz cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -34,8 +35,9 @@ process TABIX_BGZIPTABIX { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.gz - touch ${prefix}.gz.tbi + touch ${prefix}.${input.getExtension()}.gz + touch ${prefix}.${input.getExtension()}.gz.tbi + touch ${prefix}.${input.getExtension()}.gz.csi cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/tabix/bgziptabix/meta.yml b/modules/nf-core/tabix/bgziptabix/meta.yml similarity index 90% rename from modules/nf-core/modules/tabix/bgziptabix/meta.yml rename to modules/nf-core/tabix/bgziptabix/meta.yml index 49c03289..2761e271 100644 --- a/modules/nf-core/modules/tabix/bgziptabix/meta.yml +++ b/modules/nf-core/tabix/bgziptabix/meta.yml @@ -37,9 +37,14 @@ output: type: file description: tabix index file pattern: "*.{gz.tbi}" + - csi: + type: file + description: tabix alternate index file + pattern: "*.{gz.csi}" - versions: type: file description: File containing software versions pattern: "versions.yml" authors: - "@maxulysse" + - "@DLBPointon" diff --git a/modules/nf-core/modules/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf similarity index 87% rename from modules/nf-core/modules/tabix/tabix/main.nf rename to modules/nf-core/tabix/tabix/main.nf index e155e468..5bf332ef 100644 --- a/modules/nf-core/modules/tabix/tabix/main.nf +++ b/modules/nf-core/tabix/tabix/main.nf @@ -1,11 +1,11 @@ process TABIX_TABIX { tag "$meta.id" - label 'process_medium' + label 'process_single' - conda (params.enable_conda ? 'bioconda::tabix=1.11' : null) + conda "bioconda::tabix=1.11" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/tabix:1.11--hdfd78af_0' : - 'quay.io/biocontainers/tabix:1.11--hdfd78af_0' }" + 'biocontainers/tabix:1.11--hdfd78af_0' }" input: tuple val(meta), path(tab) diff --git a/modules/nf-core/modules/tabix/tabix/meta.yml b/modules/nf-core/tabix/tabix/meta.yml similarity index 100% rename from modules/nf-core/modules/tabix/tabix/meta.yml rename to modules/nf-core/tabix/tabix/meta.yml diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf new file mode 100644 index 00000000..61461c39 --- /dev/null +++ b/modules/nf-core/untar/main.nf @@ -0,0 +1,63 @@ +process UNTAR { + tag "$archive" + label 'process_single' + + conda "conda-forge::sed=4.7 conda-forge::grep=3.11 conda-forge::tar=1.34" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$prefix"), emit: untar + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) + + """ + mkdir $prefix + + ## Ensures --strip-components only applied when top level of tar contents is a directory + ## If just files or multiple directories, place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + tar \\ + -C $prefix --strip-components 1 \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + else + tar \\ + -C $prefix \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) + """ + mkdir $prefix + touch ${prefix}/file.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/untar/meta.yml b/modules/nf-core/untar/meta.yml similarity index 84% rename from modules/nf-core/modules/untar/meta.yml rename to modules/nf-core/untar/meta.yml index d426919b..db241a6e 100644 --- a/modules/nf-core/modules/untar/meta.yml +++ b/modules/nf-core/untar/meta.yml @@ -3,6 +3,7 @@ description: Extract files. keywords: - untar - uncompress + - extract tools: - untar: description: | @@ -26,9 +27,9 @@ output: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - untar: - type: file - description: - pattern: "*.*" + type: directory + description: Directory containing contents of archive + pattern: "*/" - versions: type: file description: File containing software versions @@ -36,3 +37,5 @@ output: authors: - "@joseespinosa" - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/nextflow.config b/nextflow.config index c4b1dc8b..5383a3e3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -43,7 +43,7 @@ params { // Variant calling no_intervals = false - + // Variant annotation annotate_tools = null // List of annotation tools to run - snpeff or vep or merge annotation_cache = false // Annotation cache disabled @@ -84,29 +84,29 @@ params { // MultiQC options multiqc_config = null multiqc_title = null + multiqc_logo = null max_multiqc_email_size = '25.MB' + multiqc_methods_description = null // Boilerplate options - outdir = 'results' - tracedir = "${params.outdir}/pipeline_info" + outdir = null publish_dir_mode = 'copy' email = null email_on_fail = null plaintext_email = false monochrome_logs = false + hook_url = null help = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes' - enable_conda = false + version = false // Config options + config_profile_name = null + config_profile_description = null custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_description = null config_profile_contact = null config_profile_url = null - config_profile_name = null + // Max resource options // Defaults only, expecting to be overwritten @@ -114,6 +114,13 @@ params { max_cpus = 16 max_time = '240.h' + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes' + validationShowHiddenParams = false + validate_params = true + } // Load base.config by default for all pipelines @@ -139,66 +146,118 @@ try { // } catch (Exception e) { // System.err.println("WARNING: Could not load nf-core/config/rnavar profiles: ${params.custom_config_base}/pipeline/rnavar.config") // } - - profiles { - debug { process.beforeScript = 'echo $HOSTNAME' } + debug { + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + } conda { - params.enable_conda = true + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + mamba { + conda.enabled = true + conda.useMamba = true docker.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } docker { docker.enabled = true docker.userEmulation = true + conda.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false + } + arm { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { singularity.enabled = true singularity.autoMounts = true + conda.enabled = false docker.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } podman { podman.enabled = true + conda.enabled = false docker.enabled = false singularity.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } shifter { shifter.enabled = true + conda.enabled = false docker.enabled = false singularity.enabled = false podman.enabled = false charliecloud.enabled = false + apptainer.enabled = false } charliecloud { charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + conda.enabled = false docker.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false + charliecloud.enabled = false + } + gitpod { + executor.name = 'local' + executor.cpus = 16 + executor.memory = 60.GB } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } } +// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' + +// Nextflow plugins +plugins { + id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} + // Load igenomes.config if required if (!params.igenomes_ignore) { includeConfig 'conf/igenomes.config' } else { params.genomes = [:] } - // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -216,29 +275,30 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { name = 'nf-core/rnavar' - author = '@praveenraj2018' + author = """Praveen Raj, Maxime U Garcia""" homePage = 'https://github.com/nf-core/rnavar' - description = 'GATK4 RNA variant calling pipeline' + description = """GATK4 RNA variant calling pipeline""" mainScript = 'main.nf' - nextflowVersion = '>=21.10.3' - version = '1.1.0dev' + nextflowVersion = '!>=23.04.0' + version = '1.1.0dev' + doi = '10.5281/zenodo.6669637' } // Load modules.config for DSL2 module specific options diff --git a/nextflow_schema.json b/nextflow_schema.json index e0ec0fc1..36d0136e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -13,14 +13,15 @@ "required": ["input", "outdir"], "properties": { "input": { + "description": "Path to comma-separated file containing information about the samples in the experiment.", + "help_text": "A design file with information about the samples in your experiment. Use this parameter to specify the location of the input files. It has to be a comma-separated file with a header row. See [usage docs](https://nf-co.re/rnavar/usage#input).", + "fa_icon": "fas fa-file-csv", + "schema": "assets/schema_input.json", "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/csv", - "pattern": "^\\S+\\.csv$", - "schema": "assets/schema_input.json", - "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/rnavar/usage#samplesheet-input).", - "fa_icon": "fas fa-file-csv" + "pattern": "^\\S+\\.csv$" }, "outdir": { "type": "string", @@ -61,6 +62,7 @@ "fasta": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", @@ -500,7 +502,7 @@ "description": "Maximum amount of time that can be requested for any single job.", "default": "240.h", "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } @@ -517,6 +519,14 @@ "type": "boolean", "description": "Display help text.", "fa_icon": "fas fa-question-circle", + "default": false, + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "default": false, "hidden": true }, "publish_dir_mode": { @@ -540,6 +550,7 @@ "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", + "default": false, "hidden": true }, "max_multiqc_email_size": { @@ -554,21 +565,34 @@ "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", + "default": false, + "hidden": true + }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", "hidden": true }, "multiqc_config": { "type": "string", + "format": "file-path", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true }, - "tracedir": { + "multiqc_logo": { "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs", + "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", + "fa_icon": "fas fa-image", "hidden": true }, + "multiqc_methods_description": { + "type": "string", + "description": "Custom MultiQC yaml file containing HTML including a methods description.", + "fa_icon": "fas fa-cog" + }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", @@ -576,18 +600,29 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "show_hidden_params": { + "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", + "default": false, "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." }, - "enable_conda": { + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "default": false, + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { "type": "boolean", - "description": "Run this workflow with Conda. You can also use '-profile conda' instead of providing this parameter.", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "default": false, "hidden": true, - "fa_icon": "fas fa-bacon" + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } } } diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..0d62beb6 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. +# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. +[tool.black] +line-length = 120 +target_version = ["py37", "py38", "py39", "py310"] + +[tool.isort] +profile = "black" +known_first_party = ["nf_core"] +multi_line_output = 3 diff --git a/subworkflows/nf-core/align_star.nf b/subworkflows/local/align_star/main.nf similarity index 83% rename from subworkflows/nf-core/align_star.nf rename to subworkflows/local/align_star/main.nf index 2a4069fd..4949686f 100644 --- a/subworkflows/nf-core/align_star.nf +++ b/subworkflows/local/align_star/main.nf @@ -2,8 +2,8 @@ // Alignment with STAR // -include { STAR_ALIGN } from '../../modules/nf-core/modules/star/align/main' -include { BAM_SORT_SAMTOOLS } from './bam_sort_samtools' +include { STAR_ALIGN } from '../../../modules/nf-core/star/align/main' +include { BAM_SORT_SAMTOOLS } from '../bam_sort_samtools/main' workflow ALIGN_STAR { take: @@ -52,10 +52,7 @@ workflow ALIGN_STAR { bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] csi = BAM_SORT_SAMTOOLS.out.csi // channel: [ val(meta), [ csi ] ] - stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - + reports = BAM_SORT_SAMTOOLS.out.reports versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/annotate.nf b/subworkflows/local/annotate/main.nf similarity index 87% rename from subworkflows/local/annotate.nf rename to subworkflows/local/annotate/main.nf index 9555dddb..8f2b5104 100644 --- a/subworkflows/local/annotate.nf +++ b/subworkflows/local/annotate/main.nf @@ -2,9 +2,9 @@ // ANNOTATION // -include { SNPEFF_ANNOTATE } from '../nf-core/snpeff_annotate' -include { ENSEMBLVEP_ANNOTATE as MERGE_ANNOTATE } from '../nf-core/ensemblvep_annotate' -include { ENSEMBLVEP_ANNOTATE } from '../nf-core/ensemblvep_annotate' +include { SNPEFF_ANNOTATE } from '../snpeff_annotate/main' +include { ENSEMBLVEP_ANNOTATE as MERGE_ANNOTATE } from '../ensemblvep_annotate/main' +include { ENSEMBLVEP_ANNOTATE } from '../ensemblvep_annotate/main' workflow ANNOTATE { take: diff --git a/subworkflows/local/bam_markduplicates/main.nf b/subworkflows/local/bam_markduplicates/main.nf new file mode 100644 index 00000000..37d901b9 --- /dev/null +++ b/subworkflows/local/bam_markduplicates/main.nf @@ -0,0 +1,51 @@ +// +// MARKDUPLICATES AND QC after mapping +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' +include { GATK4_MARKDUPLICATES } from '../../../modules/nf-core/gatk4/markduplicates/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' + +workflow BAM_MARKDUPLICATES { + take: + bam // channel: [mandatory] [ meta, bam ] + fasta // channel: [mandatory] [ fasta ] + fasta_fai // channel: [mandatory] [ fasta_fai ] + intervals_bed_combined // channel: [optional] [ intervals_bed ] + + main: + ch_versions = Channel.empty() + ch_reports = Channel.empty() + + // RUN MARKUPDUPLICATES + GATK4_MARKDUPLICATES(bam, fasta, fasta_fai) + + SAMTOOLS_INDEX(GATK4_MARKDUPLICATES.out.bam) + + ch_bam_index = GATK4_MARKDUPLICATES.out.bam + .join(SAMTOOLS_INDEX.out.bai, remainder: true) + .join(SAMTOOLS_INDEX.out.csi, remainder: true) + .map{meta, bam, bai, csi -> + if (bai) [meta, bam, bai] + else [meta, bam, csi] + } + + BAM_STATS_SAMTOOLS(ch_bam_index) + + // Gather all reports generated + ch_reports = ch_reports.mix(GATK4_MARKDUPLICATES.out.metrics) + ch_reports = ch_reports.mix(BAM_STATS_SAMTOOLS.out.reports) + + // Gather versions of all tools used + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + ch_versions = ch_versions.mix(GATK4_MARKDUPLICATES.out.versions) + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + + emit: + bam = ch_bam_index + reports = ch_reports + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/bam_sort_samtools.nf b/subworkflows/local/bam_sort_samtools/main.nf similarity index 69% rename from subworkflows/nf-core/bam_sort_samtools.nf rename to subworkflows/local/bam_sort_samtools/main.nf index f473b759..feabd8c9 100644 --- a/subworkflows/nf-core/bam_sort_samtools.nf +++ b/subworkflows/local/bam_sort_samtools/main.nf @@ -2,9 +2,9 @@ // Sort, index BAM file and run samtools stats, flagstat and idxstats // -include { SAMTOOLS_SORT } from '../../modules/nf-core/modules/samtools/sort/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/modules/samtools/index/main' -include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' +include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' workflow BAM_SORT_SAMTOOLS { take: @@ -37,9 +37,10 @@ workflow BAM_SORT_SAMTOOLS { } .set { ch_bam_bai } - BAM_STATS_SAMTOOLS ( - ch_bam_bai - ) + BAM_STATS_SAMTOOLS(ch_bam_bai) + + reports = BAM_STATS_SAMTOOLS.out.reports.map{meta, logs -> [logs] } + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions.first()) emit: @@ -47,8 +48,6 @@ workflow BAM_SORT_SAMTOOLS { bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ] - stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + reports versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/bam_stats_samtools/main.nf b/subworkflows/local/bam_stats_samtools/main.nf new file mode 100644 index 00000000..09fdd265 --- /dev/null +++ b/subworkflows/local/bam_stats_samtools/main.nf @@ -0,0 +1,32 @@ +// +// Run SAMtools stats, flagstat and idxstats +// + +include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' +include { SAMTOOLS_IDXSTATS } from '../../../modules/nf-core/samtools/idxstats/main' +include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/samtools/flagstat/main' + +workflow BAM_STATS_SAMTOOLS { + take: + ch_bam_bai // channel: [ val(meta), [ bam ], [bai/csi] ] + + main: + ch_versions = Channel.empty() + ch_reports = Channel.empty() + + SAMTOOLS_FLAGSTAT(ch_bam_bai) + SAMTOOLS_IDXSTATS(ch_bam_bai) + SAMTOOLS_STATS(ch_bam_bai, [[],[]]) + + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions.first()) + ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions.first()) + ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) + + ch_reports = ch_reports.mix(SAMTOOLS_STATS.out.stats) + ch_reports = ch_reports.mix(SAMTOOLS_FLAGSTAT.out.flagstat) + ch_reports = ch_reports.mix(SAMTOOLS_IDXSTATS.out.idxstats) + + emit: + reports = ch_reports + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/cram_qc_mosdepth_samtools/main.nf b/subworkflows/local/cram_qc_mosdepth_samtools/main.nf new file mode 100644 index 00000000..fd070a68 --- /dev/null +++ b/subworkflows/local/cram_qc_mosdepth_samtools/main.nf @@ -0,0 +1,38 @@ +// +// QC on CRAM +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' +include { MOSDEPTH } from '../../../modules/nf-core/mosdepth/main' + +workflow CRAM_QC_MOSDEPTH_SAMTOOLS { + take: + cram // channel: [mandatory] [ meta, cram, crai ] + fasta // channel: [mandatory] [ fasta ] + intervals + + main: + versions = Channel.empty() + reports = Channel.empty() + + // Reports run on cram + SAMTOOLS_STATS(cram, fasta.map{ it -> [ [ id:'fasta' ], it ] }) + + MOSDEPTH(cram.combine(intervals.map{ meta, bed -> [ bed?:[] ] }), fasta.map{ it -> [ [ id:'fasta' ], it ] }) + + // Gather all reports generated + reports = reports.mix(SAMTOOLS_STATS.out.stats) + reports = reports.mix(MOSDEPTH.out.global_txt) + reports = reports.mix(MOSDEPTH.out.regions_txt) + + // Gather versions of all tools used + versions = versions.mix(MOSDEPTH.out.versions) + versions = versions.mix(SAMTOOLS_STATS.out.versions.first()) + + emit: + reports + + versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/ensemblvep_annotate.nf b/subworkflows/local/ensemblvep_annotate/main.nf similarity index 85% rename from subworkflows/nf-core/ensemblvep_annotate.nf rename to subworkflows/local/ensemblvep_annotate/main.nf index 13dffee4..83187150 100644 --- a/subworkflows/nf-core/ensemblvep_annotate.nf +++ b/subworkflows/local/ensemblvep_annotate/main.nf @@ -2,8 +2,8 @@ // Run VEP to annotate VCF files // -include { ENSEMBLVEP } from '../../modules/nf-core/modules/ensemblvep/main' -include { TABIX_BGZIPTABIX } from '../../modules/nf-core/modules/tabix/bgziptabix/main' +include { ENSEMBLVEP_VEP } from '../../../modules/nf-core/ensemblvep/vep/main' +include { TABIX_BGZIPTABIX } from '../../../modules/nf-core/tabix/bgziptabix/main' workflow ENSEMBLVEP_ANNOTATE { take: diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf deleted file mode 100644 index 95fc458e..00000000 --- a/subworkflows/local/input_check.nf +++ /dev/null @@ -1,46 +0,0 @@ -// -// Check input samplesheet and get read channels -// - -include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' - -workflow INPUT_CHECK { - take: - samplesheet // file: /path/to/samplesheet.csv - - main: - SAMPLESHEET_CHECK ( - samplesheet - ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channel(it) } - .set { reads } - - emit: - reads // channel: [ val(meta), [ reads ] ] - versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] -} - -// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] -def create_fastq_channel(LinkedHashMap row) { - // create meta map - def meta = [:] - meta.id = row.sample - meta.single_end = row.single_end.toBoolean() - - // add path(s) of the fastq file(s) to the meta map - def fastq_meta = [] - if (!file(row.fastq_1).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" - } - if (meta.single_end) { - fastq_meta = [ meta, [ file(row.fastq_1) ] ] - } else { - if (!file(row.fastq_2).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" - } - fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] - } - return fastq_meta -} diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf deleted file mode 100755 index 74acecc8..00000000 --- a/subworkflows/local/prepare_genome.nf +++ /dev/null @@ -1,175 +0,0 @@ -// -// Uncompress and prepare reference genome files -// - -include { GATK4_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/modules/gatk4/createsequencedictionary/main' //addParams(options: params.genome_options) -include { GFFREAD } from '../../modules/nf-core/modules/gffread/main' //addParams(options: params.gffread_options) -include { GTF2BED } from '../../modules/local/gtf2bed' -include { BEDTOOLS_SORT } from '../../modules/nf-core/modules/bedtools/sort/main' -include { BEDTOOLS_MERGE } from '../../modules/nf-core/modules/bedtools/merge/main' -include { GUNZIP as GUNZIP_FASTA } from '../../modules/nf-core/modules/gunzip/main' //addParams(options: params.genome_options) -include { GUNZIP as GUNZIP_GENE_BED } from '../../modules/nf-core/modules/gunzip/main' //addParams(options: params.genome_options) -include { GUNZIP as GUNZIP_GFF } from '../../modules/nf-core/modules/gunzip/main' //addParams(options: params.genome_options) -include { GUNZIP as GUNZIP_GTF } from '../../modules/nf-core/modules/gunzip/main' //addParams(options: params.genome_options) -include { SAMTOOLS_FAIDX } from '../../modules/nf-core/modules/samtools/faidx/main' //addParams(options: params.genome_options) -include { STAR_GENOMEGENERATE } from '../../modules/nf-core/modules/star/genomegenerate/main' //addParams(options: params.star_index_options) -include { UNTAR as UNTAR_STAR_INDEX } from '../../modules/nf-core/modules/untar/main' //addParams(options: params.star_untar_options) - - -workflow PREPARE_GENOME { - take: - prepare_tool_indices - feature_type - - main: - - ch_versions = Channel.empty() - - // - // Uncompress genome fasta file if required - // - if (params.fasta.endsWith('.gz')) { - GUNZIP_FASTA ( - Channel.fromPath(params.fasta).map{ it -> [[id:it[0].baseName], it] } - ) - ch_fasta = GUNZIP_FASTA.out.gunzip.map{ meta, fasta -> [fasta] }.collect() - ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions) - } else { - ch_fasta = Channel.fromPath(params.fasta).collect() - } - - // - // Uncompress GTF annotation file or create from GFF3 if required - // - ch_gffread_version = Channel.empty() - if (params.gtf) { - if (params.gtf.endsWith('.gz')) { - GUNZIP_GTF ( - Channel.fromPath(params.gtf).map{ it -> [[id:it[0].baseName], it] } - ) - ch_gtf = GUNZIP_GTF.out.gunzip.map{ meta, gtf -> [gtf] }.collect() - ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions) - } else { - ch_gtf = Channel.fromPath(params.gtf).collect() - } - } else if (params.gff) { - if (params.gff.endsWith('.gz')) { - GUNZIP_GFF ( - Channel.fromPath(params.gff).map{ it -> [[id:it[0].baseName], it] } - ) - ch_gff = GUNZIP_GFF.out.gunzip.map{ meta, gff -> [gff] }.collect() - ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions) - } else { - ch_gff = Channel.fromPath(params.gff).collect() - } - - GFFREAD ( - ch_gff - ) - .gtf - .set { ch_gtf } - - ch_versions = ch_versions.mix(GFFREAD.out.versions) - } - - // - // Uncompress exon BED annotation file or create from GTF if required - // - if (params.exon_bed) { - if (params.exon_bed.endsWith('.gz')) { - GUNZIP_GENE_BED ( - Channel.fromPath(params.exon_bed).map{ it -> [[id:it[0].baseName], it] } - ) - ch_gene_bed = GUNZIP_GENE_BED.out.gunzip.map{ meta, bed -> [bed] }.collect() - ch_versions = ch_versions.mix(GUNZIP_GENE_BED.out.versions) - } else { - ch_gene_bed = Channel.fromPath(params.exon_bed).collect() - } - } else { - ch_exon_bed = GTF2BED ( ch_gtf , feature_type).bed.collect() - ch_versions = ch_versions.mix(GTF2BED.out.versions) - } - - //ch_exon_bed.view() - //ch_exon_bed.map{ it -> [[id:'exome'], it] } - //ch_exon_bed.view() - // Bedtools sort - ch_bedtools_sort = BEDTOOLS_SORT(ch_exon_bed.map{ it -> [[id:'exome'], it] }, 'sorted').sorted.collect() - ch_versions = ch_versions.mix(BEDTOOLS_SORT.out.versions) - - - // Bedtools merge - ch_bedtools_merge = BEDTOOLS_MERGE(ch_bedtools_sort).bed - ch_versions = ch_versions.mix(BEDTOOLS_MERGE.out.versions) - - - // Index the genome fasta - ch_fasta_fai = Channel.empty() - if (params.fasta_fai) ch_fasta_fai = Channel.fromPath(params.fasta_fai).collect() - if (!params.fasta_fai) { - SAMTOOLS_FAIDX( - ch_fasta.map{ it -> [[id:it[0].getName()], it]} - ) - ch_fasta_fai = SAMTOOLS_FAIDX.out.fai.map{ meta, fai -> [fai] }.collect() - ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) - } - - // Create dictionary file for the genome fasta - ch_fasta_dict = Channel.empty() - if (params.dict) ch_fasta_dict = Channel.fromPath(params.dict).collect() - else ch_fasta_dict = GATK4_CREATESEQUENCEDICTIONARY(ch_fasta).dict - - // - // Uncompress STAR index or generate from scratch if required - // - ch_star_index = Channel.empty() - if ('star' in prepare_tool_indices) { - if (params.star_index) { - if (params.star_index.endsWith('.tar.gz')) { - UNTAR_STAR_INDEX ( - Channel.fromPath(params.star_index).map{ it -> [[id:it[0].baseName], it] } - ) - ch_star_index = UNTAR_STAR_INDEX.out.untar.map{ meta, star_index -> [star_index] }.collect() - ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions) - } else { - ch_star_index = Channel.fromPath(params.star_index).collect() - } - } - else { - STAR_GENOMEGENERATE ( - ch_fasta,ch_gtf - ) - .index - .set { ch_star_index } - ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) - } - - //if((!ch_star_index) || getIndexVersion(ch_star_index) != '2.7.4a'){ - // ch_star_index = STAR_GENOMEGENERATE(ch_fasta,ch_gtf).index - // ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) - //} - } - - - emit: - fasta = ch_fasta // path: genome.fasta - fai = ch_fasta_fai // path: genome.fasta.fai - dict = ch_fasta_dict // path: genome.fasta.dict - gtf = ch_gtf // path: genome.gtf - exon_bed = ch_exon_bed // path: exon.bed - bedtools_sort = ch_bedtools_sort // path: sort.bed - bedtools_merge = ch_bedtools_merge // path: merge.bed - star_index = ch_star_index // path: star/index/ - versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] -} - -def getIndexVersion( index_path ) { - genomeParameters = new File("$index_path/genomeParameters.txt") - if ( genomeParameters.exists() ) { - for(line: genomeParameters.readLines()){ - if(line.startsWith("versionGenome")){ - return line.split("\t")[1].trim() - } - } - } -} diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf new file mode 100755 index 00000000..efb649ae --- /dev/null +++ b/subworkflows/local/prepare_genome/main.nf @@ -0,0 +1,47 @@ +// +// Prepare reference genome files +// + +include { BEDTOOLS_MERGE } from '../../../modules/nf-core/bedtools/merge/main' +include { BEDTOOLS_SORT } from '../../../modules/nf-core/bedtools/sort/main' +include { GATK4_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/gatk4/createsequencedictionary/main' +include { GFFREAD } from '../../../modules/nf-core/gffread/main' +include { GTF2BED } from '../../../modules/local/gtf2bed' +include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main' +include { STAR_GENOMEGENERATE } from '../../../modules/nf-core/star/genomegenerate/main' + +workflow PREPARE_GENOME { + take: + ch_fasta // file: /path/to/genome.fasta + ch_gff // file: /path/to/genome.gff + ch_gtf // file: /path/to/genome.gtf + feature_type + + main: + ch_versions = Channel.empty() + + GATK4_CREATESEQUENCEDICTIONARY(ch_fasta) + GFFREAD(ch_gff) + SAMTOOLS_FAIDX(ch_fasta, [['id':null], []]) + + ch_gtf = ch_gtf.mix(GFFREAD.out.gtf) + + GTF2BED(ch_gtf, feature_type) + STAR_GENOMEGENERATE(ch_fasta, ch_gtf) + + ch_versions = ch_versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) + ch_versions = ch_versions.mix(GFFREAD.out.versions) + ch_versions = ch_versions.mix(GTF2BED.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) + ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) + + emit: + dict = GATK4_CREATESEQUENCEDICTIONARY.out.dict // path: genome.fasta.dict + exon_bed = GTF2BED.out.bed.map{ bed -> [ [ id:bed.baseName ], bed ] }.collect() // path: exon.bed + fasta_fai = SAMTOOLS_FAIDX.out.fai.map{ meta, fai -> [fai] } // path: genome.fasta.fai + gtf = ch_gtf // path: genome.gtf + star_index = STAR_GENOMEGENERATE.out.index // path: star/index/ + versions = ch_versions // channel: [ versions.yml ] + // bedtools_sort = ch_bedtools_sort // path: sort.bed + // bedtools_merge = ch_bedtools_merge // path: merge.bed +} diff --git a/subworkflows/nf-core/recalibrate.nf b/subworkflows/local/recalibrate/main.nf similarity index 79% rename from subworkflows/nf-core/recalibrate.nf rename to subworkflows/local/recalibrate/main.nf index 664cf3f5..518e9fb2 100644 --- a/subworkflows/nf-core/recalibrate.nf +++ b/subworkflows/local/recalibrate/main.nf @@ -4,9 +4,9 @@ ======================================================================================== */ -include { GATK4_APPLYBQSR as APPLYBQSR } from '../../modules/nf-core/modules/gatk4/applybqsr/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/modules/samtools/index/main' -include { SAMTOOLS_STATS } from '../../modules/nf-core/modules/samtools/stats/main' +include { GATK4_APPLYBQSR as APPLYBQSR } from '../../../modules/nf-core/gatk4/applybqsr/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' workflow RECALIBRATE { take: @@ -33,9 +33,8 @@ workflow RECALIBRATE { bam_recalibrated = APPLYBQSR.out.bam ch_versions = ch_versions.mix(APPLYBQSR.out.versions.first()) - SAMTOOLS_INDEX ( - bam_recalibrated - ) + SAMTOOLS_INDEX(bam_recalibrated) + bam_recalibrated_index = bam_recalibrated .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) @@ -49,7 +48,7 @@ workflow RECALIBRATE { samtools_stats = Channel.empty() if (!skip_samtools) { - SAMTOOLS_STATS(bam_recalibrated_index, []) + SAMTOOLS_STATS(bam_recalibrated_index, [[], []]) samtools_stats = SAMTOOLS_STATS.out.stats ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) } diff --git a/subworkflows/nf-core/snpeff_annotate.nf b/subworkflows/local/snpeff_annotate/main.nf similarity index 82% rename from subworkflows/nf-core/snpeff_annotate.nf rename to subworkflows/local/snpeff_annotate/main.nf index 9a8b65bc..241cb190 100644 --- a/subworkflows/nf-core/snpeff_annotate.nf +++ b/subworkflows/local/snpeff_annotate/main.nf @@ -2,8 +2,8 @@ // Run snpEff to annotate VCF files // -include { SNPEFF } from '../../modules/nf-core/modules/snpeff/main' -include { TABIX_BGZIPTABIX } from '../../modules/nf-core/modules/tabix/bgziptabix/main' +include { SNPEFF_SNPEFF } from '../../../modules/nf-core/snpeff/snpeff/main' +include { TABIX_BGZIPTABIX } from '../../../modules/nf-core/tabix/bgziptabix/main' workflow SNPEFF_ANNOTATE { take: diff --git a/subworkflows/nf-core/splitncigar.nf b/subworkflows/local/splitncigar/main.nf similarity index 55% rename from subworkflows/nf-core/splitncigar.nf rename to subworkflows/local/splitncigar/main.nf index 1b1895f7..cbc30618 100644 --- a/subworkflows/nf-core/splitncigar.nf +++ b/subworkflows/local/splitncigar/main.nf @@ -2,59 +2,56 @@ // Subworkflow: Run GATK4 SplitNCigarReads with intervals, merge and index BAM file. // -include { GATK4_SPLITNCIGARREADS } from '../../modules/nf-core/modules/gatk4/splitncigarreads/main' -include { SAMTOOLS_MERGE } from '../../modules/nf-core/modules/samtools/merge/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/modules/samtools/index/main' +include { GATK4_SPLITNCIGARREADS } from '../../../modules/nf-core/gatk4/splitncigarreads/main' +include { SAMTOOLS_MERGE } from '../../../modules/nf-core/samtools/merge/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' workflow SPLITNCIGAR { take: bam // channel: [ val(meta), [ bam ], [bai] ] - fasta // channel: [ fasta ] - fasta_fai // channel: [ fai ] - fasta_dict // channel: [ dict ] + ch_fasta // channel: [ fasta ] + ch_fai // channel: [ fai ] + ch_dict // channel: [ dict ] intervals // channel: [ interval_list] main: - ch_versions = Channel.empty() - bam.combine(intervals) - .map{ meta, bam, bai, intervals -> + bam_interval = bam.combine(intervals).map{ meta, bam, bai, intervals -> new_meta = meta.clone() new_meta.id = meta.id + "_" + intervals.baseName new_meta.sample = meta.id [new_meta, bam, bai, intervals] - }.set{bam_interval} + } - GATK4_SPLITNCIGARREADS ( + GATK4_SPLITNCIGARREADS( bam_interval, - fasta, - fasta_fai, - fasta_dict + ch_fasta.map{ meta, fasta -> [fasta] }, + ch_fai, + ch_dict.map{ meta, dict -> [dict] }, ) bam_splitncigar = GATK4_SPLITNCIGARREADS.out.bam ch_versions = ch_versions.mix(GATK4_SPLITNCIGARREADS.out.versions.first()) - bam_splitncigar + bam_splitncigar_interval = bam_splitncigar .map{ meta, bam -> new_meta = meta.clone() new_meta.id = meta.sample [new_meta, bam] - }.groupTuple().set{bam_splitncigar_interval} + }.groupTuple() + + SAMTOOLS_MERGE(bam_splitncigar_interval, + ch_fasta, + ch_fai.map{ fai -> [[id:fai.baseName], fai] }) - SAMTOOLS_MERGE ( - bam_splitncigar_interval, - fasta - ) splitncigar_bam = SAMTOOLS_MERGE.out.bam ch_versions = ch_versions.mix(SAMTOOLS_MERGE.out.versions.first()) - SAMTOOLS_INDEX ( - splitncigar_bam - ) + SAMTOOLS_INDEX(splitncigar_bam) + splitncigar_bam_bai = splitncigar_bam - .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) - .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.bai, remainder: true) + .join(SAMTOOLS_INDEX.out.csi, remainder: true) .map{meta, bam, bai, csi -> if (bai) [meta, bam, bai] else [meta, bam, csi] diff --git a/subworkflows/nf-core/bam_stats_samtools.nf b/subworkflows/nf-core/bam_stats_samtools.nf deleted file mode 100644 index 68d632c3..00000000 --- a/subworkflows/nf-core/bam_stats_samtools.nf +++ /dev/null @@ -1,38 +0,0 @@ -// -// Run SAMtools stats, flagstat and idxstats -// - -include { SAMTOOLS_STATS } from '../../modules/nf-core/modules/samtools/stats/main' -include { SAMTOOLS_IDXSTATS } from '../../modules/nf-core/modules/samtools/idxstats/main' -include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/modules/samtools/flagstat/main' - -workflow BAM_STATS_SAMTOOLS { - take: - ch_bam_bai // channel: [ val(meta), [ bam ], [bai/csi] ] - - main: - ch_versions = Channel.empty() - - SAMTOOLS_STATS ( - ch_bam_bai, - [] - ) - ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) - - SAMTOOLS_FLAGSTAT ( - ch_bam_bai - ) - ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions.first()) - - SAMTOOLS_IDXSTATS ( - ch_bam_bai - ) - ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions.first()) - - emit: - stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/markduplicates.nf b/subworkflows/nf-core/markduplicates.nf deleted file mode 100644 index 85b724dd..00000000 --- a/subworkflows/nf-core/markduplicates.nf +++ /dev/null @@ -1,54 +0,0 @@ -// -// GATK4 MarkDuplicates, index BAM file and run samtools stats, flagstat and idxstats -// - -include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' -include { GATK4_MARKDUPLICATES } from '../../modules/nf-core/modules/gatk4/markduplicates/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/modules/samtools/index/main' - -workflow MARKDUPLICATES { - take: - bam // channel: [ val(meta), [ bam ] ] - - main: - - ch_versions = Channel.empty() - - GATK4_MARKDUPLICATES ( - bam - ) - ch_versions = ch_versions.mix(GATK4_MARKDUPLICATES.out.versions.first()) - - // - // Index BAM file and run samtools stats, flagstat and idxstats - // - SAMTOOLS_INDEX ( - GATK4_MARKDUPLICATES.out.bam - ) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) - - GATK4_MARKDUPLICATES.out.bam - .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) - .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) - .map{meta, bam, bai, csi -> - if (bai) [meta, bam, bai] - else [meta, bam, csi]} - .set{ch_bam_bai} - - BAM_STATS_SAMTOOLS ( - ch_bam_bai - ) - ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions.first()) - - emit: - bam = GATK4_MARKDUPLICATES.out.bam // channel: [ val(meta), [ bam ] ] - bam_bai = ch_bam_bai // channel: [ val(meta), [ bam ], [bai or csi] ] - metrics = GATK4_MARKDUPLICATES.out.metrics // channel: [ val(meta), [ metrics ] ] - - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] - csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ] - stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - versions = ch_versions // channel: [versions.yml] -} diff --git a/tests/csv/1.0/fastq_single.csv b/tests/csv/1.0/fastq_single.csv new file mode 100644 index 00000000..125f9bfd --- /dev/null +++ b/tests/csv/1.0/fastq_single.csv @@ -0,0 +1,2 @@ +sample,fastq_1,fastq_2,strandedness +GM12878,https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz,https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz,reverse diff --git a/tests/test_annotation.yml b/tests/test_annotation.yml index b6fb529d..28ef4322 100644 --- a/tests/test_annotation.yml +++ b/tests/test_annotation.yml @@ -1,5 +1,5 @@ - name: Run snpEff - command: nextflow run main.nf -profile test,docker --annotate_tools snpeff + command: nextflow run main.nf -profile test,docker --annotate_tools snpeff --outdir results tags: - annotation - snpeff @@ -8,7 +8,7 @@ - path: results/variant_annotation/GM12878/GM12878_snpEff.ann.vcf.gz.tbi - path: results/reports/multiqc_report.html - name: Run VEP - command: nextflow run main.nf -profile test,docker --annotate_tools vep --skip_multiqc + command: nextflow run main.nf -profile test,docker --annotate_tools vep --skip_multiqc --outdir results tags: - annotation - vep @@ -16,7 +16,7 @@ - path: results/variant_annotation/GM12878/GM12878_VEP.ann.vcf.gz - path: results/variant_annotation/GM12878/GM12878_VEP.ann.vcf.gz.tbi - name: Run snpEff followed by VEP - command: nextflow run main.nf -profile test,docker --annotate_tools merge --skip_multiqc + command: nextflow run main.nf -profile test,docker --annotate_tools merge --skip_multiqc --outdir results tags: - annotation - merge diff --git a/tests/test_bamcsiindex.yml b/tests/test_bamcsiindex.yml index 06fb817f..7235d9f9 100644 --- a/tests/test_bamcsiindex.yml +++ b/tests/test_bamcsiindex.yml @@ -1,10 +1,10 @@ - name: Run pipeline with bam_csi_index for large genomes - command: nextflow run main.nf -profile test,docker --bam_csi_index + command: nextflow run main.nf -profile test,docker --bam_csi_index --outdir results tags: - bamcsiindex files: - path: results/reports/multiqc_report.html - - path: results/preprocessing/GM12878/GM12878.markdup.sorted.bam - - path: results/preprocessing/GM12878/GM12878.markdup.sorted.bam.csi + - path: results/preprocessing/GM12878/GM12878.md.bam + - path: results/preprocessing/GM12878/GM12878.md.bam.csi - path: results/variant_calling/GM12878/GM12878.haplotypecaller.vcf.gz - path: results/variant_calling/GM12878/GM12878.haplotypecaller.vcf.gz.csi diff --git a/tests/test_default.yml b/tests/test_default.yml index 36cb35fa..776ec6f3 100644 --- a/tests/test_default.yml +++ b/tests/test_default.yml @@ -1,5 +1,5 @@ - name: Run default pipeline - command: nextflow run main.nf -profile test,docker + command: nextflow run main.nf -profile test,docker --outdir results tags: - default - preprocessing diff --git a/tests/test_removeduplicates.yml b/tests/test_removeduplicates.yml index 8889ad6a..d0650728 100644 --- a/tests/test_removeduplicates.yml +++ b/tests/test_removeduplicates.yml @@ -1,5 +1,5 @@ - name: Run pipeline with duplicate reads removed - command: nextflow run main.nf -profile test,docker --remove_duplicates true + command: nextflow run main.nf -profile test,docker --remove_duplicates true --outdir results tags: - removeduplicates - preprocessing diff --git a/tests/test_skipbasecalib.yml b/tests/test_skipbasecalib.yml index 7b5b2132..67f1c463 100644 --- a/tests/test_skipbasecalib.yml +++ b/tests/test_skipbasecalib.yml @@ -1,5 +1,5 @@ - name: Run pipeline without base calibration step - command: nextflow run main.nf -profile test,docker --skip_baserecalibration true + command: nextflow run main.nf -profile test,docker --skip_baserecalibration true --outdir results tags: - skipbasecalib - preprocessing diff --git a/tower.yml b/tower.yml new file mode 100644 index 00000000..787aedfe --- /dev/null +++ b/tower.yml @@ -0,0 +1,5 @@ +reports: + multiqc_report.html: + display: "MultiQC HTML report" + samplesheet.csv: + display: "Auto-created samplesheet with collated metadata and FASTQ paths" diff --git a/workflows/rnavar.nf b/workflows/rnavar.nf index 94d202dd..698e0c25 100755 --- a/workflows/rnavar.nf +++ b/workflows/rnavar.nf @@ -1,12 +1,18 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS + PRINT PARAMS SUMMARY ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) +include { paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation' + +def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) +def citation = '\n' + WorkflowMain.citation(workflow) + '\n' +def summary_params = paramsSummaryMap(workflow) + +// Print parameter summary log to screen +log.info logo + paramsSummaryLog(workflow) + citation -// Validate input parameters WorkflowRnavar.initialise(params, log) // Check input path parameters to see if they exist @@ -26,11 +32,11 @@ def checkPathParamList = [ params.star_index, ] -for (param in checkPathParamList) {if (param) file(param, checkIfExists: true)} +for(param in checkPathParamList) {if (param) file(param, checkIfExists: true)} // Check mandatory parameters if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } -if(!params.star_index && !params.gtf && !params.gff){ exit 1, "GTF|GFF3 file is required to build a STAR reference index! Use option --gtf|--gff to provide a GTF|GFF file." } +if (!params.star_index && !params.gtf && !params.gff){ exit 1, "GTF|GFF3 file is required to build a STAR reference index! Use option --gtf|--gff to provide a GTF|GFF file." } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -38,9 +44,10 @@ if(!params.star_index && !params.gtf && !params.gff){ exit 1, "GTF|GFF3 file is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -ch_multiqc_config = Channel.fromPath(file("$projectDir/assets/multiqc_config.yml", checkIfExists: true)) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() -ch_rnavar_logo = Channel.fromPath(file("$projectDir/assets/nf-core-rnavar_logo_light.png", checkIfExists: true)) +ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() +ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() +ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -48,51 +55,37 @@ ch_rnavar_logo = Channel.fromPath(file("$projectDir/assets/nf-core-rna ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { INPUT_CHECK } from '../subworkflows/local/input_check' // Validate the input samplesheet.csv and prepare input channels -include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' // Build the genome index and other reference files -include { ANNOTATE } from '../subworkflows/local/annotate' // Annotate variants using snpEff or VEP or both - -/* -======================================================================================== - IMPORT NF-CORE MODULES -======================================================================================== -*/ - -include { FASTQC } from '../modules/nf-core/modules/fastqc/main' -include { MULTIQC } from '../modules/nf-core/modules/multiqc/main' -include { CAT_FASTQ } from '../modules/nf-core/modules/cat/fastq/main' -include { GATK4_BASERECALIBRATOR } from '../modules/nf-core/modules/gatk4/baserecalibrator/main' -include { GATK4_BEDTOINTERVALLIST } from '../modules/nf-core/modules/gatk4/bedtointervallist/main' -include { GATK4_INTERVALLISTTOOLS } from '../modules/nf-core/modules/gatk4/intervallisttools/main' -include { GATK4_HAPLOTYPECALLER } from '../modules/nf-core/modules/gatk4/haplotypecaller/main' -include { GATK4_HAPLOTYPECALLER as GATK4_HAPLOTYPECALLERGVCF } from '../modules/nf-core/modules/gatk4/haplotypecaller/main' -include { GATK4_MERGEVCFS } from '../modules/nf-core/modules/gatk4/mergevcfs/main' -include { GATK4_COMBINEGVCFS } from '../modules/nf-core/modules/gatk4/combinegvcfs/main' -include { GATK4_INDEXFEATUREFILE } from '../modules/nf-core/modules/gatk4/indexfeaturefile/main' -include { GATK4_VARIANTFILTRATION } from '../modules/nf-core/modules/gatk4/variantfiltration/main' -include { SAMTOOLS_INDEX } from '../modules/nf-core/modules/samtools/index/main' -include { TABIX_TABIX as TABIX } from '../modules/nf-core/modules/tabix/tabix/main' -include { TABIX_TABIX as TABIXGVCF } from '../modules/nf-core/modules/tabix/tabix/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' - -/* -======================================================================================== - IMPORT LOCAL MODULES -======================================================================================== -*/ +include { GTF2BED } from '../modules/local/gtf2bed/main' -include { GTF2BED } from '../modules/local/gtf2bed' +include { ALIGN_STAR } from '../subworkflows/local/align_star/main' // Align reads to genome and sort and index the alignment file +include { ANNOTATE } from '../subworkflows/local/annotate/main' // Annotate variants using snpEff or VEP or both +include { BAM_MARKDUPLICATES } from '../subworkflows/local/bam_markduplicates/main' // Mark duplicates in the BAM file +include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome/main' // Build the genome index and other reference files +include { RECALIBRATE } from '../subworkflows/local/recalibrate/main' // Estimate and correct systematic bias +include { SPLITNCIGAR } from '../subworkflows/local/splitncigar/main' // Splits reads that contain Ns in their cigar string /* ======================================================================================== - IMPORT NF-CORE SUBWORKFLOWS + IMPORT NF-CORE MODULES ======================================================================================== */ -include { ALIGN_STAR } from '../subworkflows/nf-core/align_star' // Align reads to genome and sort and index the alignment file -include { MARKDUPLICATES } from '../subworkflows/nf-core/markduplicates' // Mark duplicates in the BAM file -include { SPLITNCIGAR } from '../subworkflows/nf-core/splitncigar' // Splits reads that contain Ns in their cigar string -include { RECALIBRATE } from '../subworkflows/nf-core/recalibrate' // Estimate and correct systematic bias +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' +include { GATK4_BASERECALIBRATOR } from '../modules/nf-core/gatk4/baserecalibrator/main' +include { GATK4_BEDTOINTERVALLIST } from '../modules/nf-core/gatk4/bedtointervallist/main' +include { GATK4_INTERVALLISTTOOLS } from '../modules/nf-core/gatk4/intervallisttools/main' +include { GATK4_HAPLOTYPECALLER } from '../modules/nf-core/gatk4/haplotypecaller/main' +include { GATK4_HAPLOTYPECALLER as GATK4_HAPLOTYPECALLERGVCF } from '../modules/nf-core/gatk4/haplotypecaller/main' +include { GATK4_MERGEVCFS } from '../modules/nf-core/gatk4/mergevcfs/main' +include { GATK4_COMBINEGVCFS } from '../modules/nf-core/gatk4/combinegvcfs/main' +include { GATK4_INDEXFEATUREFILE } from '../modules/nf-core/gatk4/indexfeaturefile/main' +include { GATK4_VARIANTFILTRATION } from '../modules/nf-core/gatk4/variantfiltration/main' +include { SAMTOOLS_INDEX } from '../modules/nf-core/samtools/index/main' +include { TABIX_TABIX as TABIX } from '../modules/nf-core/tabix/tabix/main' +include { TABIX_TABIX as TABIXGVCF } from '../modules/nf-core/tabix/tabix/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' /* ======================================================================================== @@ -100,29 +93,36 @@ include { RECALIBRATE } from '../subworkflows/nf-core/recalibr ======================================================================================== */ -// Check STAR alignment parameters -def prepareToolIndices = params.aligner -def seq_platform = params.seq_platform ? params.seq_platform : [] -def seq_center = params.seq_center ? params.seq_center : [] +// // Check STAR alignment parameters +// def prepareToolIndices = params.aligner +def seq_platform = params.seq_platform ?: [] +def seq_center = params.seq_center ?: [] -// Initialize file channels based on params +// // Initialize file channels based on params ch_dbsnp = params.dbsnp ? Channel.fromPath(params.dbsnp).collect() : Channel.empty() ch_dbsnp_tbi = params.dbsnp_tbi ? Channel.fromPath(params.dbsnp_tbi).collect() : Channel.empty() ch_known_indels = params.known_indels ? Channel.fromPath(params.known_indels).collect() : Channel.empty() ch_known_indels_tbi = params.known_indels_tbi ? Channel.fromPath(params.known_indels_tbi).collect() : Channel.empty() -// Initialize variant annotation associated channels -ch_snpeff_db = params.snpeff_db ?: Channel.empty() -ch_vep_cache_version = params.vep_cache_version ?: Channel.empty() -ch_vep_genome = params.vep_genome ?: Channel.empty() -ch_vep_species = params.vep_species ?: Channel.empty() -ch_snpeff_cache = params.snpeff_cache ? Channel.fromPath(params.snpeff_cache).collect() : [] -ch_vep_cache = params.vep_cache ? Channel.fromPath(params.vep_cache).collect() : [] +// // Initialize variant annotation associated channels +// ch_snpeff_db = params.snpeff_db ?: Channel.empty() +// ch_vep_cache_version = params.vep_cache_version ?: Channel.empty() +// ch_vep_genome = params.vep_genome ?: Channel.empty() +// ch_vep_species = params.vep_species ?: Channel.empty() +// ch_snpeff_cache = params.snpeff_cache ? Channel.fromPath(params.snpeff_cache).collect() : [] +// ch_vep_cache = params.vep_cache ? Channel.fromPath(params.vep_cache).collect() : [] // MultiQC reporting -def multiqc_report = [] +// def multiqc_report = [] + +// Initialize file channels based on params, defined in the params.genomes[params.genome] scope +ch_exon_bed = params.exon_bed ? Channel.fromPath(params.exon_bed) : Channel.empty() +ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map{ fasta -> [ [ id:fasta.baseName ], fasta ] }.first() : Channel.empty() +ch_gff = params.gff ? Channel.fromPath(params.gff).first() : Channel.empty() +ch_gtf = params.gtf ? Channel.fromPath(params.gtf).map{ gtf -> [ [ id:gtf.baseName ], gtf ] }.first() : Channel.empty() + /* ======================================================================================== RUN MAIN WORKFLOW RNAVAR @@ -130,75 +130,62 @@ def multiqc_report = [] */ workflow RNAVAR { - // To gather all QC reports for MultiQC ch_reports = Channel.empty() + // To gather used softwares versions for MultiQC ch_versions = Channel.empty() - // - // SUBWORKFLOW: Uncompress and prepare reference genome files - // - - PREPARE_GENOME ( - prepareToolIndices, + ch_from_samplesheet = Channel.empty() + + if (params.input) ch_from_samplesheet = Channel.fromSamplesheet("input") + + ch_fastq = ch_from_samplesheet.map{ meta, fastq_1, fastq_2 -> + if (fastq_2) return [ meta + [id: meta.sample], [ fastq_1, fastq_2 ] ] + else return [ meta + [id: meta.sample], [ fastq_1 ] ] + }.groupTuple() + .branch { meta, fastq -> + single : fastq.size() == 1 + return [ meta, fastq.flatten() ] + multiple: fastq.size() > 1 + return [ meta, fastq.flatten() ] + } + + // Prepare reference genome files + + PREPARE_GENOME( + ch_fasta, + ch_gff, + ch_gtf, params.feature_type ) - ch_genome_bed = Channel.from([id:'genome.bed']).combine(PREPARE_GENOME.out.exon_bed) ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) - // - // SUBWORKFLOW: Read in samplesheet, validate and stage input files - // - INPUT_CHECK ( - ch_input - ) - .reads - .map { - meta, fastq -> - def meta_clone = meta.clone() - meta_clone.id = meta_clone.id.split('_')[0..-2].join('_') - [ meta_clone, fastq ] - } - .groupTuple(by: [0]) - .branch { - meta, fastq -> - single : fastq.size() == 1 - return [ meta, fastq.flatten() ] - multiple: fastq.size() > 1 - return [ meta, fastq.flatten() ] - } - .set { ch_fastq } - ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) + ch_genome_bed = params.exon_bed ? Channel.fromPath(params.exon_bed).map{ it -> [ [id:'exon_bed'], it ] }.collect() + : PREPARE_GENOME.out.exon_bed + ch_dict = params.dict ? Channel.fromPath(params.dict).map{ it -> [ [id:'dict'], it ] }.collect() + : ch_dict + ch_fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() + : PREPARE_GENOME.out.fasta_fai - // // MODULE: Concatenate FastQ files from same sample if required - // - CAT_FASTQ ( - ch_fastq.multiple - ) - .reads - .mix(ch_fastq.single) - .set { ch_cat_fastq } + + CAT_FASTQ(ch_fastq.multiple) + + ch_cat_fastq = CAT_FASTQ.out.reads.mix(ch_fastq.single) + ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first().ifEmpty(null)) - // // MODULE: Generate QC summary using FastQC - // - FASTQC ( - ch_cat_fastq - ) - ch_reports = ch_reports.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) + FASTQC(ch_cat_fastq) + ch_reports = ch_reports.mix(FASTQC.out.zip.collect{ meta, logs -> logs }) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) // // MODULE: Prepare the interval list from the GTF file using GATK4 BedToIntervalList // - ch_interval_list = Channel.empty() - GATK4_BEDTOINTERVALLIST( - ch_genome_bed, - PREPARE_GENOME.out.dict - ) + + GATK4_BEDTOINTERVALLIST(ch_genome_bed, ch_dict) ch_interval_list = GATK4_BEDTOINTERVALLIST.out.interval_list ch_versions = ch_versions.mix(GATK4_BEDTOINTERVALLIST.out.versions.first().ifEmpty(null)) @@ -207,9 +194,7 @@ workflow RNAVAR { // ch_interval_list_split = Channel.empty() if (!params.skip_intervallisttools) { - GATK4_INTERVALLISTTOOLS( - ch_interval_list - ) + GATK4_INTERVALLISTTOOLS(ch_interval_list) ch_interval_list_split = GATK4_INTERVALLISTTOOLS.out.interval_list.map{ meta, bed -> [bed] }.flatten() } else ch_interval_list_split = ch_interval_list @@ -227,7 +212,7 @@ workflow RNAVAR { ch_aligner_clustering_multiqc = Channel.empty() if (params.aligner == 'star') { - ALIGN_STAR ( + ALIGN_STAR( ch_cat_fastq, PREPARE_GENOME.out.star_index, PREPARE_GENOME.out.gtf, @@ -240,33 +225,35 @@ workflow RNAVAR { ch_transcriptome_bam = ALIGN_STAR.out.bam_transcript // Gather QC reports - ch_reports = ch_reports.mix(ALIGN_STAR.out.stats.collect{it[1]}.ifEmpty([])) + ch_reports = ch_reports.mix(ALIGN_STAR.out.reports) ch_reports = ch_reports.mix(ALIGN_STAR.out.log_final.collect{it[1]}.ifEmpty([])) ch_versions = ch_versions.mix(ALIGN_STAR.out.versions.first().ifEmpty(null)) // // SUBWORKFLOW: Mark duplicates with GATK4 // - MARKDUPLICATES ( - ch_genome_bam - ) - ch_genome_bam = MARKDUPLICATES.out.bam_bai + BAM_MARKDUPLICATES( + ch_genome_bam, + ch_fasta.map{ meta, fasta -> [fasta] }, + ch_fasta_fai, + []) + + ch_genome_bam = BAM_MARKDUPLICATES.out.bam //Gather QC reports - ch_reports = ch_reports.mix(MARKDUPLICATES.out.stats.collect{it[1]}.ifEmpty([])) - ch_reports = ch_reports.mix(MARKDUPLICATES.out.metrics.collect{it[1]}.ifEmpty([])) - ch_versions = ch_versions.mix(MARKDUPLICATES.out.versions.first().ifEmpty(null)) + ch_reports = ch_reports.mix(BAM_MARKDUPLICATES.out.reports.collect{it[1]}.ifEmpty([])) + ch_versions = ch_versions.mix(BAM_MARKDUPLICATES.out.versions.first().ifEmpty(null)) // // SUBWORKFLOW: SplitNCigarReads from GATK4 over the intervals - // Splits reads that contain Ns in their cigar string (e.g. spanning splicing events in RNAseq data). + // Splits reads that contain Ns in their cigar string(e.g. spanning splicing events in RNAseq data). // ch_splitncigar_bam_bai = Channel.empty() - SPLITNCIGAR ( + SPLITNCIGAR( ch_genome_bam, - PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.fai, - PREPARE_GENOME.out.dict, + ch_fasta, + ch_fasta_fai, + ch_dict, ch_interval_list_split ) ch_splitncigar_bam_bai = SPLITNCIGAR.out.bam_bai @@ -277,7 +264,7 @@ workflow RNAVAR { // Generates a recalibration table based on various co-variates // ch_bam_variant_calling = Channel.empty() - if(!params.skip_baserecalibration) { + if (!params.skip_baserecalibration) { ch_bqsr_table = Channel.empty() // known_sites is made by grouping both the dbsnp and the known indels ressources // they can either or both be optional @@ -285,15 +272,14 @@ workflow RNAVAR { ch_known_sites_tbi = ch_dbsnp_tbi.concat(ch_known_indels_tbi).collect() ch_interval_list_recalib = ch_interval_list.map{ meta, bed -> [bed] }.flatten() - ch_splitncigar_bam_bai.combine(ch_interval_list_recalib) - .map{ meta, bam, bai, interval -> [ meta, bam, bai, interval] - }.set{ch_splitncigar_bam_bai_interval} + ch_splitncigar_bam_bai_interval = ch_splitncigar_bam_bai.combine(ch_interval_list_recalib) + .map{ meta, bam, bai, interval -> [ meta, bam, bai, interval] } GATK4_BASERECALIBRATOR( ch_splitncigar_bam_bai_interval, - PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.fai, - PREPARE_GENOME.out.dict, + ch_fasta.map{ meta, fasta -> [fasta] }, + ch_fasta_fai, + ch_dict.map{ meta, dict -> [dict] }, ch_known_sites, ch_known_sites_tbi ) @@ -303,13 +289,13 @@ workflow RNAVAR { ch_reports = ch_reports.mix(ch_bqsr_table.map{ meta, table -> table}) ch_versions = ch_versions.mix(GATK4_BASERECALIBRATOR.out.versions.first().ifEmpty(null)) - ch_bam_applybqsr = ch_splitncigar_bam_bai.join(ch_bqsr_table, by: [0]) + ch_bam_applybqsr = ch_splitncigar_bam_bai.join(ch_bqsr_table) ch_bam_recalibrated_qc = Channel.empty() ch_interval_list_applybqsr = ch_interval_list.map{ meta, bed -> [bed] }.flatten() ch_bam_applybqsr.combine(ch_interval_list_applybqsr) - .map{ meta, bam, bai, table, interval -> [ meta, bam, bai, table, interval] - }.set{ch_applybqsr_bam_bai_interval} + .map{ meta, bam, bai, table, interval -> [ meta, bam, bai, table, interval]} + .set{ch_applybqsr_bam_bai_interval} // // MODULE: ApplyBaseRecalibrator from GATK4 @@ -318,9 +304,9 @@ workflow RNAVAR { RECALIBRATE( params.skip_multiqc, ch_applybqsr_bam_bai_interval, - PREPARE_GENOME.out.dict, - PREPARE_GENOME.out.fai, - PREPARE_GENOME.out.fasta + ch_dict.map{ meta, dict -> [dict] }, + ch_fasta_fai, + ch_fasta.map{ meta, fasta -> [fasta] } ) ch_bam_variant_calling = RECALIBRATE.out.bam @@ -343,131 +329,111 @@ workflow RNAVAR { ch_haplotypecaller_vcf = Channel.empty() ch_haplotypecaller_interval_bam = ch_bam_variant_calling.combine(ch_interval_list_split) .map{ meta, bam, bai, interval_list -> - new_meta = meta.clone() - new_meta.id = meta.id + "_" + interval_list.baseName - new_meta.sample = meta.id - [new_meta, bam, bai, interval_list] + [meta + [id:meta.id + "_" + interval_list.baseName], bam, bai, interval_list, []] } // // MODULE: HaplotypeCaller from GATK4 // Calls germline SNPs and indels via local re-assembly of haplotypes. // - + GATK4_HAPLOTYPECALLER( ch_haplotypecaller_interval_bam, - PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.fai, - PREPARE_GENOME.out.dict, + ch_fasta.map{ meta, fasta -> [fasta] }, + ch_fasta_fai, + ch_dict.map{ meta, dict -> [dict] }, ch_dbsnp, ch_dbsnp_tbi ) - - + + ch_haplotypecaller_raw = GATK4_HAPLOTYPECALLER.out.vcf - .map{ meta, vcf -> - meta.id = meta.sample - [meta, vcf]} - .groupTuple() + .map{ meta, vcf -> + meta.id = meta.sample + [meta, vcf]} + .groupTuple() ch_versions = ch_versions.mix(GATK4_HAPLOTYPECALLER.out.versions.first().ifEmpty(null)) - + // // MODULE: MergeVCFS from GATK4 // Merge multiple VCF files into one VCF // - GATK4_MERGEVCFS( - ch_haplotypecaller_raw, - PREPARE_GENOME.out.dict - ) + GATK4_MERGEVCFS(ch_haplotypecaller_raw, ch_dict) ch_haplotypecaller_vcf = GATK4_MERGEVCFS.out.vcf ch_versions = ch_versions.mix(GATK4_MERGEVCFS.out.versions.first().ifEmpty(null)) - if (params.generate_gvcf){ + if (params.generate_gvcf){ GATK4_HAPLOTYPECALLERGVCF( ch_haplotypecaller_interval_bam, - PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.fai, - PREPARE_GENOME.out.dict, + ch_fasta.map{ meta, fasta -> [fasta] }, + ch_fasta_fai, + ch_dict.map{ meta, dict -> [dict] }, ch_dbsnp, ch_dbsnp_tbi ) - + ch_haplotypecallergvcf_raw = GATK4_HAPLOTYPECALLERGVCF.out.vcf .map{ meta, vcf -> meta.id = meta.sample - [meta, vcf]} - .groupTuple() + [meta, vcf] + }.groupTuple() ch_versions = ch_versions.mix(GATK4_HAPLOTYPECALLERGVCF.out.versions.first().ifEmpty(null)) // // MODULE: IndexFeatureFile from GATK4 // Index the gVCF files // - GATK4_INDEXFEATUREFILE( - GATK4_HAPLOTYPECALLERGVCF.out.vcf - ) - + GATK4_INDEXFEATUREFILE(GATK4_HAPLOTYPECALLERGVCF.out.vcf) + ch_haplotypecallergvcf_raw_index = GATK4_INDEXFEATUREFILE.out.index - .map{ meta, idx -> - meta.id = meta.sample - [meta, idx]} - .groupTuple() + .map{ meta, idx -> + meta.id = meta.sample + [meta, idx] + }.groupTuple() ch_versions = ch_versions.mix(GATK4_INDEXFEATUREFILE.out.versions.first().ifEmpty(null)) - - // + // MODULE: CombineGVCFS from GATK4 // Merge multiple GVCF files into one GVCF - // - - //ch_haplotypecallergvcf_raw_tbi = ch_haplotypecallergvcf_raw - // .join(ch_haplotypecallergvcf_raw_index, by: [0], remainder: true) - // .map{meta, vcf, tbi -> - // [meta, vcf, tbi] - // } - - - + + ch_haplotypecallergvcf_raw_tbi = ch_haplotypecallergvcf_raw + .join(ch_haplotypecallergvcf_raw_index, remainder: true) + GATK4_COMBINEGVCFS( - ch_haplotypecallergvcf_raw, - ch_haplotypecallergvcf_raw_index, - PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.fai, - PREPARE_GENOME.out.dict + ch_haplotypecallergvcf_raw_tbi, + ch_fasta.map{ meta, fasta -> [fasta] }, + ch_fasta_fai, + ch_dict.map{ meta, dict -> [dict] } ) ch_haplotypecaller_gvcf = GATK4_COMBINEGVCFS.out.combined_gvcf ch_versions = ch_versions.mix(GATK4_COMBINEGVCFS.out.versions.first().ifEmpty(null)) - + // // MODULE: Index the VCF using TABIX // - TABIXGVCF( - ch_haplotypecaller_gvcf - ) + TABIXGVCF(ch_haplotypecaller_gvcf) ch_haplotypecaller_gvcf_tbi = ch_haplotypecaller_gvcf - .join(TABIXGVCF.out.tbi, by: [0], remainder: true) - .join(TABIXGVCF.out.csi, by: [0], remainder: true) + .join(TABIXGVCF.out.tbi, remainder: true) + .join(TABIXGVCF.out.csi, remainder: true) .map{meta, vcf, tbi, csi -> if (tbi) [meta, vcf, tbi] else [meta, vcf, csi] } ch_versions = ch_versions.mix(TABIXGVCF.out.versions.first().ifEmpty(null)) - + } - + // // MODULE: Index the VCF using TABIX // - TABIX( - ch_haplotypecaller_vcf - ) + TABIX(ch_haplotypecaller_vcf) ch_haplotypecaller_vcf_tbi = ch_haplotypecaller_vcf - .join(TABIX.out.tbi, by: [0], remainder: true) - .join(TABIX.out.csi, by: [0], remainder: true) + .join(TABIX.out.tbi, remainder: true) + .join(TABIX.out.csi, remainder: true) .map{meta, vcf, tbi, csi -> if (tbi) [meta, vcf, tbi] else [meta, vcf, csi] @@ -484,9 +450,9 @@ workflow RNAVAR { GATK4_VARIANTFILTRATION( ch_haplotypecaller_vcf_tbi, - PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.fai, - PREPARE_GENOME.out.dict + ch_fasta, + ch_fasta_fai.map{ it -> [ [id:'fai'], it ] }, + ch_dict ) ch_filtered_vcf = GATK4_VARIANTFILTRATION.out.vcf @@ -494,29 +460,29 @@ workflow RNAVAR { ch_versions = ch_versions.mix(GATK4_VARIANTFILTRATION.out.versions.first().ifEmpty(null)) } - // - // SUBWORKFLOW: Annotate variants using snpEff and Ensembl VEP if enabled. - // - if((!params.skip_variantannotation) && (params.annotate_tools) && (params.annotate_tools.contains('merge') || params.annotate_tools.contains('snpeff') || params.annotate_tools.contains('vep'))) { - ANNOTATE( - ch_final_vcf, - params.annotate_tools, - ch_snpeff_db, - ch_snpeff_cache, - ch_vep_genome, - ch_vep_species, - ch_vep_cache_version, - ch_vep_cache) - - // Gather QC reports - ch_reports = ch_reports.mix(ANNOTATE.out.reports) - ch_versions = ch_versions.mix(ANNOTATE.out.versions.first().ifEmpty(null)) - } + // // + // // SUBWORKFLOW: Annotate variants using snpEff and Ensembl VEP if enabled. + // // + // if ((!params.skip_variantannotation) &&(params.annotate_tools) &&(params.annotate_tools.contains('merge') || params.annotate_tools.contains('snpeff') || params.annotate_tools.contains('vep'))) { + // ANNOTATE( + // ch_final_vcf, + // params.annotate_tools, + // ch_snpeff_db, + // ch_snpeff_cache, + // ch_vep_genome, + // ch_vep_species, + // ch_vep_cache_version, + // ch_vep_cache) + + // // Gather QC reports + // ch_reports = ch_reports.mix(ANNOTATE.out.reports) + // ch_versions = ch_versions.mix(ANNOTATE.out.versions.first().ifEmpty(null)) + // } } ch_version_yaml = Channel.empty() - CUSTOM_DUMPSOFTWAREVERSIONS (ch_versions.unique().collectFile(name: 'collated_versions.yml')) + CUSTOM_DUMPSOFTWAREVERSIONS(ch_versions.unique().collectFile(name: 'collated_versions.yml')) ch_version_yaml = CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect() // @@ -526,17 +492,21 @@ workflow RNAVAR { if (!params.skip_multiqc){ workflow_summary = WorkflowRnavar.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) - ch_multiqc_files = Channel.empty().mix(ch_version_yaml, - ch_multiqc_custom_config.collect().ifEmpty([]), - ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'), - ch_reports.collect(), - ch_multiqc_config, - ch_rnavar_logo) - - MULTIQC (ch_multiqc_files.collect()) + + methods_description = WorkflowRnavar.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) + ch_methods_description = Channel.value(methods_description) + + multiqc_files = Channel.empty() + multiqc_files = multiqc_files.mix(ch_version_yaml) + multiqc_files = multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + multiqc_files = multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) + multiqc_files = multiqc_files.mix(ch_reports.collect().ifEmpty([])) + + MULTIQC(multiqc_files.collect(), ch_multiqc_config.collect().ifEmpty([]), ch_multiqc_custom_config.collect().ifEmpty([]), ch_multiqc_logo.collect().ifEmpty([])) + multiqc_report = MULTIQC.out.report.toList() + ch_versions = ch_versions.mix(MULTIQC.out.versions) } - } /* @@ -550,6 +520,9 @@ workflow.onComplete { NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) } NfcoreTemplate.summary(workflow, params, log) + if (params.hook_url) { + NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) + } } /*