diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 0000000..781b9cf --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,30 @@ +# Contributing to LOGAN + +TODO -- describe gitflow, require PRs... + +## Use pre-commit hooks + +Pre-commit can automatically format your code, check for spelling errors, etc. every time you commit. + +Install [pre-commit](https://pre-commit.com/#installation) if you haven't already, +then run `pre-commit install` to install the hooks specified in `.pre-commit-config.yaml`. +Pre-commit will run the hooks every time you commit. + +If you re-clone the repo, you will likely need to re-run `pre-commit install` +as the hooks are installed to the local repo's `.git/hooks` directory. + +## Versions + +Increment the version number following semantic versioning[^1] in the `VERSION` file. + +[^1]: semantic versioning guidelines https://semver.org/ + +## Changelog + +Keep the changelog up to date with all notable changes in `CHANGELOG.md`[^2]. + +[^2]: changelog guidelines: https://keepachangelog.com/en/1.1.0/ + +## VS code extensions + +If you use VS code, installing [nf-core extension pack](https://marketplace.visualstudio.com/items?itemName=nf-core.nf-core-extensionpack) is recommended. diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000..5959b05 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,50 @@ +name: Bug report +description: Report something that is broken or incorrect +labels: bug +body: + - type: markdown + attributes: + value: | + Before you post this issue, please check the documentation: + + - [pipeline documentation: troubleshooting](https://ccbr.github.io/LOGAN/user-guide/troubleshooting/) + + - type: textarea + id: description + attributes: + label: Description of the bug + description: A clear and concise description of what the bug is. + validations: + required: true + + - type: textarea + id: command_used + attributes: + label: Command used and terminal output + description: Steps to reproduce the behaviour. Please paste the command you used to launch the pipeline and the output from your terminal. + render: console + placeholder: | + $ nextflow run ... + + Some output where something broke + + - type: textarea + id: files + attributes: + label: Relevant files + description: | + Please drag and drop any relevant files here. Create a `.zip` archive if the extension is not allowed. + Your verbose log file `.nextflow.log` is often useful _(this is a hidden file in the directory where you launched the pipeline)_ as well as custom Nextflow configuration files. + If the bug is related to a GUI, add screenshots to help explain your problem. + + - type: textarea + id: system + attributes: + label: System information + description: | + * Nextflow version _(eg. 21.10.3)_ + * Hardware _(eg. HPC, Desktop)_ + * Executor _(eg. slurm, local, awsbatch)_ + * Container engine: _(e.g. Docker, Singularity)_ + * OS _(eg. Ubuntu Linux, macOS)_ + * Version of CCBR/TOOL_NAME _(eg. 1.0, 1.8.2)_ diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..8cba0ff --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,4 @@ +contact_links: + - name: Discussions + url: https://github.com/CCBR/LOGAN/discussions + about: Please ask and answer questions here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 0000000..342090f --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,11 @@ +name: Feature request +description: Suggest an idea for the pipeline +labels: enhancement +body: + - type: textarea + id: description + attributes: + label: Description of feature + description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered. + validations: + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..2e8ffc2 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,27 @@ +## Changes + + + +## Issues + + + + + +## PR Checklist + +(~Strikethrough~ any points that are not applicable.) + +- [ ] This comment contains a description of changes with justifications, with any relevant issues linked. +- ~[ ] Write unit tests for any new features, bug fixes, or other code changes.~ _testing framework not yet implemented_ +- [ ] Update docs if there are any API changes. +- [ ] If a new nextflow process is implemented, define the process `container` and `stub`. +- [ ] Update `CHANGELOG.md` with a short description of any user-facing changes and reference the PR number. Guidelines: https://keepachangelog.com/en/1.1.0/ diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..e91bde8 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,39 @@ +name: build + +on: + push: + branches: + - main + - develop + pull_request: + branches: + - main + - develop + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9"] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: "pip" + - name: Install nextflow + uses: nf-core/setup-nextflow@v1 + - name: Install dependencies + run: | + python -m pip install --upgrade pip setuptools + pip install .[dev,test] + python -c 'from logan.src.util import chmod_bins_exec; chmod_bins_exec()' + - name: Test stub run + run: | + mkdir tmp && cd tmp + which logan + logan init + logan run -profile ci_stub,docker -stub diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml deleted file mode 100644 index 839d1bd..0000000 --- a/.github/workflows/docs.yaml +++ /dev/null @@ -1,30 +0,0 @@ -name: Docs - -# Controls when the workflow will run -on: - # Triggers the workflow on push or pull request events but only for the master branch - push: - branches: - - main - paths: - - 'docs/**' - - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - -# A workflow run is made up of one or more jobs that can run sequentially or in parallel -jobs: - # This workflow contains a single job called "build" - build: - # The type of runner that the job will run on - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - with: - python-version: 3.x - - run: pip install mkdocs - - name: Build site - run: mkdocs build - - name: Deploy - run: mkdocs gh-deploy --force --clean --verbose diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..18228e9 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,27 @@ +name: mkdocs_build +on: + workflow_dispatch: + push: + branches: + - main + paths: + - "docs/**" + +env: + actor: "41898282+github-actions[bot]" + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: 3.9 + - name: git config + run: | + git config --local user.email "${actor}@users.noreply.github.com" + git config --local user.name "$actor" + - run: pip install --upgrade pip + - run: pip install -r docs/requirements.txt + - run: mkdocs gh-deploy --force diff --git a/.github/workflows/projects.yml b/.github/workflows/projects.yml new file mode 100644 index 0000000..61a2816 --- /dev/null +++ b/.github/workflows/projects.yml @@ -0,0 +1,14 @@ +name: Add issues/PRs to user projects + +on: + issues: + types: + - assigned + pull_request: + types: + - assigned + +jobs: + add-to-project: + uses: CCBR/.github/.github/workflows/auto-add-user-project.yml@v0.1.0 + secrets: inherit diff --git a/.gitignore b/.gitignore index 4a37a5b..55f7c49 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,55 @@ docs/user-guide/site/ + +# Compiled class file +*.class + +# Log file +*.log + +# BlueJ files +*.ctxt + +# Mobile Tools for Java (J2ME) +.mtj.tmp/ + +# Package Files # +*.jar +*.war +*.nar +*.ear +*.zip +*.tar.gz +*.rar + +# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml +hs_err_pid* +replay_pid* + +# nextflow +.nextflow* +/work*/ +/data/ +/results/ +/output/ +/params.yaml + +# python packaging +*.pyc +*.egg-info +/build + +# macOS +.DS_Store + +# code editors +.vscode/ +*.code-workspace + +# user-specific dev files +*KLS* +*.Rproj +.Rproj.user +.Rhistory + +/submit_slurm.sh +/tmp/ diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..f8ecdbb --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,3 @@ +# LOGAN development version + +- Python-based CLI \ No newline at end of file diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..49ffebc --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +0.1.0-dev \ No newline at end of file diff --git a/assets/slurm_header_biowulf.sh b/assets/slurm_header_biowulf.sh new file mode 100644 index 0000000..65b61ab --- /dev/null +++ b/assets/slurm_header_biowulf.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +#SBATCH --cpus-per-task=1 +#SBATCH --mem=1g +#SBATCH --time=1-00:00:00 +#SBATCH --parsable +#SBATCH -J "tool_name" +#SBATCH --mail-type=BEGIN,END,FAIL +#SBATCH --output "log/slurm_%j.log" +#SBATCH --output "log/slurm_%j.log" + +module load ccbrpipeliner +module load nextflow +NXF_SINGULARITY_CACHEDIR=/data/CCBR_Pipeliner/SIFS diff --git a/assets/slurm_header_frce.sh b/assets/slurm_header_frce.sh new file mode 100644 index 0000000..665274e --- /dev/null +++ b/assets/slurm_header_frce.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +#SBATCH --cpus-per-task=1 +#SBATCH --mem=1g +#SBATCH --time=1-00:00:00 +#SBATCH --parsable +#SBATCH -J "tool_name" +#SBATCH --mail-type=BEGIN,END,FAIL +#SBATCH --output "log/slurm_%j.log" +#SBATCH --output "log/slurm_%j.log" + +module load nextflow +NXF_SINGULARITY_CACHEDIR=/mnt/projects/CCBR-Pipelines/SIFs diff --git a/conf/base.config b/conf/base.config new file mode 100644 index 0000000..c525125 --- /dev/null +++ b/conf/base.config @@ -0,0 +1,61 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow base config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + A 'blank slate' config file, appropriate for general use on most high performance + compute environments. Assumes that all software is installed and available on + the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. +---------------------------------------------------------------------------------------- +*/ + +process { + + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } + maxRetries = 2 + maxErrors = '-1' + + // Process-specific resource requirements + // NOTE - Please try and re-use the labels below as much as possible. + // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. + // If possible, it would be nice to keep the same label naming convention when + // adding in your local modules too. + // TODO nf-core: Customise requirements for specific processes. + // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_single { + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withLabel:process_low { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withLabel:process_medium { + cpus = { check_max( 16 * task.attempt, 'cpus' ) } + memory = { check_max( 36.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + withLabel:process_high { + cpus = { check_max( 32 * task.attempt, 'cpus' ) } + memory = { check_max( 120.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } + } + withLabel:process_long { + time = { check_max( 20.h * task.attempt, 'time' ) } + } + withLabel:process_high_memory { + memory = { check_max( 200.GB * task.attempt, 'memory' ) } + } + withLabel:error_ignore { + errorStrategy = 'ignore' + } + withLabel:error_retry { + errorStrategy = 'retry' + maxRetries = 2 + } +} diff --git a/conf/biowulf.config b/conf/biowulf.config new file mode 100644 index 0000000..78b3f05 --- /dev/null +++ b/conf/biowulf.config @@ -0,0 +1,34 @@ + +params { + config_profile_description = 'Biowulf nf-core config' + config_profile_contact = 'staff@hpc.nih.gov' + config_profile_url = 'https://hpc.nih.gov/apps/nextflow.html' + max_memory = '224 GB' + max_cpus = 32 + max_time = '72 h' + + igenomes_base = '/fdb/igenomes/' + + // CCBR shared resource paths + index_dir = '/data/CCBR_Pipeliner/db/PipeDB/Indices' + fastq_screen { + conf = "assets/fastq_screen_biowulf.conf" + db_dir = '/data/CCBR_Pipeliner/db/PipeDB/lib/fastq_screen_db/' + } +} + +singularity { + enabled = true + autoMounts = true + cacheDir = "/data/CCBR_Pipeliner/SIFS" + envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID,SINGULARITY_BINDPATH' +} + +env.SINGULARITY_CACHEDIR = "/data/CCBR_Pipeliner/SIFS" + +process.clusterOptions = ' --gres=lscratch:200 ' +process.scratch = '/lscratch/$SLURM_JOBID' +process.stageInMode = 'symlink' +process.stageOutMode = 'rsync' +// for running pipeline on group sharing data directory, this can avoid inconsistent files timestamps +process.cache = 'lenient' diff --git a/conf/ci_stub.config b/conf/ci_stub.config new file mode 100644 index 0000000..6273277 --- /dev/null +++ b/conf/ci_stub.config @@ -0,0 +1,18 @@ +params { + config_profile_name = 'Test stubs' + config_profile_description = 'Minimal test configuration to run stubs with continuous integration' + + input = 'hello world' + outdir = 'results/test' + + max_cpus = 2 // for GitHub Actions https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources + max_memory = '6.GB' + max_time = '6.h' + + publish_dir_mode = "symlink" +} + +process { + cpus = 1 + memory = '1.GB' +} diff --git a/conf/containers.config b/conf/containers.config new file mode 100644 index 0000000..504d3b8 --- /dev/null +++ b/conf/containers.config @@ -0,0 +1,7 @@ +// containers accessed in process definitions with $params.containers.NAME +params { + containers { + base = 'nciccbr/ccbr_ubuntu_base_20.04:v6.1' + logan = 'docker://dnousome/ccbr_logan_base:v0.3.0' + } +} diff --git a/conf/frce.config b/conf/frce.config new file mode 100644 index 0000000..4f132a8 --- /dev/null +++ b/conf/frce.config @@ -0,0 +1,29 @@ +params { + config_profile_description = 'FRCE config' + max_memory = '224 GB' + max_cpus = 32 + max_time = '72 h' + + // CCBR shared resource paths + index_dir = null // TODO + fastq_screen { + conf = "assets/fastq_screen_frce.conf" // TODO + db_dir = null // TODO + } +} + +singularity { + enabled = true + autoMounts = true + cacheDir = "/mnt/projects/CCBR-Pipelines/SIFs" + envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID,SINGULARITY_BINDPATH' +} + +process.scratch = null // TODO + +process.stageInMode = 'symlink' +process.stageOutMode = 'rsync' + +// for running pipeline on group sharing data directory, this can avoid inconsistent files timestamps +process.cache = 'lenient' +} diff --git a/conf/genomes.config b/conf/genomes.config new file mode 100644 index 0000000..e69de29 diff --git a/conf/interactive.config b/conf/interactive.config new file mode 100644 index 0000000..3f11c04 --- /dev/null +++ b/conf/interactive.config @@ -0,0 +1,7 @@ +params { + config_profile_name = 'Run tasks locally on an interactive node' + max_memory = '220 GB' + max_cpus = 56 + max_time = '12 h' +} +process.scratch = false diff --git a/conf/modules.config b/conf/modules.config new file mode 100644 index 0000000..ff3e484 --- /dev/null +++ b/conf/modules.config @@ -0,0 +1,11 @@ +process { + + publishDir = [ + path: { task.label ? "${params.outdir}/${task.label.findAll { !it.startsWith('process_') & !it.startsWith('error_') }.join('/')}/${task.process.tokenize(':')[-1].toLowerCase()}" : "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + + errorStrategy = 'finish' + +} diff --git a/conf/slurm.config b/conf/slurm.config new file mode 100644 index 0000000..b63e01f --- /dev/null +++ b/conf/slurm.config @@ -0,0 +1,25 @@ + +executor { + + $slurm { + queue = 'norm' + queueSize = 200 + pollInterval = '2 min' + queueStatInterval = '5 min' + submitRateLimit = '6/1min' + retry.maxAttempts = 1 + } +} + +process { + executor = 'slurm' + maxRetries = 1 + + clusterOptions = ' --gres=lscratch:200 ' + + stageInMode = 'symlink' + stageOutMode = 'rsync' + + // for running pipeline on group sharing data directory, this can avoid inconsistent files timestamps + cache = 'lenient' +} diff --git a/nextflow.config b/nextflow.config index b58d214..d076bf4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,13 +1,5 @@ -manifest { - name = "CCBR/LOGAN" - author = "CCR Collaborative Bioinformatics Resource" - homePage = "https://github.com/CCBR/LOGAN" - description = "one-line description of LOGAN goes here" - mainScript = "main.nf" -} - -params { +params { // TODO create a separate genome config, with genome index dir that can change depending on platform. see https://github.com/CCBR/CHAMPAGNE/blob/main/conf/genomes.config genome = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta" // file(params.genome) genomedict= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.dict" wgsregion = "/data/nousomedr/annotation/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list" // @@ -63,8 +55,35 @@ params { file_input=null } +includeConfig 'conf/base.config' + profiles { - local { + + docker { + docker.enabled = true + // Avoid this error: + // WARNING: Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap. + // Testing this in nf-core after discussion here https://github.com/nf-core/tools/pull/351 + // once this is established and works well, nextflow might implement this behavior as new default. + docker.runOptions = '-u \$(id -u):\$(id -g)' + } + biowulf { + includeConfig 'conf/biowulf.config' + } + frce { + includeConfig 'conf/frce.config' + } + interactive { + includeConfig 'conf/interactive.config' + } + slurm { + includeConfig 'conf/slurm.config' + } + ci_stub { + includeConfig 'conf/ci_stub.config' + } + + local { // TODO move all containers to conf/containers.config process { executor = 'local' withName:fc_lane { @@ -113,7 +132,8 @@ profiles { autoMounts = true cacheDir = "$PWD/singularity" envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID' - runOptions = '-B /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/,/data/nousomedr/,/data/CCBR/projects/,/vf/users/,/gpfs/,/fdb' + // TODO refactor to no longer need bind mounts. These paths also only work on biowulf + runOptions = '-B /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/,/data/nousomedr/,/data/CCBR/projects/,/vf/users/,/gpfs/,/fdb' } } @@ -127,7 +147,7 @@ profiles { } } - biowulf { + biowulf_DEPRECATED { // TODO switch all cpus/time/memory definitions to use labels in conf/base.config process { executor = 'slurm' queue = 'norm' @@ -308,16 +328,77 @@ profiles { } } - //Container options - singularity { - enabled = true - autoMounts = true - cacheDir = "$PWD/singularity" - envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID' - runOptions = '-B /gs10,/gs11,/gs12,/gs8,/gs9,/spin1,/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/,/data/nousomedr/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb' - } } +} + +includeConfig 'conf/genomes.config' +includeConfig 'conf/containers.config' + +// Export these variables to prevent local Python/R libraries from conflicting with those in the container +// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. +// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. +env { + PYTHONNOUSERSITE = 1 + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" + JULIA_DEPOT_PATH = "/usr/local/share/julia" +} + +// Capture exit codes from upstream processes when piping +process.shell = ['/bin/bash', '-euo', 'pipefail'] + +def trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') +dag { + enabled = true + overwrite = true + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" +} +report { + enabled = true + overwrite = true + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" +} + +includeConfig 'conf/modules.config' +manifest { + name = "CCBR/LOGAN" + author = "CCR Collaborative Bioinformatics Resource" + homePage = "https://github.com/CCBR/LOGAN" + description = "whoLe genOme-sequencinG Analysis pipeliNe" + mainScript = "main.nf" } +// Function to ensure that resource requirements don't go beyond +// a maximum limit +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min( obj, params.max_cpus as int ) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} diff --git a/pyproject.toml b/pyproject.toml index 9e7117c..8f9c593 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [build-system] requires = [ - "setuptools >= 48", + "setuptools >= 62.3.0", "wheel >= 0.29.0", ] build-backend = 'setuptools.build_meta' @@ -8,9 +8,10 @@ build-backend = 'setuptools.build_meta' [project] name = 'LOGAN' dynamic = ['version','readme'] -description = "TODO insert description of LOGAN here" +description = "whoLe genOme-sequencinG Analysis pipeliNe" keywords = ["bioinformatics", "nextflow"] authors = [ + {name = 'Darryl Nousome', email = 'darryl.nousome@nih.gov'}, {name = "Kelly Sovacool", email = "kelly.sovacool@nih.gov"}, {name = "Vishal Koparde", email = "vishal.koparde@nih.gov"}, ] @@ -31,18 +32,17 @@ classifiers = [ "Programming Language :: Python :: 3.9", "Topic :: Scientific/Engineering :: Bio-Informatics", ] -requires-python = ">=3.7" +requires-python = ">=3.8" dependencies = [ - "nextflow >= 0.4.0", "pyyaml >= 6.0", "Click >= 8.1.3", + "cffconvert" ] [project.optional-dependencies] dev = [ "black >= 22.0.0", - "pre-commit", - "cffconvert" + "pre-commit" ] test = [ "pytest" @@ -55,14 +55,14 @@ logan = "logan.src.__main__:main" Homepage = "https://ccbr.github.io/LOGAN" Documentation = "https://ccbr.github.io/LOGAN" Repository = "https://github.com/CCBR/LOGAN" -Changelog = "https://github.com/CCBR/LOGAN/blob/main/docs/CHANGELOG.md" +Changelog = "https://github.com/CCBR/LOGAN/blob/main/CHANGELOG.md" [tool.setuptools.package-dir] logan = "." [tool.setuptools.package-data] -"*" = ["CITATION.cff", "LICENSE", "VERSION", "main.nf", "nextflow.conf", "bin/", "conf/", "modules/*/*", "submodules/*/*"] +"*" = ["CITATION.cff", "LICENSE", "VERSION", "main.nf", "nextflow.config", "assets/**", "bin/**", "conf/**", "lib/**", "modules/**", "subworkflows/**", "workflows/**", "tests/**"] [tool.setuptools.dynamic] -version = {file = "src/VERSION"} -readme = {file = "README.md"} \ No newline at end of file +version = {file = "VERSION"} +readme = {file = "README.md"} diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/__main__.py b/src/__main__.py index 3110ac6..0a70824 100644 --- a/src/__main__.py +++ b/src/__main__.py @@ -1,5 +1,5 @@ """ -Entrypoint for pipeline CLI +Entrypoint for LOGAN CLI Check out the wiki for a detailed look at customizing this file: https://github.com/beardymcjohnface/Snaketool/wiki/Customising-your-Snaketool @@ -20,21 +20,6 @@ def common_options(func): """Common options decorator for use with click commands.""" options = [ - click.option( - "--configfile", - default="nextflow.config", - help="Custom config file", - show_default=True, - ), - click.option( - "--paramsfile", default=None, help="Custom params file", show_default=True - ), - click.option( # when threads=None, uses max available - "--threads", - help="Number of threads to use", - default=None, - show_default=True, - ), click.argument("nextflow_args", nargs=-1), ] for option in reversed(options): @@ -46,8 +31,9 @@ def common_options(func): cls=OrderedCommands, context_settings=dict(help_option_names=["-h", "--help"]) ) @click.version_option(get_version(), "-v", "--version", is_flag=True) +@click.option("--citation", is_flag=True, callback=print_citation, expose_value=False, help="Print the citation in bibtex format and exit.") def cli(): - """LOGAN description TODO + """whoLe genOme-sequencinG Analysis pipeliNe For more options, run: logan [command] --help""" @@ -56,15 +42,17 @@ def cli(): help_msg_extra = """ \b -CLUSTER EXECUTION: -logan run ... -profile [profile],[profile],... -For information on Nextflow config and profiles see: -https://www.nextflow.io/docs/latest/config.html#config-profiles -\b -RUN EXAMPLES: -Use singularity: logan run ... -profile singularity -Specify threads: logan run ... --threads [threads] -Add NextFlow args: logan run ... -work-dir workDir -with-docker +EXAMPLES: +Execute with slurm: + logan run ... --mode slurm +Preview the processes that will run: + logan run ... --mode local -preview +Add nextflow args (anything supported by `nextflow run`): + logan run ... -work-dir path/to/workDir +Run with a specific installation of logan: + logan run --main path/to/logan/main.nf ... +Run with a specific tag, branch, or commit from GitHub: + logan run --main CCBR/LOGAN -r v0.1.0 ... """ @@ -74,43 +62,52 @@ def cli(): help_option_names=["-h", "--help"], ignore_unknown_options=True ), ) +@click.option( + "--main", + "main_path", + help="Path to the logan main.nf file or the GitHub repo (CCBR/LOGAN). Defaults to the version installed in the $PATH.", + type=str, + default=nek_base(os.path.join("main.nf")), + show_default=True, +) +@click.option( + "--mode", + "_mode", + help="Run mode (slurm, local)", + type=str, + default="local", + show_default=True, +) @common_options -def run(**kwargs): +def run(main_path, _mode, **kwargs): """Run the workflow""" - # optional: merge config from CLI with nf config - # run! + if ( # this is the only acceptable github repo option for logan + main_path != "CCBR/LOGAN" + ): + # make sure the path exists + if not os.path.exists(main_path): + raise FileNotFoundError( + f"Path to the logan main.nf file not found: {main_path}" + ) + run_nextflow( - nextfile_path=nek_base(os.path.join("main.nf")), # Full path to Nextflow file + nextfile_path=main_path, + mode=_mode, **kwargs, ) @click.command() -@click.option( - "--configfile", - default="nextflow.config", - help="Copy template config to file", - show_default=True, -) -def config(configfile, **kwargs): - """Copy the system default config files""" - for filename in ("nextflow.config", "params.yml"): - if os.path.exists(nek_base(filename)): - copy_config( - local_config=configfile, - system_config=nek_base(filename), - ) - - -@click.command() -def citation(**kwargs): - """Print the citation""" - print_citation() +def init(**kwargs): + """Initialize the working directory by copying the system default config files""" + paths = ("nextflow.config", "conf/", "assets/") + copy_config(paths) + if not os.path.exists("log/"): + os.mkdir("log/") cli.add_command(run) -cli.add_command(config) -# cli.add_command(citation) # TODO uncomment if logan is published in a journal or Zenodo +cli.add_command(init) def main(): @@ -118,4 +115,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/src/util.py b/src/util.py new file mode 100644 index 0000000..5f802b1 --- /dev/null +++ b/src/util.py @@ -0,0 +1,206 @@ +from cffconvert.cli.create_citation import create_citation +from cffconvert.cli.validate_or_write_output import validate_or_write_output +from time import localtime, strftime + +import click +import collections.abc +import os +import pprint +import shutil +import stat +import subprocess +import sys +import yaml + + +def nek_base(rel_path): + basedir = os.path.split(os.path.dirname(os.path.realpath(__file__)))[0] + return os.path.join(basedir, rel_path) + + +def get_version(): + with open(nek_base("VERSION"), "r") as f: + version = f.readline() + return version + + +def print_citation(context, param, value): + citation = create_citation(nek_base("CITATION.cff"), None) + # click.echo(citation._implementation.cffobj['message']) + validate_or_write_output(None, "bibtex", False, citation) + context.exit() + + +def msg(err_message): + tstamp = strftime("[%Y:%m:%d %H:%M:%S] ", localtime()) + click.echo(tstamp + err_message, err=True) + + +def msg_box(splash, errmsg=None): + msg("-" * (len(splash) + 4)) + msg(f"| {splash} |") + msg(("-" * (len(splash) + 4))) + if errmsg: + click.echo("\n" + errmsg, err=True) + + +def append_config_block(nf_config="nextflow.config", scope=None, **kwargs): + with open(nf_config, "a") as f: + f.write(scope.rstrip() + "{" + "\n") + for k in kwargs: + f.write(f"{k} = {kwargs[k]}\n") + f.write("}\n") + + +def copy_config(config_paths, overwrite=True): + msg(f"Copying default config files to current working directory") + for local_config in config_paths: + system_config = nek_base(local_config) + if os.path.isfile(system_config): + shutil.copyfile(system_config, local_config) + elif os.path.isdir(system_config): + shutil.copytree(system_config, local_config, dirs_exist_ok=overwrite) + else: + raise FileNotFoundError(f"Cannot copy {system_config} to {local_config}") + + +def read_config(file): + with open(file, "r") as stream: + _config = yaml.safe_load(stream) + return _config + + +def update_config(config, overwrite_config): + def _update(d, u): + for key, value in u.items(): + if isinstance(value, collections.abc.Mapping): + d[key] = _update(d.get(key, {}), value) + else: + d[key] = value + return d + + _update(config, overwrite_config) + + +def write_config(_config, file): + msg(f"Writing runtime config file to {file}") + with open(file, "w") as stream: + yaml.dump(_config, stream) + + +def chmod_bins_exec(): + """Ensure that all files in bin/ are executable. + + It appears that setuptools strips executable permissions from package_data files, + yet post-install scripts are not possible with the pyproject.toml format. + So this function will run when `run()` is called. + Without this hack, nextflow processes that call scripts in bin/ fail. + + https://stackoverflow.com/questions/18409296/package-data-files-with-executable-permissions + https://github.com/pypa/setuptools/issues/2041 + https://stackoverflow.com/questions/76320274/post-install-script-for-pyproject-toml-projects + """ + bin_dir = nek_base("bin/") + for filename in os.listdir(bin_dir): + bin_path = os.path.join(bin_dir, filename) + if os.path.isfile(bin_path): + file_stat = os.stat(bin_path) + # below is equivalent to `chmod +x` + os.chmod( + bin_path, file_stat.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH + ) + + +class OrderedCommands(click.Group): + """Preserve the order of subcommands when printing --help""" + + def list_commands(self, ctx: click.Context): + return list(self.commands) + + +def scontrol_show(): + scontrol_dict = dict() + scontrol_out = subprocess.run( + "scontrol show config", shell=True, capture_output=True, text=True + ).stdout + if len(scontrol_out) > 0: + for line in scontrol_out.split("\n"): + line_split = line.split("=") + if len(line_split) > 1: + scontrol_dict[line_split[0].strip()] = line_split[1].strip() + return scontrol_dict + + +hpc_options = { + "biowulf": {"profile": "biowulf", "slurm": "assets/slurm_header_biowulf.sh"}, + "fnlcr": { + "profile": "frce", + "slurm": "assets/slurm_header_frce.sh", + }, +} + + +def get_hpc(): + scontrol_out = scontrol_show() + if "ClusterName" in scontrol_out.keys(): + hpc = scontrol_out["ClusterName"] + else: + hpc = None + return hpc + + +def run_nextflow( + nextfile_path=None, + merge_config=None, + threads=None, + nextflow_args=None, + mode="local", +): + """Run a Nextflow workflow""" + nextflow_command = ["nextflow", "run", nextfile_path] + + hpc = get_hpc() + if mode == "slurm" and not hpc: + raise ValueError("mode is 'slurm' but no HPC environment was detected") + # add any additional Nextflow commands + args_dict = dict() + prev_arg = "" + for arg in nextflow_args: + if arg.startswith("-"): + args_dict[arg] = "" + elif prev_arg.startswith("-"): + args_dict[prev_arg] = arg + prev_arg = arg + # make sure profile matches biowulf or frce + profiles = ( + set(args_dict["-profile"].split(",")) + if "-profile" in args_dict.keys() + else set() + ) + if mode == "slurm": + profiles.add("slurm") + if hpc: + profiles.add(hpc_options[hpc]["profile"]) + args_dict["-profile"] = ",".join(sorted(profiles)) + nextflow_command += list(f"{k} {v}" for k, v in args_dict.items()) + + # Print nextflow command + nextflow_command = " ".join(str(nf) for nf in nextflow_command) + msg_box("Nextflow command", errmsg=nextflow_command) + + if mode == "slurm": + slurm_filename = "submit_slurm.sh" + with open(slurm_filename, "w") as sbatch_file: + with open(nek_base(hpc_options[hpc]["slurm"]), "r") as template: + sbatch_file.writelines(template.readlines()) + sbatch_file.write(nextflow_command) + run_command = f"sbatch {slurm_filename}" + msg_box("Slurm batch job", errmsg=run_command) + elif mode == "local": + if hpc: + nextflow_command = f'bash -c "module load nextflow && {nextflow_command}"' + run_command = nextflow_command + else: + raise ValueError(f"mode {mode} not recognized") + # Run Nextflow!!! + subprocess.run(run_command, shell=True, check=True)