diff --git a/.gitignore b/.gitignore index e921edd..4c6d76b 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,23 @@ _build # VSCode .vscode + +# Cromwell +cromwell-executions +cromwell-workflow-logs + +# miniwdl +*_*_hello_world +_LAST + +# nextflow +work +.nextflow +.nextflow.log* + +# Snakemake +.snakemake +*.snakemake.log + +# workflows +hello_world.txt diff --git a/nextflow/main.nf b/nextflow/main.nf new file mode 100644 index 0000000..ac89e5b --- /dev/null +++ b/nextflow/main.nf @@ -0,0 +1,25 @@ +#!/usr/bin/env nextflow +params.input_file = '../scripts/hello_world.py' +params.outdir = '.' + +process HelloWorld { + publishDir "$params.outdir" + + container 'ghcr.io/precimed/container_template' + + input: + path input_file + + output: + path "hello_world.txt" + + script: + """ + python3 ${input_file} > hello_world.txt + """ +} + +// Define workflow +workflow { + HelloWorld(file(params.input_file)) +} \ No newline at end of file diff --git a/nextflow/nextflow b/nextflow/nextflow new file mode 100755 index 0000000..42429af --- /dev/null +++ b/nextflow/nextflow @@ -0,0 +1,464 @@ +#!/usr/bin/env bash +# +# Copyright 2013-2024, Seqera Labs +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[[ "$NXF_DEBUG" == 'x' ]] && set -x +NXF_VER=${NXF_VER:-'24.04.4'} +NXF_ORG=${NXF_ORG:-'nextflow-io'} +NXF_HOME=${NXF_HOME:-$HOME/.nextflow} +NXF_PROT=${NXF_PROT:-'https'} +NXF_BASE=${NXF_BASE:-$NXF_PROT://www.nextflow.io/releases} +NXF_TEMP=${NXF_TEMP:-$TMPDIR} +NXF_DIST=${NXF_DIST:-$NXF_HOME/framework} +NXF_CLI="$0 $@" +NXF_CLI_OPTS=${NXF_CLI_OPTS:-} +NXF_REMOTE_DEBUG_PORT=${NXF_REMOTE_DEBUG_PORT:-5005} + +export NXF_CLI +export NXF_ORG +export NXF_HOME + +if [[ $TERM && $TERM != 'dumb' ]]; then +if command -v tput &>/dev/null; then +GREEN=$(tput setaf 2; tput bold) +YELLOW=$(tput setaf 3) +RED=$(tput setaf 1) +NORMAL=$(tput sgr0) +fi +fi + +function echo_red() { + >&2 echo -e "$RED$*$NORMAL" +} + +function echo_green() { + echo -e "$GREEN$*$NORMAL" +} + +function echo_yellow() { + >&2 echo -e "$YELLOW$*$NORMAL" +} + +function die() { + echo_red "$*" + exit 1 +} + +function get_abs_filename() { + echo "$(cd "$(dirname "$1")" && pwd)/$(basename "$1")" +} + +function get() { + if command -v curl &>/dev/null; then + GET="curl -fsSL '$1' -o '$2'" + elif command -v wget &>/dev/null; then + GET="wget '$1' -O '$2' >/dev/null 2>&1" + else + echo_red "ERROR: Cannot find 'curl' nor 'wget' utility -- please install one of them" + exit 1 + fi + + printf "Downloading nextflow dependencies. It may require a few seconds, please wait .. " + eval $GET; status=$? + printf "\r\033[K" + if [ $status -ne 0 ]; then + echo_red "ERROR: Cannot download nextflow required file -- make sure you can connect to the internet" + echo "" + echo "Alternatively you can try to download this file:" + echo " $1" + echo "" + echo "and save it as:" + echo " ${3:-$2}" + echo "" + exit 1 + fi +} + +function get_ver() { + if command -v curl &>/dev/null; then + curl -fsSL "$1" + elif command -v wget &>/dev/null; then + wget "$1" >/dev/null 2>&1 + else + echo_red "ERROR: Cannot find 'curl' nor 'wget' utility -- please install one of them" + exit 1 + fi +} + +function make_temp() { + local base=${NXF_TEMP:=$PWD} + if [ "$(uname)" = 'Darwin' ]; then mktemp "${base}/nxf-tmp.XXXXXX" || exit $? + else mktemp -t nxf-tmp.XXXXXX -p "${base}" || exit $? + fi +} + +function resolve_link() { + [[ ! -f $1 ]] && exit 1 + if command -v realpath &>/dev/null; then + realpath "$1" + elif command -v readlink &>/dev/null; then + local target="$1" + cd "$(dirname "$target")"; target="$(basename "$target")" + while [ -L "$target" ]; do + target="$(readlink "$target")" + cd "$(dirname "$target")"; target="$(basename "$target")" + done + echo "$(cd "$(dirname "$target")"; pwd -P)/$target" + else + echo_yellow "WARN: Neither \`realpath\` nor \`readlink\` command can be found" + exit 1 + fi +} + +function current_ver() { + [[ $NXF_EDGE == 1 || $NXF_VER == *"-edge" ]] && printf 'edge' || printf 'latest' +} + +function install() { + local tmpfile=$(make_temp) + local version=$(set +u; [[ $NXF_VER ]] && printf "v$NXF_VER" || current_ver) + local action="a=${2:-default}" + get "$NXF_BASE/$version/nextflow?$action" "$tmpfile" "$1" || exit $? + mv "$tmpfile" "$1" || exit $? + chmod +x "$1" || exit $? + bash "$1" -download || exit $? + echo '' + echo -e $'Nextflow installation completed. Please note:' + echo -e $'- the executable file `nextflow` has been created in the folder:' $(dirname $1) + if [[ ! "$PATH" =~ (^|:)"$(dirname $1)"(:|$) ]]; then + echo -e $'- you may complete the installation by moving it to a directory in your $PATH' + fi + echo '' +} + +function check_latest() { + [[ $cmd != run ]] && return 0 + [[ $NXF_OFFLINE == true || $NXF_DISABLE_CHECK_LATEST == true ]] && return 0 + local latest=$(get_ver "$NXF_BASE/$(current_ver)/version?current=$NXF_VER") + if [[ -n "$latest" && "$latest" != $NXF_VER ]]; then + echo_yellow "Nextflow $latest is available - Please consider updating your version to it" + fi +} + +function launch_nextflow() { + # the launch command line + local cmdline=() + # remove leading and trailing double-quotes + for x in "${launcher[@]}"; do + x="${x%\"}" + x="${x#\"}" + cmdline+=("$x") + done + + if [[ "$bg" ]]; then + local pid_file="${NXF_PID_FILE:-.nextflow.pid}" + cmdline+=("${args[@]}") + exec "${cmdline[@]}" & + disown + echo $! > "$pid_file" + exit 0 + fi + + cmdline+=("${args[@]}") + exec "${cmdline[@]}" + exit 1 +} + +# check self-install +if [ "$0" = "bash" ] || [[ "$0" =~ .*/bash ]]; then + if [ -d nextflow ]; then + echo 'Please note:' + echo "- The install procedure needs to create a file named 'nextflow' in this folder, but a directory with this name already exists." + echo "- Please renamed/delete that directory, or execute the Nextflow install procedure in another folder." + echo '' + exit 1 + fi + install "$PWD/nextflow" install + exit 0 +fi + +# clean up env +# see https://github.com/nextflow-io/nextflow/issues/1716 +unset JAVA_TOOL_OPTIONS + +# parse the command line +bg='' +declare -a jvmopts=() +declare -a args=("$@") +declare -a commands=(clone config drop help history info ls pull run view node console kuberun) +# $NXF_CLI_OPTS allow to pass arbitrary cli opts via the environment +# note: do not wrap with quotes because the variable can be used to specify more than on option separate by blanks +[ "$NXF_CLI_OPTS" ] && args+=($NXF_CLI_OPTS) + +cmd='' +while [[ $# != 0 ]]; do + case $1 in + -D*) + if [[ ! "$cmd" ]]; then + jvmopts+=("$1") + fi + ;; + -bg) + bg=1 + ;; + -remote-debug) + echo_yellow "Enabling script debugging - continue the execution launching the remote VM debugger in your favourite IDE using port $NXF_REMOTE_DEBUG_PORT" + remote_debug=1 + ;; + -download) + if [[ ! "$cmd" ]]; then + rm -rf "$NXF_DIST/$NXF_VER" || exit $? + bash "$0" -version || exit $? + exit 0 + fi + ;; + -self-update|self-update) + if [[ ! "$cmd" ]]; then + [[ -z $NXF_EDGE && $NXF_VER = *-edge ]] && NXF_EDGE=1 + unset NXF_VER + install "$0" update + exit 0 + fi + ;; + *) + [[ $1 && $1 != -* && ! "$cmd" && ${commands[*]} =~ $1 ]] && cmd=$1 + ;; + esac + shift +done + +CAPSULE_LOG=${CAPSULE_LOG:=''} +CAPSULE_RESET=${CAPSULE_RESET:=''} +CAPSULE_CACHE_DIR=${CAPSULE_CACHE_DIR:="$NXF_HOME/capsule"} + +NXF_PACK=one +NXF_MODE=${NXF_MODE:-''} +NXF_JAR=${NXF_JAR:-nextflow-$NXF_VER-$NXF_PACK.jar} +NXF_BIN=${NXF_BIN:-$NXF_DIST/$NXF_VER/$NXF_JAR} +NXF_PATH=$(dirname "$NXF_BIN") +NXF_URL=${NXF_URL:-$NXF_BASE/v$NXF_VER/$NXF_JAR} +NXF_GRAB=${NXF_GRAB:-''} +NXF_CLASSPATH=${NXF_CLASSPATH:-''} +NXF_HOST=${HOSTNAME:-localhost} +[[ $NXF_LAUNCHER ]] || NXF_LAUNCHER=${NXF_HOME}/tmp/launcher/nextflow-${NXF_PACK}_${NXF_VER}/${NXF_HOST} + +# Determine the path to this file +if [[ $NXF_PACK = all ]]; then + NXF_BIN=$(which "$0" 2>/dev/null) + [ $? -gt 0 -a -f "$0" ] && NXF_BIN="./$0" +fi + +# use nextflow custom java home path +if [[ "$NXF_JAVA_HOME" ]]; then + JAVA_HOME="$NXF_JAVA_HOME" + unset JAVA_CMD +fi +# Determine the Java command to use to start the JVM. +if [ ! -x "$JAVA_CMD" ] ; then + if [ -d "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVA_CMD="$JAVA_HOME/jre/sh/java" + else + JAVA_CMD="$JAVA_HOME/bin/java" + fi + elif [ -x /usr/libexec/java_home ]; then + JAVA_CMD="$(/usr/libexec/java_home -v 11+ 2>/dev/null)/bin/java" || JAVA_CMD=java + else + JAVA_CMD="$(which java)" || JAVA_CMD=java + fi +fi + +# Retrieve the java version from a NF local file +JAVA_KEY="$NXF_HOME/tmp/ver/$(resolve_link "$JAVA_CMD" | sed 's@/@.@g')" +if [ -f "$JAVA_KEY" ]; then + JAVA_VER="$(cat "$JAVA_KEY")" +else + JAVA_VER="$("$JAVA_CMD" $NXF_OPTS -version 2>&1)" + if [ $? -ne 0 ]; then + getstarted_web="https://www.nextflow.io/docs/latest/getstarted.html" + echo_red "${JAVA_VER:-Failed to launch the Java virtual machine}" + echo_red "NOTE: Nextflow needs a Java virtual machine to run. To this end: + - make sure a \`java\` command can be found; or + - manually define the variables JAVA_HOME to point to an existing installation; or + - install a Java virtual machine, for instance through https://sdkman.io (read the docs); + - for more details please refer to the Nextflow Get Started page at http://docs.nextflow.io." + echo_yellow "NOTE: Nextflow is trying to use the Java VM defined by the following environment variables:\n JAVA_CMD: $JAVA_CMD\n NXF_OPTS: $NXF_OPTS\n" + exit 1 + fi + JAVA_VER=$(echo "$JAVA_VER" | awk '/version/ {gsub(/"/, "", $3); print $3}') + # check NF version + if [[ ! $NXF_VER =~ ([0-9]+)\.([0-9]+)\.([0-9].*) ]]; then + echo_red "Not a valid Nextflow version: $NXF_VER" + exit 1 + fi + major=${BASH_REMATCH[1]} + minor=${BASH_REMATCH[2]} + # legacy version - Java 7/8 only + if [ $major -eq 0 ] && [ $minor -lt 26 ]; then + version_check="^(1.7|1.8)" + version_message="Java 7 or 8" + else + version_check="^(1.8|9|10|11|12|13|14|15|16|17|18|19|20|21|22)" + version_message="Java 8 or later (up to 21)" + fi + if [[ ! $JAVA_VER =~ $version_check ]]; then + echo_red "ERROR: Cannot find Java or it's a wrong version -- please make sure that $version_message is installed" + if [[ "$NXF_JAVA_HOME" ]]; then + echo_yellow "NOTE: Nextflow is trying to use the Java VM defined by the following environment variables:\n JAVA_CMD: $JAVA_CMD\n NXF_JAVA_HOME: $NXF_JAVA_HOME\n" + else + echo_yellow "NOTE: Nextflow is trying to use the Java VM defined by the following environment variables:\n JAVA_CMD: $JAVA_CMD\n JAVA_HOME: $JAVA_HOME\n" + fi + exit 1 + fi + if [[ ! $JAVA_VER =~ ^(11|12|13|14|15|16|17|18|19|20|21|22) ]]; then + echo_yellow "NOTE: Nextflow is not tested with Java $JAVA_VER -- It's recommended the use of version 11 up to 22\n" + fi + mkdir -p "$(dirname "$JAVA_KEY")" + [[ -f $JAVA_VER ]] && echo $JAVA_VER > "$JAVA_KEY" +fi + +# Verify nextflow jar is available +if [ ! -f "$NXF_BIN" ]; then + [ -f "$NXF_PATH" ] && rm "$NXF_PATH" + mkdir -p "$NXF_PATH" || exit $? + tmpfile=$(make_temp) + get "$NXF_URL" "$tmpfile" "$NXF_BIN" + mv "$tmpfile" "$NXF_BIN" +fi + +COLUMNS=${COLUMNS:-`tty -s && tput cols 2>/dev/null || true`} +declare -a JAVA_OPTS=() +JAVA_OPTS+=(-Dfile.encoding=UTF-8 -Dcapsule.trampoline -Dcapsule.java.cmd="$JAVA_CMD" -Dcom.sun.security.enableAIAcaIssuers=true) +if [[ $cmd == console ]]; then bg=1; +else JAVA_OPTS+=(-Djava.awt.headless=true) +fi + +[[ "$JAVA_VER" =~ ^(21|22) ]] && [[ ! "$NXF_ENABLE_VIRTUAL_THREADS" ]] && NXF_ENABLE_VIRTUAL_THREADS=true +[[ "$JAVA_HOME" ]] && JAVA_OPTS+=(-Dcapsule.java.home="$JAVA_HOME") +[[ "$CAPSULE_LOG" ]] && JAVA_OPTS+=(-Dcapsule.log=$CAPSULE_LOG) +[[ "$CAPSULE_RESET" ]] && JAVA_OPTS+=(-Dcapsule.reset=true) +[[ "$cmd" != "run" && "$cmd" != "node" ]] && JAVA_OPTS+=(-XX:+TieredCompilation -XX:TieredStopAtLevel=1) +[[ "$NXF_OPTS" ]] && JAVA_OPTS+=($NXF_OPTS) +[[ "$NXF_CLASSPATH" ]] && export NXF_CLASSPATH +[[ "$NXF_GRAB" ]] && export NXF_GRAB +[[ "$COLUMNS" ]] && export COLUMNS +[[ "$NXF_TEMP" ]] && JAVA_OPTS+=(-Djava.io.tmpdir="$NXF_TEMP") +[[ "${jvmopts[@]}" ]] && JAVA_OPTS+=("${jvmopts[@]}") +export JAVA_CMD +export CAPSULE_CACHE_DIR +export NXF_PLUGINS_DIR +export NXF_PLUGINS_MODE +export NXF_PLUGINS_DEFAULT +export NXF_PACK +export NXF_ENABLE_VIRTUAL_THREADS + +# lookup the a `md5` command +if hash md5sum 2>/dev/null; then MD5=md5sum; +elif hash gmd5sum 2>/dev/null; then MD5=gmd5sum; +elif hash md5 2>/dev/null; then MD5=md5; +else MD5='' +fi + +# when no md5 command is available fallback on default execution +if [ ! "$MD5" ] || [ "$CAPSULE_RESET" ]; then + launcher=($("$JAVA_CMD" "${JAVA_OPTS[@]}" -jar "$NXF_BIN")) + launch_nextflow + exit 1 +fi + +# creates a md5 unique for the given variables +env_md5() { +cat </dev/null; then + STR='' + for x in "${launcher[@]}"; do + [[ "$x" != "\"-Duser.dir=$PWD\"" ]] && [[ ! "$x" == *"-agentlib:jdwp"* ]] && STR+=$(printf '%q ' "$x") + done + printf "$STR">"$LAUNCH_FILE" + else + echo_yellow "Warning: Couldn't create cached classpath folder: $NXF_LAUNCHER -- Maybe NXF_HOME is not writable?" + fi + +fi + +# check for latest version +check_latest +# finally run it +launch_nextflow diff --git a/nextflow/nextflow.config b/nextflow/nextflow.config new file mode 100644 index 0000000..e69de29 diff --git a/scripts/PROJECT_README.md b/scripts/PROJECT_README.md index 5fb0a58..6bf14a4 100644 --- a/scripts/PROJECT_README.md +++ b/scripts/PROJECT_README.md @@ -183,6 +183,79 @@ sbatch singularity_slurm_job.sh # submit job ``` The output of the job will be written to the text files `container_template.out` (output) and `container_template.err` (errors). + +## Workflows + +The following sections describe how to run the provided workflows using different workflow management systems. +The Docker container is assumed to be available and tagged as `ghcr.io/precimed/container_template:latest`: + +```bash +# (optional) make sure that the container is available; tagged as "latest" +docker pull --platform=linux/amd64 ghcr.io/precimed/container_template: +docker image tag ghcr.io/precimed/container_template:0.1.0rc8 ghcr.io/precimed/container_template:latest +``` + +### WDL + +The [Workflow Description Language](https://openwdl.org) (WDL) is a way to describe workflows in a way that is portable and reproducible. + +We have included a basic WDL file and JSON file defining inputs in the `/wdl` directory. +To run the pipeline, you will need to install [miniwdl](https://github.com/chanzuckerberg/miniwdl?tab=readme-ov-file#miniwdl) or some other execution engine like [Cromwell](https://cromwell.readthedocs.io/en/stable/), and run the following command: + +```bash +cd /wdl +# (optional) create a new conda environment and install miniwdl +conda create -n miniwdl -c conda-forge pip -y +conda activate miniwdl +pip install miniwdl + +# run the WDL file +miniwdl run hello_world.wdl -i inputs.json +``` + +To check the output, see the file + +``` +/wdl/_LAST/out/output_file/ +``` +### Nextflow + +[Nextflow](https://www.nextflow.io) is a workflow manager that enables the development of portable and reproducible workflows. + +We have provided a basic Nextflow script in the `/nextflow` directory. +To execute the workflow, you will need to (optionally) install Nextflow and run the following command: + +```bash +cd /nextflow +# (optional) download and install nextflow executable in the current directory +curl -s https://get.nextflow.io | bash + +# run Snakemake +./nextflow run nextflow/main.nf +``` + +The output will be written to the `output_file` defined in the Nextflow script. + +### Snakemake + +[Snakemake](https://snakemake.github.io) is a workflow management system that aims to reduce the complexity of creating workflows by providing a fast and comfortable way to define them. + +We have provided a basic Snakefile in the `/snakemake` directory. +To run the pipeline, you will need to install Snakemake and run the following command: + +```bash +# (optional) create a new conda environment and install Snakemake +cd /snakemake +# (optional) create a new conda environment and install miniwdl +conda create -c conda-forge -c bioconda -n snakemake snakemake -y +conda activate snakemake + +# run Snakemake +snakemake +``` + +The output will be written to the `output_file` defined in the Snakefile. + ## Feedback If you face any issues, or if you need additional software, please let us know by creating a new [issue](https://github.com/precimed/container_template/issues/new). diff --git a/snakemake/Snakefile b/snakemake/Snakefile new file mode 100644 index 0000000..75fe8bc --- /dev/null +++ b/snakemake/Snakefile @@ -0,0 +1,11 @@ +rule hello_world: + input: + input_file="../scripts/hello_world.py" + output: + output_file="hello_world.txt" + singularity: + "docker://ghcr.io/precimed/container_template" + shell: + """ + python3 {input.input_file} > {output.output_file} + """ diff --git a/version/version.py b/version/version.py index 4d2bcfb..b49f44b 100644 --- a/version/version.py +++ b/version/version.py @@ -5,7 +5,7 @@ _PATCH = "0" # This is mainly for nightly builds which have the suffix ".dev$DATE". See # https://semver.org/#is-v123-a-semantic-version for the semantics. -_SUFFIX = "rc6" +_SUFFIX = "" VERSION_SHORT = "{0}.{1}".format(_MAJOR, _MINOR) VERSION = "{0}.{1}.{2}{3}".format(_MAJOR, _MINOR, _PATCH, _SUFFIX) \ No newline at end of file diff --git a/wdl/hello_world.wdl b/wdl/hello_world.wdl new file mode 100644 index 0000000..f348740 --- /dev/null +++ b/wdl/hello_world.wdl @@ -0,0 +1,20 @@ +version 1.0 + +task hello_world { + input { + File input_file + String output_file_name + } + + command { + python3 ~{input_file} > ~{output_file_name} + } + + output { + File output_file = output_file_name + } + + runtime { + docker: "ghcr.io/precimed/container_template" + } +} \ No newline at end of file diff --git a/wdl/inputs.json b/wdl/inputs.json new file mode 100644 index 0000000..870de6c --- /dev/null +++ b/wdl/inputs.json @@ -0,0 +1,5 @@ +{ + "input_file": "../scripts/hello_world.py", + "output_file_name": "hello_world.txt" +} +