diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index f224cb6..3717137 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -7,7 +7,7 @@ - Check the correct box. Does this PR contain: - [ ] Breaking changes - - [ ] New functionality (new method, new metric, ...) + - [ ] New functionality - [ ] Major changes - [ ] Minor changes - [ ] Bug fixes diff --git a/.github/dependabot.yml b/.github/dependabot.yml deleted file mode 100644 index 9096371..0000000 --- a/.github/dependabot.yml +++ /dev/null @@ -1,6 +0,0 @@ -version: 2 -updates: - - package-ecosystem: "github-actions" - directory: "/" - schedule: - interval: "daily" \ No newline at end of file diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml deleted file mode 100644 index 58a6b45..0000000 --- a/.github/workflows/build.yaml +++ /dev/null @@ -1,108 +0,0 @@ -name: build - -on: - push: - branches: [ 'main' ] - workflow_dispatch: - inputs: - version: - description: 'Version name to use for the build. If not specified, `${BRANCH_NAME}_build` will be used.' - required: false - -jobs: - # phase 1 - list: - runs-on: ubuntu-latest - - outputs: - version: ${{ steps.version.outputs.version }} - component_matrix: ${{ steps.set_matrix.outputs.matrix }} - - steps: - - uses: actions/checkout@v4 - - - uses: viash-io/viash-actions/setup@v5 - - - name: Determine version tag from branch name - id: version - run: | - if [ -z "$INPUT_VERSION" ]; then - echo "version=$(echo $GITHUB_REF | sed 's/refs\/heads\/\(.*\)/\1_build/')" >> $GITHUB_OUTPUT - else - echo "version=$INPUT_VERSION" >> $GITHUB_OUTPUT - fi - - - name: Remove target folder from .gitignore - run: | - # allow publishing the target folder - sed -i '/^target.*/d' .gitignore - - - uses: viash-io/viash-actions/ns-build@v5 - with: - config_mod: .functionality.version := '${{ steps.version.outputs.version }}' - parallel: true - - - name: Deploy to target branch - uses: peaceiris/actions-gh-pages@v4 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: . - publish_branch: ${{ steps.version.outputs.version }} - - - id: ns_list - uses: viash-io/viash-actions/ns-list@v5 - with: - platform: docker - src: src - format: json - - - id: set_matrix - run: | - echo "matrix=$(jq -c '[ .[] | - { - "name": (.functionality.namespace + "/" + .functionality.name), - "dir": .info.config | capture("^(?.*\/)").dir - } - ]' ${{ steps.ns_list.outputs.output_file }} )" >> $GITHUB_OUTPUT - - # phase 2 - build: - needs: list - - runs-on: ubuntu-latest - - strategy: - fail-fast: false - matrix: - component: ${{ fromJson(needs.list.outputs.component_matrix) }} - - steps: - # Remove unnecessary files to free up space. Otherwise, we get 'no space left on device.' - - uses: data-intuitive/reclaim-the-bytes@v2 - - - uses: actions/checkout@v4 - - - uses: viash-io/viash-actions/setup@v5 - - - name: Build container - uses: viash-io/viash-actions/ns-build@v5 - with: - config_mod: .functionality.version := '${{ needs.list.outputs.version }}' - platform: docker - src: ${{ matrix.component.dir }} - setup: build - - - name: Login to container registry - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ secrets.GTHB_USER }} - password: ${{ secrets.GTHB_PAT }} - - - name: Push container - uses: viash-io/viash-actions/ns-build@v5 - with: - config_mod: .functionality.version := '${{ needs.list.outputs.version }}' - platform: docker - src: ${{ matrix.component.dir }} - setup: push \ No newline at end of file diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml deleted file mode 100644 index 7bddd7e..0000000 --- a/.github/workflows/test.yaml +++ /dev/null @@ -1,106 +0,0 @@ -name: test - -on: - pull_request: - push: - branches: [ '**' ] - -jobs: - run_ci_check_job: - runs-on: ubuntu-latest - outputs: - run_ci: ${{ steps.github_cli.outputs.check }} - steps: - - name: 'Check if branch has an existing pull request and the trigger was a push' - id: github_cli - run: | - pull_request=$(gh pr list -R ${{ github.repository }} -H ${{ github.ref_name }} --json url --state open --limit 1 | jq '.[0].url') - # If the branch has a PR and this run was triggered by a push event, do not run - if [[ "$pull_request" != "null" && "$GITHUB_REF_NAME" != "main" && "${{ github.event_name == 'push' }}" == "true" && "${{ !contains(github.event.head_commit.message, 'ci force') }}" == "true" ]]; then - echo "check=false" >> $GITHUB_OUTPUT - else - echo "check=true" >> $GITHUB_OUTPUT - fi - env: - GITHUB_TOKEN: ${{ secrets.GTHB_PAT }} - - # phase 1 - list: - needs: run_ci_check_job - env: - s3_bucket: s3://openproblems-bio/public/neurips-2023-competition/workflow-resources/ - runs-on: ubuntu-latest - if: ${{ needs.run_ci_check_job.outputs.run_ci == 'true' }} - - outputs: - matrix: ${{ steps.set_matrix.outputs.matrix }} - cache_key: ${{ steps.cache.outputs.cache_key }} - - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - uses: viash-io/viash-actions/setup@v5 - - - uses: viash-io/viash-actions/project/sync-and-cache-s3@v5 - id: cache - with: - s3_bucket: $s3_bucket - dest_path: resources - cache_key_prefix: resources__ - - - id: ns_list - uses: viash-io/viash-actions/ns-list@v5 - with: - platform: docker - format: json - - - id: ns_list_filtered - uses: viash-io/viash-actions/project/detect-changed-components@v5 - with: - input_file: "${{ steps.ns_list.outputs.output_file }}" - - - id: set_matrix - run: | - echo "matrix=$(jq -c '[ .[] | - { - "name": (.functionality.namespace + "/" + .functionality.name), - "config": .info.config - } - ]' ${{ steps.ns_list_filtered.outputs.output_file }} )" >> $GITHUB_OUTPUT - - # phase 2 - viash_test: - needs: list - if: ${{ needs.list.outputs.matrix != '[]' && needs.list.outputs.matrix != '' }} - runs-on: ubuntu-latest - - strategy: - fail-fast: false - matrix: - component: ${{ fromJson(needs.list.outputs.matrix) }} - - steps: - # Remove unnecessary files to free up space. Otherwise, we get 'no space left on device.' - - uses: data-intuitive/reclaim-the-bytes@v2 - - - uses: actions/checkout@v4 - - - uses: viash-io/viash-actions/setup@v5 - - # use cache - - name: Cache resources data - uses: actions/cache@v4 - timeout-minutes: 10 - with: - path: resources - key: ${{ needs.list.outputs.cache_key }} - - - name: Run test - timeout-minutes: 30 - run: | - VIASH_TEMP=$RUNNER_TEMP/viash viash test \ - "${{ matrix.component.config }}" \ - --cpus 2 \ - --memory "5gb" diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 55db009..0000000 --- a/.gitignore +++ /dev/null @@ -1,10 +0,0 @@ -resources -work -.nextflow* -target -.idea -.vscode -.DS_Store -output -trace-* -.ipynb_checkpoints \ No newline at end of file diff --git a/_viash.yaml b/_viash.yaml deleted file mode 100644 index b2d5e50..0000000 --- a/_viash.yaml +++ /dev/null @@ -1,15 +0,0 @@ -viash_version: 0.8.6 - -source: src -target: target - -config_mods: | - .functionality.version := 'dev' - .functionality.arguments[.multiple == true].multiple_sep := ';' - .platforms[.type == 'docker'].target_registry := 'ghcr.io' - .platforms[.type == 'docker'].target_organization := 'openproblems-bio' - .platforms[.type == 'docker'].target_image_source := 'https://github.com/openproblems-bio/common-resources' - .platforms[.type == "nextflow"].directives.tag := "$id" - .platforms[.type == "nextflow"].auto.simplifyOutput := false - .platforms[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h" } - .platforms[.type == "nextflow"].config.script := "process.errorStrategy = 'ignore'" \ No newline at end of file diff --git a/component_tests/check_method_config.py b/component_tests/check_method_config.py new file mode 100644 index 0000000..04f3962 --- /dev/null +++ b/component_tests/check_method_config.py @@ -0,0 +1,132 @@ +import yaml + +## VIASH START +meta = { + "config" : "foo" +} +## VIASH END + + +NAME_MAXLEN = 50 + +SUMMARY_MAXLEN = 400 + +DESCRIPTION_MAXLEN = 5000 + +_MISSING_DOIS = ["vandermaaten2008visualizing", "hosmer2013applied"] + +TIME_LABELS = ["lowtime", "midtime", "hightime"] +MEM_LABELS = ["lowmem", "midmem", "highmem"] +CPU_LABELS = ["lowcpu", "midcpu", "highcpu"] + +def _load_bib(): + with open(f"{meta['resources_dir']}/library.bib", "r") as file: + return file.read() + +def check_url(url): + import requests + from urllib3.util.retry import Retry + from requests.adapters import HTTPAdapter + + # configure retry strategy + session = requests.Session() + retry = Retry(connect=3, backoff_factor=0.5) + adapter = HTTPAdapter(max_retries=retry) + session.mount('http://', adapter) + session.mount('https://', adapter) + + get = session.head(url) + + if get.ok or get.status_code == 429: # 429 rejected, too many requests + return True + else: + return False + +def search_ref_bib(reference): + import re + bib = _load_bib() + + entry_pattern = r"(@\w+{[^}]*" + reference + r"[^}]*}(.|\n)*?)(?=@)" + + bib_entry = re.search(entry_pattern, bib) + + if bib_entry: + + type_pattern = r"@(.*){" + reference + doi_pattern = r"(?=[Dd][Oo][Ii]\s*=\s*{([^,}]+)})" + + entry_type = re.search(type_pattern, bib_entry.group(1)) + + if not (entry_type.group(1) == "misc" or reference in _MISSING_DOIS): + entry_doi = re.search(doi_pattern, bib_entry.group(1)) + assert entry_doi.group(1), "doi not found in bibtex reference" + url = f"https://doi.org/{entry_doi.group(1)}" + assert check_url(url), f"{url} is not reachable, ref= {reference}." + + return True + + else: + return False + +print("Load config data", flush=True) +with open(meta["config"], "r") as file: + config = yaml.safe_load(file) + +print("Check general fields", flush=True) +assert len(config["functionality"]["name"]) <= NAME_MAXLEN, f"Component id (.functionality.name) should not exceed {NAME_MAXLEN} characters." +assert "namespace" in config["functionality"] is not None, "namespace not a field or is empty" + +print("Check info fields", flush=True) +info = config['functionality']['info'] +assert "type" in info, "type not an info field" +info_types = ["method", "control_method"] +assert info["type"] in info_types , f"got {info['type']} expected one of {info_types}" +assert "label" in info is not None, "label not an info field or is empty" +assert "summary" in info is not None, "summary not an info field or is empty" +assert "FILL IN:" not in info["summary"], "Summary not filled in" +assert len(info["summary"]) <= SUMMARY_MAXLEN, f"Component id (.functionality.info.summary) should not exceed {SUMMARY_MAXLEN} characters." +assert "description" in info is not None, "description not an info field or is empty" +assert "FILL IN:" not in info["description"], "description not filled in" +assert len(info["description"]) <= DESCRIPTION_MAXLEN, f"Component id (.functionality.info.description) should not exceed {DESCRIPTION_MAXLEN} characters." +if info["type"] == "method": + assert "reference" in info, "reference not an info field" + bib = _load_bib() + if info["reference"]: + reference = info["reference"] + if not isinstance(reference, list): + reference = [reference] + for ref in reference: + assert search_ref_bib(ref), f"reference {ref} not added to library.bib" + assert "documentation_url" in info is not None, "documentation_url not an info field or is empty" + assert "repository_url" in info is not None, "repository_url not an info field or is empty" + assert check_url(info["documentation_url"]), f"{info['documentation_url']} is not reachable" + assert check_url(info["repository_url"]), f"{info['repository_url']} is not reachable" + +if "variants" in info: + arg_names = [arg["name"].replace("--", "") for arg in config["functionality"]["arguments"]] + ["preferred_normalization"] + + for paramset_id, paramset in info["variants"].items(): + if paramset: + for arg_id in paramset: + assert arg_id in arg_names, f"Argument '{arg_id}' in `.functionality.info.variants['{paramset_id}']` is not an argument in `.functionality.arguments`." + +assert "preferred_normalization" in info, "preferred_normalization not an info field" +norm_methods = ["log_cpm", "log_cp10k", "counts", "log_scran_pooling", "sqrt_cpm", "sqrt_cp10k", "l1_sqrt"] +assert info["preferred_normalization"] in norm_methods, "info['preferred_normalization'] not one of '" + "', '".join(norm_methods) + "'." + +print("Check platform fields", flush=True) +platforms = config['platforms'] +for platform in platforms: + if not platform["type"] == "nextflow": + continue + nextflow= platform + +assert nextflow, "nextflow not a platform" +assert nextflow["directives"], "directives not a field in nextflow platform" +assert nextflow["directives"]["label"], "label not a field in nextflow platform directives" + +assert [i for i in nextflow["directives"]["label"] if i in TIME_LABELS], "time label not filled in" +assert [i for i in nextflow["directives"]["label"] if i in MEM_LABELS], "mem label not filled in" +assert [i for i in nextflow["directives"]["label"] if i in CPU_LABELS], "cpu label not filled in" + +print("All checks succeeded!", flush=True) diff --git a/component_tests/check_metric_config.py b/component_tests/check_metric_config.py new file mode 100644 index 0000000..45fa1ef --- /dev/null +++ b/component_tests/check_metric_config.py @@ -0,0 +1,139 @@ +import yaml +from typing import Dict + +## VIASH START + +meta = { + "config" : "foo" +} + +## VIASH END + +NAME_MAXLEN = 50 + +SUMMARY_MAXLEN = 400 + +DESCRIPTION_MAXLEN = 5000 + +_MISSING_DOIS = ["vandermaaten2008visualizing", "hosmer2013applied"] + +TIME_LABELS = ["lowtime", "midtime", "hightime"] +MEM_LABELS = ["lowmem", "midmem", "highmem"] +CPU_LABELS = ["lowcpu", "midcpu", "highcpu"] + + +def _load_bib(): + bib_path = meta["resources_dir"]+"/library.bib" + with open(bib_path, "r") as file: + return file.read() + +def check_url(url): + import requests + from urllib3.util.retry import Retry + from requests.adapters import HTTPAdapter + + # configure retry strategy + session = requests.Session() + retry = Retry(connect=3, backoff_factor=0.5) + adapter = HTTPAdapter(max_retries=retry) + session.mount('http://', adapter) + session.mount('https://', adapter) + + get = session.head(url) + + if get.ok or get.status_code == 429: # 429 rejected, too many requests + return True + else: + return False + +def search_ref_bib(reference): + import re + bib = _load_bib() + + entry_pattern = r"(@\w+{[^}]*" + reference + r"[^}]*}(.|\n)*?)(?=@)" + + bib_entry = re.search(entry_pattern, bib) + + if bib_entry: + + type_pattern = r"@(.*){" + reference + doi_pattern = r"(?=[Dd][Oo][Ii]\s*=\s*{([^,}]+)})" + + entry_type = re.search(type_pattern, bib_entry.group(1)) + + if not (entry_type.group(1) == "misc" or reference in _MISSING_DOIS): + entry_doi = re.search(doi_pattern, bib_entry.group(1)) + assert entry_doi.group(1), "doi not found in bibtex reference" + url = f"https://doi.org/{entry_doi.group(1)}" + assert check_url(url), f"{url} is not reachable, ref= {reference}." + + return True + + else: + return False + +def check_metric(metric: Dict[str, str]) -> str: + assert "name" in metric is not None, "name not a field or is empty" + assert len(metric["name"]) <= NAME_MAXLEN, f"Component id (.functionality.info.metrics.metric.name) should not exceed {NAME_MAXLEN} characters." + assert "label" in metric is not None, "label not a field in metric or is empty" + assert "summary" in metric is not None, "summary not a field in metric or is empty" + assert "FILL IN:" not in metric["summary"], "Summary not filled in" + assert len(metric["summary"]) <= SUMMARY_MAXLEN, f"Component id (.functionality.info.metrics.metric.summary) should not exceed {SUMMARY_MAXLEN} characters." + assert "description" in metric is not None, "description not a field in metric or is empty" + assert len(metric["description"]) <= DESCRIPTION_MAXLEN, f"Component id (.functionality.info.metrics.metric.description) should not exceed {DESCRIPTION_MAXLEN} characters." + assert "FILL IN:" not in metric["description"], "description not filled in" + # assert "reference" in metric, "reference not a field in metric" + if "reference" in metric: + reference = metric["reference"] + if not isinstance(reference, list): + reference = [reference] + for ref in reference: + assert search_ref_bib(ref), f"reference {ref} not added to library.bib" + # assert "documentation_url" in metric , "documentation_url not a field in metric" + # assert "repository_url" in metric , "repository_url not a metric field" + if "documentation_url" in metric: + assert check_url(metric["documentation_url"]), f"{metric['documentation_url']} is not reachable" + if "repository_url" in metric: + assert check_url(metric["repository_url"]), f"{metric['repository_url']} is not reachable" + assert "min" in metric is not None, f"min not a field in metric or is emtpy" + assert "max" in metric is not None, f"max not a field in metric or is empty" + assert "maximize" in metric is not None, f"maximize not a field in metric or is emtpy" + assert isinstance(metric['min'], (int, str)), "not an int or string (-.inf)" + assert isinstance(metric['max'], (int, str)), "not an int or string (+.inf)" + assert isinstance(metric['maximize'], bool) or metric["maximize"] not in ["-inf", "+inf"], "not a bool" + + +print("Load config data", flush=True) +with open(meta["config"], "r") as file: + config = yaml.safe_load(file) + +print("check general fields", flush=True) +assert "name" in config["functionality"] is not None, "Name not a field or is empty" +assert len(config["functionality"]["name"]) <= NAME_MAXLEN, f"Component id (.functionality.name) should not exceed {NAME_MAXLEN} characters." +assert "namespace" in config["functionality"] is not None, "namespace not a field or is empty" + + +print("Check info fields", flush=True) +info = config['functionality']['info'] +assert "type" in info, "type not an info field" +assert info["type"] == "metric" , f"got {info['type']} expected 'metric'" +assert "metrics" in info, "metrics not an info field" +for metric in info["metrics"]: + check_metric(metric) + +print("Check platform fields", flush=True) +platforms = config['platforms'] +for platform in platforms: + if not platform["type"] == "nextflow": + continue + nextflow= platform + +assert nextflow, "nextflow not a platform" +assert nextflow["directives"], "directives not a field in nextflow platform" +assert nextflow["directives"]["label"], "label not a field in nextflow platform directives" + +assert [i for i in nextflow["directives"]["label"] if i in TIME_LABELS], "time label not filled in" +assert [i for i in nextflow["directives"]["label"] if i in MEM_LABELS], "mem label not filled in" +assert [i for i in nextflow["directives"]["label"] if i in CPU_LABELS], "cpu label not filled in" + +print("All checks succeeded!", flush=True) diff --git a/src/component_tests/run_and_check_output.py b/component_tests/run_and_check_output.py similarity index 100% rename from src/component_tests/run_and_check_output.py rename to component_tests/run_and_check_output.py diff --git a/create_component/create_component b/create_component/create_component new file mode 100755 index 0000000..6ccf2c3 --- /dev/null +++ b/create_component/create_component @@ -0,0 +1,1340 @@ +#!/usr/bin/env bash + +# create_component dev +# +# This wrapper script is auto-generated by viash 0.8.0 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + SOURCE="$1" + while [[ "$SOURCE" != "" && ! -e "$SOURCE/.build.yaml" ]]; do + SOURCE=${SOURCE%/*} + done + echo $SOURCE +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="create_component" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "create_component dev" + echo "" + echo "Create a component Viash component." + echo "" + echo "Usage:" + echo "\`\`\`" + echo "bin/create_component --language r --name foo" + echo "bin/create_component --language python --name bar" + echo "\`\`\`" + echo "" + echo "Arguments:" + echo " --language" + echo " type: string" + echo " default: python" + echo " choices: [ python, r ]" + echo " Which scripting language to use. Options are 'python', 'r'." + echo "" + echo " --name" + echo " type: string" + echo " example: new_comp" + echo " Name of the new method, formatted in snake case." + echo "" + echo " --output" + echo " type: file, output, file must exist" + echo " default: src/methods/\${VIASH_PAR_NAME}" + echo " Path to the component directory. Suggested location is" + echo " \`src/s/\`." + echo "" + echo " --api_file" + echo " type: file" + echo " default: src/api/comp_method.yaml" + echo " Which API file to use. Defaults to \`src/api/comp_.yaml\`." + echo " In tasks with different subtypes of method, this location might not" + echo " exist and you might need" + echo " to manually specify a different API file to inherit from." + echo "" + echo " --viash_yaml" + echo " type: file, file must exist" + echo " default: _viash.yaml" + echo " Path to the project config file. Needed for knowing the relative" + echo " location of a file to the project root." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "ruamel.yaml" + +LABEL org.opencontainers.image.description="Companion container for running component common create_component" +LABEL org.opencontainers.image.created="2024-05-31T16:54:06+02:00" +LABEL org.opencontainers.image.source="https://github.com/openproblems-bio/openproblems-v2" +LABEL org.opencontainers.image.revision="907447bc4c8073182f35850747298d05aad536d7" +LABEL org.opencontainers.image.version="dev" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-create_component-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "create_component dev" + exit + ;; + --language) + [ -n "$VIASH_PAR_LANGUAGE" ] && ViashError Bad arguments for option \'--language\': \'$VIASH_PAR_LANGUAGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LANGUAGE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --language. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --language=*) + [ -n "$VIASH_PAR_LANGUAGE" ] && ViashError Bad arguments for option \'--language=*\': \'$VIASH_PAR_LANGUAGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LANGUAGE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --name) + [ -n "$VIASH_PAR_NAME" ] && ViashError Bad arguments for option \'--name\': \'$VIASH_PAR_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NAME="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --name. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --name=*) + [ -n "$VIASH_PAR_NAME" ] && ViashError Bad arguments for option \'--name=*\': \'$VIASH_PAR_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NAME=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --api_file) + [ -n "$VIASH_PAR_API_FILE" ] && ViashError Bad arguments for option \'--api_file\': \'$VIASH_PAR_API_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_API_FILE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --api_file. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --api_file=*) + [ -n "$VIASH_PAR_API_FILE" ] && ViashError Bad arguments for option \'--api_file=*\': \'$VIASH_PAR_API_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_API_FILE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --viash_yaml) + [ -n "$VIASH_PAR_VIASH_YAML" ] && ViashError Bad arguments for option \'--viash_yaml\': \'$VIASH_PAR_VIASH_YAML\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VIASH_YAML="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --viash_yaml. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --viash_yaml=*) + [ -n "$VIASH_PAR_VIASH_YAML" ] && ViashError Bad arguments for option \'--viash_yaml=*\': \'$VIASH_PAR_VIASH_YAML\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VIASH_YAML=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openproblems-bio/common/create_component:dev' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openproblems-bio/common/create_component:dev' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openproblems-bio/common/create_component:dev'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openproblems-bio/common/create_component:dev' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_LANGUAGE+x} ]; then + VIASH_PAR_LANGUAGE="python" +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + VIASH_PAR_OUTPUT="src/methods/${VIASH_PAR_NAME}" +fi +if [ -z ${VIASH_PAR_API_FILE+x} ]; then + VIASH_PAR_API_FILE="src/api/comp_method.yaml" +fi +if [ -z ${VIASH_PAR_VIASH_YAML+x} ]; then + VIASH_PAR_VIASH_YAML="_viash.yaml" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_VIASH_YAML" ] && [ ! -e "$VIASH_PAR_VIASH_YAML" ]; then + ViashError "Input file '$VIASH_PAR_VIASH_YAML' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_LANGUAGE" ]; then + VIASH_PAR_LANGUAGE_CHOICES=("python:r") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_LANGUAGE_CHOICES[*]}:" =~ ":$VIASH_PAR_LANGUAGE:" ]]; then + ViashError '--language' specified value of \'$VIASH_PAR_LANGUAGE\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_PAR_API_FILE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_API_FILE")" ) + VIASH_PAR_API_FILE=$(ViashAutodetectMount "$VIASH_PAR_API_FILE") +fi +if [ ! -z "$VIASH_PAR_VIASH_YAML" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_VIASH_YAML")" ) + VIASH_PAR_VIASH_YAML=$(ViashAutodetectMount "$VIASH_PAR_VIASH_YAML") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openproblems-bio/common/create_component:dev "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openproblems-bio/common/create_component:dev)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openproblems-bio/common/create_component:dev +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-create_component-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +from typing import Any +from pathlib import Path +import sys +import os +import re + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'language': $( if [ ! -z ${VIASH_PAR_LANGUAGE+x} ]; then echo "r'${VIASH_PAR_LANGUAGE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'name': $( if [ ! -z ${VIASH_PAR_NAME+x} ]; then echo "r'${VIASH_PAR_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'api_file': $( if [ ! -z ${VIASH_PAR_API_FILE+x} ]; then echo "r'${VIASH_PAR_API_FILE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'viash_yaml': $( if [ ! -z ${VIASH_PAR_VIASH_YAML+x} ]; then echo "r'${VIASH_PAR_VIASH_YAML//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +dep = { + +} + +## VIASH END + +# import helper function +sys.path.append(meta["resources_dir"]) +from read_and_merge_yaml import read_and_merge_yaml + +def strip_margin(text: str) -> str: + return re.sub("(^|\\n)[ \\t]*\\|", "\\\\1", text) + +def create_config(par, component_type, pretty_name, script_path) -> str: + info_str = generate_info(par, component_type, pretty_name) + resources_str = generate_resources(par, script_path) + docker_platform = generate_docker_platform(par) + + return strip_margin(f'''\\ + |# The API specifies which type of component this is. + |# It contains specifications for: + |# - The input/output files + |# - Common parameters + |# - A unit test + |__merge__: {os.path.relpath(par["api_file"], par["output"])} + | + |functionality: + | # A unique identifier for your component (required). + | # Can contain only lowercase letters or underscores. + | name: {par["name"]} + | + | # Metadata for your component + | info: + |{info_str} + | # Component-specific parameters (optional) + | # arguments: + | # - name: "--n_neighbors" + | # type: "integer" + | # default: 5 + | # description: Number of neighbors to use. + | + | # Resources required to run the component + | resources: + |{resources_str} + |platforms: + | # Specifications for the Docker image for this component. + |{docker_platform} + | # This platform allows running the component natively + | - type: native + | # Allows turning the component into a Nextflow module / pipeline. + | - type: nextflow + | directives: + | label: [midtime,midmem,midcpu] + |''' + ) + +def generate_info(par, component_type, pretty_name) -> str: + """Generate the functionality info for a component.""" + if component_type in ["method", "control_method"]: + str = strip_margin(f'''\\ + | # A relatively short label, used when rendering visualisations (required) + | label: {pretty_name} + | # A one sentence summary of how this method works (required). Used when + | # rendering summary tables. + | summary: "FILL IN: A one sentence summary of this method." + | # A multi-line description of how this component works (required). Used + | # when rendering reference documentation. + | description: | + | FILL IN: A (multi-line) description of how this method works. + | # Which normalisation method this component prefers to use (required). + | preferred_normalization: log_cp10k + |''') + if component_type == "method": + str += strip_margin(f'''\\ + | # A reference key from the bibtex library at src/common/library.bib (required). + | reference: bibtex_reference_key + | # URL to the documentation for this method (required). + | documentation_url: https://url.to/the/documentation + | # URL to the code repository for this method (required). + | repository_url: https://github.com/organisation/repository + |''') + return str + elif component_type == "metric": + return strip_margin(f'''\\ + | metrics: + | # A unique identifier for your metric (required). + | # Can contain only lowercase letters or underscores. + | name: {par["name"]} + | # A relatively short label, used when rendering visualisarions (required) + | label: {pretty_name} + | # A one sentence summary of how this metric works (required). Used when + | # rendering summary tables. + | summary: "FILL IN: A one sentence summary of this metric." + | # A multi-line description of how this component works (required). Used + | # when rendering reference documentation. + | description: | + | FILL IN: A (multi-line) description of how this metric works. + | # A reference key from the bibtex library at src/common/library.bib (required). + | reference: bibtex_reference_key + | # URL to the documentation for this metric (required). + | documentation_url: https://url.to/the/documentation + | # URL to the code repository for this metric (required). + | repository_url: https://github.com/organisation/repository + | # The minimum possible value for this metric (required) + | min: 0 + | # The maximum possible value for this metric (required) + | max: 1 + | # Whether a higher value represents a 'better' solution (required) + | maximize: true + |''') + + +def generate_resources(par, script_path) -> str: + """Add the script to the functionality resources.""" + if par["language"] == "python": + type_str = "python_script" + elif par["language"] == "r": + type_str = "r_script" + + return strip_margin(f'''\\ + | # The script of your component (required) + | - type: {type_str} + | path: {script_path} + | # Additional resources your script needs (optional) + | # - type: file + | # path: weights.pt + |''') + +def generate_docker_platform(par) -> str: + """Set up the docker platform for Python.""" + if par["language"] == "python": + image_str = "ghcr.io/openproblems-bio/base_python:1.0.4" + extra = "" + elif par["language"] == "r": + image_str = "ghcr.io/openproblems-bio/base_r:1.0.4" + extra = strip_margin(f'''\\ + | - type: r + | packages: [ arrow, readr ] + |''') + return strip_margin(f'''\\ + | - type: docker + | image: {image_str} + | # Add custom dependencies here (optional). For more information, see + | # https://viash.io/reference/config/platforms/docker/#setup . + | setup: + | - type: python + | packages: [ fastparquet ] + |{extra}''') + +def set_par_values(config) -> None: + """Adds values to each of the arguments in a config file.""" + args = config['functionality']['arguments'] + for argi, arg in enumerate(args): + key = re.sub("^-*", "", arg['name']) + + # find value + if arg["type"] != "file": + value = arg.get("default", arg.get("example", "...")) + elif key == "de_train": + value = "resources/neurips-2023-kaggle/de_train.parquet" + elif key == "de_train_h5ad": + value = "resources/neurips-2023-kaggle/2023-09-12_de_by_cell_type_train.h5ad" + elif key == "id_map": + value = "resources/neurips-2023-kaggle/id_map.csv" + else: + key_strip = key.replace("output_", "") + value = f'{key_strip}.h5ad' + + # store key and value + config['functionality']['arguments'][argi]["key"] = key + config['functionality']['arguments'][argi]["value"] = value + + +def create_python_script(par, config, type): + script = strip_margin('''\\ + |import pandas as pd + | + |## VIASH START + |par = { + | "de_train": "resources/neurips-2023-kaggle/de_train.parquet", + | "de_test": "resources/neurips-2023-kaggle/de_test.parquet", + | "id_map": "resources/neurips-2023-kaggle/id_map.csv", + | "output": "output.parquet", + |} + |## VIASH END + | + |print('Reading input files', flush=True) + |de_train = pd.read_parquet(par["de_train"]) + |id_map = pd.read_csv(par["id_map"]) + |gene_names = [col for col in de_train.columns if col not in {"cell_type", "sm_name", "sm_lincs_id", "SMILES", "split", "control", "index"}] + | + |print('Preprocess data', flush=True) + |# ... preprocessing ... + | + |print('Train model', flush=True) + |# ... train model ... + | + |print('Generate predictions', flush=True) + |# ... generate predictions ... + | + |print('Write output to file', flush=True) + |output = pd.DataFrame( + | # ... TODO: fill in data ... + | index=id_map["id"], + | columns=gene_names + |).reset_index() + |output.to_parquet(par["output"]) + |''') + + return script + +def create_r_script(par, api_spec, type): + script = strip_margin(f'''\\ + |requireNamespace("arrow", quietly = TRUE) + |requireNamespace("readr", quietly = TRUE) + | + |## VIASH START + |par <- list( + | de_train = "resources/neurips-2023-kaggle/de_train.parquet", + | id_map = "resources/neurips-2023-kaggle/id_map.csv", + | output = "output.parquet" + |) + |## VIASH END + | + |cat("Reading input files\\\\n") + |de_train <- arrow::read_parquet(par\$de_train) + |id_map <- readr::read_csv(par\$id_map) + | + |cat("Preprocess data\\\\n") + |# ... preprocessing ... + | + |cat("Train model\\\\n") + |# ... train model ... + | + |cat("Generate predictions\\\\n") + |# ... generate predictions ... + | + |cat("Write output to file\\\\n") + |output <- data.frame( + | id = id_map\$id, + | # ... more columns ... + | check.names = FALSE + |) + |arrow::write_parquet(output, par\$output) + |''') + + return script + + + +def main(par): + ####### CHECK INPUTS ####### + print("Check inputs", flush=True) + assert re.match("[a-z][a-z0-9_]*", par["name"]), "Name should match the regular expression '[a-z][a-z0-9_]*'. Example: 'my_component'." + assert len(par['name']) <= 50, "Method name should be at most 50 characters." + + pretty_name = re.sub("_", " ", par['name']).title() + + ####### CHECK LANGUAGE ####### + print("Check language", flush=True) + # check language and determine script path + if par["language"] == "python": + script_path = "script.py" + elif par["language"] == "r": + script_path = "script.R" + else: + sys.exit(f"Unrecognized language parameter '{par['language']}'.") + + ## CHECK API FILE + print("Check API file", flush=True) + api_file = Path(par["api_file"]) + viash_yaml = Path(par["viash_yaml"]) + project_dir = viash_yaml.parent + if not api_file.exists(): + comp_types = [x.with_suffix("").name.removeprefix("comp_") for x in api_file.parent.glob("**/comp_*.y*ml")] + list.sort(comp_types) + sys.exit(strip_margin(f"""\\ + |Error: Invalid --type argument. + | Reason: Could not find API file at '{api_file.relative_to(project_dir)}'. + | Possible values for --type: {', '.join(comp_types)}.""")) + + ## READ API FILE + print("Read API file", flush=True) + api = read_and_merge_yaml(api_file) + comp_type = api.get("functionality", {}).get("info", {}).get("type", {}) + if not comp_type: + sys.exit(strip_margin(f"""\\ + |Error: API file is incorrectly formatted. + | Reason: Could not find component type at \`.functionality.info.type\`.' + | Please fix the formatting of the API file.""")) + + ####### CREATE OUTPUT DIR ####### + print("Create output dir", flush=True) + out_dir = Path(par["output"]) + out_dir.mkdir(exist_ok=True) + + ####### CREATE CONFIG ####### + print("Create config", flush=True) + config_file = out_dir / "config.vsh.yaml" + + # get config template + config_str = create_config(par, comp_type, pretty_name, script_path) + + with open(config_file, "w") as f: + f.write(config_str) + + ####### CREATE SCRIPT ####### + print("Create script", flush=True) + script_file = out_dir / script_path + + # set reasonable values + set_par_values(api) + + if par["language"] == "python": + script_out = create_python_script(par, api, comp_type) + + if par["language"] == "r": + script_out = create_r_script(par, api, comp_type) + + # write script + with open(script_file, "w") as f: + f.write(script_out) + + print("Done!", flush=True) + + +if __name__ == "__main__": + main(par) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_PAR_API_FILE" ]; then + VIASH_PAR_API_FILE=$(ViashStripAutomount "$VIASH_PAR_API_FILE") +fi +if [ ! -z "$VIASH_PAR_VIASH_YAML" ]; then + VIASH_PAR_VIASH_YAML=$(ViashStripAutomount "$VIASH_PAR_VIASH_YAML") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/src/helper_functions/read_and_merge_yaml.py b/create_component/read_and_merge_yaml.py similarity index 100% rename from src/helper_functions/read_and_merge_yaml.py rename to create_component/read_and_merge_yaml.py diff --git a/create_task_readme/create_task_readme b/create_task_readme/create_task_readme new file mode 100755 index 0000000..1acc220 --- /dev/null +++ b/create_task_readme/create_task_readme @@ -0,0 +1,1132 @@ +#!/usr/bin/env bash + +# create_task_readme dev +# +# This wrapper script is auto-generated by viash 0.8.0 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + SOURCE="$1" + while [[ "$SOURCE" != "" && ! -e "$SOURCE/.build.yaml" ]]; do + SOURCE=${SOURCE%/*} + done + echo $SOURCE +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="create_task_readme" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "create_task_readme dev" + echo "" + echo "Create a README for the task." + echo "" + echo "Inputs:" + echo " --task" + echo " type: string" + echo " example: denoising" + echo " Which task the component will be added to." + echo "" + echo " --task_dir" + echo " type: file, file must exist" + echo " default: src/tasks/\${VIASH_PAR_TASK}" + echo " Path to the task directory." + echo "" + echo " --viash_yaml" + echo " type: file, file must exist" + echo " default: _viash.yaml" + echo " Path to the project config file. Needed for knowing the relative" + echo " location of a file to the project root." + echo "" + echo " --github_url" + echo " type: string" + echo " default: https://github.com/openproblems-bio/openproblems-v2/tree/main/" + echo " URL to the GitHub repository. Needed for linking to the source code." + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, output, file must exist" + echo " default: src/tasks/\${VIASH_PAR_TASK}/README.md" + echo " Path to the component directory. Suggested location is" + echo " \`src/tasks//README.md\`." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM ghcr.io/openproblems-bio/base_r:1.0.4 + +ENTRYPOINT [] + + +RUN Rscript -e 'if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' && \ + Rscript -e 'remotes::install_cran(c("dplyr", "purrr", "rlang", "glue", "yaml", "fs", "cli", "igraph", "rmarkdown", "processx"), repos = "https://cran.rstudio.com")' + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y jq curl && \ + rm -rf /var/lib/apt/lists/* + +RUN release_info=$(curl -s https://api.github.com/repos/quarto-dev/quarto-cli/releases/latest) && \ + download_url=$(printf "%s" "$release_info" | jq -r '.assets[] | select(.name | test("quarto-.*-linux-amd64.deb")) | .browser_download_url') && \ + curl -sL "$download_url" -o /opt/quarto.deb && \ + dpkg -i /opt/quarto.deb && \ + rm /opt/quarto.deb + +LABEL org.opencontainers.image.description="Companion container for running component common create_task_readme" +LABEL org.opencontainers.image.created="2024-06-06T22:44:20+02:00" +LABEL org.opencontainers.image.source="https://github.com/openproblems-bio/openproblems-v2" +LABEL org.opencontainers.image.revision="2a8390cc524bcb2eba803536b02f6f083d388b9a" +LABEL org.opencontainers.image.version="dev" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-create_task_readme-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "create_task_readme dev" + exit + ;; + --task) + [ -n "$VIASH_PAR_TASK" ] && ViashError Bad arguments for option \'--task\': \'$VIASH_PAR_TASK\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TASK="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --task. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --task=*) + [ -n "$VIASH_PAR_TASK" ] && ViashError Bad arguments for option \'--task=*\': \'$VIASH_PAR_TASK\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TASK=$(ViashRemoveFlags "$1") + shift 1 + ;; + --task_dir) + [ -n "$VIASH_PAR_TASK_DIR" ] && ViashError Bad arguments for option \'--task_dir\': \'$VIASH_PAR_TASK_DIR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TASK_DIR="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --task_dir. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --task_dir=*) + [ -n "$VIASH_PAR_TASK_DIR" ] && ViashError Bad arguments for option \'--task_dir=*\': \'$VIASH_PAR_TASK_DIR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TASK_DIR=$(ViashRemoveFlags "$1") + shift 1 + ;; + --viash_yaml) + [ -n "$VIASH_PAR_VIASH_YAML" ] && ViashError Bad arguments for option \'--viash_yaml\': \'$VIASH_PAR_VIASH_YAML\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VIASH_YAML="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --viash_yaml. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --viash_yaml=*) + [ -n "$VIASH_PAR_VIASH_YAML" ] && ViashError Bad arguments for option \'--viash_yaml=*\': \'$VIASH_PAR_VIASH_YAML\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VIASH_YAML=$(ViashRemoveFlags "$1") + shift 1 + ;; + --github_url) + [ -n "$VIASH_PAR_GITHUB_URL" ] && ViashError Bad arguments for option \'--github_url\': \'$VIASH_PAR_GITHUB_URL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GITHUB_URL="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --github_url. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --github_url=*) + [ -n "$VIASH_PAR_GITHUB_URL" ] && ViashError Bad arguments for option \'--github_url=*\': \'$VIASH_PAR_GITHUB_URL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GITHUB_URL=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openproblems-bio/common/create_task_readme:dev' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openproblems-bio/common/create_task_readme:dev' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openproblems-bio/common/create_task_readme:dev'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openproblems-bio/common/create_task_readme:dev' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_TASK_DIR+x} ]; then + VIASH_PAR_TASK_DIR="src/tasks/${VIASH_PAR_TASK}" +fi +if [ -z ${VIASH_PAR_VIASH_YAML+x} ]; then + VIASH_PAR_VIASH_YAML="_viash.yaml" +fi +if [ -z ${VIASH_PAR_GITHUB_URL+x} ]; then + VIASH_PAR_GITHUB_URL="https://github.com/openproblems-bio/openproblems-v2/tree/main/" +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + VIASH_PAR_OUTPUT="src/tasks/${VIASH_PAR_TASK}/README.md" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_TASK_DIR" ] && [ ! -e "$VIASH_PAR_TASK_DIR" ]; then + ViashError "Input file '$VIASH_PAR_TASK_DIR' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_VIASH_YAML" ] && [ ! -e "$VIASH_PAR_VIASH_YAML" ]; then + ViashError "Input file '$VIASH_PAR_VIASH_YAML' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_TASK_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_TASK_DIR")" ) + VIASH_PAR_TASK_DIR=$(ViashAutodetectMount "$VIASH_PAR_TASK_DIR") +fi +if [ ! -z "$VIASH_PAR_VIASH_YAML" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_VIASH_YAML")" ) + VIASH_PAR_VIASH_YAML=$(ViashAutodetectMount "$VIASH_PAR_VIASH_YAML") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openproblems-bio/common/create_task_readme:dev "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openproblems-bio/common/create_task_readme:dev)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openproblems-bio/common/create_task_readme:dev +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-create_task_readme-XXXXXX").R +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +library(rlang, quietly = TRUE, warn.conflicts = FALSE) +library(purrr, quietly = TRUE, warn.conflicts = FALSE) +library(dplyr, quietly = TRUE, warn.conflicts = FALSE) + +## VIASH START +# The following code has been auto-generated by Viash. +# treat warnings as errors +.viash_orig_warn <- options(warn = 2) + +par <- list( + "task" = $( if [ ! -z ${VIASH_PAR_TASK+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_TASK" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "task_dir" = $( if [ ! -z ${VIASH_PAR_TASK_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_TASK_DIR" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "viash_yaml" = $( if [ ! -z ${VIASH_PAR_VIASH_YAML+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_VIASH_YAML" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "github_url" = $( if [ ! -z ${VIASH_PAR_GITHUB_URL+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_GITHUB_URL" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "output" = $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_OUTPUT" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ) +) +meta <- list( + "functionality_name" = $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_META_FUNCTIONALITY_NAME" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "resources_dir" = $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_RESOURCES_DIR" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "executable" = $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo -n "'"; echo -n "$VIASH_META_EXECUTABLE" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "config" = $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo -n "'"; echo -n "$VIASH_META_CONFIG" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "temp_dir" = $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_TEMP_DIR" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "cpus" = $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_META_CPUS" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_b" = $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_B" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_kb" = $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_KB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_mb" = $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_MB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_gb" = $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_GB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_tb" = $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_TB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_pb" = $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_PB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ) +) +dep <- list( + +) + + +# restore original warn setting +options(.viash_orig_warn) +rm(.viash_orig_warn) + +## VIASH END + +if (is.null(par\$task) && is.null(par\$task_dir)) { + stop("Either 'task' or 'task_dir' must be provided") +} +if (is.null(par\$viash_yaml)) { + stop("Argument 'viash_yaml' must be provided") +} +if (is.null(par\$output)) { + stop("Argument 'output' must be provided") +} + +# import helper function +source(paste0(meta["resources_dir"], "/read_and_merge_yaml.R")) +source(paste0(meta["resources_dir"], "/strip_margin.R")) +source(paste0(meta["resources_dir"], "/read_api_files.R")) + +cat("Read task info\\n") +task_api <- read_task_api(par[["task_dir"]]) + +# determine ordering +root <- .task_graph_get_root(task_api) + +r_graph <- render_task_graph(task_api, root) + +cat("Render API details\\n") +order <- names(igraph::bfs(task_api\$task_graph, root)\$order) +r_details <- map_chr( + order, + function(file_name) { + if (file_name %in% names(task_api\$comp_specs)) { + render_component(task_api\$comp_specs[[file_name]]) + } else { + render_file(task_api\$file_specs[[file_name]]) + } + } +) + +cat("Render authors\\n") +authors_str <- + if (nrow(task_api\$authors) > 0) { + paste0( + "\\n## Authors & contributors\\n\\n", + task_api\$authors %>% knitr::kable() %>% paste(collapse = "\\n"), + "\\n" + ) + } else { + "" + } +readme_str <- + if (is.null(task_api\$task_info\$readme) || is.na(task_api\$task_info\$readme)) { + "" + } else { + paste0( + "\\n## README\\n\\n", + task_api\$task_info\$readme, + "\\n" + ) + } + +cat("Generate qmd content\\n") +relative_path <- par[["task_dir"]] %>% + gsub(paste0(dirname(par[["viash_yaml"]]), "/*"), "", .) %>% + gsub("/*\$", "", .) +source_url <- paste0(par[["github_url"]], relative_path) +qmd_content <- strip_margin(glue::glue(" + §--- + §title: \\"{task_api\$task_info\$label}\\" + §format: gfm + §--- + § + § + § + §{task_api\$task_info\$summary} + § + §Path to source: [\`{relative_path}\`]({source_url}) + § + §{readme_str} + § + §## Motivation + § + §{task_api\$task_info\$motivation} + § + §## Description + § + §{task_api\$task_info\$description} + §{authors_str} + §## API + § + §{r_graph} + § + §{paste(r_details, collapse = '\\n\\n')} + § + §"), symbol = "§") + +cat("Write README.qmd to file\\n") +qmd_file <- tempfile( + pattern = "README_", + fileext = ".qmd", + tmpdir = meta\$temp_dir +) + +if (!dir.exists(meta\$temp_dir)) { + dir.create(meta\$temp_dir, recursive = TRUE) +} +writeLines(qmd_content, qmd_file) + +cat("Render README.qmd to README.md\\n") +out <- processx::run( + command = "quarto", + args = c("render", qmd_file, "--output", "-"), + echo = TRUE +) + +writeLines(out\$stdout, par\$output) +VIASHMAIN +Rscript "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_TASK_DIR" ]; then + VIASH_PAR_TASK_DIR=$(ViashStripAutomount "$VIASH_PAR_TASK_DIR") +fi +if [ ! -z "$VIASH_PAR_VIASH_YAML" ]; then + VIASH_PAR_VIASH_YAML=$(ViashStripAutomount "$VIASH_PAR_VIASH_YAML") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/src/helper_functions/read_and_merge_yaml.R b/create_task_readme/read_and_merge_yaml.R similarity index 100% rename from src/helper_functions/read_and_merge_yaml.R rename to create_task_readme/read_and_merge_yaml.R diff --git a/src/helper_functions/read_api_files.R b/create_task_readme/read_api_files.R similarity index 100% rename from src/helper_functions/read_api_files.R rename to create_task_readme/read_api_files.R diff --git a/src/helper_functions/strip_margin.R b/create_task_readme/strip_margin.R similarity index 100% rename from src/helper_functions/strip_margin.R rename to create_task_readme/strip_margin.R diff --git a/helper_functions/subset_anndata.py b/helper_functions/subset_anndata.py new file mode 100644 index 0000000..80bd160 --- /dev/null +++ b/helper_functions/subset_anndata.py @@ -0,0 +1,83 @@ +"""Helper functions related to subsetting AnnData objects based on the file format +specifications in the .config.vsh.yaml and slot mapping overrides.""" + +def read_config_slots_info(config_file, slot_mapping = {}): + """Read the .config.vsh.yaml to find out which output slots need to be copied to which output file. + + Arguments: + config_file -- Path to the .config.vsh.yaml file (required). + slot_mapping -- Which slots to retain. Must be a dictionary whose keys are the names + of the AnnData structs, and values is another dictionary with destination value + names as keys and source value names as values. + Example of slot_mapping: + ``` + slot_mapping = { + "layers": { + "counts": par["layer_counts"], + }, + "obs": { + "cell_type": par["obs_cell_type"], + "batch": par["obs_batch"], + } + } + ``` + """ + import yaml + import re + + # read output spec from yaml + with open(config_file, "r") as object_name: + config = yaml.safe_load(object_name) + + output_struct_slots = {} + + # fetch info on which slots should be copied to which file + for arg in config["functionality"]["arguments"]: + # argument is an output file with a slot specification + if arg["direction"] == "output" and arg.get("info", {}).get("slots"): + object_name = re.sub("--", "", arg["name"]) + + struct_slots = arg['info']['slots'] + out = {} + for (struct, slots) in struct_slots.items(): + out_struct = {} + for slot in slots: + # if slot_mapping[struct][slot['name']] exists, use that as the source slot name + # otherwise use slot['name'] + source_slot = slot_mapping.get(struct, {}).get(slot["name"], slot["name"]) + out_struct[slot["name"]] = source_slot + out[struct] = out_struct + + output_struct_slots[object_name] = out + + return output_struct_slots + +# create new anndata objects according to api spec +def subset_anndata(adata, slot_info): + """Create new anndata object according to slot info specifications. + + Arguments: + adata -- An AnnData object to subset (required) + slot_info -- Which slots to retain, typically one of the items in the output of read_config_slots_info. + Must be a dictionary whose keys are the names of the AnnData structs, and values is another + dictionary with destination value names as keys and source value names as values. + """ + import pandas as pd + import anndata as ad + + structs = ["layers", "obs", "var", "uns", "obsp", "obsm", "varp", "varm"] + kwargs = {} + + for struct in structs: + slot_mapping = slot_info.get(struct, {}) + data = {dest : getattr(adata, struct)[src] for (dest, src) in slot_mapping.items()} + if len(data) > 0: + if struct in ['obs', 'var']: + data = pd.concat(data, axis=1) + kwargs[struct] = data + elif struct in ['obs', 'var']: + # if no columns need to be copied, we still need an 'obs' and a 'var' + # to help determine the shape of the adata + kwargs[struct] = getattr(adata, struct).iloc[:,[]] + + return ad.AnnData(**kwargs) \ No newline at end of file diff --git a/library.bib b/library.bib new file mode 100644 index 0000000..313bfff --- /dev/null +++ b/library.bib @@ -0,0 +1,1722 @@ +@misc{10x2018pbmc, + title = {1k PBMCs from a Healthy Donor (v3 chemistry)}, + author = {{10x Genomics}}, + year = {2018}, + url = {https://www.10xgenomics.com/resources/datasets/1-k-pbm-cs-from-a-healthy-donor-v-3-chemistry-3-standard-3-0-0} +} + + +@misc{10x2019pbmc, + title = {5k Peripheral Blood Mononuclear Cells (PBMCs) from a Healthy Donor with a Panel of TotalSeq-B Antibodies (v3 chemistry)}, + author = {{10x Genomics}}, + year = {2019}, + url = {https://www.10xgenomics.com/resources/datasets/5-k-peripheral-blood-mononuclear-cells-pbm-cs-from-a-healthy-donor-with-cell-surface-proteins-v-3-chemistry-3-1-standard-3-1-0} +} + + +@article{agostinis2022newwave, + doi = {10.1093/bioinformatics/btac149}, + url = {https://doi.org/10.1093/bioinformatics/btac149}, + year = {2022}, + month = {Mar.}, + publisher = {Oxford University Press ({OUP})}, + volume = {38}, + number = {9}, + pages = {2648--2650}, + author = {Federico Agostinis and Chiara Romualdi and Gabriele Sales and Davide Risso}, + editor = {Yann Ponty}, + title = {NewWave: a scalable R/Bioconductor package for the dimensionality reduction and batch effect removal of single-cell {RNA}-seq data}, + journal = {Bioinformatics} +} + + +@article{agrawal2021mde, + title = {Minimum-Distortion Embedding}, + author = {Akshay Agrawal and Alnur Ali and Stephen Boyd}, + year = {2021}, + journal = {Foundations and Trends{\textregistered} in Machine Learning}, + publisher = {Now Publishers}, + volume = {14}, + number = {3}, + pages = {211--378}, + doi = {10.1561/2200000090}, + url = {https://doi.org/10.1561/2200000090} +} + + +@article{aliee2021autogenes, + title = {{AutoGeneS}: Automatic gene selection using multi-objective optimization for {RNA}-seq deconvolution}, + author = {Hananeh Aliee and Fabian J. Theis}, + year = {2021}, + month = {Jul.}, + journal = {Cell Systems}, + publisher = {Elsevier {BV}}, + volume = {12}, + number = {7}, + pages = {706--715.e4}, + doi = {10.1016/j.cels.2021.05.006}, + url = {https://doi.org/10.1016/j.cels.2021.05.006} +} + + +@inproceedings{amelio2015normalized, + doi = {10.1145/2808797.2809344}, + url = {https://doi.org/10.1145/2808797.2809344}, + year = {2015}, + month = {Aug.}, + publisher = {{ACM}}, + author = {Alessia Amelio and Clara Pizzuti}, + title = {Is Normalized Mutual Information a Fair Measure for Comparing Community Detection Methods?}, + booktitle = {Proceedings of the 2015 {IEEE}/{ACM} International Conference on Advances in Social Networks Analysis and Mining 2015} +} + + +@article{andersson2020single, + title = {Single-cell and spatial transcriptomics enables probabilistic inference of cell type topography}, + author = {Alma Andersson and Joseph Bergenstr{\aa}hle and Michaela Asp and Ludvig Bergenstr{\aa}hle and Aleksandra Jurek and Jos{\'{e}} Fern{\'{a}}ndez Navarro and Joakim Lundeberg}, + year = {2020}, + month = {Oct.}, + journal = {Communications Biology}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {3}, + number = {1}, + doi = {10.1038/s42003-020-01247-y}, + url = {https://doi.org/10.1038/s42003-020-01247-y} +} + + +@string{apr = {Apr.}} + + +@string{aug = {Aug.}} + + +@article{batson2019molecular, + title = {Molecular Cross-Validation for Single-Cell RNA-seq}, + author = {Batson, Joshua and Royer, Lo{\"\i}c and Webber, James}, + year = {2019}, + journal = {bioRxiv}, + publisher = {Cold Spring Harbor Laboratory}, + doi = {10.1101/786269}, + url = {https://www.biorxiv.org/content/early/2019/09/30/786269}, + elocation-id = {786269}, + eprint = {https://www.biorxiv.org/content/early/2019/09/30/786269.full.pdf} +} + + +@article{biancalani2021deep, + title = {Deep learning and alignment of spatially resolved single-cell transcriptomes with Tangram}, + author = {Tommaso Biancalani and Gabriele Scalia and Lorenzo Buffoni and Raghav Avasthi and Ziqing Lu and Aman Sanger and Neriman Tokcan and Charles R. Vanderburg and {\AA}sa Segerstolpe and Meng Zhang and Inbal Avraham-Davidi and Sanja Vickovic and Mor Nitzan and Sai Ma and Ayshwarya Subramanian and Michal Lipinski and Jason Buenrostro and Nik Bear Brown and Duccio Fanelli and Xiaowei Zhuang and Evan Z. Macosko and Aviv Regev}, + year = {2021}, + month = {Oct.}, + journal = {Nature Methods}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {18}, + number = {11}, + pages = {1352--1362}, + doi = {10.1038/s41592-021-01264-7}, + url = {https://doi.org/10.1038/s41592-021-01264-7} +} + + +@article{bland2000odds, + title = {Statistics Notes: The odds ratio}, + author = {J. M. Bland}, + year = {2000}, + month = {May}, + journal = {{BMJ}}, + publisher = {{BMJ}}, + volume = {320}, + number = {7247}, + pages = {1468--1468}, + doi = {10.1136/bmj.320.7247.1468}, + url = {https://doi.org/10.1136/bmj.320.7247.1468} +} + + +@article{breiman2001random, + doi = {10.1023/a:1010933404324}, + url = {https://doi.org/10.1023/a:1010933404324}, + year = {2001}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {45}, + number = {1}, + pages = {5--32}, + author = {Leo Breiman}, + journal = {Machine Learning} +} + + +@article{bttner2018test, + title = {A test metric for assessing single-cell {RNA}-seq batch correction}, + author = {Maren B\"{u}ttner and Zhichao Miao and F. Alexander Wolf and Sarah A. Teichmann and Fabian J. Theis}, + year = {2018}, + month = {Dec.}, + journal = {Nature Methods}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {16}, + number = {1}, + pages = {43--49}, + doi = {10.1038/s41592-018-0254-1}, + url = {https://doi.org/10.1038/s41592-018-0254-1} +} + + +@article{cabello2020singlecellsignalr, + title = {{SingleCellSignalR}: inference of intercellular networks from single-cell transcriptomics}, + author = {Simon Cabello-Aguilar and M{\'{e}}lissa Alame and Fabien Kon-Sun-Tack and Caroline Fau and Matthieu Lacroix and Jacques Colinge}, + year = {2020}, + month = {Mar.}, + journal = {Nucleic Acids Research}, + publisher = {Oxford University Press ({OUP})}, + volume = {48}, + number = {10}, + pages = {e55--e55}, + doi = {10.1093/nar/gkaa183}, + url = {https://doi.org/10.1093/nar/gkaa183} +} + + +@article{cable2021robust, + title = {Robust decomposition of cell type mixtures in spatial transcriptomics}, + author = {Dylan M. Cable and Evan Murray and Luli S. Zou and Aleksandrina Goeva and Evan Z. Macosko and Fei Chen and Rafael A. Irizarry}, + year = {2021}, + month = {Feb.}, + journal = {Nature Biotechnology}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {40}, + number = {4}, + pages = {517--526}, + doi = {10.1038/s41587-021-00830-w}, + url = {https://doi.org/10.1038/s41587-021-00830-w} +} + + +@misc{cannoodt2021viashfromscripts, + doi = {10.48550/ARXIV.2110.11494}, + url = {https://arxiv.org/abs/2110.11494}, + author = {Cannoodt, Robrecht and Cannoodt, Hendrik and Van de Kerckhove, Eric and Boschmans, Andy and De Maeyer, Dries and Verbeiren, Toni}, + keywords = {Software Engineering (cs.SE), FOS: Computer and information sciences, FOS: Computer and information sciences}, + title = {Viash: from scripts to pipelines}, + publisher = {arXiv}, + year = {2021}, + copyright = {Creative Commons Attribution Non Commercial Share Alike 4.0 International} +} + + +@article{cao2018joint, + title = {Joint profiling of chromatin accessibility and gene expression in thousands of single cells}, + author = {Junyue Cao and Darren A. Cusanovich and Vijay Ramani and Delasa Aghamirzaie and Hannah A. Pliner and Andrew J. Hill and Riza M. Daza and Jose L. McFaline-Figueroa and Jonathan S. Packer and Lena Christiansen and Frank J. Steemers and Andrew C. Adey and Cole Trapnell and Jay Shendure}, + year = {2018}, + month = {Sept.}, + journal = {Science}, + publisher = {American Association for the Advancement of Science ({AAAS})}, + volume = {361}, + number = {6409}, + pages = {1380--1385}, + doi = {10.1126/science.aau0730}, + url = {https://doi.org/10.1126/science.aau0730} +} + + +@article{cao2020human, + title = {A human cell atlas of fetal gene expression}, + author = {Junyue Cao and Diana R. O'Day and Hannah A. Pliner and Paul D. Kingsley and Mei Deng and Riza M. Daza and Michael A. Zager and Kimberly A. Aldinger and Ronnie Blecher-Gonen and Fan Zhang and Malte Spielmann and James Palis and Dan Doherty and Frank J. Steemers and Ian A. Glass and Cole Trapnell and Jay Shendure}, + year = {2020}, + month = {Nov.}, + journal = {Science}, + publisher = {American Association for the Advancement of Science ({AAAS})}, + volume = {370}, + number = {6518}, + doi = {10.1126/science.aba7721}, + url = {https://doi.org/10.1126/science.aba7721} +} + + +@article{chai2014root, + doi = {10.5194/gmdd-7-1525-2014}, + url = {https://doi.org/10.5194/gmdd-7-1525-2014}, + year = {2014}, + month = {Feb.}, + publisher = {Copernicus {GmbH}}, + author = {T. Chai and R. R. Draxler}, + title = {Root mean square error ({RMSE}) or mean absolute error ({MAE})?} +} + + +@article{chazarragil2021flexible, + doi = {10.1093/nar/gkab004}, + url = {https://doi.org/10.1093/nar/gkab004}, + year = {2021}, + month = {Feb.}, + publisher = {Oxford University Press ({OUP})}, + volume = {49}, + number = {7}, + pages = {e42--e42}, + author = {Ruben Chazarra-Gil and Stijn van~Dongen and Vladimir~Yu Kiselev and Martin Hemberg}, + title = {Flexible comparison of batch correction methods for single-cell {RNA}-seq using {BatchBench}}, + journal = {Nucleic Acids Research} +} + + +@article{chen2009local, + title = {Local Multidimensional Scaling for Nonlinear Dimension Reduction, Graph Drawing, and Proximity Analysis}, + author = {Lisha Chen and Andreas Buja}, + year = {2009}, + month = {Mar.}, + journal = {Journal of the American Statistical Association}, + publisher = {Informa {UK} Limited}, + volume = {104}, + number = {485}, + pages = {209--219}, + doi = {10.1198/jasa.2009.0111}, + url = {https://doi.org/10.1198/jasa.2009.0111} +} + + +@inproceedings{chen2016xgboost, + title = {{XGBoost}}, + author = {Tianqi Chen and Carlos Guestrin}, + year = {2016}, + month = {Aug.}, + booktitle = {Proceedings of the 22nd {ACM} {SIGKDD} International Conference on Knowledge Discovery and Data Mining}, + publisher = {{Acm}}, + doi = {10.1145/2939672.2939785}, + url = {https://doi.org/10.1145/2939672.2939785} +} + + +@article{cichocki2009fast, + title = {Fast Local Algorithms for Large Scale Nonnegative Matrix and Tensor Factorizations}, + author = {Andrzej Cichocki and Anh-Huy Phan}, + year = {2009}, + journal = {{IEICE} Transactions on Fundamentals of Electronics, Communications and Computer Sciences}, + publisher = {Institute of Electronics, Information and Communications Engineers ({IEICE})}, + volume = {E92-a}, + number = {3}, + pages = {708--721}, + doi = {10.1587/transfun.e92.a.708}, + url = {https://doi.org/10.1587/transfun.e92.a.708} +} + + +@article{coifman2006diffusion, + title = {Diffusion maps}, + author = {Ronald R. Coifman and St{\'{e}}phane Lafon}, + year = {2006}, + month = {Jul.}, + journal = {Applied and Computational Harmonic Analysis}, + publisher = {Elsevier {BV}}, + volume = {21}, + number = {1}, + pages = {5--30}, + doi = {10.1016/j.acha.2006.04.006}, + url = {https://doi.org/10.1016/j.acha.2006.04.006} +} + + +@article{cover1967nearest, + title = {Nearest neighbor pattern classification}, + author = {T. Cover and P. Hart}, + year = {1967}, + month = {Jan}, + journal = {{IEEE} Transactions on Information Theory}, + publisher = {Institute of Electrical and Electronics Engineers ({IEEE})}, + volume = {13}, + number = {1}, + pages = {21--27}, + doi = {10.1109/tit.1967.1053964}, + url = {https://doi.org/10.1109/tit.1967.1053964} +} + + +@inproceedings{davis2006prauc, + title = {The relationship between Precision-Recall and {ROC} curves}, + author = {Jesse Davis and Mark Goadrich}, + year = {2006}, + booktitle = {Proceedings of the 23rd international conference on Machine learning - {ICML} {\textquotesingle}06}, + publisher = {{ACM} Press}, + doi = {10.1145/1143844.1143874}, + url = {https://doi.org/10.1145/1143844.1143874} +} + + +@string{dec = {Dec.}} + +@article{Demetci2020scot, + author = {Pinar Demetci and Rebecca Santorella and Bj{\"o}rn Sandstede and William Stafford Noble and Ritambhara Singh}, + title = {Gromov-Wasserstein optimal transport to align single-cell multi-omics data}, + elocation-id = {2020.04.28.066787}, + year = {2020}, + doi = {10.1101/2020.04.28.066787}, + publisher = {Cold Spring Harbor Laboratory}, + URL = {https://www.biorxiv.org/content/early/2020/11/11/2020.04.28.066787}, + eprint = {https://www.biorxiv.org/content/early/2020/11/11/2020.04.28.066787.full.pdf}, + journal = {bioRxiv} +} + + +@article{dimitrov2022comparison, + title = {Comparison of methods and resources for cell-cell communication inference from single-cell {RNA}-Seq data}, + author = {Daniel Dimitrov and D{\'{e}}nes T\"{u}rei and Martin Garrido-Rodriguez and Paul L. Burmedi and James S. Nagai and Charlotte Boys and Ricardo O. Ramirez Flores and Hyojin Kim and Bence Szalai and Ivan G. Costa and Alberto Valdeolivas and Aur{\'{e}}lien Dugourd and Julio Saez-Rodriguez}, + year = {2022}, + month = {Jun.}, + journal = {Nature Communications}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {13}, + number = {1}, + doi = {10.1038/s41467-022-30755-0}, + url = {https://doi.org/10.1038/s41467-022-30755-0} +} + + +@article{donoho2017yearsdatascience, + doi = {10.1080/10618600.2017.1384734}, + url = {https://doi.org/10.1080/10618600.2017.1384734}, + year = {2017}, + month = {Oct.}, + publisher = {Informa {UK} Limited}, + volume = {26}, + number = {4}, + pages = {745--766}, + author = {David Donoho}, + title = {50 Years of Data Science}, + journal = {Journal of Computational and Graphical Statistics} +} + + +@article{efremova2020cellphonedb, + title = {{CellPhoneDB}: inferring cell{\textendash}cell communication from combined expression of multi-subunit ligand{\textendash}receptor complexes}, + author = {Mirjana Efremova and Miquel Vento-Tormo and Sarah A. Teichmann and Roser Vento-Tormo}, + year = {2020}, + month = {Feb.}, + journal = {Nature Protocols}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {15}, + number = {4}, + pages = {1484--1506}, + doi = {10.1038/s41596-020-0292-x}, + url = {https://doi.org/10.1038/s41596-020-0292-x} +} + + +@article{emmons2016analysis, + title = {Analysis of Network Clustering Algorithms and Cluster Quality Metrics at Scale}, + volume = {11}, + ISSN = {1932-6203}, + url = {http://dx.doi.org/10.1371/journal.pone.0159161}, + doi = {10.1371/journal.pone.0159161}, + number = {7}, + journal = {PLOS ONE}, + publisher = {Public Library of Science (PLoS)}, + author = {Emmons, Scott and Kobourov, Stephen and Gallant, Mike and B\"{o}rner, Katy}, + editor = {Dovrolis, Constantine}, + year = {2016}, + month = jul, + pages = {e0159161} +} + + +@article{eraslan2019single, + title = {Single-cell {RNA}-seq denoising using a deep count autoencoder}, + author = {G\"{o}kcen Eraslan and Lukas M. Simon and Maria Mircea and Nikola S. Mueller and Fabian J. Theis}, + year = {2019}, + month = {Jan}, + journal = {Nature Communications}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {10}, + number = {1}, + doi = {10.1038/s41467-018-07931-2}, + url = {https://doi.org/10.1038/s41467-018-07931-2} +} + + +@string{feb = {Feb.}} + + +@article{fix1989discriminatory, + doi = {10.2307/1403797}, + url = {https://doi.org/10.2307/1403797}, + year = {1989}, + month = {Dec.}, + publisher = {{JSTOR}}, + volume = {57}, + number = {3}, + pages = {238}, + author = {Evelyn Fix and J. L. Hodges}, + title = {Discriminatory Analysis. Nonparametric Discrimination: Consistency Properties}, + journal = {International Statistical Review / Revue Internationale de Statistique} +} + + +@article{gower1975generalized, + title = {Generalized procrustes analysis}, + author = {J. C. Gower}, + year = {1975}, + month = {Mar.}, + journal = {Psychometrika}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {40}, + number = {1}, + pages = {33--51}, + doi = {10.1007/bf02291478}, + url = {https://doi.org/10.1007/bf02291478} +} + + +@article{grandini2020metrics, + title = {Metrics for Multi-Class Classification: an Overview}, + author = {Grandini, Margherita and Bagli, Enrico and Visani, Giorgio}, + year = {2020}, + journal = {arXiv}, + publisher = {Cornell University}, + doi = {10.48550/arxiv.2008.05756}, + url = {https://arxiv.org/abs/2008.05756}, + copyright = {arXiv.org perpetual, non-exclusive license}, + keywords = {Machine Learning (stat.ML), Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences} +} + + +@article{granja2021archr, + title = {{ArchR} is a scalable software package for integrative single-cell chromatin accessibility analysis}, + author = {Jeffrey M. Granja and M. Ryan Corces and Sarah E. Pierce and S. Tansu Bagdatli and Hani Choudhry and Howard Y. Chang and William J. Greenleaf}, + year = {2021}, + month = {Feb.}, + journal = {Nature Genetics}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {53}, + number = {3}, + pages = {403--411}, + doi = {10.1038/s41588-021-00790-6}, + url = {https://doi.org/10.1038/s41588-021-00790-6} +} + + +@article{grn2014validation, + title = {Validation of noise models for single-cell transcriptomics}, + author = {Dominic Gr\"{u}n and Lennart Kester and Alexander van Oudenaarden}, + year = {2014}, + month = {Apr.}, + journal = {Nature Methods}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {11}, + number = {6}, + pages = {637--640}, + doi = {10.1038/nmeth.2930}, + url = {https://doi.org/10.1038/nmeth.2930} +} + + +@article{haghverdi2018batch, + title = {Batch effects in single-cell {RNA}-sequencing data are corrected by matching mutual nearest neighbors}, + author = {Laleh Haghverdi and Aaron T L Lun and Michael D Morgan and John C Marioni}, + year = {2018}, + month = {Apr.}, + journal = {Nature Biotechnology}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {36}, + number = {5}, + pages = {421--427}, + doi = {10.1038/nbt.4091}, + url = {https://doi.org/10.1038/nbt.4091} +} + + +@article{hammarlund2018cengen, + title = {The {CeNGEN} Project: The Complete Gene Expression Map of an Entire Nervous System}, + author = {Marc Hammarlund and Oliver Hobert and David M. Miller and Nenad Sestan}, + year = {2018}, + month = {Aug.}, + journal = {Neuron}, + publisher = {Elsevier {BV}}, + volume = {99}, + number = {3}, + pages = {430--433}, + doi = {10.1016/j.neuron.2018.07.042}, + url = {https://doi.org/10.1016/j.neuron.2018.07.042} +} + + +@article{hansen2012removing, + title = {Adjusting batch effects in microarray expression data using empirical Bayes methods}, + author = {W. Evan Johnson and Cheng Li and Ariel Rabinovic}, + year = {2006}, + month = {Apr.}, + journal = {Biostatistics}, + publisher = {Oxford University Press ({OUP})}, + volume = {8}, + number = {1}, + pages = {118--127}, + doi = {10.1093/biostatistics/kxj037}, + url = {https://doi.org/10.1093/biostatistics/kxj037} +} + + +@article{hao2021integrated, + title = {Integrated analysis of multimodal single-cell data}, + author = {Yuhan Hao and Stephanie Hao and Erica Andersen-Nissen and William M. Mauck and Shiwei Zheng and Andrew Butler and Maddie J. Lee and Aaron J. Wilk and Charlotte Darby and Michael Zager and Paul Hoffman and Marlon Stoeckius and Efthymia Papalexi and Eleni P. Mimitou and Jaison Jain and Avi Srivastava and Tim Stuart and Lamar M. Fleming and Bertrand Yeung and Angela J. Rogers and Juliana M. McElrath and Catherine A. Blish and Raphael Gottardo and Peter Smibert and Rahul Satija}, + year = {2021}, + month = {Jun.}, + journal = {Cell}, + publisher = {Elsevier {BV}}, + volume = {184}, + number = {13}, + pages = {3573--3587.e29}, + doi = {10.1016/j.cell.2021.04.048}, + url = {https://doi.org/10.1016/j.cell.2021.04.048} +} + + +@article{hie2019efficient, + title = {Efficient integration of heterogeneous single-cell transcriptomes using Scanorama}, + author = {Brian Hie and Bryan Bryson and Bonnie Berger}, + year = {2019}, + month = {May}, + journal = {Nature Biotechnology}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {37}, + number = {6}, + pages = {685--691}, + doi = {10.1038/s41587-019-0113-3}, + url = {https://doi.org/10.1038/s41587-019-0113-3} +} + + +@article{hinton1989connectionist, + title = {Connectionist learning procedures}, + author = {Geoffrey E. Hinton}, + year = {1989}, + month = {Sept.}, + journal = {Artificial Intelligence}, + publisher = {Elsevier {BV}}, + volume = {40}, + number = {1-3}, + pages = {185--234}, + doi = {10.1016/0004-3702(89)90049-0}, + url = {https://doi.org/10.1016/0004-3702(89)90049-0} +} + + +@book{hosmer2013applied, + title = {Applied logistic regression}, + author = {Hosmer Jr, D.W. and Lemeshow, S. and Sturdivant, R.X.}, + year = {2013}, + publisher = {John Wiley \& Sons}, + volume = {398} +} + + +@article{hou2019scmatch, + title = {{scMatch}: a single-cell gene expression profile annotation tool using reference datasets}, + author = {Rui Hou and Elena Denisenko and Alistair R R Forrest}, + year = {2019}, + month = {Apr.}, + journal = {Bioinformatics}, + publisher = {Oxford University Press ({OUP})}, + volume = {35}, + number = {22}, + pages = {4688--4695}, + doi = {10.1093/bioinformatics/btz292}, + url = {https://doi.org/10.1093/bioinformatics/btz292}, + editor = {Janet Kelso} +} + + +@article{hou2020predicting, + title = {Predicting cell-to-cell communication networks using {NATMI}}, + author = {Rui Hou and Elena Denisenko and Huan Ting Ong and Jordan A. Ramilowski and Alistair R. R. Forrest}, + year = {2020}, + month = {Oct.}, + journal = {Nature Communications}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {11}, + number = {1}, + doi = {10.1038/s41467-020-18873-z}, + url = {https://doi.org/10.1038/s41467-020-18873-z} +} + + +@article{hou2020systematic, + title = {A systematic evaluation of single-cell {RNA}-sequencing imputation methods}, + author = {Wenpin Hou and Zhicheng Ji and Hongkai Ji and Stephanie C. Hicks}, + year = {2020}, + month = {Aug.}, + journal = {Genome Biology}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {21}, + number = {1}, + doi = {10.1186/s13059-020-02132-x}, + url = {https://doi.org/10.1186/s13059-020-02132-x} +} + + +@article{hubert1985comparing, + doi = {10.1007/bf01908075}, + url = {https://doi.org/10.1007/bf01908075}, + year = {1985}, + month = {Dec.}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {2}, + number = {1}, + pages = {193--218}, + author = {Lawrence Hubert and Phipps Arabie}, + title = {Comparing partitions}, + journal = {Journal of Classification} +} + + +@string{jan = {Jan}} + + +@string{jul = {Jul.}} + + +@string{jun = {Jun.}} + + +@article{kendall1938new, + doi = {10.1093/biomet/30.1-2.81}, + url = {https://doi.org/10.1093/biomet/30.1-2.81}, + year = {1938}, + month = {Jun.}, + publisher = {Oxford University Press ({OUP})}, + volume = {30}, + number = {1-2}, + pages = {81--93}, + author = {M. G. KENDALL}, + title = {A new measure of rank correlation}, + journal = {Biometrika} +} + + +@article{kiselev2019challenges, + title = {Challenges in unsupervised clustering of single-cell {RNA}-seq data}, + author = {Vladimir Yu Kiselev and Tallulah S. Andrews and Martin Hemberg}, + year = {2019}, + month = {Jan}, + journal = {Nature Reviews Genetics}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {20}, + number = {5}, + pages = {273--282}, + doi = {10.1038/s41576-018-0088-9}, + url = {https://doi.org/10.1038/s41576-018-0088-9} +} + + +@article{kleshchevnikov2022cell2location, + title = {Cell2location maps fine-grained cell types in spatial transcriptomics}, + author = {Vitalii Kleshchevnikov and Artem Shmatko and Emma Dann and Alexander Aivazidis and Hamish W. King and Tong Li and Rasa Elmentaite and Artem Lomakin and Veronika Kedlian and Adam Gayoso and Mika Sarkin Jain and Jun Sung Park and Lauma Ramona and Elizabeth Tuck and Anna Arutyunyan and Roser Vento-Tormo and Moritz Gerstung and Louisa James and Oliver Stegle and Omer Ali Bayraktar}, + year = {2022}, + month = {Jan}, + journal = {Nature Biotechnology}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {40}, + number = {5}, + pages = {661--671}, + doi = {10.1038/s41587-021-01139-4}, + url = {https://doi.org/10.1038/s41587-021-01139-4} +} + + +@article{korsunsky2019fast, + title = {Fast, sensitive and accurate integration of single-cell data with Harmony}, + author = {Ilya Korsunsky and Nghia Millard and Jean Fan and Kamil Slowikowski and Fan Zhang and Kevin Wei and Yuriy Baglaenko and Michael Brenner and Po-ru Loh and Soumya Raychaudhuri}, + year = {2019}, + month = {Nov.}, + journal = {Nature Methods}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {16}, + number = {12}, + pages = {1289--1296}, + doi = {10.1038/s41592-019-0619-0}, + url = {https://doi.org/10.1038/s41592-019-0619-0} +} + + +@article{kraemer2018dimred, + title = {{dimRed} and {coRanking} - Unifying Dimensionality Reduction in R}, + author = {Guido Kraemer and Markus Reichstein and Miguel, D. Mahecha}, + year = {2018}, + journal = {The R Journal}, + publisher = {The R Foundation}, + volume = {10}, + number = {1}, + pages = {342}, + doi = {10.32614/rj-2018-039}, + url = {https://doi.org/10.32614/rj-2018-039} +} + + +@article{kruskal1964mds, + title = {Multidimensional scaling by optimizing goodness of fit to a nonmetric hypothesis}, + author = {J. B. Kruskal}, + year = {1964}, + month = {Mar.}, + journal = {Psychometrika}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {29}, + number = {1}, + pages = {1--27}, + doi = {10.1007/bf02289565}, + url = {https://doi.org/10.1007/bf02289565} +} + + +@article{lance2022multimodal, + title = {Multimodal single cell data integration challenge: results and lessons learned}, + author = {Lance, Christopher and Luecken, Malte D. and Burkhardt, Daniel B. and Cannoodt, Robrecht and Rautenstrauch, Pia and Laddach, Anna and Ubingazhibov, Aidyn and Cao, Zhi-Jie and Deng, Kaiwen and Khan, Sumeer and Liu, Qiao and Russkikh, Nikolay and Ryazantsev, Gleb and Ohler, Uwe and , and Pisco, Angela Oliveira and Bloom, Jonathan and Krishnaswamy, Smita and Theis, Fabian J.}, + year = {2022}, + journal = {bioRxiv}, + publisher = {Cold Spring Harbor Laboratory}, + doi = {10.1101/2022.04.11.487796}, + url = {https://www.biorxiv.org/content/early/2022/04/12/2022.04.11.487796}, + elocation-id = {2022.04.11.487796}, + eprint = {https://www.biorxiv.org/content/early/2022/04/12/2022.04.11.487796.full.pdf} +} + + +@article{lance2024predicting, + title = {Predicting cellular profiles across modalities in longitudinal single-cell data: An Open Problems competition}, + author = {...}, + year = {2024}, + journal = {In preparation}, +} + + +@book{lawson1995solving, + title = {Solving Least Squares Problems}, + author = {Charles L. Lawson and Richard J. Hanson}, + year = {1995}, + month = {Jan}, + publisher = {Society for Industrial and Applied Mathematics}, + doi = {10.1137/1.9781611971217}, + url = {https://doi.org/10.1137/1.9781611971217} +} + + +@article{lee2009quality, + title = {Quality assessment of dimensionality reduction: Rank-based criteria}, + author = {John A. Lee and Michel Verleysen}, + year = {2009}, + month = {Mar.}, + journal = {Neurocomputing}, + publisher = {Elsevier {BV}}, + volume = {72}, + number = {7-9}, + pages = {1431--1443}, + doi = {10.1016/j.neucom.2008.12.017}, + url = {https://doi.org/10.1016/j.neucom.2008.12.017} +} + + +@article{linderman2018zero, + title = {Zero-preserving imputation of scRNA-seq data using low-rank approximation}, + author = {Linderman, George C. and Zhao, Jun and Kluger, Yuval}, + year = {2018}, + journal = {bioRxiv}, + publisher = {Cold Spring Harbor Laboratory}, + doi = {10.1101/397588}, + url = {https://www.biorxiv.org/content/early/2018/08/22/397588}, + elocation-id = {397588}, + eprint = {https://www.biorxiv.org/content/early/2018/08/22/397588.full.pdf} +} + + +@article{lopez2018deep, + title = {Deep generative modeling for single-cell transcriptomics}, + author = {Romain Lopez and Jeffrey Regier and Michael B. Cole and Michael I. Jordan and Nir Yosef}, + year = {2018}, + month = {Nov.}, + journal = {Nature Methods}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {15}, + number = {12}, + pages = {1053--1058}, + doi = {10.1038/s41592-018-0229-2}, + url = {https://doi.org/10.1038/s41592-018-0229-2} +} + + +@article{lopez2022destvi, + title = {{DestVI} identifies continuums of cell types in spatial transcriptomics data}, + author = {Romain Lopez and Baoguo Li and Hadas Keren-Shaul and Pierre Boyeau and Merav Kedmi and David Pilzer and Adam Jelinski and Ido Yofe and Eyal David and Allon Wagner and Can Ergen and Yoseph Addadi and Ofra Golani and Franca Ronchese and Michael I. Jordan and Ido Amit and Nir Yosef}, + year = {2022}, + month = {Apr.}, + journal = {Nature Biotechnology}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {40}, + number = {9}, + pages = {1360--1369}, + doi = {10.1038/s41587-022-01272-8}, + url = {https://doi.org/10.1038/s41587-022-01272-8} +} + + +@article{lotfollahi2020query, + title = {Query to reference single-cell integration with transfer learning}, + author = {Lotfollahi, Mohammad and Naghipourfar, Mohsen and Luecken, Malte D. and Khajavi, Matin and B{\"u}ttner, Maren and Avsec, Ziga and Misharin, Alexander V. and Theis, Fabian J.}, + year = {2020}, + journal = {bioRxiv}, + publisher = {Cold Spring Harbor Laboratory}, + doi = {10.1101/2020.07.16.205997}, + url = {https://doi.org/10.1101/2020.07.16.205997}, + elocation-id = {2020.07.16.205997}, + eprint = {https://www.biorxiv.org/content/early/2020/07/16/2020.07.16.205997.full.pdf} +} + + +@article{luecken2022benchmarking, + title = {Benchmarking atlas-level data integration in single-cell genomics}, + author = {Malte D. Luecken and M. B\"{u}ttner and K. Chaichoompu and A. Danese and M. Interlandi and M. F. Mueller and D. C. Strobl and L. Zappia and M. Dugas and M. Colom{\'{e}}-Tatch{\'{e}} and Fabian J. Theis}, + year = {2021}, + month = {Dec.}, + journal = {Nature Methods}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {19}, + number = {1}, + pages = {41--50}, + doi = {10.1038/s41592-021-01336-8}, + url = {https://doi.org/10.1038/s41592-021-01336-8} +} + + +@article{lueks2011evaluate, + title = {How to Evaluate Dimensionality Reduction? - Improving the Co-ranking Matrix}, + author = {Lueks, Wouter and Mokbel, Bassam and Biehl, Michael and Hammer, Barbara}, + year = {2011}, + journal = {arXiv}, + doi = {10.48550/ARXIV.1110.3917}, + url = {https://arxiv.org/abs/1110.3917}, + copyright = {arXiv.org perpetual, non-exclusive license}, + keywords = {Machine Learning (cs.LG), Information Retrieval (cs.IR), FOS: Computer and information sciences, FOS: Computer and information sciences} +} + + +@misc{lun2019fastmnn, + title = {A description of the theory behind the fastMNN algorithm}, + author = {Lun, Aaron}, + year = {2019}, + url = {https://marionilab.github.io/FurtherMNN2018/theory/description.html} +} + + +@string{mar = {Mar.}} + + +@string{may = {May}} + + +@article{mcinnes2018umap, + title = {UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction}, + author = {McInnes, Leland and Healy, John and Melville, James}, + year = {2018}, + journal = {arXiv}, + publisher = {Cornell University}, + doi = {10.48550/arxiv.1802.03426}, + url = {https://arxiv.org/abs/1802.03426}, + copyright = {arXiv.org perpetual, non-exclusive license}, + keywords = {Machine Learning (stat.ML), Computational Geometry (cs.CG), Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences} +} + + +@article{mereu2020benchmarking, + doi = {10.1038/s41587-020-0469-4}, + author = {Mereu, Elisabetta and Lafzi, Atefeh and Moutinho, Catia and Ziegenhain, Christoph and McCarthy, Davis J and Alvarez-Varela, Adrian and Batlle, Eduard and Sagar and Gruen, Dominic and Lau, Julia K and others}, + journal = {Nature biotechnology}, + number = {6}, + pages = {747--755}, + publisher = {Nature Publishing Group US New York}, + title = {Benchmarking single-cell {RNA}-sequencing protocols for cell atlas projects}, + volume = {38}, + year = {2020} +} + + +@inbook{miles2005rsquared, + title = {Encyclopedia of Statistics in Behavioral Science}, + author = {Jeremy Miles}, + year = {2005}, + month = {Oct.}, + publisher = {John Wiley {\&} Sons, Ltd}, + doi = {10.1002/0470013192.bsa526}, + url = {https://doi.org/10.1002/0470013192.bsa526}, + chapter = {{R-Squared}, Adjusted {R-Squared}} +} + + +@article{moon2019visualizing, + title = {Visualizing structure and transitions in high-dimensional biological data}, + author = {Kevin R. Moon and David van Dijk and Zheng Wang and Scott Gigante and Daniel B. Burkhardt and William S. Chen and Kristina Yim and Antonia van den Elzen and Matthew J. Hirn and Ronald R. Coifman and Natalia B. Ivanova and Guy Wolf and Smita Krishnaswamy}, + year = {2019}, + month = {Dec.}, + journal = {Nature Biotechnology}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {37}, + number = {12}, + pages = {1482--1492}, + doi = {10.1038/s41587-019-0336-3}, + url = {https://doi.org/10.1038/s41587-019-0336-3} +} + + +@article{narayan2021assessing, + title = {Assessing single-cell transcriptomic variability through density-preserving data visualization}, + author = {Ashwin Narayan and Bonnie Berger and Hyunghoon Cho}, + year = {2021}, + month = {Jan}, + journal = {Nature Biotechnology}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {39}, + number = {6}, + pages = {765--774}, + doi = {10.1038/s41587-020-00801-7}, + url = {https://doi.org/10.1038/s41587-020-00801-7} +} + + +@article{nestorowa2016single, + title = {A single-cell resolution map of mouse hematopoietic stem and progenitor cell differentiation}, + author = {Sonia Nestorowa and Fiona K. Hamey and Blanca Pijuan Sala and Evangelia Diamanti and Mairi Shepherd and Elisa Laurenti and Nicola K. Wilson and David G. Kent and Berthold G\"{o}ttgens}, + year = {2016}, + month = {Aug.}, + journal = {Blood}, + publisher = {American Society of Hematology}, + volume = {128}, + number = {8}, + pages = {e20--e31}, + doi = {10.1182/blood-2016-05-716480}, + url = {https://doi.org/10.1182/blood-2016-05-716480} +} + +@inproceedings{luecken2021neurips, + author = {Luecken, Malte and Burkhardt, Daniel and Cannoodt, Robrecht and Lance, Christopher and Agrawal, Aditi and Aliee, Hananeh and Chen, Ann and Deconinck, Louise and Detweiler, Angela and Granados, Alejandro and Huynh, Shelly and Isacco, Laura and Kim, Yang and Klein, Dominik and DE KUMAR, BONY and Kuppasani, Sunil and Lickert, Heiko and McGeever, Aaron and Melgarejo, Joaquin and Mekonen, Honey and Morri, Maurizio and M\"{u}ller, Michaela and Neff, Norma and Paul, Sheryl and Rieck, Bastian and Schneider, Kaylie and Steelman, Scott and Sterr, Michael and Treacy, Daniel and Tong, Alexander and Villani, Alexandra-Chloe and Wang, Guilin and Yan, Jia and Zhang, Ce and Pisco, Angela and Krishnaswamy, Smita and Theis, Fabian and Bloom, Jonathan M}, + booktitle = {Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks}, + editor = {J. Vanschoren and S. Yeung}, + pages = {}, + publisher = {Curran}, + title = {A sandbox for prediction and integration of DNA, RNA, and proteins in single cells}, + url = {https://datasets-benchmarks-proceedings.neurips.cc/paper_files/paper/2021/file/158f3069a435b314a80bdcb024f8e422-Paper-round2.pdf}, + volume = {1}, + year = {2021} +} + + +@string{nov = {Nov.}} + + +@string{oct = {Oct.}} + + +@article{olsson2016single, + title = {Single-cell analysis of mixed-lineage states leading to a binary cell fate choice}, + author = {Andre Olsson and Meenakshi Venkatasubramanian and Viren K. Chaudhri and Bruce J. Aronow and Nathan Salomonis and Harinder Singh and H. Leighton Grimes}, + year = {2016}, + month = {Aug.}, + journal = {Nature}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {537}, + number = {7622}, + pages = {698--702}, + doi = {10.1038/nature19348}, + url = {https://doi.org/10.1038/nature19348} +} + + +@misc{openproblems, + title = {Open Problems}, + author = {{Open Problems for Single Cell Analysis Consortium}}, + year = {2022}, + url = {https://openproblems.bio} +} + + +@article{pearson1895regression, + doi = {10.1098/rspl.1895.0041}, + title = {VII. Note on regression and inheritance in the case of two parents}, + author = {Pearson, Karl}, + journal = {proceedings of the royal society of London}, + volume = {58}, + number = {347-352}, + pages = {240--242}, + year = {1895}, + publisher = {The Royal Society London} +} + + +@article{pearson1901pca, + title = {On lines and planes of closest fit to systems of points in space}, + author = {Karl Pearson}, + year = {1901}, + month = {Nov.}, + journal = {The London, Edinburgh, and Dublin Philosophical Magazine and Journal of Science}, + publisher = {Informa {UK} Limited}, + volume = {2}, + number = {11}, + pages = {559--572}, + doi = {10.1080/14786440109462720}, + url = {https://doi.org/10.1080/14786440109462720} +} + + +@article{pliner2019supervised, + title = {Supervised classification enables rapid annotation of cell atlases}, + author = {Hannah A. Pliner and Jay Shendure and Cole Trapnell}, + year = {2019}, + month = {Sept.}, + journal = {Nature Methods}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {16}, + number = {10}, + pages = {983--986}, + doi = {10.1038/s41592-019-0535-3}, + url = {https://doi.org/10.1038/s41592-019-0535-3} +} + + +@article{polanski2020bbknn, + title = {{BBKNN}: fast batch alignment of single cell transcriptomes}, + author = {Krzysztof Pola{\'{n}}ski and Matthew D Young and Zhichao Miao and Kerstin B Meyer and Sarah A Teichmann and Jong-Eun Park}, + year = {2019}, + month = {Aug.}, + journal = {Bioinformatics}, + publisher = {Oxford University Press ({OUP})}, + doi = {10.1093/bioinformatics/btz625}, + url = {https://doi.org/10.1093/bioinformatics/btz625}, + editor = {Bonnie Berger} +} + + +@article{raredon2022computation, + title = {Computation and visualization of cell{\textendash}cell signaling topologies in single-cell systems data using Connectome}, + author = {Micha Sam Brickman Raredon and Junchen Yang and James Garritano and Meng Wang and Dan Kushnir and Jonas Christian Schupp and Taylor S. Adams and Allison M. Greaney and Katherine L. Leiby and Naftali Kaminski and Yuval Kluger and Andre Levchenko and Laura E. Niklason}, + year = {2022}, + month = {Mar.}, + journal = {Scientific Reports}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {12}, + number = {1}, + doi = {10.1038/s41598-022-07959-x}, + url = {https://doi.org/10.1038/s41598-022-07959-x} +} + + +@article{rodriques2019slide, + title = {Slide-seq: A scalable technology for measuring genome-wide expression at high spatial resolution}, + author = {Samuel G. Rodriques and Robert R. Stickels and Aleksandrina Goeva and Carly A. Martin and Evan Murray and Charles R. Vanderburg and Joshua Welch and Linlin M. Chen and Fei Chen and Evan Z. Macosko}, + year = {2019}, + month = {Mar.}, + journal = {Science}, + publisher = {American Association for the Advancement of Science ({AAAS})}, + volume = {363}, + number = {6434}, + pages = {1463--1467}, + doi = {10.1126/science.aaw1219}, + url = {https://doi.org/10.1126/science.aaw1219} +} + + +@InProceedings{santos2009on, + author = {Santos, Jorge M. and Embrechts, Mark"}, + editor = {Alippi, Cesare and Polycarpou, Marios and Panayiotou, Christos and Ellinas, Georgios}, + title = {On the Use of the Adjusted Rand Index as a Metric for Evaluating Supervised Classification}, + booktitle = {Artificial Neural Networks -- ICANN 2009}, + year = {2009}, + publisher = {Springer Berlin Heidelberg}, + address = {Berlin, Heidelberg}, + pages = {175--184}, + isbn = {978-3-642-04277-5}, + doi = {10.1007/978-3-642-04277-5_18}, + url = {https://doi.org/10.1007/978-3-642-04277-5_18} +} + + +@article{sarkar2021separating, + title = {Separating measurement and expression models clarifies confusion in single-cell {RNA} sequencing analysis}, + author = {Abhishek Sarkar and Matthew Stephens}, + year = {2021}, + month = {May}, + journal = {Nature Genetics}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {53}, + number = {6}, + pages = {770--777}, + doi = {10.1038/s41588-021-00873-4}, + url = {https://doi.org/10.1038/s41588-021-00873-4} +} + + +@article{schober2018correlation, + title = {Correlation Coefficients}, + author = {Patrick Schober and Christa Boer and Lothar A. Schwarte}, + year = {2018}, + month = {May}, + journal = {Anesthesia {\&} Analgesia}, + publisher = {Ovid Technologies (Wolters Kluwer Health)}, + volume = {126}, + number = {5}, + pages = {1763--1768}, + doi = {10.1213/ane.0000000000002864}, + url = {https://doi.org/10.1213/ane.0000000000002864} +} + + +@string{sep = {Sept.}} + + +@inproceedings{stanley2020harmonic, + title = {Harmonic Alignment}, + author = {Jay S. Stanley and Scott Gigante and Guy Wolf and Smita Krishnaswamy}, + year = {2020}, + month = {Jan}, + booktitle = {Proceedings of the 2020 {SIAM} International Conference on Data Mining}, + publisher = {Society for Industrial and Applied Mathematics}, + pages = {316--324}, + doi = {10.1137/1.9781611976236.36}, + url = {https://doi.org/10.1137/1.9781611976236.36} +} + + +@article{stoeckius2017simultaneous, + title = {Simultaneous epitope and transcriptome measurement in single cells}, + author = {Marlon Stoeckius and Christoph Hafemeister and William Stephenson and Brian Houck-Loomis and Pratip K Chattopadhyay and Harold Swerdlow and Rahul Satija and Peter Smibert}, + year = {2017}, + month = {Jul.}, + journal = {Nature Methods}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {14}, + number = {9}, + pages = {865--868}, + doi = {10.1038/nmeth.4380}, + url = {https://doi.org/10.1038/nmeth.4380} +} + + +@article{stuart2019comprehensive, + title = {Comprehensive Integration of Single-Cell Data}, + author = {Stuart, T. and Butler, A. and Hoffman, P. and Hafemeister, C. and Papalexi, E. and Mauck, W.M. and Hao, Y. and Stoeckius, M. and Smibert, P. and Satija, R.}, + year = {2019}, + journal = {Cell}, + volume = {177}, + number = {7}, + pages = {1888--1902.e21}, + doi = {10.1016/j.cell.2019.05.031} +} + + +@article{szubert2019structurepreserving, + title = {Structure-preserving visualisation of high dimensional single-cell datasets}, + author = {Benjamin Szubert and Jennifer E. Cole and Claudia Monaco and Ignat Drozdov}, + year = {2019}, + month = {Jun.}, + journal = {Scientific Reports}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {9}, + number = {1}, + doi = {10.1038/s41598-019-45301-0}, + url = {https://doi.org/10.1038/s41598-019-45301-0} +} + + +@article{tabula2018single, + title = {Single-cell transcriptomics of 20 mouse organs creates a Tabula Muris}, + author = {{Tabula Muris Consortium}}, + year = {2018}, + month = {Oct.}, + journal = {Nature}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {562}, + number = {7727}, + pages = {367--372}, + doi = {10.1038/s41586-018-0590-4}, + url = {https://doi.org/10.1038/s41586-018-0590-4} +} + + +@article{tabula2020single, + title = {A single-cell transcriptomic atlas characterizes ageing tissues in the mouse}, + author = {{Tabula Muris Consortium}}, + year = {2020}, + month = {Jul.}, + journal = {Nature}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {583}, + number = {7817}, + pages = {590--595}, + doi = {10.1038/s41586-020-2496-1}, + url = {https://doi.org/10.1038/s41586-020-2496-1} +} + + +@article{tasic2016adult, + title = {Adult mouse cortical cell taxonomy revealed by single cell transcriptomics}, + author = {Bosiljka Tasic and Vilas Menon and Thuc Nghi Nguyen and Tae Kyung Kim and Tim Jarsky and Zizhen Yao and Boaz Levi and Lucas T Gray and Staci A Sorensen and Tim Dolbeare and Darren Bertagnolli and Jeff Goldy and Nadiya Shapovalova and Sheana Parry and Changkyu Lee and Kimberly Smith and Amy Bernard and Linda Madisen and Susan M Sunkin and Michael Hawrylycz and Christof Koch and Hongkui Zeng}, + year = {2016}, + month = {Jan}, + journal = {Nature Neuroscience}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {19}, + number = {2}, + pages = {335--346}, + doi = {10.1038/nn.4216}, + url = {https://doi.org/10.1038/nn.4216} +} + + +@article{tian2019benchmarking, + title = {Benchmarking single cell {RNA}-sequencing analysis pipelines using mixture control experiments}, + author = {Luyi Tian and Xueyi Dong and Saskia Freytag and Kim-Anh L{\^{e}} Cao and Shian Su and Abolfazl JalalAbadi and Daniela Amann-Zalcenstein and Tom S. Weber and Azadeh Seidi and Jafar S. Jabbari and Shalin H. Naik and Matthew E. Ritchie}, + year = {2019}, + month = {May}, + journal = {Nature Methods}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {16}, + number = {6}, + pages = {479--487}, + doi = {10.1038/s41592-019-0425-8}, + url = {https://doi.org/10.1038/s41592-019-0425-8} +} + + +@article{tran2020benchmark, + doi = {10.1186/s13059-019-1850-9}, + url = {https://doi.org/10.1186/s13059-019-1850-9}, + year = {2020}, + month = {Jan}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {21}, + number = {1}, + author = {Hoa Thi Nhu Tran and Kok Siong Ang and Marion Chevrier and Xiaomeng Zhang and Nicole Yee Shin Lee and Michelle Goh and Jinmiao Chen}, + title = {A benchmark of batch-effect correction methods for single-cell {RNA} sequencing data}, + journal = {Genome Biology} +} + + +@article{van2018recovering, + title = {Recovering Gene Interactions from Single-Cell Data Using Data Diffusion}, + author = {David van Dijk and Roshan Sharma and Juozas Nainys and Kristina Yim and Pooja Kathail and Ambrose J. Carr and Cassandra Burdziak and Kevin R. Moon and Christine L. Chaffer and Diwakar Pattabiraman and Brian Bierie and Linas Mazutis and Guy Wolf and Smita Krishnaswamy and Dana Pe'er}, + year = {2018}, + month = {Jul.}, + journal = {Cell}, + publisher = {Elsevier {BV}}, + volume = {174}, + number = {3}, + pages = {716--729.e27}, + doi = {10.1016/j.cell.2018.05.061}, + url = {https://doi.org/10.1016/j.cell.2018.05.061} +} + + +@article{vandermaaten2008visualizing, + title = {Visualizing Data using t-SNE}, + author = {{van der} Maaten, Laurens and Hinton, Geoffrey}, + year = {2008}, + journal = {Journal of Machine Learning Research}, + volume = {9}, + number = {86}, + pages = {2579--2605}, + url = {http://jmlr.org/papers/v9/vandermaaten08a.html} +} + + +@inproceedings{venna2001neighborhood, + title = {Neighborhood Preservation in Nonlinear Projection Methods: An Experimental Study}, + author = {Jarkko Venna and Samuel Kaski}, + year = {2001}, + booktitle = {Artificial Neural Networks {\textemdash} {ICANN} 2001}, + publisher = {Springer Berlin Heidelberg}, + pages = {485--491}, + doi = {{10.1007/3-540-44668-0\_68}}, + url = {{https://doi.org/10.1007/3-540-44668-0\_68}} +} + + +@article{venna2006local, + title = {Local multidimensional scaling}, + author = {Jarkko Venna and Samuel Kaski}, + year = {2006}, + month = {Jul.}, + journal = {Neural Networks}, + publisher = {Elsevier {BV}}, + volume = {19}, + number = {6-7}, + pages = {889--899}, + doi = {10.1016/j.neunet.2006.05.014}, + url = {https://doi.org/10.1016/j.neunet.2006.05.014} +} + + +@article{virshup2021anndataannotateddata, + doi = {10.1101/2021.12.16.473007}, + url = {https://doi.org/10.1101/2021.12.16.473007}, + year = {2021}, + month = {Dec.}, + publisher = {Cold Spring Harbor Laboratory}, + author = {Isaac Virshup and Sergei Rybakov and Fabian J. Theis and Philipp Angerer and F. Alexander Wolf}, + title = {anndata: Annotated data} +} + + +@article{wagner2018knearest, + title = {K-nearest neighbor smoothing for high-throughput single-cell RNA-Seq data}, + author = {Wagner, Florian and Yan, Yun and Yanai, Itai}, + year = {2018}, + journal = {bioRxiv}, + publisher = {Cold Spring Harbor Laboratory}, + doi = {10.1101/217737}, + url = {https://www.biorxiv.org/content/early/2018/04/09/217737}, + elocation-id = {217737}, + eprint = {https://www.biorxiv.org/content/early/2018/04/09/217737.full.pdf} +} + + +@article{wagner2018single, + title = {Single-cell mapping of gene expression landscapes and lineage in the zebrafish embryo}, + author = {Daniel E. Wagner and Caleb Weinreb and Zach M. Collins and James A. Briggs and Sean G. Megason and Allon M. Klein}, + year = {2018}, + month = {Jun.}, + journal = {Science}, + publisher = {American Association for the Advancement of Science ({AAAS})}, + volume = {360}, + number = {6392}, + pages = {981--987}, + doi = {10.1126/science.aar4362}, + url = {https://doi.org/10.1126/science.aar4362} +} + + +@article{wang2013target, + title = {Target analysis by integration of transcriptome and {ChIP}-seq data with {BETA}}, + author = {Su Wang and Hanfei Sun and Jian Ma and Chongzhi Zang and Chenfei Wang and Juan Wang and Qianzi Tang and Clifford A Meyer and Yong Zhang and X Shirley Liu}, + year = {2013}, + month = {Nov.}, + journal = {Nature Protocols}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {8}, + number = {12}, + pages = {2502--2515}, + doi = {10.1038/nprot.2013.150}, + url = {https://doi.org/10.1038/nprot.2013.150} +} + + +@article{wang2017visualization, + title = {Visualization and analysis of single-cell {RNA}-seq data by kernel-based similarity learning}, + volume = {14}, + copyright = {2017 Springer Nature America, Inc.}, + issn = {1548-7105}, + url = {https://www.nature.com/articles/nmeth.4207}, + doi = {10.1038/nmeth.4207}, + abstract = {The SIMLR software identifies similarities between cells across a range of single-cell RNA-seq data, enabling effective dimension reduction, clustering and visualization.}, + language = {en}, + number = {4}, + journal = {Nature Methods}, + author = {Wang, Bo and Zhu, Junjie and Pierson, Emma and Ramazzotti, Daniele and Batzoglou, Serafim}, + month = apr, + year = {2017}, + publisher = {Nature Publishing Group}, + keywords = {Gene expression, Genome informatics, Machine learning, Statistical methods}, + pages = {414--416}, +} + + +@article{welch2019single, + title = {Single-Cell Multi-omic Integration Compares and Contrasts Features of Brain Cell Identity}, + author = {Joshua D. Welch and Velina Kozareva and Ashley Ferreira and Charles Vanderburg and Carly Martin and Evan Z. Macosko}, + year = {2019}, + month = {Jun.}, + journal = {Cell}, + publisher = {Elsevier {BV}}, + volume = {177}, + number = {7}, + pages = {1873--1887.e17}, + doi = {10.1016/j.cell.2019.05.006}, + url = {https://doi.org/10.1016/j.cell.2019.05.006} +} + + +@article{wilkinson1973symbolic, + doi = {10.2307/2346786}, + url = {https://doi.org/10.2307/2346786}, + year = {1973}, + publisher = {{JSTOR}}, + volume = {22}, + number = {3}, + pages = {392}, + author = {G. N. Wilkinson and C. E. Rogers}, + title = {Symbolic Description of Factorial Models for Analysis of Variance}, + journal = {Applied Statistics} +} + + +@article{wu2021single, + title = {A single-cell and spatially resolved atlas of human breast cancers}, + author = {Sunny Z. Wu and Ghamdan Al-Eryani and Daniel Lee Roden and Simon Junankar and Kate Harvey and Alma Andersson and Aatish Thennavan and Chenfei Wang and James R. Torpy and Nenad Bartonicek and Taopeng Wang and Ludvig Larsson and Dominik Kaczorowski and Neil I. Weisenfeld and Cedric R. Uytingco and Jennifer G. Chew and Zachary W. Bent and Chia-Ling Chan and Vikkitharan Gnanasambandapillai and Charles-Antoine Dutertre and Laurence Gluch and Mun N. Hui and Jane Beith and Andrew Parker and Elizabeth Robbins and Davendra Segara and Caroline Cooper and Cindy Mak and Belinda Chan and Sanjay Warrier and Florent Ginhoux and Ewan Millar and Joseph E. Powell and Stephen R. Williams and X. Shirley Liu and Sandra O'Toole and Elgene Lim and Joakim Lundeberg and Charles M. Perou and Alexander Swarbrick}, + year = {2021}, + month = {Sept.}, + journal = {Nature Genetics}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {53}, + number = {9}, + pages = {1334--1347}, + doi = {10.1038/s41588-021-00911-1}, + url = {https://doi.org/10.1038/s41588-021-00911-1} +} + + +@article{xiong2020neuralee, + title = {{NeuralEE}: A {GPU}-Accelerated Elastic Embedding Dimensionality Reduction Method for Visualizing Large-Scale {scRNA}-Seq Data}, + author = {Jiankang Xiong and Fuzhou Gong and Lin Wan and Liang Ma}, + year = {2020}, + month = {Oct.}, + journal = {Frontiers in Genetics}, + publisher = {Frontiers Media {SA}}, + volume = {11}, + doi = {10.3389/fgene.2020.00786}, + url = {https://doi.org/10.3389/fgene.2020.00786} +} + + +@article{xiong2021online, + title = {Online single-cell data integration through projecting heterogeneous datasets into a common cell-embedding space}, + author = {Lei Xiong and Kang Tian and Yuzhe Li and Weixi Ning and Xin Gao and Qiangfeng Cliff Zhang}, + year = {2022}, + month = {Oct.}, + journal = {Nature Communications}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {13}, + number = {1}, + doi = {10.1038/s41467-022-33758-z}, + url = {https://doi.org/10.1038/s41467-022-33758-z} +} + + +@article{xu2021probabilistic, + title = {Probabilistic harmonization and annotation of single-cell transcriptomics data with deep generative models}, + author = {Chenling Xu and Romain Lopez and Edouard Mehlman and Jeffrey Regier and Michael I Jordan and Nir Yosef}, + year = {2021}, + month = {Jan}, + journal = {Molecular Systems Biology}, + publisher = {{Embo}}, + volume = {17}, + number = {1}, + doi = {10.15252/msb.20209620}, + url = {https://doi.org/10.15252/msb.20209620} +} + + +@article{zappia2018exploring, + doi = {10.1371/journal.pcbi.1006245}, + url = {https://doi.org/10.1371/journal.pcbi.1006245}, + year = {2018}, + month = {Jun.}, + publisher = {Public Library of Science ({PLoS})}, + volume = {14}, + number = {6}, + pages = {e1006245}, + author = {Luke Zappia and Belinda Phipson and Alicia Oshlack}, + editor = {Dina Schneidman}, + title = {Exploring the single-cell {RNA}-seq analysis landscape with the {scRNA}-tools database}, + journal = {{PLOS} Computational Biology} +} + + +@article{zhang2021pydrmetrics, + title = {{pyDRMetrics} - A Python toolkit for dimensionality reduction quality assessment}, + author = {Yinsheng Zhang and Qian Shang and Guoming Zhang}, + year = {2021}, + month = {Feb.}, + journal = {Heliyon}, + publisher = {Elsevier {BV}}, + volume = {7}, + number = {2}, + pages = {e06199}, + doi = {10.1016/j.heliyon.2021.e06199}, + url = {https://doi.org/10.1016/j.heliyon.2021.e06199} +} + +@article {hrovatin2023delineating, + author = {Karin Hrovatin and Aim{\'e}e Bastidas-Ponce and Mostafa Bakhti and Luke Zappia and Maren B{\"u}ttner and Ciro Sallino and Michael Sterr and Anika B{\"o}ttcher and Adriana Migliorini and Heiko Lickert and Fabian J. Theis}, + title = {Delineating mouse β-cell identity during lifetime and in diabetes with a single cell atlas}, + elocation-id = {2022.12.22.521557}, + year = {2023}, + doi = {10.1101/2022.12.22.521557}, + publisher = {Cold Spring Harbor Laboratory}, + URL = {https://www.biorxiv.org/content/early/2023/04/25/2022.12.22.521557}, + eprint = {https://www.biorxiv.org/content/early/2023/04/25/2022.12.22.521557.full.pdf}, + journal = {bioRxiv} +} + +@article{sikkema2023integrated, + title = {An integrated cell atlas of the lung in health and disease}, + volume = {29}, + ISSN = {1546-170X}, + url = {http://dx.doi.org/10.1038/s41591-023-02327-2}, + DOI = {10.1038/s41591-023-02327-2}, + number = {6}, + journal = {Nature Medicine}, + publisher = {Springer Science and Business Media LLC}, + author = {Sikkema, Lisa and Ramírez-Suástegui, Ciro and Strobl, Daniel C. and Gillett, Tessa E. and Zappia, Luke and Madissoon, Elo and Markov, Nikolay S. and Zaragosi, Laure-Emmanuelle and Ji, Yuge and Ansari, Meshal and Arguel, Marie-Jeanne and Apperloo, Leonie and Banchero, Martin and Bécavin, Christophe and Berg, Marijn and Chichelnitskiy, Evgeny and Chung, Mei-i and Collin, Antoine and Gay, Aurore C. A. and Gote-Schniering, Janine and Hooshiar Kashani, Baharak and Inecik, Kemal and Jain, Manu and Kapellos, Theodore S. and Kole, Tessa M. and Leroy, Sylvie and Mayr, Christoph H. and Oliver, Amanda J. and von Papen, Michael and Peter, Lance and Taylor, Chase J. and Walzthoeni, Thomas and Xu, Chuan and Bui, Linh T. and De Donno, Carlo and Dony, Leander and Faiz, Alen and Guo, Minzhe and Gutierrez, Austin J. and Heumos, Lukas and Huang, Ni and Ibarra, Ignacio L. and Jackson, Nathan D. and Kadur Lakshminarasimha Murthy, Preetish and Lotfollahi, Mohammad and Tabib, Tracy and Talavera-López, Carlos and Travaglini, Kyle J. and Wilbrey-Clark, Anna and Worlock, Kaylee B. and Yoshida, Masahiro and Chen, Yuexin and Hagood, James S. and Agami, Ahmed and Horvath, Peter and Lundeberg, Joakim and Marquette, Charles-Hugo and Pryhuber, Gloria and Samakovlis, Chistos and Sun, Xin and Ware, Lorraine B. and Zhang, Kun and van den Berge, Maarten and Bossé, Yohan and Desai, Tushar J. and Eickelberg, Oliver and Kaminski, Naftali and Krasnow, Mark A. and Lafyatis, Robert and Nikolic, Marko Z. and Powell, Joseph E. and Rajagopal, Jayaraj and Rojas, Mauricio and Rozenblatt-Rosen, Orit and Seibold, Max A. and Sheppard, Dean and Shepherd, Douglas P. and Sin, Don D. and Timens, Wim and Tsankov, Alexander M. and Whitsett, Jeffrey and Xu, Yan and Banovich, Nicholas E. and Barbry, Pascal and Duong, Thu Elizabeth and Falk, Christine S. and Meyer, Kerstin B. and Kropski, Jonathan A. and Pe’er, Dana and Schiller, Herbert B. and Tata, Purushothama Rao and Schultze, Joachim L. and Teichmann, Sara A. and Misharin, Alexander V. and Nawijn, Martijn C. and Luecken, Malte D. and Theis, Fabian J.}, + year = {2023}, + month = jun, + pages = {1563–1577} +} + +@article{consortium2022tabula, + title = {The Tabula Sapiens: A multiple-organ, single-cell transcriptomic atlas of humans}, + volume = {376}, + ISSN = {1095-9203}, + url = {http://dx.doi.org/10.1126/science.abl4896}, + DOI = {10.1126/science.abl4896}, + number = {6594}, + journal = {Science}, + publisher = {American Association for the Advancement of Science (AAAS)}, + author = {Jones, Robert C. and Karkanias, Jim and Krasnow, Mark A. and Pisco, Angela Oliveira and Quake, Stephen R. and Salzman, Julia and Yosef, Nir and Bulthaup, Bryan and Brown, Phillip and Harper, William and Hemenez, Marisa and Ponnusamy, Ravikumar and Salehi, Ahmad and Sanagavarapu, Bhavani A. and Spallino, Eileen and Aaron, Ksenia A. and Concepcion, Waldo and Gardner, James M. and Kelly, Burnett and Neidlinger, Nikole and Wang, Zifa and Crasta, Sheela and Kolluru, Saroja and Morri, Maurizio and Pisco, Angela Oliveira and Tan, Serena Y. and Travaglini, Kyle J. and Xu, Chenling and Alcántara-Hernández, Marcela and Almanzar, Nicole and Antony, Jane and Beyersdorf, Benjamin and Burhan, Deviana and Calcuttawala, Kruti and Carter, Matthew M. and Chan, Charles K. F. and Chang, Charles A. and Chang, Stephen and Colville, Alex and Crasta, Sheela and Culver, Rebecca N. and Cvijović, Ivana and D’Amato, Gaetano and Ezran, Camille and Galdos, Francisco X. and Gillich, Astrid and Goodyer, William R. and Hang, Yan and Hayashi, Alyssa and Houshdaran, Sahar and Huang, Xianxi and Irwin, Juan C. and Jang, SoRi and Juanico, Julia Vallve and Kershner, Aaron M. and Kim, Soochi and Kiss, Bernhard and Kolluru, Saroja and Kong, William and Kumar, Maya E. and Kuo, Angera H. and Leylek, Rebecca and Li, Baoxiang and Loeb, Gabriel B. and Lu, Wan-Jin and Mantri, Sruthi and Markovic, Maxim and McAlpine, Patrick L. and de Morree, Antoine and Morri, Maurizio and Mrouj, Karim and Mukherjee, Shravani and Muser, Tyler and Neuh\"{o}fer, Patrick and Nguyen, Thi D. and Perez, Kimberly and Phansalkar, Ragini and Pisco, Angela Oliveira and Puluca, Nazan and Qi, Zhen and Rao, Poorvi and Raquer-McKay, Hayley and Schaum, Nicholas and Scott, Bronwyn and Seddighzadeh, Bobak and Segal, Joe and Sen, Sushmita and Sikandar, Shaheen and Spencer, Sean P. and Steffes, Lea C. and Subramaniam, Varun R. and Swarup, Aditi and Swift, Michael and Travaglini, Kyle J. and Van Treuren, Will and Trimm, Emily and Veizades, Stefan and Vijayakumar, Sivakamasundari and Vo, Kim Chi and Vorperian, Sevahn K. and Wang, Wanxin and Weinstein, Hannah N. W. and Winkler, Juliane and Wu, Timothy T. H. and Xie, Jamie and Yung, Andrea R. and Zhang, Yue and Detweiler, Angela M. and Mekonen, Honey and Neff, Norma F. and Sit, Rene V. and Tan, Michelle and Yan, Jia and Bean, Gregory R. and Charu, Vivek and Forgó, Erna and Martin, Brock A. and Ozawa, Michael G. and Silva, Oscar and Tan, Serena Y. and Toland, Angus and Vemuri, Venkata N. P. and Afik, Shaked and Awayan, Kyle and Botvinnik, Olga Borisovna and Byrne, Ashley and Chen, Michelle and Dehghannasiri, Roozbeh and Detweiler, Angela M. and Gayoso, Adam and Granados, Alejandro A. and Li, Qiqing and Mahmoudabadi, Gita and McGeever, Aaron and de Morree, Antoine and Olivieri, Julia Eve and Park, Madeline and Pisco, Angela Oliveira and Ravikumar, Neha and Salzman, Julia and Stanley, Geoff and Swift, Michael and Tan, Michelle and Tan, Weilun and Tarashansky, Alexander J. and Vanheusden, Rohan and Vorperian, Sevahn K. and Wang, Peter and Wang, Sheng and Xing, Galen and Xu, Chenling and Yosef, Nir and Alcántara-Hernández, Marcela and Antony, Jane and Chan, Charles K. F. and Chang, Charles A. and Colville, Alex and Crasta, Sheela and Culver, Rebecca and Dethlefsen, Les and Ezran, Camille and Gillich, Astrid and Hang, Yan and Ho, Po-Yi and Irwin, Juan C. and Jang, SoRi and Kershner, Aaron M. and Kong, William and Kumar, Maya E. and Kuo, Angera H. and Leylek, Rebecca and Liu, Shixuan and Loeb, Gabriel B. and Lu, Wan-Jin and Maltzman, Jonathan S. and Metzger, Ross J. and de Morree, Antoine and Neuh\"{o}fer, Patrick and Perez, Kimberly and Phansalkar, Ragini and Qi, Zhen and Rao, Poorvi and Raquer-McKay, Hayley and Sasagawa, Koki and Scott, Bronwyn and Sinha, Rahul and Song, Hanbing and Spencer, Sean P. and Swarup, Aditi and Swift, Michael and Travaglini, Kyle J. and Trimm, Emily and Veizades, Stefan and Vijayakumar, Sivakamasundari and Wang, Bruce and Wang, Wanxin and Winkler, Juliane and Xie, Jamie and Yung, Andrea R. and Artandi, Steven E. and Beachy, Philip A. and Clarke, Michael F. and Giudice, Linda C. and Huang, Franklin W. and Huang, Kerwyn Casey and Idoyaga, Juliana and Kim, Seung K. and Krasnow, Mark and Kuo, Christin S. and Nguyen, Patricia and Quake, Stephen R. and Rando, Thomas A. and Red-Horse, Kristy and Reiter, Jeremy and Relman, David A. and Sonnenburg, Justin L. and Wang, Bruce and Wu, Albert and Wu, Sean M. and Wyss-Coray, Tony}, + year = {2022}, + month = may +} + +@article{dominguez2022crosstissue, + title = {Cross-tissue immune cell analysis reveals tissue-specific features in humans}, + volume = {376}, + ISSN = {1095-9203}, + url = {http://dx.doi.org/10.1126/science.abl5197}, + DOI = {10.1126/science.abl5197}, + number = {6594}, + journal = {Science}, + publisher = {American Association for the Advancement of Science (AAAS)}, + author = {Domínguez Conde, C. and Xu, C. and Jarvis, L. B. and Rainbow, D. B. and Wells, S. B. and Gomes, T. and Howlett, S. K. and Suchanek, O. and Polanski, K. and King, H. W. and Mamanova, L. and Huang, N. and Szabo, P. A. and Richardson, L. and Bolt, L. and Fasouli, E. S. and Mahbubani, K. T. and Prete, M. and Tuck, L. and Richoz, N. and Tuong, Z. K. and Campos, L. and Mousa, H. S. and Needham, E. J. and Pritchard, S. and Li, T. and Elmentaite, R. and Park, J. and Rahmani, E. and Chen, D. and Menon, D. K. and Bayraktar, O. A. and James, L. K. and Meyer, K. B. and Yosef, N. and Clatworthy, M. R. and Sims, P. A. and Farber, D. L. and Saeb-Parsy, K. and Jones, J. L. and Teichmann, S. A.}, + year = {2022}, + month = may +} + +@article{eraslan2022singlenucleus, + title = {Single-nucleus cross-tissue molecular reference maps toward understanding disease gene function}, + volume = {376}, + ISSN = {1095-9203}, + url = {http://dx.doi.org/10.1126/science.abl4290}, + DOI = {10.1126/science.abl4290}, + number = {6594}, + journal = {Science}, + publisher = {American Association for the Advancement of Science (AAAS)}, + author = {Eraslan, G\"{o}kcen and Drokhlyansky, Eugene and Anand, Shankara and Fiskin, Evgenij and Subramanian, Ayshwarya and Slyper, Michal and Wang, Jiali and Van Wittenberghe, Nicholas and Rouhana, John M. and Waldman, Julia and Ashenberg, Orr and Lek, Monkol and Dionne, Danielle and Win, Thet Su and Cuoco, Michael S. and Kuksenko, Olena and Tsankov, Alexander M. and Branton, Philip A. and Marshall, Jamie L. and Greka, Anna and Getz, Gad and Segrè, Ayellet V. and Aguet, Fran\c{c}ois and Rozenblatt-Rosen, Orit and Ardlie, Kristin G. and Regev, Aviv}, + year = {2022}, + month = may +} + +@article{li2023integrated, + title = {Integrated multi-omics single cell atlas of the human retina}, + url = {http://dx.doi.org/10.1101/2023.11.07.566105}, + DOI = {10.1101/2023.11.07.566105}, + publisher = {Cold Spring Harbor Laboratory}, + author = {Li, Jin and Wang, Jun and Ibarra, Ignacio L and Cheng, Xuesen and Luecken, Malte D and Lu, Jiaxiong and Monavarfeshani, Aboozar and Yan, Wenjun and Zheng, Yiqiao and Zuo, Zhen and Zayas Colborn, Samantha Lynn and Cortez, Berenice Sarahi and Owen, Leah A and Tran, Nicholas M and Shekhar, Karthik and Sanes, Joshua R and Stout, J Timothy and Chen, Shiming and Li, Yumei and DeAngelis, Margaret M and Theis, Fabian J and Chen, Rui}, + year = {2023}, + month = nov +} + +@article{wilson2022multimodal, + title = {Multimodal single cell sequencing implicates chromatin accessibility and genetic background in diabetic kidney disease progression}, + volume = {13}, + ISSN = {2041-1723}, + url = {http://dx.doi.org/10.1038/s41467-022-32972-z}, + DOI = {10.1038/s41467-022-32972-z}, + number = {1}, + journal = {Nature Communications}, + publisher = {Springer Science and Business Media LLC}, + author = {Wilson, Parker C. and Muto, Yoshiharu and Wu, Haojia and Karihaloo, Anil and Waikar, Sushrut S. and Humphreys, Benjamin D.}, + year = {2022}, + month = sep +} + +@article{steuernagel2022hypomap, + title = {HypoMap—a unified single-cell gene expression atlas of the murine hypothalamus}, + volume = {4}, + ISSN = {2522-5812}, + url = {http://dx.doi.org/10.1038/s42255-022-00657-y}, + DOI = {10.1038/s42255-022-00657-y}, + number = {10}, + journal = {Nature Metabolism}, + publisher = {Springer Science and Business Media LLC}, + author = {Steuernagel, Lukas and Lam, Brian Y. H. and Klemm, Paul and Dowsett, Georgina K. C. and Bauder, Corinna A. and Tadross, John A. and Hitschfeld, Tamara Sotelo and del Rio Martin, Almudena and Chen, Weiyi and de Solis, Alain J. and Fenselau, Henning and Davidsen, Peter and Cimino, Irene and Kohnke, Sara N. and Rimmington, Debra and Coll, Anthony P. and Beyer, Andreas and Yeo, Giles S. H. and Br\"{u}ning, Jens C.}, + year = {2022}, + month = oct, + pages = {1402–1419} +} + +@article{tian2023singlecell, + title = {Single-cell DNA methylation and 3D genome architecture in the human brain}, + volume = {382}, + ISSN = {1095-9203}, + url = {http://dx.doi.org/10.1126/science.adf5357}, + DOI = {10.1126/science.adf5357}, + number = {6667}, + journal = {Science}, + publisher = {American Association for the Advancement of Science (AAAS)}, + author = {Tian, Wei and Zhou, Jingtian and Bartlett, Anna and Zeng, Qiurui and Liu, Hanqing and Castanon, Rosa G. and Kenworthy, Mia and Altshul, Jordan and Valadon, Cynthia and Aldridge, Andrew and Nery, Joseph R. and Chen, Huaming and Xu, Jiaying and Johnson, Nicholas D. and Lucero, Jacinta and Osteen, Julia K. and Emerson, Nora and Rink, Jon and Lee, Jasper and Li, Yang E. and Siletti, Kimberly and Liem, Michelle and Claffey, Naomi and O’Connor, Carolyn and Yanny, Anna Marie and Nyhus, Julie and Dee, Nick and Casper, Tamara and Shapovalova, Nadiya and Hirschstein, Daniel and Ding, Song-Lin and Hodge, Rebecca and Levi, Boaz P. and Keene, C. Dirk and Linnarsson, Sten and Lein, Ed and Ren, Bing and Behrens, M. Margarita and Ecker, Joseph R.}, + year = {2023}, + month = oct +} + + +@article{sonrel2023metaanalysis, + title = {Meta-analysis of (single-cell method) benchmarks reveals the need for extensibility and interoperability}, + volume = {24}, + ISSN = {1474-760X}, + url = {http://dx.doi.org/10.1186/s13059-023-02962-5}, + DOI = {10.1186/s13059-023-02962-5}, + number = {1}, + journal = {Genome Biology}, + publisher = {Springer Science and Business Media LLC}, + author = {Sonrel, Anthony and Luetge, Almut and Soneson, Charlotte and Mallona, Izaskun and Germain, Pierre-Luc and Knyazev, Sergey and Gilis, Jeroen and Gerber, Reto and Seurinck, Ruth and Paul, Dominique and Sonder, Emanuel and Crowell, Helena L. and Fanaswala, Imran and Al-Ajami, Ahmad and Heidari, Elyas and Schmeing, Stephan and Milosavljevic, Stefan and Saeys, Yvan and Mangul, Serghei and Robinson, Mark D.}, + year = {2023}, + month = may +} + + +@article{saelens2019comparison, + title = {A comparison of single-cell trajectory inference methods}, + volume = {37}, + ISSN = {1546-1696}, + url = {http://dx.doi.org/10.1038/s41587-019-0071-9}, + DOI = {10.1038/s41587-019-0071-9}, + number = {5}, + journal = {Nature Biotechnology}, + publisher = {Springer Science and Business Media LLC}, + author = {Saelens, Wouter and Cannoodt, Robrecht and Todorov, Helena and Saeys, Yvan}, + year = {2019}, + month = apr, + pages = {547–554} +} + + +@article{huang2018savergene, + title = {SAVER: gene expression recovery for single-cell RNA sequencing}, + volume = {15}, + ISSN = {1548-7105}, + url = {http://dx.doi.org/10.1038/s41592-018-0033-z}, + DOI = {10.1038/s41592-018-0033-z}, + number = {7}, + journal = {Nature Methods}, + publisher = {Springer Science and Business Media LLC}, + author = {Huang, Mo and Wang, Jingshu and Torre, Eduardo and Dueck, Hannah and Shaffer, Sydney and Bonasio, Roberto and Murray, John I. and Raj, Arjun and Li, Mingyao and Zhang, Nancy R.}, + year = {2018}, + month = jun, + pages = {539–542} +} + + +@article{chari2023speciousart, + title = {The specious art of single-cell genomics}, + volume = {19}, + ISSN = {1553-7358}, + url = {http://dx.doi.org/10.1371/journal.pcbi.1011288}, + DOI = {10.1371/journal.pcbi.1011288}, + number = {8}, + journal = {PLOS Computational Biology}, + publisher = {Public Library of Science (PLoS)}, + author = {Chari, Tara and Pachter, Lior}, + editor = {Papin, Jason A.}, + year = {2023}, + month = aug, + pages = {e1011288} +} diff --git a/main.nf b/main.nf deleted file mode 100644 index fd40518..0000000 --- a/main.nf +++ /dev/null @@ -1,3 +0,0 @@ -workflow { - print("This is a dummy placeholder for pipeline execution. Please use the corresponding nf files for running pipelines.") -} diff --git a/nextflow.config b/nextflow.config deleted file mode 100644 index 6402ebf..0000000 --- a/nextflow.config +++ /dev/null @@ -1 +0,0 @@ -process.container = 'nextflow/bash:latest' diff --git a/src/nextflow_helpers/helper.nf b/nextflow_helpers/helper.nf similarity index 100% rename from src/nextflow_helpers/helper.nf rename to nextflow_helpers/helper.nf diff --git a/src/nextflow_helpers/labels_ci.config b/nextflow_helpers/labels_ci.config similarity index 100% rename from src/nextflow_helpers/labels_ci.config rename to nextflow_helpers/labels_ci.config diff --git a/src/nextflow_helpers/labels_tw.config b/nextflow_helpers/labels_tw.config similarity index 100% rename from src/nextflow_helpers/labels_tw.config rename to nextflow_helpers/labels_tw.config diff --git a/src/create_component/config.vsh.yaml b/src/create_component/config.vsh.yaml deleted file mode 100644 index fa6b63b..0000000 --- a/src/create_component/config.vsh.yaml +++ /dev/null @@ -1,71 +0,0 @@ -functionality: - name: create_component - namespace: common - description: | - Create a component Viash component. - - Usage: - ``` - bin/create_component --task denoising --type method --language r --name foo - bin/create_component --task denoising --type metric --language python --name bar - ``` - arguments: - - type: string - name: --task - description: Which task the component will be added to. - example: denoising - - type: string - name: --type - example: metric - description: The type of component to create. Typically must be one of 'method', 'control_method' or 'metric'. - - type: string - name: --language - description: Which scripting language to use. Options are 'python', 'r'. - default: python - choices: [python, r] - - type: string - name: --name - example: new_comp - description: Name of the new method, formatted in snake case. - - type: file - name: --output - direction: output - # required: true - description: Path to the component directory. Suggested location is `src//s/`. - default: src/tasks/${VIASH_PAR_TASK}/${VIASH_PAR_TYPE}s/${VIASH_PAR_NAME} - - type: file - name: --api_file - description: | - Which API file to use. Defaults to `src//api/comp_.yaml`. - In tasks with different subtypes of method, this location might not exist and you might need - to manually specify a different API file to inherit from. - must_exist: false - # required: true - default: src/tasks/${VIASH_PAR_TASK}/api/comp_${VIASH_PAR_TYPE}.yaml - - type: file - name: --viash_yaml - description: | - Path to the project config file. Needed for knowing the relative location of a file to the project root. - # required: true - default: "_viash.yaml" - resources: - - type: python_script - path: script.py - - path: /src/helper_functions/read_and_merge_yaml.py - test_resources: - - type: python_script - path: test.py - - path: /src - dest: openproblems-v2/src - - path: /_viash.yaml - dest: openproblems-v2/_viash.yaml -platforms: - - type: docker - image: python:3.10-slim - setup: - - type: python - pypi: ruamel.yaml - - type: native - - type: nextflow - - diff --git a/src/create_component/script.py b/src/create_component/script.py deleted file mode 100644 index 822d0ea..0000000 --- a/src/create_component/script.py +++ /dev/null @@ -1,476 +0,0 @@ -from typing import Any -from pathlib import Path -import sys -import os -import re - -## VIASH START -par = { - "task": "denoising", - "type": "method", - "language": "python", - "name": "new_comp", - "output": "src/tasks/denoising/methods/new_comp", - "api_file": "src/tasks/denoising/api/comp_method.yaml", - "viash_yaml": "_viash.yaml" -} -## VIASH END - -# import helper function -sys.path.append(meta["resources_dir"]) -from read_and_merge_yaml import read_and_merge_yaml - -def strip_margin(text: str) -> str: - return re.sub("(^|\n)[ \t]*\|", "\\1", text) - -def create_config(par, component_type, pretty_name, script_path) -> str: - info_str = generate_info(par, component_type, pretty_name) - resources_str = generate_resources(par, script_path) - docker_platform = generate_docker_platform(par) - - return strip_margin(f'''\ - |# The API specifies which type of component this is. - |# It contains specifications for: - |# - The input/output files - |# - Common parameters - |# - A unit test - |__merge__: {os.path.relpath(par["api_file"], par["output"])} - | - |functionality: - | # A unique identifier for your component (required). - | # Can contain only lowercase letters or underscores. - | name: {par["name"]} - | - | # Metadata for your component - | info: - |{info_str} - | # Component-specific parameters (optional) - | # arguments: - | # - name: "--n_neighbors" - | # type: "integer" - | # default: 5 - | # description: Number of neighbors to use. - | - | # Resources required to run the component - | resources: - |{resources_str} - |platforms: - | # Specifications for the Docker image for this component. - |{docker_platform} - | # This platform allows running the component natively - | - type: native - | # Allows turning the component into a Nextflow module / pipeline. - | - type: nextflow - | directives: - | label: [midtime,midmem, midcpu] - |''' - ) - -def generate_info(par, component_type, pretty_name) -> str: - """Generate the functionality info for a component.""" - if component_type in ["method", "control_method"]: - str = strip_margin(f'''\ - | # A relatively short label, used when rendering visualisarions (required) - | label: {pretty_name} - | # A one sentence summary of how this method works (required). Used when - | # rendering summary tables. - | summary: "FILL IN: A one sentence summary of this method." - | # A multi-line description of how this component works (required). Used - | # when rendering reference documentation. - | description: | - | FILL IN: A (multi-line) description of how this method works. - | # Which normalisation method this component prefers to use (required). - | preferred_normalization: log_cp10k - |''') - if component_type == "method": - str += strip_margin(f'''\ - | # A reference key from the bibtex library at src/common/library.bib (required). - | reference: bibtex_reference_key - | # URL to the documentation for this method (required). - | documentation_url: https://url.to/the/documentation - | # URL to the code repository for this method (required). - | repository_url: https://github.com/organisation/repository - |''') - return str - elif component_type == "metric": - return strip_margin(f'''\ - | metrics: - | # A unique identifier for your metric (required). - | # Can contain only lowercase letters or underscores. - | name: {par["name"]} - | # A relatively short label, used when rendering visualisarions (required) - | label: {pretty_name} - | # A one sentence summary of how this metric works (required). Used when - | # rendering summary tables. - | summary: "FILL IN: A one sentence summary of this metric." - | # A multi-line description of how this component works (required). Used - | # when rendering reference documentation. - | description: | - | FILL IN: A (multi-line) description of how this metric works. - | # A reference key from the bibtex library at src/common/library.bib (required). - | reference: bibtex_reference_key - | # URL to the documentation for this metric (required). - | documentation_url: https://url.to/the/documentation - | # URL to the code repository for this metric (required). - | repository_url: https://github.com/organisation/repository - | # The minimum possible value for this metric (required) - | min: 0 - | # The maximum possible value for this metric (required) - | max: 1 - | # Whether a higher value represents a 'better' solution (required) - | maximize: true - |''') - - -def generate_resources(par, script_path) -> str: - """Add the script to the functionality resources.""" - if par["language"] == "python": - type_str = "python_script" - elif par["language"] == "r": - type_str = "r_script" - - return strip_margin(f'''\ - | # The script of your component (required) - | - type: {type_str} - | path: {script_path} - | # Additional resources your script needs (optional) - | # - type: file - | # path: weights.pt - |''') - -def generate_docker_platform(par) -> str: - """Set up the docker platform for Python.""" - if par["language"] == "python": - image_str = "ghcr.io/openproblems-bio/base_python:1.0.4" - setup_type = "python" - package_example = "scib==1.1.5" - elif par["language"] == "r": - image_str = "ghcr.io/openproblems-bio/base_r:1.0.4" - setup_type = "r" - package_example = "tidyverse" - return strip_margin(f'''\ - | - type: docker - | image: {image_str} - | # Add custom dependencies here (optional). For more information, see - | # https://viash.io/reference/config/platforms/docker/#setup . - | # setup: - | # - type: {setup_type} - | # packages: {package_example} - |''') - -def set_par_values(config) -> None: - """Adds values to each of the arguments in a config file.""" - args = config['functionality']['arguments'] - for argi, arg in enumerate(args): - key = re.sub("^-*", "", arg['name']) - - # find value - if arg["type"] != "file": - value = arg.get("default", arg.get("example", "...")) - elif arg.get("direction", "input") == "input": - key_strip = key.replace("input_", "") - value = f'resources_test/{par["task"]}/pancreas/{key_strip}.h5ad' - else: - key_strip = key.replace("output_", "") - value = f'{key_strip}.h5ad' - - # store key and value - config['functionality']['arguments'][argi]["key"] = key - config['functionality']['arguments'][argi]["value"] = value - -def look_for_adata_arg(args, uns_field): - """Look for an argument that has a .uns[uns_field] in its info.slots.""" - for arg in args: - uns = arg.get("info", {}).get("slots", {}).get("uns", []) - for unval in uns: - if unval.get("name") == uns_field: - return arg["key"] - return "adata" - -def write_output_python(arg, copy_from_adata, is_metric): - """Create code for writing the output h5ad files.""" - slots = arg.get("info", {}).get("slots", {}) - outer = [] - for group_name, slots in slots.items(): - inner = [] - for slot in slots: - if group_name == "uns" and slot["name"] in ["dataset_id", "normalization_id"]: - value = f"{copy_from_adata}.uns['{slot['name']}']" - elif group_name == "uns" and slot["name"] == "method_id": - if is_metric: - value = f"{copy_from_adata}.uns['{slot['name']}']" - else: - value = "meta['functionality_name']" - else: - value = group_name + "_" + slot["name"] - inner.append(f"'{slot['name']}': {value}") - inner_values = ',\n '.join(inner) - outer.append(f"{group_name}={{\n {inner_values}\n }}") - outer_values = ',\n '.join(outer) - return strip_margin( - f'''\ - |print("Write {arg["key"]} AnnData to file", flush=True) - |{arg["key"]} = ad.AnnData( - | {outer_values} - |) - |{arg["key"]}.write_h5ad(par['{arg["key"]}'], compression='gzip')''' - ) - -def write_output_r(arg, copy_from_adata, is_metric): - """Create code for writing the output h5ad files.""" - slots = arg.get("info", {}).get("slots", {}) - outer = [] - for group_name, slots in slots.items(): - inner = [] - for slot in slots: - if group_name == "uns" and slot["name"] in ["dataset_id", "normalization_id"]: - value = f"{copy_from_adata}$uns[[\"{slot['name']}\"]]" - elif group_name == "uns" and slot["name"] == "method_id": - if is_metric: - value = f"{copy_from_adata}$uns[[\"{slot['name']}\"]]" - else: - value = "meta[[\"functionality_name\"]]" - else: - value = group_name + "_" + slot["name"] - inner.append(f"{slot['name']} = {value}") - inner_values = ',\n '.join(inner) - outer.append(f"{group_name} = list(\n {inner_values}\n )") - outer_values = ',\n '.join(outer) - return strip_margin( - f'''\ - |cat("Write {arg["key"]} AnnData to file\\n") - |{arg["key"]} <- anndata::AnnData( - | {outer_values} - |) - |{arg["key"]}$write_h5ad(par[["{arg["key"]}"]], compression = "gzip")''' - ) - -def create_python_script(par, config, type): - args = config['functionality']['arguments'] - - # create the arguments of the par string - par_string = ",\n ".join(f"'{arg['key']}': '{arg['value']}'" for arg in args) - - # create code for reading the input h5ad file - read_h5ad_string = "\n".join( - f"{arg['key']} = ad.read_h5ad(par['{arg['key']}'])" - for arg in args - if arg['type'] == "file" - and arg.get('direction', "input") == "input" - ) - - # determine which adata to copy from - copy_from_adata = look_for_adata_arg(args, "method_id" if type == "metric" else "dataset_id") - - # create code for writing the output h5ad files - write_h5ad_string = "\n".join( - write_output_python(arg, copy_from_adata, type == "metric") - for arg in args - if arg["type"] == "file" - and arg.get("direction", "input") == "output" - ) - - if type == 'metric': - processing_string = strip_margin(f'''\ - |print('Compute metrics', flush=True) - |# metric_ids and metric_values can have length > 1 - |# but should be of equal length - |uns_metric_ids = [ '{par['name']}' ] - |uns_metric_values = [ 0.5 ]''') - else: - processing_string = strip_margin(f'''\ - |print('Preprocess data', flush=True) - |# ... preprocessing ... - | - |print('Train model', flush=True) - |# ... train model ... - | - |print('Generate predictions', flush=True) - |# ... generate predictions ...''') - - script = strip_margin(f'''\ - |import anndata as ad - | - |## VIASH START - |# Note: this section is auto-generated by viash at runtime. To edit it, make changes - |# in config.vsh.yaml and then run `viash config inject config.vsh.yaml`. - |par = {{ - | {par_string} - |}} - |meta = {{ - | 'functionality_name': '{par["name"]}' - |}} - |## VIASH END - | - |print('Reading input files', flush=True) - |{read_h5ad_string} - | - |{processing_string} - | - |{write_h5ad_string} - |''') - - return script - -def create_r_script(par, api_spec, type): - args = api_spec['functionality']['arguments'] - - # create the arguments of the par string - par_string = ",\n ".join(f'{arg["key"]} = "{arg["value"]}"' for arg in args) - - # create helpers for reading the h5ad file - read_h5ad_string = "\n".join( - f'{arg["key"]} <- anndata::read_h5ad(par[["{arg["key"]}"]])' - for arg in args - if arg['type'] == "file" - and arg.get("direction", "input") == "input" - ) - - # determine which adata to copy from - copy_from_adata = look_for_adata_arg(args, "method_id" if type == "metric" else "dataset_id") - - # create code for writing the output h5ad files - write_h5ad_string = "\n".join( - write_output_r(arg, copy_from_adata, type == "metric") - for arg in args - if arg["type"] == "file" - and arg.get("direction", "input") == "output" - ) - - if type == 'metric': - processing_string = strip_margin(f'''\ - |cat("Compute metrics\\n") - |# metric_ids and metric_values can have length > 1 - |# but should be of equal length - |uns_metric_ids <- c("{par['name']}") - |uns_metric_values <- c(0.5)''') - else: - processing_string = strip_margin(f'''\ - |cat("Preprocess data\\n") - |# ... preprocessing ... - | - |cat("Train model\\n") - |# ... train model ... - | - |cat("Generate predictions\\n") - |# ... generate predictions ...''') - - script = strip_margin(f'''\ - |library(anndata) - | - |## VIASH START - |par <- list( - | {par_string} - |) - |meta <- list( - | functionality_name = "{par["name"]}" - |) - |## VIASH END - | - |cat("Reading input files\\n") - |{read_h5ad_string} - | - |{processing_string} - | - |{write_h5ad_string} - |''') - - return script - -# def read_viash_config(file): -# file = file.absolute() - -# # read in config -# command = ["viash", "config", "view", str(file)] - -# # Execute the command and capture the output -# output = subprocess.check_output( -# command, -# universal_newlines=True, -# cwd=str(file.parent) -# ) - -# # Parse the output as YAML -# config = yaml.load(output) - -# return config - - -def main(par): - ####### CHECK INPUTS ####### - print("Check inputs", flush=True) - assert re.match("[a-z][a-z0-9_]*", par["name"]), "Name should match the regular expression '[a-z][a-z0-9_]*'. Example: 'my_component'." - assert len(par['name']) <= 50, "Method name should be at most 50 characters." - - pretty_name = re.sub("_", " ", par['name']).title() - - ####### CHECK LANGUAGE ####### - print("Check language", flush=True) - # check language and determine script path - if par["language"] == "python": - script_path = "script.py" - elif par["language"] == "r": - script_path = "script.R" - else: - sys.exit(f"Unrecognized language parameter '{par['language']}'.") - - ## CHECK API FILE - print("Check API file", flush=True) - api_file = Path(par["api_file"]) - viash_yaml = Path(par["viash_yaml"]) - project_dir = viash_yaml.parent - if not api_file.exists(): - comp_types = [x.with_suffix("").name.removeprefix("comp_") for x in api_file.parent.glob("**/comp_*.y*ml")] - list.sort(comp_types) - sys.exit(strip_margin(f"""\ - |Error: Invalid --type argument. - | Reason: Could not find API file at '{api_file.relative_to(project_dir)}'. - | Possible values for --type: {', '.join(comp_types)}.""")) - - ## READ API FILE - print("Read API file", flush=True) - api = read_and_merge_yaml(api_file) - comp_type = api.get("functionality", {}).get("info", {}).get("type", {}) - if not comp_type: - sys.exit(strip_margin(f"""\ - |Error: API file is incorrectly formatted. - | Reason: Could not find component type at `.functionality.info.type`.' - | Please fix the formatting of the API file.""")) - - ####### CREATE OUTPUT DIR ####### - print("Create output dir", flush=True) - out_dir = Path(par["output"]) - out_dir.mkdir(exist_ok=True) - - ####### CREATE CONFIG ####### - print("Create config", flush=True) - config_file = out_dir / "config.vsh.yaml" - - # get config template - config_str = create_config(par, comp_type, pretty_name, script_path) - - with open(config_file, "w") as f: - f.write(config_str) - - ####### CREATE SCRIPT ####### - print("Create script", flush=True) - script_file = out_dir / script_path - - # set reasonable values - set_par_values(api) - - if par["language"] == "python": - script_out = create_python_script(par, api, comp_type) - - if par["language"] == "r": - script_out = create_r_script(par, api, comp_type) - - # write script - with open(script_file, "w") as f: - f.write(script_out) - - print("Done!", flush=True) - - -if __name__ == "__main__": - main(par) diff --git a/src/create_component/test.py b/src/create_component/test.py deleted file mode 100644 index 16da1bd..0000000 --- a/src/create_component/test.py +++ /dev/null @@ -1,52 +0,0 @@ -import os -import subprocess -from os import path -from ruamel.yaml import YAML - -## VIASH START -meta = { - 'executable': 'foo' -} -## VIASH END - -opv2 = f"{meta['resources_dir']}/openproblems-v2" -output_path = f"{opv2}/src/tasks/label_projection/methods/test_method" - -cmd = [ - meta['executable'], - '--task', 'label_projection', - '--type', 'method', - '--name', 'test_method', - '--language', 'python' -] - -print('>> Running the script as test', flush=True) -out = subprocess.run(cmd, stderr=subprocess.STDOUT, cwd=opv2) - -if out.stdout: - print(out.stdout) - -if out.returncode: - print(f"script: '{cmd}' exited with an error.") - exit(out.returncode) - -print('>> Checking whether output files exist', flush=True) -assert os.path.exists(output_path), "Output dir does not exist" - -conf_f = path.join(output_path, 'config.vsh.yaml') -assert os.path.exists(conf_f), "Config file does not exist" - -script_f = path.join(output_path, "script.py") -assert os.path.exists(script_f), "Script file does not exist" - -print('>> Checking file contents', flush=True) -yaml = YAML(typ='safe', pure=True) -with open(conf_f) as f: - conf_data = yaml.load(f) - -assert conf_data['functionality']['name'] == 'test_method', "Name should be equal to 'test_method'" -# assert conf_data['platforms'][0]['image'] == 'python:3.10', "Python image should be equal to python:3.10" - - -print('All checks succeeded!', flush=True) - diff --git a/src/create_task_readme/config.vsh.yaml b/src/create_task_readme/config.vsh.yaml deleted file mode 100644 index 6993981..0000000 --- a/src/create_task_readme/config.vsh.yaml +++ /dev/null @@ -1,69 +0,0 @@ -functionality: - name: create_task_readme - namespace: common - description: | - Create a README for the task. - argument_groups: - - name: Inputs - arguments: - - type: string - name: --task - description: Which task the component will be added to. - example: denoising - required: false - - type: file - name: --task_dir - description: Path to the task directory. - default: src/tasks/${VIASH_PAR_TASK} - required: false - - type: file - name: --viash_yaml - description: | - Path to the project config file. Needed for knowing the relative location of a file to the project root. - default: "_viash.yaml" - - type: string - name: --github_url - description: | - URL to the GitHub repository. Needed for linking to the source code. - default: "https://github.com/openproblems-bio/openproblems-v2/tree/main/" - - name: Outputs - arguments: - - type: file - name: --output - direction: output - description: Path to the component directory. Suggested location is `src/tasks//README.md`. - default: src/tasks/${VIASH_PAR_TASK}/README.md - resources: - - type: r_script - path: script.R - - path: /src/helper_functions/read_and_merge_yaml.R - - path: /src/helper_functions/read_api_files.R - - path: /src/helper_functions/strip_margin.R - test_resources: - - type: r_script - path: test.R - - path: /src - dest: openproblems-v2/src - - path: /_viash.yaml - dest: openproblems-v2/_viash.yaml -platforms: - - type: docker - image: ghcr.io/openproblems-bio/base_r:1.0.4 - setup: - - type: r - packages: [dplyr, purrr, rlang, glue, yaml, fs, cli, igraph, rmarkdown, processx] - - type: apt - packages: [jq, curl] - - type: docker - # download and install quarto-*-linux-amd64.deb from latest release - run: | - release_info=$(curl -s https://api.github.com/repos/quarto-dev/quarto-cli/releases/latest) && \ - download_url=$(printf "%s" "$release_info" | jq -r '.assets[] | select(.name | test("quarto-.*-linux-amd64.deb")) | .browser_download_url') && \ - curl -sL "$download_url" -o /opt/quarto.deb && \ - dpkg -i /opt/quarto.deb && \ - rm /opt/quarto.deb - - type: native - - type: nextflow - directives: - label: [midtime, lowmem, lowcpu] - diff --git a/src/create_task_readme/script.R b/src/create_task_readme/script.R deleted file mode 100644 index 55388ea..0000000 --- a/src/create_task_readme/script.R +++ /dev/null @@ -1,134 +0,0 @@ -library(rlang, quietly = TRUE, warn.conflicts = FALSE) -library(purrr, quietly = TRUE, warn.conflicts = FALSE) -library(dplyr, quietly = TRUE, warn.conflicts = FALSE) - -## VIASH START -par <- list( - "task" = "batch_integration", - "task_dir" = "src/tasks/batch_integration", - "output" = "src/tasks/batch_integration/README.md", - "viash_yaml" = "_viash.yaml", - "github_url" = "https://github.com/openproblems-bio/openproblems-v2/tree/main/" -) -meta <- list( - "resources_dir" = "src/common/helper_functions", - "temp_dir" = "temp/" -) -## VIASH END - -if (is.null(par$task) && is.null(par$task_dir)) { - stop("Either 'task' or 'task_dir' must be provided") -} -if (is.null(par$viash_yaml)) { - stop("Argument 'viash_yaml' must be provided") -} -if (is.null(par$output)) { - stop("Argument 'output' must be provided") -} - -# import helper function -source(paste0(meta["resources_dir"], "/read_and_merge_yaml.R")) -source(paste0(meta["resources_dir"], "/strip_margin.R")) -source(paste0(meta["resources_dir"], "/read_api_files.R")) - -cat("Read task info\n") -task_api <- read_task_api(par[["task_dir"]]) - -# determine ordering -root <- .task_graph_get_root(task_api) - -r_graph <- render_task_graph(task_api, root) - -cat("Render API details\n") -order <- names(igraph::bfs(task_api$task_graph, root)$order) -r_details <- map_chr( - order, - function(file_name) { - if (file_name %in% names(task_api$comp_specs)) { - render_component(task_api$comp_specs[[file_name]]) - } else { - render_file(task_api$file_specs[[file_name]]) - } - } -) - -cat("Render authors\n") -authors_str <- - if (nrow(task_api$authors) > 0) { - paste0( - "\n## Authors & contributors\n\n", - task_api$authors %>% knitr::kable() %>% paste(collapse = "\n"), - "\n" - ) - } else { - "" - } -readme_str <- - if (is.null(task_api$task_info$readme) || is.na(task_api$task_info$readme)) { - "" - } else { - paste0( - "\n## README\n\n", - task_api$task_info$readme, - "\n" - ) - } - -cat("Generate qmd content\n") -relative_path <- par[["task_dir"]] %>% - gsub(paste0(dirname(par[["viash_yaml"]]), "/*"), "", .) %>% - gsub("/*$", "", .) -source_url <- paste0(par[["github_url"]], relative_path) -qmd_content <- strip_margin(glue::glue(" - §--- - §title: \"{task_api$task_info$label}\" - §format: gfm - §--- - § - § - § - §{task_api$task_info$summary} - § - §Path to source: [`{relative_path}`]({source_url}) - § - §{readme_str} - § - §## Motivation - § - §{task_api$task_info$motivation} - § - §## Description - § - §{task_api$task_info$description} - §{authors_str} - §## API - § - §{r_graph} - § - §{paste(r_details, collapse = '\n\n')} - § - §"), symbol = "§") - -cat("Write README.qmd to file\n") -qmd_file <- tempfile( - pattern = "README_", - fileext = ".qmd", - tmpdir = meta$temp_dir -) - -if (!dir.exists(meta$temp_dir)) { - dir.create(meta$temp_dir, recursive = TRUE) -} -writeLines(qmd_content, qmd_file) - -cat("Render README.qmd to README.md\n") -out <- processx::run( - command = "quarto", - args = c("render", qmd_file, "--output", "-"), - echo = TRUE -) - -writeLines(out$stdout, par$output) diff --git a/src/create_task_readme/test.R b/src/create_task_readme/test.R deleted file mode 100644 index 9af1fe9..0000000 --- a/src/create_task_readme/test.R +++ /dev/null @@ -1,30 +0,0 @@ -requireNamespace("assertthat", quietly = TRUE) - -## VIASH START -## VIASH END - -opv2 <- paste0(meta$resources_dir, "/openproblems-v2") -output_path <- "output.md" - -cat(">> Running the script as test\n") -system(paste( - meta["executable"], - "--task", "label_projection", - "--output", output_path, - "--task_dir", paste0(opv2, "/src/tasks/label_projection"), - "--viash_yaml", paste0(opv2, "/_viash.yaml") -)) - -cat(">> Checking whether output files exist\n") -assertthat::assert_that(file.exists(output_path)) - -cat(">> Checking file contents\n") -lines <- readLines(output_path) -assertthat::assert_that(any(grepl("# Label projection", lines))) -assertthat::assert_that(any(grepl("# Description", lines))) -assertthat::assert_that(any(grepl("# Motivation", lines))) -assertthat::assert_that(any(grepl("# Authors", lines))) -assertthat::assert_that(any(grepl("flowchart LR", lines))) -assertthat::assert_that(any(grepl("# File format:", lines))) - -cat("All checks succeeded!\n") diff --git a/src/helper_functions/strip_margin.py b/src/helper_functions/strip_margin.py deleted file mode 100644 index fbfb39d..0000000 --- a/src/helper_functions/strip_margin.py +++ /dev/null @@ -1,3 +0,0 @@ -def strip_margin(text: str) -> str: - import re - return re.sub("(^|\n)[ \t]*\|", "\\1", text) \ No newline at end of file diff --git a/src/sync_resources/config.vsh.yaml b/src/sync_resources/config.vsh.yaml deleted file mode 100644 index 11c9461..0000000 --- a/src/sync_resources/config.vsh.yaml +++ /dev/null @@ -1,44 +0,0 @@ -functionality: - name: "sync_resources" - namespace: "common" - version: "dev" - description: Synchronise resources from s3 to a directory - usage: | - sync_resources - sync_resources --input s3://openproblems-data/resources_test --output resources_test - arguments: - - name: "--input" - alternatives: ["-i"] - type: string - description: "Path to the S3 bucket to sync from." - default: "s3://openproblems-data/resources_test" - - name: "--output" - alternatives: ["-o"] - type: file - default: resources_test - direction: output - description: "Path to the test resource directory." - - name: "--quiet" - type: boolean_true - description: "Displays the operations that would be performed using the specified command without actually running them." - - name: "--dryrun" - type: boolean_true - description: "Does not display the operations performed from the specified command." - - name: "--delete" - type: boolean_true - description: "Files that exist in the destination but not in the source are deleted during sync." - - name: "--exclude" - type: "string" - multiple: true - description: Exclude all files or objects from the command that matches the specified pattern. - resources: - - type: bash_script - path: script.sh - test_resources: - - type: bash_script - path: run_test.sh -platforms: - - type: docker - image: "amazon/aws-cli:2.7.12" - - type: native - - type: nextflow diff --git a/src/sync_resources/run_test.sh b/src/sync_resources/run_test.sh deleted file mode 100755 index 67f2504..0000000 --- a/src/sync_resources/run_test.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -## VIASH START -## VIASH END - -echo ">> Run aws s3 sync" -./$meta_functionality_name \ - --input s3://openproblems-data/resources_test/common/pancreas \ - --output foo \ - --quiet - -echo ">> Check whether the right files were copied" -[ ! -f foo/dataset.h5ad ] && echo csv should have been copied && exit 1 - -echo ">> Test succeeded!" \ No newline at end of file diff --git a/src/sync_resources/script.sh b/src/sync_resources/script.sh deleted file mode 100644 index c97b9fc..0000000 --- a/src/sync_resources/script.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash - -## VIASH START -par_input='s3://openproblems-data/resources_test' -par_output='resources_test' -## VIASH END - -extra_params=( ) - -if [ "$par_quiet" == "true" ]; then - extra_params+=( "--quiet" ) -fi -if [ "$par_dryrun" == "true" ]; then - extra_params+=( "--dryrun" ) -fi -if [ "$par_delete" == "true" ]; then - extra_params+=( "--delete" ) -fi - -if [ ! -z ${par_exclude+x} ]; then - IFS=":" - for var in $par_exclude; do - unset IFS - extra_params+=( "--exclude" "$var" ) - done -fi - - -# Disable the use of the Amazon EC2 instance metadata service (IMDS). -# see https://florian.ec/blog/github-actions-awscli-errors/ -# or https://github.com/aws/aws-cli/issues/5234#issuecomment-705831465 -export AWS_EC2_METADATA_DISABLED=true - -aws s3 sync "$par_input" "$par_output" --no-sign-request "${extra_params[@]}" diff --git a/sync_resources/sync_resources b/sync_resources/sync_resources new file mode 100755 index 0000000..6d39808 --- /dev/null +++ b/sync_resources/sync_resources @@ -0,0 +1,1029 @@ +#!/usr/bin/env bash + +# sync_resources dev +# +# This wrapper script is auto-generated by viash 0.8.0 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + SOURCE="$1" + while [[ "$SOURCE" != "" && ! -e "$SOURCE/.build.yaml" ]]; do + SOURCE=${SOURCE%/*} + done + echo $SOURCE +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="sync_resources" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "sync_resources dev" + echo "" + echo "Synchronise the resources from s3 to resources_test" + echo "" + echo "Usage:" + echo "sync_resources" + echo "sync_resources --input s3://openproblems-data/resources_test --output" + echo "resources_test" + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: string" + echo " default: s3://openproblems-data/resources_test" + echo " Path to the S3 bucket to sync from." + echo "" + echo " -o, --output" + echo " type: file, output, file must exist" + echo " default: resources_test" + echo " Path to the test resource directory." + echo "" + echo " --quiet" + echo " type: boolean_true" + echo " Displays the operations that would be performed using the specified" + echo " command without actually running them." + echo "" + echo " --dryrun" + echo " type: boolean_true" + echo " Does not display the operations performed from the specified command." + echo "" + echo " --delete" + echo " type: boolean_true" + echo " Files that exist in the destination but not in the source are deleted" + echo " during sync." + echo "" + echo " --exclude" + echo " type: string, multiple values allowed" + echo " Exclude all files or objects from the command that matches the specified" + echo " pattern." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM amazon/aws-cli:2.7.12 + +ENTRYPOINT [] + + +RUN : +LABEL org.opencontainers.image.description="Companion container for running component common sync_resources" +LABEL org.opencontainers.image.created="2024-05-31T22:05:54+02:00" +LABEL org.opencontainers.image.source="https://github.com/openproblems-bio/openproblems-v2" +LABEL org.opencontainers.image.revision="694e6553540d1e5ea3c0786293ad8e3da9e95b2d" +LABEL org.opencontainers.image.version="dev" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-sync_resources-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "sync_resources dev" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --quiet) + [ -n "$VIASH_PAR_QUIET" ] && ViashError Bad arguments for option \'--quiet\': \'$VIASH_PAR_QUIET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUIET=true + shift 1 + ;; + --dryrun) + [ -n "$VIASH_PAR_DRYRUN" ] && ViashError Bad arguments for option \'--dryrun\': \'$VIASH_PAR_DRYRUN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DRYRUN=true + shift 1 + ;; + --delete) + [ -n "$VIASH_PAR_DELETE" ] && ViashError Bad arguments for option \'--delete\': \'$VIASH_PAR_DELETE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DELETE=true + shift 1 + ;; + --exclude) + if [ -z "$VIASH_PAR_EXCLUDE" ]; then + VIASH_PAR_EXCLUDE="$2" + else + VIASH_PAR_EXCLUDE="$VIASH_PAR_EXCLUDE:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --exclude. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --exclude=*) + if [ -z "$VIASH_PAR_EXCLUDE" ]; then + VIASH_PAR_EXCLUDE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_EXCLUDE="$VIASH_PAR_EXCLUDE:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openproblems-bio/common/sync_resources:dev' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openproblems-bio/common/sync_resources:dev' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openproblems-bio/common/sync_resources:dev'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openproblems-bio/common/sync_resources:dev' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_INPUT+x} ]; then + VIASH_PAR_INPUT="s3://openproblems-data/resources_test" +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + VIASH_PAR_OUTPUT="resources_test" +fi +if [ -z ${VIASH_PAR_QUIET+x} ]; then + VIASH_PAR_QUIET="false" +fi +if [ -z ${VIASH_PAR_DRYRUN+x} ]; then + VIASH_PAR_DRYRUN="false" +fi +if [ -z ${VIASH_PAR_DELETE+x} ]; then + VIASH_PAR_DELETE="false" +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_QUIET" ]]; then + if ! [[ "$VIASH_PAR_QUIET" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--quiet' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_DRYRUN" ]]; then + if ! [[ "$VIASH_PAR_DRYRUN" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--dryrun' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_DELETE" ]]; then + if ! [[ "$VIASH_PAR_DELETE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--delete' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openproblems-bio/common/sync_resources:dev "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openproblems-bio/common/sync_resources:dev)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openproblems-bio/common/sync_resources:dev +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-sync_resources-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +#!/bin/bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_QUIET+x} ]; then echo "${VIASH_PAR_QUIET}" | sed "s#'#'\"'\"'#g;s#.*#par_quiet='&'#" ; else echo "# par_quiet="; fi ) +$( if [ ! -z ${VIASH_PAR_DRYRUN+x} ]; then echo "${VIASH_PAR_DRYRUN}" | sed "s#'#'\"'\"'#g;s#.*#par_dryrun='&'#" ; else echo "# par_dryrun="; fi ) +$( if [ ! -z ${VIASH_PAR_DELETE+x} ]; then echo "${VIASH_PAR_DELETE}" | sed "s#'#'\"'\"'#g;s#.*#par_delete='&'#" ; else echo "# par_delete="; fi ) +$( if [ ! -z ${VIASH_PAR_EXCLUDE+x} ]; then echo "${VIASH_PAR_EXCLUDE}" | sed "s#'#'\"'\"'#g;s#.*#par_exclude='&'#" ; else echo "# par_exclude="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +extra_params=( ) + +if [ "\$par_quiet" == "true" ]; then + extra_params+=( "--quiet" ) +fi +if [ "\$par_dryrun" == "true" ]; then + extra_params+=( "--dryrun" ) +fi +if [ "\$par_delete" == "true" ]; then + extra_params+=( "--delete" ) +fi + +if [ ! -z \${par_exclude+x} ]; then + IFS=":" + for var in \$par_exclude; do + unset IFS + extra_params+=( "--exclude" "\$var" ) + done +fi + + +# Disable the use of the Amazon EC2 instance metadata service (IMDS). +# see https://florian.ec/blog/github-actions-awscli-errors/ +# or https://github.com/aws/aws-cli/issues/5234#issuecomment-705831465 +export AWS_EC2_METADATA_DISABLED=true + +aws s3 sync "\$par_input" "\$par_output" --no-sign-request "\${extra_params[@]}" +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0