From 1880dac4bdee6a6692d966ea0cf1288349085681 Mon Sep 17 00:00:00 2001 From: mweston Date: Wed, 20 Dec 2017 13:05:16 -0500 Subject: [PATCH] initial df-helper commit --- reactors/df-helper/.dockerignore | 2 + reactors/df-helper/.gitignore | 4 + reactors/df-helper/Dockerfile | 32 ++++ reactors/df-helper/README.md | 25 ++++ reactors/df-helper/build.sh | 7 + reactors/df-helper/clean-test-data.sh | 21 +++ reactors/df-helper/deploy.sh | 90 ++++++++++++ reactors/df-helper/df-helper-0.1.0/VERSION | 1 + .../df-helper-0.1.0/_util/container_exec.sh | 119 +++++++++++++++ .../df-helper-0.1.0/app-maverick-mweston.json | 42 ++++++ .../df-helper-0.1.0/app-maverick.json | 42 ++++++ .../df-helper-0.1.0/app-wrangler.json | 42 ++++++ reactors/df-helper/df-helper-0.1.0/app.yml | 29 ++++ .../df-helper-job-public-m.json | 8 + .../df-helper-job-public-mw.json | 8 + .../df-helper-job-public-w.json | 8 + .../df-helper-0.1.0/df-helper-job.json | 8 + .../df-helper-0.1.0/runner-template.sh | 10 ++ reactors/df-helper/df-helper-0.1.0/tester.sh | 48 ++++++ reactors/df-helper/import-test-data.sh | 137 ++++++++++++++++++ reactors/df-helper/push.sh | 7 + reactors/df-helper/src/df-helper.py | 30 ++++ reactors/df-helper/stage-test-data.sh | 39 +++++ reactors/df-helper/test-data.tsv | 1 + 24 files changed, 760 insertions(+) create mode 100644 reactors/df-helper/.dockerignore create mode 100644 reactors/df-helper/.gitignore create mode 100644 reactors/df-helper/Dockerfile create mode 100644 reactors/df-helper/README.md create mode 100755 reactors/df-helper/build.sh create mode 100755 reactors/df-helper/clean-test-data.sh create mode 100755 reactors/df-helper/deploy.sh create mode 100644 reactors/df-helper/df-helper-0.1.0/VERSION create mode 100644 reactors/df-helper/df-helper-0.1.0/_util/container_exec.sh create mode 100644 reactors/df-helper/df-helper-0.1.0/app-maverick-mweston.json create mode 100644 reactors/df-helper/df-helper-0.1.0/app-maverick.json create mode 100644 reactors/df-helper/df-helper-0.1.0/app-wrangler.json create mode 100644 reactors/df-helper/df-helper-0.1.0/app.yml create mode 100644 reactors/df-helper/df-helper-0.1.0/df-helper-job-public-m.json create mode 100644 reactors/df-helper/df-helper-0.1.0/df-helper-job-public-mw.json create mode 100644 reactors/df-helper/df-helper-0.1.0/df-helper-job-public-w.json create mode 100644 reactors/df-helper/df-helper-0.1.0/df-helper-job.json create mode 100755 reactors/df-helper/df-helper-0.1.0/runner-template.sh create mode 100755 reactors/df-helper/df-helper-0.1.0/tester.sh create mode 100755 reactors/df-helper/import-test-data.sh create mode 100755 reactors/df-helper/push.sh create mode 100644 reactors/df-helper/src/df-helper.py create mode 100755 reactors/df-helper/stage-test-data.sh create mode 100644 reactors/df-helper/test-data.tsv diff --git a/reactors/df-helper/.dockerignore b/reactors/df-helper/.dockerignore new file mode 100644 index 0000000..14bc5c0 --- /dev/null +++ b/reactors/df-helper/.dockerignore @@ -0,0 +1,2 @@ +test-data-cache +.dirty diff --git a/reactors/df-helper/.gitignore b/reactors/df-helper/.gitignore new file mode 100644 index 0000000..ec94f54 --- /dev/null +++ b/reactors/df-helper/.gitignore @@ -0,0 +1,4 @@ +test-data-cache/* +localtest/* +df-helper-0.1.0/localtest/* +.dirty \ No newline at end of file diff --git a/reactors/df-helper/Dockerfile b/reactors/df-helper/Dockerfile new file mode 100644 index 0000000..eb3ff8d --- /dev/null +++ b/reactors/df-helper/Dockerfile @@ -0,0 +1,32 @@ +# Pin to a specific major version, not latest +# xenial = 16.0.4 LTS +# trusty = 14.0.4 LTS + +FROM sd2e/base:ubuntu16 + +RUN apt-get update && \ + apt-get install python python-pip -y && \ + apt-get clean + +RUN pip install --upgrade pip + +# Customizing 101 +# +# 1. Try to avoid working in / (unless that's your intent) +# 2. Do ADD and COPY operations as late as possible as +# they invalidate the Docker cache on downstream layers +# 3. Import archive files from GitHub using tagged releases +# 4. Clean up your build directories when done +# 5. Put scripts and other assets in relatively standard places +# 6. Don't actually have the default ENTRYPOINT or +# CMD do work. Enlist it for debugging instead. + +WORKDIR /root + +RUN mkdir -p /opt/scripts + +ADD src /opt/scripts + +# After much discussion, it seems that a standard is for +# the default code in the container at least return help/usage +CMD python /opt/scripts/df-helper.py --help diff --git a/reactors/df-helper/README.md b/reactors/df-helper/README.md new file mode 100644 index 0000000..f2918f9 --- /dev/null +++ b/reactors/df-helper/README.md @@ -0,0 +1,25 @@ + +# df-helper + +Helper code to support common tasks related to post-ETL dataframe generation + +For now, this does very little: + +It retrieves manifests and queries for a plan given a manifest + +Will be expanded or shared among dataframe producing code for each ETL'd data type + +$ python df-helper.py --manifest + +# Provisioning test data + +1. Edit `test-data.tsv` to suit your needs +2. Run `./import-test-data.sh`. A copy of the files and directories specified will be downloaded to `test-data-cache` +3. Run `stage-test-data.sh ` +4. Change into ``. Edit `tester.sh` if needed to refer to a specific file or files in `localtest`. Run your tests! +5. To deploy, run `./clean-test-data.sh ` then run `./deploy.sh ` + +## Source management + +The application's local `.gitignore` has been updated to ignore the contents of `test-data-cache` and `/localtest`. If you change any of the default paths, you will need to update the `.gitignore` as well. + diff --git a/reactors/df-helper/build.sh b/reactors/df-helper/build.sh new file mode 100755 index 0000000..c5cebf2 --- /dev/null +++ b/reactors/df-helper/build.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +version=$(cat df-helper-0.1.0/VERSION) + +CONTAINER_IMAGE="sd2e/df-helper:$version" + +docker build -t ${CONTAINER_IMAGE} . \ No newline at end of file diff --git a/reactors/df-helper/clean-test-data.sh b/reactors/df-helper/clean-test-data.sh new file mode 100755 index 0000000..5245c58 --- /dev/null +++ b/reactors/df-helper/clean-test-data.sh @@ -0,0 +1,21 @@ +# Path to app bundle +APP=$1 +DEST=${2-localtest} + +TEST_DATA_CACHE=${TEST_DATA_CACHE:-test-data-cache} + +if [ ! -d "$APP/$DEST" ] +then + echo "Can't find or access $APP/$DEST. Re-run $0 " +fi + +function clean_data() { + + local APATH=$1 + local RECURSE= + rm -rf $APP/$DEST/* + find . -name .dirty -exec rm {} \; + +} + +clean_data "${APP}" diff --git a/reactors/df-helper/deploy.sh b/reactors/df-helper/deploy.sh new file mode 100755 index 0000000..cf8f111 --- /dev/null +++ b/reactors/df-helper/deploy.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash + +# Path to app bundle +APP=$1 +APPJSON=$2 + +if [ -z "${APPJSON}" ] +then + APPJSON=$(find $APP -type f -name "*app.json") +fi + +if [ -z "${APPJSON}" ] +then + echo "Can't find an application JSON. Re-run $0 " + exit 0 +fi + +# stage-test-data.csv marks localtest with a hidden file to indicate that its a bad idea to deploy it +if [[ -n $(find "${APP}" -name ".dirty" ) ]] +then + echo "Error: A localtest directory inside $APP is marked as dirty. Please run clean-test-data.sh $APP before deploying." + exit 1 +fi + +if [ -z "$NODOCKER" ] +then + if [ ! -f "./Dockerfile" ] + then + echo "Warning: ./Dockerfile not found. Image not built." + else + { + bash build.sh + bash push.sh + } || { + echo "No ./build.sh script. Image not built." + } + fi +fi + +# Agave application management +# Assumptions: +_APPNAME=$(jq -r .name $APPJSON) +_APPVERS=$(jq -r .version $APPJSON) +_APPID="${_APPNAME}-${_APPVERS}" + +# username/apps/app-0.1.0 +_DEPPATH=$(jq -r .deploymentPath $APPJSON) +# username/apps/app-0.1.0 -> username/apps +_DEPAPPSPATH=$(dirname "${_DEPPATH}") +_DEPSYS=$(jq -r .deploymentSystem $APPJSON) + +# username/apps -> username +_USERDIRECTORY=$(dirname "${_DEPAPPSPATH}") +# username/apps -> apps +_APPDIRECTORY=$(basename "${_DEPAPPSPATH}") + +# Deploy application assets +# +# Policies +# ATOMIC - Try to rename original DEPPATH on DEPSYS +# DESTRUCTIVE - Delete DEPPATH on DEPSYS (Yolo) +# INPLACE (default) - One-way sync to DEPSYS/DEPPATH + +_FILEOPTS= +if [ ! -z "$_DEPSYS" ] +then + _FILEOPTS="${_FILEOPTS} -S ${_DEPSYS}" +fi + +if [ -z "$NOFILEOPS" ] +then +set -x +# DESTRUCTIVE + files-delete ${_FILEOPTS} ${_DEPPATH} + # files-mkdir -S system -N apps username + files-mkdir ${_FILEOPTS} -N ${_APPDIRECTORY} ${_USERDIRECTORY} + files-upload -q ${_FILEOPTS} -F ${APP} ${_DEPAPPSPATH}/ +set +x +fi + +# Register the application +if [ -z "$NOUPDATE" ] +then +set -x + apps-addupdate -F ${APPJSON} +set +x +fi + +# Sleep 1 then check its status + diff --git a/reactors/df-helper/df-helper-0.1.0/VERSION b/reactors/df-helper/df-helper-0.1.0/VERSION new file mode 100644 index 0000000..a0f9a4b --- /dev/null +++ b/reactors/df-helper/df-helper-0.1.0/VERSION @@ -0,0 +1 @@ +latest diff --git a/reactors/df-helper/df-helper-0.1.0/_util/container_exec.sh b/reactors/df-helper/df-helper-0.1.0/_util/container_exec.sh new file mode 100644 index 0000000..c91474c --- /dev/null +++ b/reactors/df-helper/df-helper-0.1.0/_util/container_exec.sh @@ -0,0 +1,119 @@ + +function container_exec() { + + # [TODO] Check for existence of docker or singularity executable + # [TODO] Enable honoring a DEBUG global + # [TODO] Figure out how to accept more optional arguments (env-file, etc) + # [TODO] Better error handling and reporting + # [TODO] Handle "urllib2.URLError: " + + local CONTAINER_IMAGE=$1 + shift + local COMMAND=$1 + shift + local PARAMS=$@ + + # A litte logging to help with the edge cases + if [ ! -z "$DEBUG" ]; + then + local _PID=$$ + echo $CONTAINER_IMAGE > .container_exec.${_PID}.log + echo $COMMAND >> .container_exec.${_PID}.log + echo $PARAMS >> .container_exec.${_PID}.log + echo $PWD >> .container_exec.${_PID}.log + echo $(ls $PWD) >> .container_exec.${_PID}.log + env > .container_exec.${_PID}.env + fi + + # Detect container engine + local _CONTAINER_APP=$(which singularity) + if [ ! -z "${_CONTAINER_APP}" ] + then + _CONTAINER_ENGINE="singularity" + else + _CONTAINER_APP=$(which docker) + if [ ! -z "${_CONTAINER_APP}" ] + then + _CONTAINER_ENGINE="docker" + fi + fi + + if [ -z "$SINGULARITY_PULLFOLDER" ]; + then + if [ ! -z "$STOCKYARD" ]; + then + SINGULARITY_PULLFOLDER="${STOCKYARD}/.singularity" + else + SINGULARITY_PULLFOLDER="$HOME/.singularity" + fi + fi + + if [ -z "$SINGULARITY_CACHEDIR" ]; + then + if [ ! -z "$STOCKYARD" ]; + then + SINGULARITY_CACHEDIR="${STOCKYARD}/.singularity" + else + SINGULARITY_CACHEDIR="$HOME/.singularity" + fi + fi + + local _UID=$(id -u) + local _GID=$(id -g) + chmod g+rwxs . + umask 002 . + + if [[ "$_CONTAINER_ENGINE" == "docker" ]]; + then + #local OPTS="--network=none --cpus=1.0000 --memory=1G --device-read-iops=/dev/sda:1500 --device-read-iops=/dev/sda:1500" + + # Set group ownership on all files making them readable by archive process + OPTS="$OPTS --rm --user=0:${_GID} -v $PWD:/home:rw -w /home" + if [ ! -z "$ENVFILE" ] + then + OPTS="$OPTS --env-file ${ENVFILE}" + fi + if [ ! -z "$DEBUG" ]; + then + set -x + fi + docker run $OPTS ${CONTAINER_IMAGE} ${COMMAND} ${PARAMS} + if [ ! -z "$DEBUG" ]; + then + set +x + fi + elif [[ "$_CONTAINER_ENGINE" == "singularity" ]]; + then + # [TODO] Detect and deal if an .img has been passed it (rare) + singularity exec docker://${CONTAINER_IMAGE} ${COMMAND} ${PARAMS} + else + echo "_CONTAINER_ENGINE needs to be 'docker' or 'singularity' [$_CONTAINER_ENGINE]" + fi + +} + + +function count_logical_cores() { + + local _count_cores=4 + local _uname=$(uname) + + if [ "$_uname" == "Darwin" ] + then + _count_cores=$(sysctl -n hw.logicalcpu) + elif [ "$_uname" == "Linux" ] + then + _count_cores=$(grep -c processor /proc/cpuinfo) + fi + + echo $_count_cores + +} + +function auto_maxthreads() { + + local hwcore=$(count_logical_cores) + hwcore=$((hwcore-1)) + echo $hwcore + +} diff --git a/reactors/df-helper/df-helper-0.1.0/app-maverick-mweston.json b/reactors/df-helper/df-helper-0.1.0/app-maverick-mweston.json new file mode 100644 index 0000000..4d21de3 --- /dev/null +++ b/reactors/df-helper/df-helper-0.1.0/app-maverick-mweston.json @@ -0,0 +1,42 @@ +{ + "checkpointable": false, + "name": "df-helper-mweston", + "executionSystem": "hpc-tacc-maverick-mweston", + "executionType": "HPC", + "deploymentPath": "mweston/apps/df-helper-0.1.0", + "deploymentSystem": "data-sd2e-projects-users", + "helpURI": "https://sd2e.org/develop/", + "label": "Dataframe helper [TACC mweston]", + "longDescription": "", + "modules": ["load tacc-singularity/2.3.1"], + "ontology": ["http://edamontology.org/topic_3520"], + "parallelism": "SERIAL", + "shortDescription": "Dataframe helper", + "tags": ["df-helper", "docker://index.docker.io/sd2e/df-helper:latest"], + "templatePath": "runner-template.sh", + "testPath": "tester.sh", + "version": "0.1.0", + "inputs": [{ + "id": "manifestFile", + "details": { + "label": "manifest file", + "showAttribute": false, + "attribute": "--manifest " + }, + "semantics": { + "minCardinality": 1, + "maxCardinality": 1, + "ontology": [ + "http://edamontology.org/format_1929", + "http://edamontology.org/format_2332", + "http://edamontology.org/format_3245" + ] + }, + "value": { + "default": "agave://data-sd2e-community/sample/biofab/yeast-gates_q0/3/manifest/107795-manifest.json", + "required": true, + "visible": true + } + }], + "defaultMaxRunTime": "00:30:00" +} \ No newline at end of file diff --git a/reactors/df-helper/df-helper-0.1.0/app-maverick.json b/reactors/df-helper/df-helper-0.1.0/app-maverick.json new file mode 100644 index 0000000..089bfb7 --- /dev/null +++ b/reactors/df-helper/df-helper-0.1.0/app-maverick.json @@ -0,0 +1,42 @@ +{ + "checkpointable": false, + "name": "df-helper-maverick", + "executionSystem": "hpc-tacc-maverick", + "executionType": "HPC", + "deploymentPath": "sd2eadm/apps/df-helper-0.1.0", + "deploymentSystem": "data-sd2e-projects-users", + "helpURI": "https://sd2e.org/develop/", + "label": "Dataframe helper [TACC Maverick]", + "longDescription": "", + "modules": ["load tacc-singularity/2.3.1"], + "ontology": ["http://edamontology.org/topic_3520"], + "parallelism": "SERIAL", + "shortDescription": "Dataframe helper", + "tags": ["df-helper", "docker://index.docker.io/sd2e/df-helper:latest"], + "templatePath": "runner-template.sh", + "testPath": "tester.sh", + "version": "0.1.0", + "inputs": [{ + "id": "manifestFile", + "details": { + "label": "manifest file", + "showAttribute": false, + "attribute": "--manifest " + }, + "semantics": { + "minCardinality": 1, + "maxCardinality": 1, + "ontology": [ + "http://edamontology.org/format_1929", + "http://edamontology.org/format_2332", + "http://edamontology.org/format_3245" + ] + }, + "value": { + "default": "agave://data-sd2e-community/sample/biofab/yeast-gates_q0/3/manifest/107795-manifest.json", + "required": true, + "visible": true + } + }], + "defaultMaxRunTime": "00:30:00" +} \ No newline at end of file diff --git a/reactors/df-helper/df-helper-0.1.0/app-wrangler.json b/reactors/df-helper/df-helper-0.1.0/app-wrangler.json new file mode 100644 index 0000000..4528703 --- /dev/null +++ b/reactors/df-helper/df-helper-0.1.0/app-wrangler.json @@ -0,0 +1,42 @@ +{ + "checkpointable": false, + "name": "df-helper-wrangler", + "executionSystem": "hpc-tacc-wrangler", + "executionType": "HPC", + "deploymentPath": "sd2eadm/apps/df-helper-0.1.0", + "deploymentSystem": "data-sd2e-projects-users", + "helpURI": "https://sd2e.org/develop/", + "label": "Dataframe helper [TACC Wrangler]", + "longDescription": "", + "modules": ["load tacc-singularity/2.3.1"], + "ontology": ["http://edamontology.org/topic_3520"], + "parallelism": "SERIAL", + "shortDescription": "Dataframe helper", + "tags": ["df-helper", "docker://index.docker.io/sd2e/df-helper:latest"], + "templatePath": "runner-template.sh", + "testPath": "tester.sh", + "version": "0.1.0", + "inputs": [{ + "id": "manifestFile", + "details": { + "label": "manifest file", + "showAttribute": false, + "attribute": "--manifest " + }, + "semantics": { + "minCardinality": 1, + "maxCardinality": 1, + "ontology": [ + "http://edamontology.org/format_1929", + "http://edamontology.org/format_2332", + "http://edamontology.org/format_3245" + ] + }, + "value": { + "default": "agave://data-sd2e-community/sample/biofab/yeast-gates_q0/3/manifest/107795-manifest.json", + "required": true, + "visible": true + } + }], + "defaultMaxRunTime": "00:30:00" +} \ No newline at end of file diff --git a/reactors/df-helper/df-helper-0.1.0/app.yml b/reactors/df-helper/df-helper-0.1.0/app.yml new file mode 100644 index 0000000..06d4bbe --- /dev/null +++ b/reactors/df-helper/df-helper-0.1.0/app.yml @@ -0,0 +1,29 @@ +--- +inputs: + manifestFile: + default_value: "agave://data-sd2e-community/sample/biofab/yeast-gates_q0/3/manifest/107795-manifest.json" + test_value: "localtest/107795-manifest.json" + label: "experiment manifest" +runtime: + template_script: "runner-template.sh" + test_script: "tester.sh" + container: + docker_org: "sd2e/" + docker_image: "df-helper" + image_tag: ":latest" +app: + name: "df-helper" + version: "0.1.0" + deploymentPath: "sd2eadm/apps/df-helper-0.1.0" +runtime: + container: + docker_reg: "index.docker.io" + docker_org: "sd2e" + docker_image: "df-helper" + image_tag: "latest" + execution_host: + system_id: "hpc-tacc-maverick" +# The app.yml file is not yet part of the official TACC Agave and Reactors platform +# but over the course of the next ~60 days we will be deploying template-driven +# tools for application & ETL component development will both generate and consume +# files in this (or a related) format. Consider this advance notice. diff --git a/reactors/df-helper/df-helper-0.1.0/df-helper-job-public-m.json b/reactors/df-helper/df-helper-0.1.0/df-helper-job-public-m.json new file mode 100644 index 0000000..172379d --- /dev/null +++ b/reactors/df-helper/df-helper-0.1.0/df-helper-job-public-m.json @@ -0,0 +1,8 @@ +{ + "name": "df-helper-test-1507152665", + "appId": "df-helper-maverick-0.1.0", + "archive": true, + "inputs": { + "manifestFile": "agave://data-sd2e-community/sample/biofab/yeast-gates_q0/3/manifest/107795-manifest.json" + } +} \ No newline at end of file diff --git a/reactors/df-helper/df-helper-0.1.0/df-helper-job-public-mw.json b/reactors/df-helper/df-helper-0.1.0/df-helper-job-public-mw.json new file mode 100644 index 0000000..17bd4d6 --- /dev/null +++ b/reactors/df-helper/df-helper-0.1.0/df-helper-job-public-mw.json @@ -0,0 +1,8 @@ +{ + "name": "df-helper-test-1507152665", + "appId": "df-helper-mweston-0.1.0", + "archive": true, + "inputs": { + "manifestFile": "agave://data-sd2e-community/sample/biofab/yeast-gates_q0/3/manifest/107795-manifest.json" + } +} \ No newline at end of file diff --git a/reactors/df-helper/df-helper-0.1.0/df-helper-job-public-w.json b/reactors/df-helper/df-helper-0.1.0/df-helper-job-public-w.json new file mode 100644 index 0000000..b8f9f26 --- /dev/null +++ b/reactors/df-helper/df-helper-0.1.0/df-helper-job-public-w.json @@ -0,0 +1,8 @@ +{ + "name": "df-helper-test-1507152665", + "appId": "df-helper-wrangler-0.1.0", + "archive": true, + "inputs": { + "manifestFile": "agave://data-sd2e-community/sample/biofab/yeast-gates_q0/3/manifest/107795-manifest.json" + } +} \ No newline at end of file diff --git a/reactors/df-helper/df-helper-0.1.0/df-helper-job.json b/reactors/df-helper/df-helper-0.1.0/df-helper-job.json new file mode 100644 index 0000000..f9fb775 --- /dev/null +++ b/reactors/df-helper/df-helper-0.1.0/df-helper-job.json @@ -0,0 +1,8 @@ +{ + "name": "df-helper-test-1507148614", + "appId": "df-helper-maverick-0.1.0", + "archive": false, + "inputs": { + "manifestFile": "agave://data-sd2e-community/sample/biofab/yeast-gates_q0/3/manifest/107795-manifest.json" + }, +} \ No newline at end of file diff --git a/reactors/df-helper/df-helper-0.1.0/runner-template.sh b/reactors/df-helper/df-helper-0.1.0/runner-template.sh new file mode 100755 index 0000000..8f7e22b --- /dev/null +++ b/reactors/df-helper/df-helper-0.1.0/runner-template.sh @@ -0,0 +1,10 @@ +version=$(cat VERSION) + +CONTAINER_IMAGE="index.docker.io/sd2e/df-helper:$version" + +. _util/container_exec.sh + +COMMAND='python' +PARAMS='/opt/scripts/df-helper.py --manifest ${manifestFile}' + +DEBUG= container_exec ${CONTAINER_IMAGE} ${COMMAND} ${PARAMS} diff --git a/reactors/df-helper/df-helper-0.1.0/tester.sh b/reactors/df-helper/df-helper-0.1.0/tester.sh new file mode 100755 index 0000000..cd5fbd4 --- /dev/null +++ b/reactors/df-helper/df-helper-0.1.0/tester.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +version=$(cat VERSION) + +# No need to set _CONTAINER_ENGINE now. The container_exec function detects +# Docker and Singularity, preferring to run with Docker +CONTAINER_IMAGE="sd2e/df-helper:$version" + +# Temporary until we get the container executor configured each TACC system +# This code is evolving and will eventually become a service on the host +. _util/container_exec.sh + +COMMAND='python' + +PARAMS='/opt/scripts/df-helper.py --manifest localtest/107795-manifest.json' + +DEBUG=1 container_exec ${CONTAINER_IMAGE} ${COMMAND} ${PARAMS} + +###################### +# FUNCTIONAL TESTS # +# # +# Dont include in # +# runner-template.sh # +###################### + +function run_tests() { + + return 0 +} + + +function cleanup() { + + echo "Cleaning up..." + rm -f .container_exec.* +} + +trap cleanup EXIT + +run_tests + +if [ $? -eq 0 ]; then + echo "Success!" + exit 0 +else + echo "Test failed!"! + exit 1 +fi diff --git a/reactors/df-helper/import-test-data.sh b/reactors/df-helper/import-test-data.sh new file mode 100755 index 0000000..deceae8 --- /dev/null +++ b/reactors/df-helper/import-test-data.sh @@ -0,0 +1,137 @@ +#!/usr/bin/env bash + +# This is a prototype of a runner that will eventually read the app.yml and +# prestage test data in a local cache directory for use in local development + +TEST_DATA_CSV=${TEST_DATA_CSV:-test-data.tsv} +TEST_DATA_CACHE=${TEST_DATA_CACHE:-test-data-cache} + +auth-tokens-refresh -q -S > /dev/null 2>&1 + +mkdir -p ${TEST_DATA_CACHE} + +function _import_agave_uri() { + + local SRC=$1 + local DEST=$2 + + local ASYS=$(get_agave_system $SRC) + local APATH=$(get_agave_path $SRC) + local AFNAME=$(get_agave_fname $SRC) + local RECURSE= + case "$APATH" in + */) + files-get "--recursive" -S "${ASYS}" -N "${DEST}" "${APATH}" + ;; + *) + files-get -S "${ASYS}" -N "${DEST}" "${APATH}" + ;; + esac +} + +function _import_public_uri() { + + local SRC=$1 + local DEST=$2 + echo "Not implemented" +} + +function get_agave_fname() { + + local AGAVE_URI=$1 + # remove the protocol + local url=$(echo $AGAVE_URI | sed -e s,"agave://",,g) + local afile=$(echo "${url##*/}") + echo $afile + +} + +function get_agave_path() { + + local AGAVE_URI=$1 + + # remove the protocol + local url=$(echo $AGAVE_URI | sed -e s,"agave://",,g) + local apath=$(echo "${url#*/}") + echo $apath + +} + +function get_agave_system() { + + local AGAVE_URI=$1 + + # remove the protocol + local url=$(echo $AGAVE_URI | sed -e s,"agave://",,g) + local sysid=$(awk -F '/' '{print $1}' <<< $url) + echo $sysid + +} + +function get_uri_scheme() { + + local URL=$1 + proto="$(echo $1 | grep :// | sed -e's,^\(.*://\).*,\1,g' | tr -d /\// | tr -d :)" + echo $proto + +} + + +function die() { + + mesg "ERROR" $1 + exit 1 +} + +function warn() { + + mesg "WARNING" $1 +} + +function log() { + + mesg "INFO" $1 +} + +function mesg() { + + level=$1 + shift + message=$@ + echo "[$level] $message" +} + +function import_uri() { + + local SRC=$1 + local DEST=$2 + + if [ ! -e "${TEST_DATA_CACHE}/${DEST}" ] + then + + local scheme=$(get_uri_scheme $SRC) + + case $scheme in + agave) + _import_agave_uri "${SRC}" "${TEST_DATA_CACHE}/${DEST}" + ;; + http|https) + _import_public_uri "${SRC}" "${TEST_DATA_CACHE}/${DEST}" + ;; + *) + warn "URI scheme $scheme not recognized" + ;; + esac + + else + warn "${TEST_DATA_CACHE}/$DEST exists. Delete it or re-run with FORCE=1." + fi + +} + +while read dat; do + SOURCE_URI=$(awk '{ print $1 }' <<< $dat) + DEST_PATH=$(awk '{ print $2 }' <<< $dat) + log "Importing $SOURCE_URI" + import_uri $SOURCE_URI $DEST_PATH +done <${TEST_DATA_CSV} diff --git a/reactors/df-helper/push.sh b/reactors/df-helper/push.sh new file mode 100755 index 0000000..5f824fc --- /dev/null +++ b/reactors/df-helper/push.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +version=$(cat df-helper-0.1.0/VERSION) + +CONTAINER_IMAGE="sd2e/df-helper:$version" + +docker push ${CONTAINER_IMAGE} diff --git a/reactors/df-helper/src/df-helper.py b/reactors/df-helper/src/df-helper.py new file mode 100644 index 0000000..51b1fd3 --- /dev/null +++ b/reactors/df-helper/src/df-helper.py @@ -0,0 +1,30 @@ +""" + +Helper code to support common tasks related to post-ETL dataframe generation + +For now, this does very little: + +It retrieves manifests and queries for a plan given a manifest + +Will be expanded or shared among dataframe producing code for each ETL'd data type + +$ python df-helper.py --manifest + +""" +import sys, json, argparse + +parser = argparse.ArgumentParser() +parser.add_argument('--manifest', help='Input manifest', required=True) + +def main(args): + + with open(args.manifest) as manifest_file: + manifest_json = json.loads(manifest_file.read()) + + # TODO SBH manifest->plan query + # This is not currently in SBH, Nic is working on adding it + # See discussion in chaos-xplan channel + +if __name__ == '__main__': + args = parser.parse_args() + main(args) \ No newline at end of file diff --git a/reactors/df-helper/stage-test-data.sh b/reactors/df-helper/stage-test-data.sh new file mode 100755 index 0000000..5d2b5c0 --- /dev/null +++ b/reactors/df-helper/stage-test-data.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +# Path to app bundle +APP=$1 +DEST=${2-localtest} + +# If you want to stage in only a subset of data, create another TSV file +# and override TEST_DATA_CSV variable at run time +TEST_DATA_CSV=${TEST_DATA_CSV:-test-data.tsv} +TEST_DATA_CACHE=${TEST_DATA_CACHE:-test-data-cache} + +if [ ! -d "$APP/$DEST" ] +then + echo "Can't find or access $APP/$DEST. Re-run $0 " +fi + +function stage_data() { + + local APATH=$1 + local RECURSE= + + case "$APATH" in + */) + RECURSE="-R " + ;; + *) + RECURSE="" + ;; + esac + cp -Rf $APATH $APP/$DEST ; touch "$APP/$DEST/.dirty" + +} + +while read dat; do + SOURCE_URI=$(awk '{ print $1 }' <<< $dat) + DEST_PATH=$(awk '{ print $2 }' <<< $dat) + stage_data $TEST_DATA_CACHE/$DEST_PATH +done <${TEST_DATA_CSV} + diff --git a/reactors/df-helper/test-data.tsv b/reactors/df-helper/test-data.tsv new file mode 100644 index 0000000..12bb7ed --- /dev/null +++ b/reactors/df-helper/test-data.tsv @@ -0,0 +1 @@ +agave://data-sd2e-community/sample/biofab/yeast-gates_q0/3/manifest/107795-manifest.json 107795-manifest.json