From 8d35c613dd4082a68c5152ffb682b23880bdf4fc Mon Sep 17 00:00:00 2001
From: e2e slurm <fake@example.com>
Date: Tue, 2 Jan 2024 09:30:22 -0500
Subject: [PATCH 1/6] alphabetize requirements

---
 setup.cfg | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index 288e627a..348a7117 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -24,17 +24,17 @@ classifiers =
 [options]
 python_requires = >=3.7
 install_requires =
+    datalad >= 0.17.2
+    datalad_container >= 1.1.6
+    filelock >= 3.8.0
     nibabel >=2.2.1
     numpy
     pandas
-    tqdm
     pyyaml >= 6.0
-    #ruamel.yaml >= 0.17.21
-    datalad >= 0.17.2
-    datalad_container >= 1.1.6
-    regex
-    filelock >= 3.8.0
     qstat >= 0.0.5
+    regex
+    #ruamel.yaml >= 0.17.21
+    tqdm
 packages = find:
 include_package_data = True
 

From c0d3479b75b6b1acc99ac01ecd2b17d48354680e Mon Sep 17 00:00:00 2001
From: Austin Macdonald <austin@dartmouth.edu>
Date: Wed, 13 Dec 2023 12:22:23 -0500
Subject: [PATCH 2/6] Setup e2e tests with a slurm microcluster

---
 .github/workflows/e2e-slurm.yml               | 30 +++++++
 .gitignore                                    |  5 +-
 Makefile                                      | 22 +++++
 babs/babs.py                                  |  4 +
 babs/utils.py                                 | 13 ++-
 setup.cfg                                     |  1 +
 tests/e2e-slurm/container/babs-user-script.sh | 88 +++++++++++++++++++
 .../container/config_toybidsapp.yaml          | 21 +++++
 tests/e2e-slurm/container/ensure-env.sh       | 11 +++
 tests/e2e-slurm/container/rerun.sh            |  6 ++
 .../e2e-slurm/container/walkthrough-tests.sh  | 58 ++++++++++++
 tests/e2e-slurm/install-babs.sh               | 15 ++++
 tests/e2e-slurm/main.sh                       | 50 +++++++++++
 13 files changed, 322 insertions(+), 2 deletions(-)
 create mode 100644 .github/workflows/e2e-slurm.yml
 create mode 100644 Makefile
 create mode 100755 tests/e2e-slurm/container/babs-user-script.sh
 create mode 100644 tests/e2e-slurm/container/config_toybidsapp.yaml
 create mode 100755 tests/e2e-slurm/container/ensure-env.sh
 create mode 100755 tests/e2e-slurm/container/rerun.sh
 create mode 100755 tests/e2e-slurm/container/walkthrough-tests.sh
 create mode 100755 tests/e2e-slurm/install-babs.sh
 create mode 100755 tests/e2e-slurm/main.sh

diff --git a/.github/workflows/e2e-slurm.yml b/.github/workflows/e2e-slurm.yml
new file mode 100644
index 00000000..e98bc090
--- /dev/null
+++ b/.github/workflows/e2e-slurm.yml
@@ -0,0 +1,30 @@
+---
+name: Slurm
+
+on: [push]
+jobs:
+    e2e-slurm:
+      name: Test e2e with SLURM
+      runs-on: ubuntu-latest
+      steps:
+        - name: checkout our repo
+          uses: actions/checkout@v4
+        - name: Install apptainer
+          uses: eWaterCycle/setup-apptainer@v2
+          with:
+            apptainer-version: 1.1.2
+        - name: Install Conda
+          uses: conda-incubator/setup-miniconda@v3
+          with:
+            activate-environment: babs
+            auto-update-conda: true
+            python-version: 3.9
+        - name: Conda info
+          shell: bash -el {0}
+          run: conda info
+        - name: Install Babs
+          shell: bash -el {0}
+          run: ./tests/e2e-slurm/install-babs.sh
+        - name: Execute e2e with SLURM
+          shell: bash -el {0}
+          run: ./tests/e2e-slurm/main.sh
diff --git a/.gitignore b/.gitignore
index 18792aa0..cf103c72 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,4 +11,7 @@ build/
 
 # Distribution / packaging
 dist/
-babs/VERSION
\ No newline at end of file
+babs/VERSION
+
+# e2e testdata
+.testdata*
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..b0375260
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,22 @@
+install:
+	./tests/e2e-slurm/install-babs.sh
+
+setup-user:
+	./tests/e2e-slurm/setup-user.sh
+
+e2e: clean
+	./tests/e2e-slurm/main.sh
+
+build: clean
+	podman build -f tests/e2e-slurm/Containerfile . -t testss
+
+# TODO testdata variable
+clean:
+	podman stop slurm 2>/dev/null || true
+	podman rm slurm 2>/dev/null || true
+	[ -e .testdata/babs_test_project/toybidsapp-container ] && \
+		datalad remove -d .testdata/babs_test_project/toybidsapp-container --reckless kill || :
+	rm -rf .testdata
+
+logs:
+	cat .testdata/ci-logs/*
diff --git a/babs/babs.py b/babs/babs.py
index f9719fe1..4c9025a2 100644
--- a/babs/babs.py
+++ b/babs/babs.py
@@ -2778,10 +2778,14 @@ def generate_job_submit_template(self, yaml_path, babs, system, test=False):
             env_flags = "-v DSLOCKFILE=" + babs.analysis_path + "/.SGE_datalad_lock"
         elif system.type == "slurm":
             submit_head = "sbatch"
+            # TODO: asmacdo
             env_flags = "--export=DSLOCKFILE=" + babs.analysis_path + "/.SLURM_datalad_lock"
         else:
             warnings.warn("not supporting systems other than sge...")
 
+        # TODO: rm asmacdo hack
+        # env_flags = env_flags + f",MINICONDA_PATH={os.getenv('MINICONDA_PATH')}"
+
         # Check if the bash file already exist:
         if op.exists(yaml_path):
             os.remove(yaml_path)  # remove it
diff --git a/babs/utils.py b/babs/utils.py
index e6004e3d..a22aa201 100644
--- a/babs/utils.py
+++ b/babs/utils.py
@@ -1674,6 +1674,7 @@ def submit_one_test_job(analysis_path, type_system, flag_print_message=True):
                               stdout=subprocess.PIPE)
 
     proc_cmd.check_returncode()
+    print(f"Return code: {proc_cmd.returncode}")
     msg = proc_cmd.stdout.decode('utf-8')
 
     if type_system == "sge":
@@ -1685,7 +1686,13 @@ def submit_one_test_job(analysis_path, type_system, flag_print_message=True):
         # e.g., on MIT OpenMind: no 1st line from MSI; only 2nd line.
     else:
         raise Exception("type system can be slurm or sge")
-    job_id = int(job_id_str)
+
+    # This is necessary SLURM commands can fail but have return code 0
+    try:
+        job_id = int(job_id_str)
+    except ValueError as e:
+        raise ValueError(f"Cannot convert {job_id_str!r} into an int: {e}. "
+            f"That output is a result of running command {cmd} which produced output {msg}.")
 
     # log filename:
     log_filename = job_name + ".*" + job_id_str
@@ -2072,8 +2079,12 @@ def get_last_line(fn):
                 # remove spaces at the beginning or the end; remove '\n':
                 last_line = last_line.strip().replace("\n", "")
             else:
+                print("empty file")
+                print(fn)
                 last_line = np.nan
     else:   # e.g., `qw` pending
+        print("file DNE")
+        print(fn)
         last_line = np.nan
 
     return last_line
diff --git a/setup.cfg b/setup.cfg
index 348a7117..698966d1 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -24,6 +24,7 @@ classifiers =
 [options]
 python_requires = >=3.7
 install_requires =
+    backoff
     datalad >= 0.17.2
     datalad_container >= 1.1.6
     filelock >= 3.8.0
diff --git a/tests/e2e-slurm/container/babs-user-script.sh b/tests/e2e-slurm/container/babs-user-script.sh
new file mode 100755
index 00000000..48c9c846
--- /dev/null
+++ b/tests/e2e-slurm/container/babs-user-script.sh
@@ -0,0 +1,88 @@
+#!/bin/bash -i
+
+SUBPROJECT_NAME=test_project
+
+set -eu
+
+echo "We are now running as user $(whoami)"
+echo "DEBUG: MINICONDA_PATH=${MINICONDA_PATH}"
+echo "DEBUG: TESTDATA=${TESTDATA}"
+
+source  "$MINICONDA_PATH/etc/profile.d/conda.sh"
+conda activate babs
+
+# record the miniconda path so it can added to the test env (slurm jobs do not preserve env)
+cat > /home/"$USER"/miniconda.env << EOF
+. "$MINICONDA_PATH/etc/profile.d/conda.sh"
+EOF
+
+
+
+git config --global user.name "e2e testuser"
+git config --global user.email "testuser@example.com"
+echo "Git user: $(git config user.name)"
+echo "Git email: $(git config user.email)"
+
+# TODO switch back to osf project
+# Populate input data (Divergent from tuturial, bc https://github.com/datalad/datalad-osf/issues/191
+pushd ${TESTDATA}
+echo "Installing Input Data"
+datalad install ///dbic/QA
+
+# Singularity image created by root, then chowned to this user, and datalad must be run as this user
+datalad create -D "toy BIDS App" toybidsapp-container
+pushd toybidsapp-container
+datalad containers-add \
+    --url ${PWD}/../toybidsapp-0.0.7.sif \
+    toybidsapp-0-0-7
+popd
+rm -f toybidsapp-0.0.7.sif
+
+
+# TODO File Issue: --where_project must be abspath file issue for relative path
+babs-init \
+    --where_project "${PWD}" \
+    --project_name $SUBPROJECT_NAME \
+    --input BIDS "${PWD}"/QA \
+    --container_ds "${PWD}"/toybidsapp-container \
+    --container_name toybidsapp-0-0-7 \
+    --container_config_yaml_file "${PWD}"/config_toybidsapp.yaml \
+    --type_session multi-ses \
+    --type_system slurm
+
+echo "PASSED: babs-init"
+echo "Check setup, without job"
+babs-check-setup --project_root "${PWD}"/test_project/
+echo "PASSED: Check setup, without job"
+
+babs-check-setup --project_root "${PWD}"/test_project/ --job-test
+echo "Job submitted: Check setup, with job"
+
+babs-status --project_root "${PWD}"/test_project/
+#
+# babs-submit --project_root "${PWD}"/test_project/
+#
+# babs-status --project_root "${PWD}"/test_project/
+# sleep 30s
+# babs-status --project_root "${PWD}"/test_project/
+#
+# echo "Print job logs--------------------------------------------"
+# find "${PWD}"/test_project/analysis/logs/* -type f -print -exec cat {} \;
+# echo "end job logs--------------------------------------------"
+# # TODO: babs-check-status-job
+#
+# # TODO babs-merge
+#
+# popd
+# # /tests/e2e-slurm/babs-tests.sh
+# # podman exec  \
+# # 	-e MINICONDA_PATH=${MINICONDA_PATH} \
+# # 	slurm \
+# # 	${PWD}/tests/e2e-slurm/babs-tests.sh
+# #
+#
+#
+# echo "--------------------------"
+# echo "     HUZZZZZZAHHHHHH!!!!!!"
+# echo "--------------------------"
+#
diff --git a/tests/e2e-slurm/container/config_toybidsapp.yaml b/tests/e2e-slurm/container/config_toybidsapp.yaml
new file mode 100644
index 00000000..b4839956
--- /dev/null
+++ b/tests/e2e-slurm/container/config_toybidsapp.yaml
@@ -0,0 +1,21 @@
+# Arguments in `singularity run`:
+singularity_run:
+    --no-zipped: ""
+    --dummy: "2"
+    -v: ""
+
+# Output foldername(s) to be zipped, and the BIDS App version to be included in the zip filename(s):
+zip_foldernames:
+    toybidsapp: "0-0-7"
+
+# How much cluster resources it needs:
+cluster_resources:
+    interpreting_shell: /bin/bash
+    hard_memory_limit: 2G
+
+script_preamble: |
+    . ~/miniconda.env
+    conda activate babs
+
+# Where to run the jobs:
+job_compute_space: "/tmp"
diff --git a/tests/e2e-slurm/container/ensure-env.sh b/tests/e2e-slurm/container/ensure-env.sh
new file mode 100755
index 00000000..67550ae1
--- /dev/null
+++ b/tests/e2e-slurm/container/ensure-env.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+#
+# exported for use in inner-slurm.sh
+if [ -z "${MINICONDA_PATH:-}" ]; then
+    if hash conda; then
+        export MINICONDA_PATH=$(/bin/which conda | xargs dirname | xargs dirname)
+    else
+        echo "ERROR: must have MINICONDA_PATH set or have 'conda' available"
+        exit 1
+    fi
+fi
diff --git a/tests/e2e-slurm/container/rerun.sh b/tests/e2e-slurm/container/rerun.sh
new file mode 100755
index 00000000..23b1d254
--- /dev/null
+++ b/tests/e2e-slurm/container/rerun.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+su "testuser" "rm -rf ${TESTDATA}"
+cp /opt/outer/* "${TESTDATA}"
+
+su "${BABS_USER}" "${TESTDATA}/babs-user-script.sh"
diff --git a/tests/e2e-slurm/container/walkthrough-tests.sh b/tests/e2e-slurm/container/walkthrough-tests.sh
new file mode 100755
index 00000000..df586761
--- /dev/null
+++ b/tests/e2e-slurm/container/walkthrough-tests.sh
@@ -0,0 +1,58 @@
+#!/bin/bash -i
+
+set -eu
+
+# add that outside user
+# groupadd --gid "$GID" "$USER"  && useradd --uid $UID --gid "$GID" "$USER"
+
+# Install singularity inside the container
+yum update -y && yum install -y epel-release &&  yum update -y &&  yum install -y singularity-runtime apptainer
+#
+# git version
+# git config user.name > /dev/null || git config --system user.name "e2e slurm"
+# git config user.email > /dev/null || git config --system user.email "fake@example.com"
+# git config --system --add safe.directory '*'
+
+export TESTDATA=/opt/testdata
+BABS_USER=testuser
+
+
+# Wait for slurm to be up
+max_retries=10
+delay=10  # seconds
+
+echo "Try connecting to slurm with sacct until it succeeds"
+set +e # We need to check the error code and allow failures until slurm has started up
+export PATH=${PWD}/tests/e2e-slurm/bin/:${PATH}
+for ((i=1; i<=max_retries; i++)); do
+	# Check if the command was successful
+	if sacct; then
+		echo "Slurm is up and running!"
+		break
+	else
+		echo "Waiting for Slurm to start... retry $i/$max_retries"
+		sleep $delay
+	fi
+	# exit if max retries reached
+	if [ $i -eq $max_retries ]; then
+		echo "Failed to start Slurm after $max_retries attempts."
+	exit 1
+    fi
+done
+set -e
+
+# Currently we are root inside the container. Now we create a user to own the testdata
+useradd "$BABS_USER"
+# cp rather than use bind directly so it can be owned by the container user and not cause issues outside
+mkdir "${TESTDATA}"
+cp /opt/outer/* "${TESTDATA}"
+
+
+# We build the singularity container now while we are root, and use it later as testuser
+pushd "${TESTDATA}"
+singularity build  \
+    toybidsapp-0.0.7.sif \
+    docker://pennlinc/toy_bids_app:0.0.7
+
+chown -R "$BABS_USER:$BABS_USER" "${TESTDATA}"
+su "${BABS_USER}" "${TESTDATA}/babs-user-script.sh"
diff --git a/tests/e2e-slurm/install-babs.sh b/tests/e2e-slurm/install-babs.sh
new file mode 100755
index 00000000..b9531fc8
--- /dev/null
+++ b/tests/e2e-slurm/install-babs.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+set -eu
+
+. tests/e2e-slurm/container/ensure-env.sh
+
+conda install -c conda-forge datalad git git-annex -y
+
+# Optional dependencies, required for e2e-slurm
+pip install datalad_container
+pip install datalad-osf
+
+# TODO non-dynamic for prod
+# pip install .
+pip install -e .
diff --git a/tests/e2e-slurm/main.sh b/tests/e2e-slurm/main.sh
new file mode 100755
index 00000000..101a72de
--- /dev/null
+++ b/tests/e2e-slurm/main.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+#
+set -eux
+
+# Expects: Conda env to be activated
+# Expects: Babs to be installed
+#
+# WIP-NOT-WORKING
+# Reminder :Z for selinux
+
+# TODO switch back to upstream after build
+# Currently using asmacdo, OpenSSL bump upstream, but no new docker build
+# https://github.com/giovtorres/docker-centos7-slurm/pull/49
+REGISTRY=docker.io
+HUBUSER=asmacdo
+# HUBUSER=giovtorres
+REPO=centos7-slurm
+# REPO=docker-centos7-slurm
+TAG=23.11.07 # TODO
+
+FQDN_IMAGE=${REGISTRY}/${HUBUSER}/${REPO}:${TAG}
+THIS_DIR="$(readlink -f "$0" | xargs dirname )"
+TESTDATA=/opt/testdata
+
+. tests/e2e-slurm/container/ensure-env.sh
+
+if [ "$MINICONDA_PATH/envs/$CONDA_DEFAULT_ENV/bin/babs-init" != "$(which babs-init)" ]; then
+    echo "Error: This script expects to be run inside a conda env with 'babs-init'!" >&2
+    echo "       We have not found it in conda env '$CONDA_DEFAULT_ENV' under '$MINICONDA_PATH'" >&2
+    exit 1
+fi
+
+stop_container () {
+	podman stop slurm || true
+}
+
+echo "Success, we are in the conda env with babs-init!"
+ # Because babs is dev-installed from here. TODO: we can remove if we remove -e from pip install
+podman run -it --rm \
+	--name slurm \
+	--hostname slurmctl  \
+	-e "MINICONDA_PATH=${MINICONDA_PATH}" \
+	--privileged \
+	-v "${PWD}:${PWD}:ro,Z" \
+	-v "${MINICONDA_PATH}:${MINICONDA_PATH}:Z" \
+	-v "${THIS_DIR}/container:/opt/outer:ro,Z" \
+	"${FQDN_IMAGE}" \
+	/bin/bash -c ". /opt/outer/walkthrough-tests.sh" # TODO keep these logs?
+
+# trap stop_container EXIT

From 4727b55ce9cfa17cd58d5fa29b35ae882c22e99a Mon Sep 17 00:00:00 2001
From: Austin Macdonald <austin@dartmouth.edu>
Date: Thu, 11 Jan 2024 11:34:20 -0500
Subject: [PATCH 3/6] Add babs-submit and babs-merge

---
 tests/e2e-slurm/container/babs-user-script.sh | 65 +++++++++++++++++++
 .../e2e-slurm/container/walkthrough-tests.sh  |  2 +-
 tests/e2e-slurm/main.sh                       |  3 +-
 3 files changed, 68 insertions(+), 2 deletions(-)

diff --git a/tests/e2e-slurm/container/babs-user-script.sh b/tests/e2e-slurm/container/babs-user-script.sh
index 48c9c846..60c7c97e 100755
--- a/tests/e2e-slurm/container/babs-user-script.sh
+++ b/tests/e2e-slurm/container/babs-user-script.sh
@@ -59,6 +59,71 @@ babs-check-setup --project_root "${PWD}"/test_project/ --job-test
 echo "Job submitted: Check setup, with job"
 
 babs-status --project_root "${PWD}"/test_project/
+
+# Wait for all running jobs to finish
+while [[ -n $(squeue -u $USER -t RUNNING,PENDING --noheader) ]]; do
+    echo "squeue -u $USER -t RUNNING,PENDING"
+    squeue -u $USER -t RUNNING,PENDING
+    echo "Waiting for running jobs to finish..."
+    sleep 5 # Wait for 60 seconds before checking again
+done
+
+echo "No running jobs."
+
+# TODO make sure this works
+# Check for failed jobs TODO state filter doesnt seem to be working as expected
+# if sacct -u $USER --state=FAILED --noheader | grep -q "FAILED"; then
+if sacct -u $USER --noheader | grep -q "FAILED"; then
+    sacct -u $USER
+    echo "There are failed jobs."
+    exit 1 # Exit with failure status
+else
+    sacct -u $USER
+    echo "PASSED: No failed jobs."
+fi
+
+babs-submit --project-root "${PWD}/test_project/"
+
+# # Wait for all running jobs to finish
+while [[ -n $(squeue -u $USER -t RUNNING,PENDING --noheader) ]]; do
+    echo "squeue -u $USER -t RUNNING,PENDING"
+    squeue -u $USER -t RUNNING,PENDING
+    echo "Waiting for running jobs to finish..."
+    sleep 5 # Wait for 60 seconds before checking again
+done
+
+echo "========================================================================="
+echo "babs-status:"
+babs-status --project_root "${PWD}"/test_project/
+echo "========================================================================="
+
+# Check for failed jobs TODO state filter doesnt seem to be working as expected
+# if sacct -u $USER --state=FAILED --noheader | grep -q "FAILED"; then
+if sacct -u $USER --noheader | grep -q "FAILED"; then
+    sacct -u $USER
+    echo "========================================================================="
+    echo "There are failed jobs."
+    exit 1 # Exit with failure status
+else
+    sacct -u $USER
+    echo "========================================================================="
+    echo "PASSED: No failed jobs."
+fi
+
+babs-merge --project_root "${PWD}"/test_project/
+
+
+# TODO: we need to fail if there is a failed job
+# fi
+
+# sleep 10
+# babs-status --project_root "${PWD}"/test_project/
+# sleep 10
+# babs-status --project_root "${PWD}"/test_project/
+# sleep 10
+# babs-status --project_root "${PWD}"/test_project/
+# sleep 10
+# babs-status --project_root "${PWD}"/test_project/
 #
 # babs-submit --project_root "${PWD}"/test_project/
 #
diff --git a/tests/e2e-slurm/container/walkthrough-tests.sh b/tests/e2e-slurm/container/walkthrough-tests.sh
index df586761..27f8b13a 100755
--- a/tests/e2e-slurm/container/walkthrough-tests.sh
+++ b/tests/e2e-slurm/container/walkthrough-tests.sh
@@ -26,7 +26,7 @@ set +e # We need to check the error code and allow failures until slurm has star
 export PATH=${PWD}/tests/e2e-slurm/bin/:${PATH}
 for ((i=1; i<=max_retries; i++)); do
 	# Check if the command was successful
-	if sacct; then
+	if sacct > /dev/null; then
 		echo "Slurm is up and running!"
 		break
 	else
diff --git a/tests/e2e-slurm/main.sh b/tests/e2e-slurm/main.sh
index 101a72de..b4b8125d 100755
--- a/tests/e2e-slurm/main.sh
+++ b/tests/e2e-slurm/main.sh
@@ -45,6 +45,7 @@ podman run -it --rm \
 	-v "${MINICONDA_PATH}:${MINICONDA_PATH}:Z" \
 	-v "${THIS_DIR}/container:/opt/outer:ro,Z" \
 	"${FQDN_IMAGE}" \
-	/bin/bash -c ". /opt/outer/walkthrough-tests.sh" # TODO keep these logs?
+	/bin/bash -c ". /opt/outer/walkthrough-tests.sh"
 
+	#/bin/bash -c ". /opt/outer/walkthrough-tests.sh && bash" # TODO remove, for debug only
 # trap stop_container EXIT

From 35f56120ce5a0408d13e49a7b8fb36553f9c7281 Mon Sep 17 00:00:00 2001
From: Austin Macdonald <austin@dartmouth.edu>
Date: Fri, 12 Jan 2024 10:03:49 -0500
Subject: [PATCH 4/6] remove dev artifacts and codespell

---
 Makefile                                      | 3 ---
 babs/utils.py                                 | 4 ----
 tests/e2e-slurm/container/babs-user-script.sh | 4 ++--
 3 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/Makefile b/Makefile
index b0375260..03949a2e 100644
--- a/Makefile
+++ b/Makefile
@@ -7,9 +7,6 @@ setup-user:
 e2e: clean
 	./tests/e2e-slurm/main.sh
 
-build: clean
-	podman build -f tests/e2e-slurm/Containerfile . -t testss
-
 # TODO testdata variable
 clean:
 	podman stop slurm 2>/dev/null || true
diff --git a/babs/utils.py b/babs/utils.py
index a22aa201..f6ca0c50 100644
--- a/babs/utils.py
+++ b/babs/utils.py
@@ -2079,12 +2079,8 @@ def get_last_line(fn):
                 # remove spaces at the beginning or the end; remove '\n':
                 last_line = last_line.strip().replace("\n", "")
             else:
-                print("empty file")
-                print(fn)
                 last_line = np.nan
     else:   # e.g., `qw` pending
-        print("file DNE")
-        print(fn)
         last_line = np.nan
 
     return last_line
diff --git a/tests/e2e-slurm/container/babs-user-script.sh b/tests/e2e-slurm/container/babs-user-script.sh
index 60c7c97e..c9bec9d4 100755
--- a/tests/e2e-slurm/container/babs-user-script.sh
+++ b/tests/e2e-slurm/container/babs-user-script.sh
@@ -71,7 +71,7 @@ done
 echo "No running jobs."
 
 # TODO make sure this works
-# Check for failed jobs TODO state filter doesnt seem to be working as expected
+# Check for failed jobs TODO state filter doesn't seem to be working as expected
 # if sacct -u $USER --state=FAILED --noheader | grep -q "FAILED"; then
 if sacct -u $USER --noheader | grep -q "FAILED"; then
     sacct -u $USER
@@ -97,7 +97,7 @@ echo "babs-status:"
 babs-status --project_root "${PWD}"/test_project/
 echo "========================================================================="
 
-# Check for failed jobs TODO state filter doesnt seem to be working as expected
+# Check for failed jobs TODO see above
 # if sacct -u $USER --state=FAILED --noheader | grep -q "FAILED"; then
 if sacct -u $USER --noheader | grep -q "FAILED"; then
     sacct -u $USER

From 3508d42d8e86d11d173e3af68969d5a6eae5c993 Mon Sep 17 00:00:00 2001
From: Austin Macdonald <austin@dartmouth.edu>
Date: Fri, 12 Jan 2024 10:23:36 -0500
Subject: [PATCH 5/6] Add shellcheck and fix

---
 .github/workflows/shellcheck.yml              | 24 +++++++++++++++
 tests/e2e-slurm/container/babs-user-script.sh | 30 ++++++++++---------
 tests/e2e-slurm/container/ensure-env.sh       |  2 ++
 tests/e2e-slurm/container/rerun.sh            |  6 ----
 .../e2e-slurm/container/walkthrough-tests.sh  |  2 +-
 tests/e2e-slurm/main.sh                       |  1 -
 6 files changed, 43 insertions(+), 22 deletions(-)
 create mode 100644 .github/workflows/shellcheck.yml
 delete mode 100755 tests/e2e-slurm/container/rerun.sh

diff --git a/.github/workflows/shellcheck.yml b/.github/workflows/shellcheck.yml
new file mode 100644
index 00000000..2cc56d4d
--- /dev/null
+++ b/.github/workflows/shellcheck.yml
@@ -0,0 +1,24 @@
+name: Shellcheck scripts
+
+on: [push, pull_request]
+
+jobs:
+  test:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Set up system
+      shell: bash
+      run: |
+        sudo apt-get update -qq
+        sudo apt-get install shellcheck
+    - uses: actions/checkout@v4
+    - name: Run shellcheck
+      run: |
+        shellcheck \
+        tests/e2e-slurm/container/babs-user-script.sh \
+        tests/e2e-slurm/container/ensure-env.sh \
+        tests/e2e-slurm/container/walkthrough-tests.sh \
+        tests/e2e-slurm/install-babs.sh \
+        tests/e2e-slurm/main.sh
diff --git a/tests/e2e-slurm/container/babs-user-script.sh b/tests/e2e-slurm/container/babs-user-script.sh
index c9bec9d4..166a178b 100755
--- a/tests/e2e-slurm/container/babs-user-script.sh
+++ b/tests/e2e-slurm/container/babs-user-script.sh
@@ -8,6 +8,8 @@ echo "We are now running as user $(whoami)"
 echo "DEBUG: MINICONDA_PATH=${MINICONDA_PATH}"
 echo "DEBUG: TESTDATA=${TESTDATA}"
 
+# without MINICONDA_PATH set, shellcheck cannot follow
+# shellcheck disable=SC1091
 source  "$MINICONDA_PATH/etc/profile.d/conda.sh"
 conda activate babs
 
@@ -25,7 +27,7 @@ echo "Git email: $(git config user.email)"
 
 # TODO switch back to osf project
 # Populate input data (Divergent from tuturial, bc https://github.com/datalad/datalad-osf/issues/191
-pushd ${TESTDATA}
+pushd "${TESTDATA}"
 echo "Installing Input Data"
 datalad install ///dbic/QA
 
@@ -33,7 +35,7 @@ datalad install ///dbic/QA
 datalad create -D "toy BIDS App" toybidsapp-container
 pushd toybidsapp-container
 datalad containers-add \
-    --url ${PWD}/../toybidsapp-0.0.7.sif \
+    --url "${PWD}/../toybidsapp-0.0.7.sif" \
     toybidsapp-0-0-7
 popd
 rm -f toybidsapp-0.0.7.sif
@@ -61,9 +63,9 @@ echo "Job submitted: Check setup, with job"
 babs-status --project_root "${PWD}"/test_project/
 
 # Wait for all running jobs to finish
-while [[ -n $(squeue -u $USER -t RUNNING,PENDING --noheader) ]]; do
-    echo "squeue -u $USER -t RUNNING,PENDING"
-    squeue -u $USER -t RUNNING,PENDING
+while [[ -n $(squeue -u "$USER" -t RUNNING,PENDING --noheader) ]]; do
+    echo "squeue -u \"$USER\" -t RUNNING,PENDING"
+    squeue -u "$USER" -t RUNNING,PENDING
     echo "Waiting for running jobs to finish..."
     sleep 5 # Wait for 60 seconds before checking again
 done
@@ -73,21 +75,21 @@ echo "No running jobs."
 # TODO make sure this works
 # Check for failed jobs TODO state filter doesn't seem to be working as expected
 # if sacct -u $USER --state=FAILED --noheader | grep -q "FAILED"; then
-if sacct -u $USER --noheader | grep -q "FAILED"; then
-    sacct -u $USER
+if sacct -u "$USER" --noheader | grep -q "FAILED"; then
+    sacct -u "$USER"
     echo "There are failed jobs."
     exit 1 # Exit with failure status
 else
-    sacct -u $USER
+    sacct -u "$USER"
     echo "PASSED: No failed jobs."
 fi
 
 babs-submit --project-root "${PWD}/test_project/"
 
 # # Wait for all running jobs to finish
-while [[ -n $(squeue -u $USER -t RUNNING,PENDING --noheader) ]]; do
-    echo "squeue -u $USER -t RUNNING,PENDING"
-    squeue -u $USER -t RUNNING,PENDING
+while [[ -n $(squeue -u "$USER" -t RUNNING,PENDING --noheader) ]]; do
+    echo "squeue -u \"$USER\" -t RUNNING,PENDING"
+    squeue -u "$USER" -t RUNNING,PENDING
     echo "Waiting for running jobs to finish..."
     sleep 5 # Wait for 60 seconds before checking again
 done
@@ -99,13 +101,13 @@ echo "========================================================================="
 
 # Check for failed jobs TODO see above
 # if sacct -u $USER --state=FAILED --noheader | grep -q "FAILED"; then
-if sacct -u $USER --noheader | grep -q "FAILED"; then
-    sacct -u $USER
+if sacct -u "$USER" --noheader | grep -q "FAILED"; then
+    sacct -u "$USER"
     echo "========================================================================="
     echo "There are failed jobs."
     exit 1 # Exit with failure status
 else
-    sacct -u $USER
+    sacct -u "$USER"
     echo "========================================================================="
     echo "PASSED: No failed jobs."
 fi
diff --git a/tests/e2e-slurm/container/ensure-env.sh b/tests/e2e-slurm/container/ensure-env.sh
index 67550ae1..fee3162b 100755
--- a/tests/e2e-slurm/container/ensure-env.sh
+++ b/tests/e2e-slurm/container/ensure-env.sh
@@ -3,6 +3,8 @@
 # exported for use in inner-slurm.sh
 if [ -z "${MINICONDA_PATH:-}" ]; then
     if hash conda; then
+        # We don't need the return value, we already catch the error
+        # shellcheck disable=SC2155
         export MINICONDA_PATH=$(/bin/which conda | xargs dirname | xargs dirname)
     else
         echo "ERROR: must have MINICONDA_PATH set or have 'conda' available"
diff --git a/tests/e2e-slurm/container/rerun.sh b/tests/e2e-slurm/container/rerun.sh
deleted file mode 100755
index 23b1d254..00000000
--- a/tests/e2e-slurm/container/rerun.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-
-su "testuser" "rm -rf ${TESTDATA}"
-cp /opt/outer/* "${TESTDATA}"
-
-su "${BABS_USER}" "${TESTDATA}/babs-user-script.sh"
diff --git a/tests/e2e-slurm/container/walkthrough-tests.sh b/tests/e2e-slurm/container/walkthrough-tests.sh
index 27f8b13a..4c36f436 100755
--- a/tests/e2e-slurm/container/walkthrough-tests.sh
+++ b/tests/e2e-slurm/container/walkthrough-tests.sh
@@ -34,7 +34,7 @@ for ((i=1; i<=max_retries; i++)); do
 		sleep $delay
 	fi
 	# exit if max retries reached
-	if [ $i -eq $max_retries ]; then
+	if [ "$i" -eq "$max_retries" ]; then
 		echo "Failed to start Slurm after $max_retries attempts."
 	exit 1
     fi
diff --git a/tests/e2e-slurm/main.sh b/tests/e2e-slurm/main.sh
index b4b8125d..c50b002b 100755
--- a/tests/e2e-slurm/main.sh
+++ b/tests/e2e-slurm/main.sh
@@ -20,7 +20,6 @@ TAG=23.11.07 # TODO
 
 FQDN_IMAGE=${REGISTRY}/${HUBUSER}/${REPO}:${TAG}
 THIS_DIR="$(readlink -f "$0" | xargs dirname )"
-TESTDATA=/opt/testdata
 
 . tests/e2e-slurm/container/ensure-env.sh
 

From da66a12895208341894e88ede317a704c5ec022d Mon Sep 17 00:00:00 2001
From: Austin Macdonald <austin@dartmouth.edu>
Date: Tue, 16 Jan 2024 10:47:41 -0500
Subject: [PATCH 6/6] cleanup

---
 .gitignore                                    |  3 --
 Makefile                                      | 10 ++---
 babs/babs.py                                  |  4 --
 babs/utils.py                                 |  1 -
 tests/e2e-slurm/container/babs-user-script.sh | 42 +------------------
 tests/e2e-slurm/container/ensure-env.sh       |  1 -
 .../e2e-slurm/container/walkthrough-tests.sh  | 16 ++-----
 tests/e2e-slurm/install-babs.sh               |  4 +-
 tests/e2e-slurm/main.sh                       | 11 ++---
 9 files changed, 13 insertions(+), 79 deletions(-)

diff --git a/.gitignore b/.gitignore
index cf103c72..de79a6da 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,3 @@ build/
 # Distribution / packaging
 dist/
 babs/VERSION
-
-# e2e testdata
-.testdata*
diff --git a/Makefile b/Makefile
index 03949a2e..a6143630 100644
--- a/Makefile
+++ b/Makefile
@@ -7,13 +7,9 @@ setup-user:
 e2e: clean
 	./tests/e2e-slurm/main.sh
 
-# TODO testdata variable
 clean:
-	podman stop slurm 2>/dev/null || true
-	podman rm slurm 2>/dev/null || true
-	[ -e .testdata/babs_test_project/toybidsapp-container ] && \
+	@ podman stop slurm 2>/dev/null || true
+	@ podman rm slurm 2>/dev/null || true
+	@[ -e .testdata/babs_test_project/toybidsapp-container ] && \
 		datalad remove -d .testdata/babs_test_project/toybidsapp-container --reckless kill || :
 	rm -rf .testdata
-
-logs:
-	cat .testdata/ci-logs/*
diff --git a/babs/babs.py b/babs/babs.py
index 4c9025a2..f9719fe1 100644
--- a/babs/babs.py
+++ b/babs/babs.py
@@ -2778,14 +2778,10 @@ def generate_job_submit_template(self, yaml_path, babs, system, test=False):
             env_flags = "-v DSLOCKFILE=" + babs.analysis_path + "/.SGE_datalad_lock"
         elif system.type == "slurm":
             submit_head = "sbatch"
-            # TODO: asmacdo
             env_flags = "--export=DSLOCKFILE=" + babs.analysis_path + "/.SLURM_datalad_lock"
         else:
             warnings.warn("not supporting systems other than sge...")
 
-        # TODO: rm asmacdo hack
-        # env_flags = env_flags + f",MINICONDA_PATH={os.getenv('MINICONDA_PATH')}"
-
         # Check if the bash file already exist:
         if op.exists(yaml_path):
             os.remove(yaml_path)  # remove it
diff --git a/babs/utils.py b/babs/utils.py
index f6ca0c50..d8103475 100644
--- a/babs/utils.py
+++ b/babs/utils.py
@@ -1674,7 +1674,6 @@ def submit_one_test_job(analysis_path, type_system, flag_print_message=True):
                               stdout=subprocess.PIPE)
 
     proc_cmd.check_returncode()
-    print(f"Return code: {proc_cmd.returncode}")
     msg = proc_cmd.stdout.decode('utf-8')
 
     if type_system == "sge":
diff --git a/tests/e2e-slurm/container/babs-user-script.sh b/tests/e2e-slurm/container/babs-user-script.sh
index 166a178b..1a484a93 100755
--- a/tests/e2e-slurm/container/babs-user-script.sh
+++ b/tests/e2e-slurm/container/babs-user-script.sh
@@ -4,6 +4,7 @@ SUBPROJECT_NAME=test_project
 
 set -eu
 
+echo "=============================================================="
 echo "We are now running as user $(whoami)"
 echo "DEBUG: MINICONDA_PATH=${MINICONDA_PATH}"
 echo "DEBUG: TESTDATA=${TESTDATA}"
@@ -113,43 +114,4 @@ else
 fi
 
 babs-merge --project_root "${PWD}"/test_project/
-
-
-# TODO: we need to fail if there is a failed job
-# fi
-
-# sleep 10
-# babs-status --project_root "${PWD}"/test_project/
-# sleep 10
-# babs-status --project_root "${PWD}"/test_project/
-# sleep 10
-# babs-status --project_root "${PWD}"/test_project/
-# sleep 10
-# babs-status --project_root "${PWD}"/test_project/
-#
-# babs-submit --project_root "${PWD}"/test_project/
-#
-# babs-status --project_root "${PWD}"/test_project/
-# sleep 30s
-# babs-status --project_root "${PWD}"/test_project/
-#
-# echo "Print job logs--------------------------------------------"
-# find "${PWD}"/test_project/analysis/logs/* -type f -print -exec cat {} \;
-# echo "end job logs--------------------------------------------"
-# # TODO: babs-check-status-job
-#
-# # TODO babs-merge
-#
-# popd
-# # /tests/e2e-slurm/babs-tests.sh
-# # podman exec  \
-# # 	-e MINICONDA_PATH=${MINICONDA_PATH} \
-# # 	slurm \
-# # 	${PWD}/tests/e2e-slurm/babs-tests.sh
-# #
-#
-#
-# echo "--------------------------"
-# echo "     HUZZZZZZAHHHHHH!!!!!!"
-# echo "--------------------------"
-#
+echo "PASSED: e2e walkthrough successful!"
diff --git a/tests/e2e-slurm/container/ensure-env.sh b/tests/e2e-slurm/container/ensure-env.sh
index fee3162b..352ed3a7 100755
--- a/tests/e2e-slurm/container/ensure-env.sh
+++ b/tests/e2e-slurm/container/ensure-env.sh
@@ -1,6 +1,5 @@
 #!/bin/bash
 #
-# exported for use in inner-slurm.sh
 if [ -z "${MINICONDA_PATH:-}" ]; then
     if hash conda; then
         # We don't need the return value, we already catch the error
diff --git a/tests/e2e-slurm/container/walkthrough-tests.sh b/tests/e2e-slurm/container/walkthrough-tests.sh
index 4c36f436..1c57f2d9 100755
--- a/tests/e2e-slurm/container/walkthrough-tests.sh
+++ b/tests/e2e-slurm/container/walkthrough-tests.sh
@@ -1,21 +1,13 @@
 #!/bin/bash -i
-
+# Here we perform all actions that must be done as root inside the container and then
+# execute the walkthrough as BABS_USER
 set -eu
 
-# add that outside user
-# groupadd --gid "$GID" "$USER"  && useradd --uid $UID --gid "$GID" "$USER"
-
-# Install singularity inside the container
-yum update -y && yum install -y epel-release &&  yum update -y &&  yum install -y singularity-runtime apptainer
-#
-# git version
-# git config user.name > /dev/null || git config --system user.name "e2e slurm"
-# git config user.email > /dev/null || git config --system user.email "fake@example.com"
-# git config --system --add safe.directory '*'
-
 export TESTDATA=/opt/testdata
 BABS_USER=testuser
 
+# Install singularity inside the container
+yum update -y && yum install -y epel-release &&  yum update -y &&  yum install -y singularity-runtime apptainer
 
 # Wait for slurm to be up
 max_retries=10
diff --git a/tests/e2e-slurm/install-babs.sh b/tests/e2e-slurm/install-babs.sh
index b9531fc8..a0674743 100755
--- a/tests/e2e-slurm/install-babs.sh
+++ b/tests/e2e-slurm/install-babs.sh
@@ -10,6 +10,4 @@ conda install -c conda-forge datalad git git-annex -y
 pip install datalad_container
 pip install datalad-osf
 
-# TODO non-dynamic for prod
-# pip install .
-pip install -e .
+pip install .
diff --git a/tests/e2e-slurm/main.sh b/tests/e2e-slurm/main.sh
index c50b002b..f4e35a4b 100755
--- a/tests/e2e-slurm/main.sh
+++ b/tests/e2e-slurm/main.sh
@@ -21,6 +21,7 @@ TAG=23.11.07 # TODO
 FQDN_IMAGE=${REGISTRY}/${HUBUSER}/${REPO}:${TAG}
 THIS_DIR="$(readlink -f "$0" | xargs dirname )"
 
+# Sets MINICONDA_PATH
 . tests/e2e-slurm/container/ensure-env.sh
 
 if [ "$MINICONDA_PATH/envs/$CONDA_DEFAULT_ENV/bin/babs-init" != "$(which babs-init)" ]; then
@@ -29,12 +30,9 @@ if [ "$MINICONDA_PATH/envs/$CONDA_DEFAULT_ENV/bin/babs-init" != "$(which babs-in
     exit 1
 fi
 
-stop_container () {
-	podman stop slurm || true
-}
-
 echo "Success, we are in the conda env with babs-init!"
- # Because babs is dev-installed from here. TODO: we can remove if we remove -e from pip install
+
+# PWD shared so babs can be optionally be installed with develop install
 podman run -it --rm \
 	--name slurm \
 	--hostname slurmctl  \
@@ -45,6 +43,3 @@ podman run -it --rm \
 	-v "${THIS_DIR}/container:/opt/outer:ro,Z" \
 	"${FQDN_IMAGE}" \
 	/bin/bash -c ". /opt/outer/walkthrough-tests.sh"
-
-	#/bin/bash -c ". /opt/outer/walkthrough-tests.sh && bash" # TODO remove, for debug only
-# trap stop_container EXIT