-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #169 from asmacdo/e2e-slurm
Introducing e2e slurm tests
- Loading branch information
Showing
12 changed files
with
342 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
--- | ||
name: Slurm | ||
|
||
on: [push] | ||
jobs: | ||
e2e-slurm: | ||
name: Test e2e with SLURM | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: checkout our repo | ||
uses: actions/checkout@v4 | ||
- name: Install apptainer | ||
uses: eWaterCycle/setup-apptainer@v2 | ||
with: | ||
apptainer-version: 1.1.2 | ||
- name: Install Conda | ||
uses: conda-incubator/setup-miniconda@v3 | ||
with: | ||
activate-environment: babs | ||
auto-update-conda: true | ||
python-version: 3.9 | ||
- name: Conda info | ||
shell: bash -el {0} | ||
run: conda info | ||
- name: Install Babs | ||
shell: bash -el {0} | ||
run: ./tests/e2e-slurm/install-babs.sh | ||
- name: Execute e2e with SLURM | ||
shell: bash -el {0} | ||
run: ./tests/e2e-slurm/main.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
name: Shellcheck scripts | ||
|
||
on: [push, pull_request] | ||
|
||
jobs: | ||
test: | ||
|
||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- name: Set up system | ||
shell: bash | ||
run: | | ||
sudo apt-get update -qq | ||
sudo apt-get install shellcheck | ||
- uses: actions/checkout@v4 | ||
- name: Run shellcheck | ||
run: | | ||
shellcheck \ | ||
tests/e2e-slurm/container/babs-user-script.sh \ | ||
tests/e2e-slurm/container/ensure-env.sh \ | ||
tests/e2e-slurm/container/walkthrough-tests.sh \ | ||
tests/e2e-slurm/install-babs.sh \ | ||
tests/e2e-slurm/main.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,4 +11,4 @@ build/ | |
|
||
# Distribution / packaging | ||
dist/ | ||
babs/VERSION | ||
babs/VERSION |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
install: | ||
./tests/e2e-slurm/install-babs.sh | ||
|
||
setup-user: | ||
./tests/e2e-slurm/setup-user.sh | ||
|
||
e2e: clean | ||
./tests/e2e-slurm/main.sh | ||
|
||
clean: | ||
@ podman stop slurm 2>/dev/null || true | ||
@ podman rm slurm 2>/dev/null || true | ||
@[ -e .testdata/babs_test_project/toybidsapp-container ] && \ | ||
datalad remove -d .testdata/babs_test_project/toybidsapp-container --reckless kill || : | ||
rm -rf .testdata |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
#!/bin/bash -i | ||
|
||
SUBPROJECT_NAME=test_project | ||
|
||
set -eu | ||
|
||
echo "==============================================================" | ||
echo "We are now running as user $(whoami)" | ||
echo "DEBUG: MINICONDA_PATH=${MINICONDA_PATH}" | ||
echo "DEBUG: TESTDATA=${TESTDATA}" | ||
|
||
# without MINICONDA_PATH set, shellcheck cannot follow | ||
# shellcheck disable=SC1091 | ||
source "$MINICONDA_PATH/etc/profile.d/conda.sh" | ||
conda activate babs | ||
|
||
# record the miniconda path so it can added to the test env (slurm jobs do not preserve env) | ||
cat > /home/"$USER"/miniconda.env << EOF | ||
. "$MINICONDA_PATH/etc/profile.d/conda.sh" | ||
EOF | ||
|
||
|
||
|
||
git config --global user.name "e2e testuser" | ||
git config --global user.email "[email protected]" | ||
echo "Git user: $(git config user.name)" | ||
echo "Git email: $(git config user.email)" | ||
|
||
# TODO switch back to osf project | ||
# Populate input data (Divergent from tuturial, bc https://github.com/datalad/datalad-osf/issues/191 | ||
pushd "${TESTDATA}" | ||
echo "Installing Input Data" | ||
datalad install ///dbic/QA | ||
|
||
# Singularity image created by root, then chowned to this user, and datalad must be run as this user | ||
datalad create -D "toy BIDS App" toybidsapp-container | ||
pushd toybidsapp-container | ||
datalad containers-add \ | ||
--url "${PWD}/../toybidsapp-0.0.7.sif" \ | ||
toybidsapp-0-0-7 | ||
popd | ||
rm -f toybidsapp-0.0.7.sif | ||
|
||
|
||
# TODO File Issue: --where_project must be abspath file issue for relative path | ||
babs-init \ | ||
--where_project "${PWD}" \ | ||
--project_name $SUBPROJECT_NAME \ | ||
--input BIDS "${PWD}"/QA \ | ||
--container_ds "${PWD}"/toybidsapp-container \ | ||
--container_name toybidsapp-0-0-7 \ | ||
--container_config_yaml_file "${PWD}"/config_toybidsapp.yaml \ | ||
--type_session multi-ses \ | ||
--type_system slurm | ||
|
||
echo "PASSED: babs-init" | ||
echo "Check setup, without job" | ||
babs-check-setup --project_root "${PWD}"/test_project/ | ||
echo "PASSED: Check setup, without job" | ||
|
||
babs-check-setup --project_root "${PWD}"/test_project/ --job-test | ||
echo "Job submitted: Check setup, with job" | ||
|
||
babs-status --project_root "${PWD}"/test_project/ | ||
|
||
# Wait for all running jobs to finish | ||
while [[ -n $(squeue -u "$USER" -t RUNNING,PENDING --noheader) ]]; do | ||
echo "squeue -u \"$USER\" -t RUNNING,PENDING" | ||
squeue -u "$USER" -t RUNNING,PENDING | ||
echo "Waiting for running jobs to finish..." | ||
sleep 5 # Wait for 60 seconds before checking again | ||
done | ||
|
||
echo "No running jobs." | ||
|
||
# TODO make sure this works | ||
# Check for failed jobs TODO state filter doesn't seem to be working as expected | ||
# if sacct -u $USER --state=FAILED --noheader | grep -q "FAILED"; then | ||
if sacct -u "$USER" --noheader | grep -q "FAILED"; then | ||
sacct -u "$USER" | ||
echo "There are failed jobs." | ||
exit 1 # Exit with failure status | ||
else | ||
sacct -u "$USER" | ||
echo "PASSED: No failed jobs." | ||
fi | ||
|
||
babs-submit --project-root "${PWD}/test_project/" | ||
|
||
# # Wait for all running jobs to finish | ||
while [[ -n $(squeue -u "$USER" -t RUNNING,PENDING --noheader) ]]; do | ||
echo "squeue -u \"$USER\" -t RUNNING,PENDING" | ||
squeue -u "$USER" -t RUNNING,PENDING | ||
echo "Waiting for running jobs to finish..." | ||
sleep 5 # Wait for 60 seconds before checking again | ||
done | ||
|
||
echo "=========================================================================" | ||
echo "babs-status:" | ||
babs-status --project_root "${PWD}"/test_project/ | ||
echo "=========================================================================" | ||
|
||
# Check for failed jobs TODO see above | ||
# if sacct -u $USER --state=FAILED --noheader | grep -q "FAILED"; then | ||
if sacct -u "$USER" --noheader | grep -q "FAILED"; then | ||
sacct -u "$USER" | ||
echo "=========================================================================" | ||
echo "There are failed jobs." | ||
exit 1 # Exit with failure status | ||
else | ||
sacct -u "$USER" | ||
echo "=========================================================================" | ||
echo "PASSED: No failed jobs." | ||
fi | ||
|
||
babs-merge --project_root "${PWD}"/test_project/ | ||
echo "PASSED: e2e walkthrough successful!" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Arguments in `singularity run`: | ||
singularity_run: | ||
--no-zipped: "" | ||
--dummy: "2" | ||
-v: "" | ||
|
||
# Output foldername(s) to be zipped, and the BIDS App version to be included in the zip filename(s): | ||
zip_foldernames: | ||
toybidsapp: "0-0-7" | ||
|
||
# How much cluster resources it needs: | ||
cluster_resources: | ||
interpreting_shell: /bin/bash | ||
hard_memory_limit: 2G | ||
|
||
script_preamble: | | ||
. ~/miniconda.env | ||
conda activate babs | ||
# Where to run the jobs: | ||
job_compute_space: "/tmp" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
#!/bin/bash | ||
# | ||
if [ -z "${MINICONDA_PATH:-}" ]; then | ||
if hash conda; then | ||
# We don't need the return value, we already catch the error | ||
# shellcheck disable=SC2155 | ||
export MINICONDA_PATH=$(/bin/which conda | xargs dirname | xargs dirname) | ||
else | ||
echo "ERROR: must have MINICONDA_PATH set or have 'conda' available" | ||
exit 1 | ||
fi | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
#!/bin/bash -i | ||
# Here we perform all actions that must be done as root inside the container and then | ||
# execute the walkthrough as BABS_USER | ||
set -eu | ||
|
||
export TESTDATA=/opt/testdata | ||
BABS_USER=testuser | ||
|
||
# Install singularity inside the container | ||
yum update -y && yum install -y epel-release && yum update -y && yum install -y singularity-runtime apptainer | ||
|
||
# Wait for slurm to be up | ||
max_retries=10 | ||
delay=10 # seconds | ||
|
||
echo "Try connecting to slurm with sacct until it succeeds" | ||
set +e # We need to check the error code and allow failures until slurm has started up | ||
export PATH=${PWD}/tests/e2e-slurm/bin/:${PATH} | ||
for ((i=1; i<=max_retries; i++)); do | ||
# Check if the command was successful | ||
if sacct > /dev/null; then | ||
echo "Slurm is up and running!" | ||
break | ||
else | ||
echo "Waiting for Slurm to start... retry $i/$max_retries" | ||
sleep $delay | ||
fi | ||
# exit if max retries reached | ||
if [ "$i" -eq "$max_retries" ]; then | ||
echo "Failed to start Slurm after $max_retries attempts." | ||
exit 1 | ||
fi | ||
done | ||
set -e | ||
|
||
# Currently we are root inside the container. Now we create a user to own the testdata | ||
useradd "$BABS_USER" | ||
# cp rather than use bind directly so it can be owned by the container user and not cause issues outside | ||
mkdir "${TESTDATA}" | ||
cp /opt/outer/* "${TESTDATA}" | ||
|
||
|
||
# We build the singularity container now while we are root, and use it later as testuser | ||
pushd "${TESTDATA}" | ||
singularity build \ | ||
toybidsapp-0.0.7.sif \ | ||
docker://pennlinc/toy_bids_app:0.0.7 | ||
|
||
chown -R "$BABS_USER:$BABS_USER" "${TESTDATA}" | ||
su "${BABS_USER}" "${TESTDATA}/babs-user-script.sh" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#!/bin/bash | ||
|
||
set -eu | ||
|
||
. tests/e2e-slurm/container/ensure-env.sh | ||
|
||
conda install -c conda-forge datalad git git-annex -y | ||
|
||
# Optional dependencies, required for e2e-slurm | ||
pip install datalad_container | ||
pip install datalad-osf | ||
|
||
pip install . |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
#!/bin/bash | ||
# | ||
set -eux | ||
|
||
# Expects: Conda env to be activated | ||
# Expects: Babs to be installed | ||
# | ||
# WIP-NOT-WORKING | ||
# Reminder :Z for selinux | ||
|
||
# TODO switch back to upstream after build | ||
# Currently using asmacdo, OpenSSL bump upstream, but no new docker build | ||
# https://github.com/giovtorres/docker-centos7-slurm/pull/49 | ||
REGISTRY=docker.io | ||
HUBUSER=asmacdo | ||
# HUBUSER=giovtorres | ||
REPO=centos7-slurm | ||
# REPO=docker-centos7-slurm | ||
TAG=23.11.07 # TODO | ||
|
||
FQDN_IMAGE=${REGISTRY}/${HUBUSER}/${REPO}:${TAG} | ||
THIS_DIR="$(readlink -f "$0" | xargs dirname )" | ||
|
||
# Sets MINICONDA_PATH | ||
. tests/e2e-slurm/container/ensure-env.sh | ||
|
||
if [ "$MINICONDA_PATH/envs/$CONDA_DEFAULT_ENV/bin/babs-init" != "$(which babs-init)" ]; then | ||
echo "Error: This script expects to be run inside a conda env with 'babs-init'!" >&2 | ||
echo " We have not found it in conda env '$CONDA_DEFAULT_ENV' under '$MINICONDA_PATH'" >&2 | ||
exit 1 | ||
fi | ||
|
||
echo "Success, we are in the conda env with babs-init!" | ||
|
||
# PWD shared so babs can be optionally be installed with develop install | ||
podman run -it --rm \ | ||
--name slurm \ | ||
--hostname slurmctl \ | ||
-e "MINICONDA_PATH=${MINICONDA_PATH}" \ | ||
--privileged \ | ||
-v "${PWD}:${PWD}:ro,Z" \ | ||
-v "${MINICONDA_PATH}:${MINICONDA_PATH}:Z" \ | ||
-v "${THIS_DIR}/container:/opt/outer:ro,Z" \ | ||
"${FQDN_IMAGE}" \ | ||
/bin/bash -c ". /opt/outer/walkthrough-tests.sh" |