Misc Updates from Testing with Skymap Scanner (Pt 2) (#105) #1545
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: wipac ci/cd | |
on: | |
push: | |
branches: | |
- '**' | |
tags-ignore: | |
- '**' | |
# not using concurrency -- it makes A/B debugging difficult. Now, manually cancel prev tests if needed | |
env: | |
RABBITMQ_IMAGE_TAG: bitnami/rabbitmq:3.13.5 | |
PULSAR_IMAGE_TAG: apachepulsar/pulsar:2.6.0 | |
NATS_IMAGE_TAG: nats:2.10.18 | |
# | |
SORTED_LIST_OF_TESTS_FILE: list-of-tests-sorted.txt | |
# | |
BROKER_CONTAINER_NAME: thebroker | |
# | |
EWMS_PILOT_TIMEOUT_INCOMING: 1 | |
EWMS_PILOT_TIMEOUT_OUTGOING: 1 | |
EWMS_PILOT_KEEP_ALL_TASK_FILES: True | |
EWMS_PILOT_DATA_DIR_PARENT_PATH_ON_HOST: /blah/ | |
# | |
_EWMS_PILOT_APPTAINER_IMAGE_DIRECTORY_MUST_BE_PRESENT: False # allow real-time conversion | |
# | |
DOCKER_IMAGE_NAME: pilot/local | |
jobs: | |
############################################################################# | |
# PACKAGING & LINTERS | |
############################################################################# | |
py-versions: | |
runs-on: ubuntu-latest | |
outputs: | |
matrix: ${{ steps.versions.outputs.matrix }} | |
steps: | |
- uses: actions/checkout@v4 | |
- id: versions | |
uses: WIPACrepo/[email protected] | |
flake8: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: actions/setup-python@v4 | |
- uses: WIPACrepo/[email protected] | |
mypy: | |
needs: [ py-versions ] | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: false | |
matrix: | |
py3: ${{ fromJSON(needs.py-versions.outputs.matrix) }} | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ matrix.py3 }} | |
- uses: WIPACrepo/[email protected] | |
py-setup: | |
runs-on: ubuntu-latest | |
steps: | |
- if: github.actor != 'dependabot[bot]' | |
uses: actions/checkout@v4 | |
with: | |
token: ${{ secrets.PERSONAL_ACCESS_TOKEN }} # fyi, dependabot can't access normal secrets | |
- if: github.actor != 'dependabot[bot]' | |
uses: WIPACrepo/[email protected] | |
with: | |
python_min: "3.10" | |
python_max: "3.12" | |
keywords: pilot "Observation Management Service" "Event Workflow Management Service" EWMS "message passing" MQ task | |
pypi_name: ewms-pilot | |
author: WIPAC Developers | |
author_email: [email protected] | |
py-dependencies: | |
needs: [ py-versions ] | |
runs-on: ubuntu-latest | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.job }} | |
cancel-in-progress: true | |
steps: | |
- if: github.actor != 'dependabot[bot]' | |
uses: actions/checkout@v4 | |
with: | |
token: ${{ secrets.PERSONAL_ACCESS_TOKEN }} # fyi, dependabot can't access normal secrets | |
- if: github.actor != 'dependabot[bot]' | |
run: | | |
set -x | |
mkdir docker-build-logs/ && trap 'rm -rf docker-build-logs/' EXIT | |
# first, build the vanilla/default image | |
docker build --tag "default:py-dep-this" . \ | |
>> docker-build-logs/$tag.docker-build.out 2>&1 & pidlist="$pidlist $!" | |
# build all dockerfiles | |
for py in $(echo ${{ needs.py-versions.outputs.matrix }} | sed 's/[][]//g; s/,/ /g'); do | |
docker pull python:$py # pre-pull to speed up dependent images | |
for flavor in rabbitmq pulsar nats; do | |
# normal | |
tag="$py-$flavor:py-dep-this" | |
docker build --tag $tag \ | |
--build-arg="PYTHON=$py" --build-arg="FLAVOR=$flavor" \ | |
. \ | |
>> docker-build-logs/$tag.docker-build.out 2>&1 & pidlist="$pidlist $!" | |
sleep .1 # little sleep to help logs | |
# another with 'tests' | |
tag="$py-$flavor-test:py-dep-this" | |
docker build --tag $tag \ | |
--build-arg="PYTHON=$py" --build-arg="FLAVOR=$flavor,test" \ | |
. \ | |
>> docker-build-logs/$tag.docker-build.out 2>&1 & pidlist="$pidlist $!" | |
sleep .1 # little sleep to help logs | |
done | |
done | |
# wait for all them | |
for pid in $pidlist; do | |
sleep .1 # little sleep to help logs | |
echo "waiting for $pid..." | |
if ! wait -n $pid; then | |
sleep 5 # may need to wait for file to be written | |
kill $pidlist 2>/dev/null | |
sleep 5 | |
more docker-build-logs/* | cat # cats with filenames (delimited by :::::::) | |
exit 1 | |
fi | |
done | |
docker system prune --force # save disk space | |
- if: github.actor != 'dependabot[bot]' | |
uses: WIPACrepo/[email protected] | |
with: | |
use_directory: true | |
############################################################################ | |
# tests | |
############################################################################# | |
test-build-docker: | |
needs: [ mypy ] | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: docker/setup-buildx-action@v3 | |
- uses: docker/build-push-action@v5 | |
with: | |
context: . | |
file: Dockerfile | |
cache-from: type=gha | |
cache-to: type=gha,mode=min | |
tags: ${{ env.DOCKER_IMAGE_NAME }}:local | |
vanilla-run: | |
needs: [ py-versions ] | |
runs-on: ubuntu-latest | |
env: | |
EWMS_PILOT_QUEUE_INCOMING_BROKER_TYPE: rabbitmq | |
EWMS_PILOT_QUEUE_OUTGOING_BROKER_TYPE: rabbitmq | |
EWMS_PILOT_QUEUE_INCOMING_BROKER_ADDRESS: blah | |
EWMS_PILOT_QUEUE_OUTGOING_BROKER_ADDRESS: blah | |
EWMS_PILOT_QUEUE_INCOMING: foo | |
EWMS_PILOT_QUEUE_OUTGOING: bar | |
EWMS_PILOT_TIMEOUT_QUEUE_WAIT_FOR_FIRST_MESSAGE: 1 # added to figure out https://github.com/WIPACrepo/wipac-dev-tools/pull/106, not needed but not doing any harm | |
steps: | |
- uses: actions/checkout@v4 | |
- name: build docker image | |
run: | | |
docker build --tag ${{ env.DOCKER_IMAGE_NAME }} . | |
docker images | |
- run: | | |
set -ex | |
docker run --rm \ | |
$(env | grep '^EWMS_' | awk '$0="--env "$0') \ | |
$(env | grep '^_EWMS_' | awk '$0="--env "$0') \ | |
${{ env.DOCKER_IMAGE_NAME }} \ | |
2>&1 | tee -a test.out | |
cat test.out | |
# The intent of this test is to confirm that the pilot runs as a CL script | |
# without error. Real testing is done in 'integration-tests'. This "test" | |
# ends on the following error, which is relatively early in the pilot... | |
expected="RuntimeError: Task image was not provided." | |
if [[ $( tail -n 1 test.out ) == "$expected" ]]; then | |
echo "passed! (iow, this is the correct error ^^^)" | |
else | |
echo "ERROR: the pilot did not fail as expected" | |
exit 1 | |
fi | |
integration-tests: | |
needs: [ py-versions, mypy ] | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: false | |
matrix: | |
container_platform: [ docker, apptainer ] | |
version: ${{ fromJSON(needs.py-versions.outputs.matrix) }} | |
broker_client: [ pulsar, rabbitmq, nats ] | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ matrix.version }} | |
- if: ${{ matrix.broker_client == 'rabbitmq' }} | |
uses: jlumbroso/free-disk-space@main | |
with: | |
android: false # this is a LOT of space, so it takes longer to remove | |
docker-images: false # we may actually need this one | |
- if: ${{ matrix.container_platform == 'docker' }} | |
name: install sysbox (needed for docker-in-docker) | |
run: | | |
temp_dir=$(mktemp -d) && cd $temp_dir && trap 'rm -rf $temp_dir' EXIT # save disk space | |
# https://github.com/nestybox/sysbox/blob/master/docs/user-guide/install-package.md | |
wget https://downloads.nestybox.com/sysbox/releases/v0.6.4/sysbox-ce_0.6.4-0.linux_amd64.deb | |
docker rm $(docker ps -a -q) -f || echo "ok: no docker containers to remove" | |
sudo apt-get install jq | |
sudo apt-get install ./sysbox-ce_0.6.4-0.linux_amd64.deb | |
# sudo systemctl status sysbox -n20 # used below | |
- if: ${{ matrix.container_platform == 'apptainer' }} | |
uses: eWaterCycle/setup-apptainer@v2 | |
with: | |
apptainer-version: 1.3.2 | |
- name: build docker image | |
run: | | |
docker build --tag ${{ env.DOCKER_IMAGE_NAME }} \ | |
--build-arg="PYTHON=${{ matrix.version }}" \ | |
--build-arg="FLAVOR=${{ matrix.broker_client }},test" \ | |
--build-arg="CONTAINER_PLATFORM=${{ matrix.container_platform }}" \ | |
. | |
docker images | |
- name: pre-pull & save images for testing | |
run: | | |
set -x | |
mkdir saved-images/ | |
retry_command_if_255() { | |
# some logic to get around docker hub's rate limiting | |
local command="$1" | |
max_retries=5 | |
for ((i=1; i<=max_retries; i++)); do | |
eval "$command" | |
exit_code=$? | |
if [[ $exit_code -eq 0 ]]; then | |
break | |
elif [[ $exit_code -eq 255 ]]; then | |
# docker hub rate limiting | |
echo "Command failed with exit code 255. Retry $i/$max_retries..." | |
else | |
exit $exit_code | |
fi | |
# Wait before retrying | |
sleep $((30*$i)) # 30, 60, 90, ... | |
done | |
} | |
if [[ "${{ matrix.container_platform }}" == 'docker' ]]; then | |
retry_command_if_255 "docker pull python:alpine" | |
docker save -o saved-images/python-alpine.tar python:alpine | |
elif [[ "${{ matrix.container_platform }}" == "apptainer" ]]; then | |
cd saved-images/ | |
# building as a sandbox (unpacked dir) allows apptainer-in-apptainer | |
retry_command_if_255 "apptainer build --sandbox python_alpine_sandbox/ docker://python:alpine" | |
# pilot converts .sif to apptainer dir -- only 1 test uses this | |
retry_command_if_255 "apptainer build python_alpine.sif docker://python:alpine" | |
else | |
exit 2 # unknown container_platform | |
fi | |
- if: ${{ matrix.container_platform == 'apptainer' }} | |
name: build apptainer image | |
run: | | |
docker images | |
apptainer build $(basename ${{ env.DOCKER_IMAGE_NAME }}).sif docker-daemon://${{ env.DOCKER_IMAGE_NAME }}:latest | |
ls -lh $(basename ${{ env.DOCKER_IMAGE_NAME }}).sif | |
- name: get list of pytest tests | |
run: | | |
pip install .[test] # for the for-loop below | |
pytest --collect-only -q --disable-warnings tests | head -n -2 > $SORTED_LIST_OF_TESTS_FILE | |
cat $SORTED_LIST_OF_TESTS_FILE | |
- if: ${{ matrix.container_platform == 'docker' }} | |
run: | | |
# we're connecting all the containers (broker + apps) in the same docker network | |
# create common network -- https://github.com/nestybox/sysbox/blob/master/docs/quickstart/kind.md#k8s-cluster-on-user-defined-bridge-networks | |
docker network create mynet | |
- name: Integrate | |
timeout-minutes: ${{ matrix.broker_client == 'rabbitmq' && 20 || 10 }} # adjust if fails, remember this is only time for *this* step | |
run: | | |
set -x | |
# ACTIVATE FOR DOCKER-IN-DOCKER | |
if [[ "${{ matrix.container_platform }}" == 'docker' ]]; then | |
# activate sysbox -- https://github.com/nestybox/sysbox/blob/master/docs/user-guide/install-package.md | |
sudo systemctl status sysbox -n20 | |
fi | |
# STARTUP BROKER -- this is always in docker container, even in 'apptainer' tests | |
if [[ "${{ matrix.container_platform }}" == 'docker' ]]; then | |
# USE THE DOCKER NETWORK CREATED IN PREVIOUS STEP | |
source ./tests/run-broker.sh "${{ matrix.broker_client }}" $BROKER_CONTAINER_NAME "--net=mynet --runtime=sysbox-runc" | |
export EWMS_PILOT_QUEUE_INCOMING_BROKER_ADDRESS="$BROKER_CONTAINER_NAME" | |
export EWMS_PILOT_QUEUE_OUTGOING_BROKER_ADDRESS="$BROKER_CONTAINER_NAME" | |
elif [[ "${{ matrix.container_platform }}" == "apptainer" ]]; then | |
# USE THE HOST NETWORK SINCE APPTAINER DOESN'T HAVE AS ISOLATED NETWORKING AS DOCKER DOES | |
source ./tests/run-broker.sh "${{ matrix.broker_client }}" $BROKER_CONTAINER_NAME "--network=host" | |
export EWMS_PILOT_QUEUE_INCOMING_BROKER_ADDRESS="localhost" # localhost b/c using --network=host | |
export EWMS_PILOT_QUEUE_OUTGOING_BROKER_ADDRESS="localhost" # localhost b/c using --network=host | |
else | |
exit 2 # unknown container_platform | |
fi | |
docker ps | |
set +x # lots of output in these loops | |
echo "--------------------------------------------------------------" | |
echo "running tests..." | |
# set queue env vars | |
export EWMS_PILOT_QUEUE_INCOMING_BROKER_TYPE=${{ matrix.broker_client }} | |
export EWMS_PILOT_QUEUE_OUTGOING_BROKER_TYPE=${{ matrix.broker_client }} | |
# make external directories -- used in like 1 test | |
export EWMS_PILOT_EXTERNAL_DIRECTORIES="/cvmfs/dummy-1/dir-A,/cvmfs/dummy-2/dir-B" | |
mkdir -p $(pwd)/cvmfs/dummy-1/dir-A | |
echo "alpha" > $(pwd)/cvmfs/dummy-1/dir-A/file.txt | |
mkdir -p $(pwd)/cvmfs/dummy-2/dir-B | |
echo "beta" > $(pwd)/cvmfs/dummy-2/dir-B/file.txt | |
test_offset_delay=5 | |
# iterate each test, starting each in its own container | |
cat $SORTED_LIST_OF_TESTS_FILE | |
i=0 | |
while read test; do | |
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" | |
echo "starting test $test" | |
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" | |
# NOTE: re 'sleep N && ...': this allows all tests to be submitted in bulk, | |
# then waited for in order, PLUS it has a built-in delay to not overwhelm | |
# the broker. This way (as opposed to sleep between loop iterations), we | |
# can stop all the tests if one fails early on (like pytest --exit-first). | |
if [[ "${{ matrix.container_platform }}" == 'docker' ]]; then | |
set -x # lets see the command | |
sleep "$(($i*$test_offset_delay))" && docker run --rm \ | |
--net=mynet --runtime=sysbox-runc --hostname=syscont \ | |
--mount type=bind,source=$(pwd),target=/repo/,readonly \ | |
--mount type=bind,source=$(pwd)/saved-images,target=/saved-images/ \ | |
--mount type=bind,source=$(pwd)/cvmfs/dummy-1/dir-A,target=/cvmfs/dummy-1/dir-A,readonly \ | |
--mount type=bind,source=$(pwd)/cvmfs/dummy-2/dir-B,target=/cvmfs/dummy-2/dir-B,readonly \ | |
$(env | grep '^EWMS_' | awk '$0="--env "$0') \ | |
$(env | grep '^_EWMS_' | awk '$0="--env "$0') \ | |
--env CI_TEST_ALPINE_PYTHON_IMAGE="python:alpine" \ | |
--env CI=$CI \ | |
${{ env.DOCKER_IMAGE_NAME }} /bin/bash -c "docker load -i /saved-images/python-alpine.tar && ls -l && ls -l / && docker images && pytest -vvv -s $test" \ | |
>> $(basename $test).test.out 2>&1 & pidlist="$pidlist $!" | |
set +x | |
elif [[ "${{ matrix.container_platform }}" == "apptainer" ]]; then | |
# NOTE: we want to mimic how htcondor launches apptainer-enabled EPs | |
set -x # lets see the command | |
temp_dir=$(mktemp -d) && trap 'rm -rf $temp_dir' EXIT # save disk space | |
# '--containall --writable-tmpfs --no-eval' gets us close to docker functionality | |
# '--workdir' allows using host disk instead of memory | |
sleep "$(($i*$test_offset_delay))" && apptainer run \ | |
--containall --writable-tmpfs --no-eval \ | |
--mount type=bind,source=$(pwd),target=/repo/,readonly \ | |
--mount type=bind,source=$(pwd)/saved-images,target=/saved-images/ \ | |
--mount type=bind,source=$(pwd)/cvmfs/dummy-1/dir-A,target=/cvmfs/dummy-1/dir-A,readonly \ | |
--mount type=bind,source=$(pwd)/cvmfs/dummy-2/dir-B,target=/cvmfs/dummy-2/dir-B,readonly \ | |
$(env | grep '^EWMS_' | awk '$0="--env "$0') \ | |
$(env | grep '^_EWMS_' | awk '$0="--env "$0') \ | |
--env CI_TEST_ALPINE_PYTHON_IMAGE="/saved-images/python_alpine_sandbox/" \ | |
--env CI_TEST_ALPINE_PYTHON_IMAGE_APPTAINER_SIF="/saved-images/python_alpine.sif" \ | |
--env CI_TEST_ALPINE_PYTHON_IMAGE_APPTAINER_FROM_DOCKER="python:alpine" \ | |
--env CI=$CI \ | |
--workdir "$temp_dir" -B "$temp_dir" \ | |
$(basename ${{ env.DOCKER_IMAGE_NAME }}).sif \ | |
/bin/bash -c "ls -l && ls -l / && pytest -vvv -s /app/$test" \ | |
>> $(basename $test).test.out 2>&1 & pidlist="$pidlist $!" | |
set +x | |
else | |
exit 2 # unknown container_platform | |
fi | |
sleep .1 # little sleep to help logs | |
i=$((i+1)) | |
done < $SORTED_LIST_OF_TESTS_FILE | |
# wait for tests to finish | |
# https://stackoverflow.com/a/32604828/13156561 | |
sleep 3 # short sleep to help logs | |
for pid in $pidlist; do | |
date --rfc-3339=seconds | |
echo "waiting for $pid..." | |
if ! wait -n $pid; then | |
echo "ERROR: test(s) failed (ctrl+f $pid to match)" | |
sleep 5 # may need to wait for output files to be written | |
kill $pidlist 2>/dev/null | |
exit 1 | |
fi | |
echo "-> PASSED" | |
done | |
- name: pytest first failure | |
if: always() | |
run: | | |
file=$(grep -l -m 1 "= FAILURES =" *.test.out | head -1) | |
if [ -n "$file" ]; then | |
cat $file | |
echo "^^^^ this is the pytest out file -- this step exited with 1 because this *pytest* failed -- the ci step is okay" | |
exit 1 | |
else | |
echo "not found" | |
fi | |
- name: pytest container outputs | |
if: always() | |
run: | | |
cat $SORTED_LIST_OF_TESTS_FILE | |
while read test; do | |
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" | |
echo $test | |
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" | |
if [ -f $(basename $test).test.out ]; then | |
cat $(basename $test).test.out | |
else | |
echo "CI: output file does not exist" | |
fi | |
done < $SORTED_LIST_OF_TESTS_FILE | |
- name: broker output | |
if: always() | |
run: | | |
set -x | |
cat ./broker.out | |
if [ -n "$(ls -A ./broker_logs/ 2>/dev/null)" ]; then | |
ls ./broker_logs/ | |
more ./broker_logs/* | cat # cats with filenames (delimited by :::::::) | |
fi | |
- name: broker docker logs | |
if: always() | |
run: | | |
docker logs $BROKER_CONTAINER_NAME || true | |
############################################################################# | |
# GITHUB & PYPI RELEASE | |
############################################################################# | |
release: | |
# only run on main/master/default | |
if: ${{ github.ref == 'refs/heads/master' || github.ref == 'refs/heads/main' }} | |
needs: [ | |
py-versions, | |
flake8, | |
mypy, | |
py-setup, | |
py-dependencies, | |
test-build-docker, | |
integration-tests, | |
vanilla-run | |
] | |
runs-on: ubuntu-latest | |
concurrency: release # prevent any possible race conditions | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
token: ${{ secrets.PERSONAL_ACCESS_TOKEN }} # using 'GITHUB_TOKEN' does not trigger ci on push | |
# Python-Package Version Bump | |
- uses: python-semantic-release/[email protected] | |
id: psr-psr | |
with: | |
github_token: ${{ secrets.PERSONAL_ACCESS_TOKEN }} # using 'GITHUB_TOKEN' does not trigger ci on push | |
# PyPI Release | |
- uses: pypa/[email protected] | |
if: steps.psr-psr.outputs.released == 'true' | |
with: | |
password: ${{ secrets.PYPI_TOKEN }} | |
# GitHub Release | |
- uses: python-semantic-release/[email protected] | |
if: steps.psr-psr.outputs.released == 'true' | |
with: | |
github_token: ${{ secrets.PERSONAL_ACCESS_TOKEN }} # using 'GITHUB_TOKEN' does not trigger ci on push |