Skip to content

Misc Updates from Testing with Skymap Scanner (Pt 2) (#105) #1545

Misc Updates from Testing with Skymap Scanner (Pt 2) (#105)

Misc Updates from Testing with Skymap Scanner (Pt 2) (#105) #1545

Workflow file for this run

name: wipac ci/cd
on:
push:
branches:
- '**'
tags-ignore:
- '**'
# not using concurrency -- it makes A/B debugging difficult. Now, manually cancel prev tests if needed
env:
RABBITMQ_IMAGE_TAG: bitnami/rabbitmq:3.13.5
PULSAR_IMAGE_TAG: apachepulsar/pulsar:2.6.0
NATS_IMAGE_TAG: nats:2.10.18
#
SORTED_LIST_OF_TESTS_FILE: list-of-tests-sorted.txt
#
BROKER_CONTAINER_NAME: thebroker
#
EWMS_PILOT_TIMEOUT_INCOMING: 1
EWMS_PILOT_TIMEOUT_OUTGOING: 1
EWMS_PILOT_KEEP_ALL_TASK_FILES: True
EWMS_PILOT_DATA_DIR_PARENT_PATH_ON_HOST: /blah/
#
_EWMS_PILOT_APPTAINER_IMAGE_DIRECTORY_MUST_BE_PRESENT: False # allow real-time conversion
#
DOCKER_IMAGE_NAME: pilot/local
jobs:
#############################################################################
# PACKAGING & LINTERS
#############################################################################
py-versions:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.versions.outputs.matrix }}
steps:
- uses: actions/checkout@v4
- id: versions
uses: WIPACrepo/[email protected]
flake8:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
- uses: WIPACrepo/[email protected]
mypy:
needs: [ py-versions ]
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
py3: ${{ fromJSON(needs.py-versions.outputs.matrix) }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.py3 }}
- uses: WIPACrepo/[email protected]
py-setup:
runs-on: ubuntu-latest
steps:
- if: github.actor != 'dependabot[bot]'
uses: actions/checkout@v4
with:
token: ${{ secrets.PERSONAL_ACCESS_TOKEN }} # fyi, dependabot can't access normal secrets
- if: github.actor != 'dependabot[bot]'
uses: WIPACrepo/[email protected]
with:
python_min: "3.10"
python_max: "3.12"
keywords: pilot "Observation Management Service" "Event Workflow Management Service" EWMS "message passing" MQ task
pypi_name: ewms-pilot
author: WIPAC Developers
author_email: [email protected]
py-dependencies:
needs: [ py-versions ]
runs-on: ubuntu-latest
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.job }}
cancel-in-progress: true
steps:
- if: github.actor != 'dependabot[bot]'
uses: actions/checkout@v4
with:
token: ${{ secrets.PERSONAL_ACCESS_TOKEN }} # fyi, dependabot can't access normal secrets
- if: github.actor != 'dependabot[bot]'
run: |
set -x
mkdir docker-build-logs/ && trap 'rm -rf docker-build-logs/' EXIT
# first, build the vanilla/default image
docker build --tag "default:py-dep-this" . \
>> docker-build-logs/$tag.docker-build.out 2>&1 & pidlist="$pidlist $!"
# build all dockerfiles
for py in $(echo ${{ needs.py-versions.outputs.matrix }} | sed 's/[][]//g; s/,/ /g'); do
docker pull python:$py # pre-pull to speed up dependent images
for flavor in rabbitmq pulsar nats; do
# normal
tag="$py-$flavor:py-dep-this"
docker build --tag $tag \
--build-arg="PYTHON=$py" --build-arg="FLAVOR=$flavor" \
. \
>> docker-build-logs/$tag.docker-build.out 2>&1 & pidlist="$pidlist $!"
sleep .1 # little sleep to help logs
# another with 'tests'
tag="$py-$flavor-test:py-dep-this"
docker build --tag $tag \
--build-arg="PYTHON=$py" --build-arg="FLAVOR=$flavor,test" \
. \
>> docker-build-logs/$tag.docker-build.out 2>&1 & pidlist="$pidlist $!"
sleep .1 # little sleep to help logs
done
done
# wait for all them
for pid in $pidlist; do
sleep .1 # little sleep to help logs
echo "waiting for $pid..."
if ! wait -n $pid; then
sleep 5 # may need to wait for file to be written
kill $pidlist 2>/dev/null
sleep 5
more docker-build-logs/* | cat # cats with filenames (delimited by :::::::)
exit 1
fi
done
docker system prune --force # save disk space
- if: github.actor != 'dependabot[bot]'
uses: WIPACrepo/[email protected]
with:
use_directory: true
############################################################################
# tests
#############################################################################
test-build-docker:
needs: [ mypy ]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: docker/setup-buildx-action@v3
- uses: docker/build-push-action@v5
with:
context: .
file: Dockerfile
cache-from: type=gha
cache-to: type=gha,mode=min
tags: ${{ env.DOCKER_IMAGE_NAME }}:local
vanilla-run:
needs: [ py-versions ]
runs-on: ubuntu-latest
env:
EWMS_PILOT_QUEUE_INCOMING_BROKER_TYPE: rabbitmq
EWMS_PILOT_QUEUE_OUTGOING_BROKER_TYPE: rabbitmq
EWMS_PILOT_QUEUE_INCOMING_BROKER_ADDRESS: blah
EWMS_PILOT_QUEUE_OUTGOING_BROKER_ADDRESS: blah
EWMS_PILOT_QUEUE_INCOMING: foo
EWMS_PILOT_QUEUE_OUTGOING: bar
EWMS_PILOT_TIMEOUT_QUEUE_WAIT_FOR_FIRST_MESSAGE: 1 # added to figure out https://github.com/WIPACrepo/wipac-dev-tools/pull/106, not needed but not doing any harm
steps:
- uses: actions/checkout@v4
- name: build docker image
run: |
docker build --tag ${{ env.DOCKER_IMAGE_NAME }} .
docker images
- run: |
set -ex
docker run --rm \
$(env | grep '^EWMS_' | awk '$0="--env "$0') \
$(env | grep '^_EWMS_' | awk '$0="--env "$0') \
${{ env.DOCKER_IMAGE_NAME }} \
2>&1 | tee -a test.out
cat test.out
# The intent of this test is to confirm that the pilot runs as a CL script
# without error. Real testing is done in 'integration-tests'. This "test"
# ends on the following error, which is relatively early in the pilot...
expected="RuntimeError: Task image was not provided."
if [[ $( tail -n 1 test.out ) == "$expected" ]]; then
echo "passed! (iow, this is the correct error ^^^)"
else
echo "ERROR: the pilot did not fail as expected"
exit 1
fi
integration-tests:
needs: [ py-versions, mypy ]
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
container_platform: [ docker, apptainer ]
version: ${{ fromJSON(needs.py-versions.outputs.matrix) }}
broker_client: [ pulsar, rabbitmq, nats ]
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.version }}
- if: ${{ matrix.broker_client == 'rabbitmq' }}
uses: jlumbroso/free-disk-space@main
with:
android: false # this is a LOT of space, so it takes longer to remove
docker-images: false # we may actually need this one
- if: ${{ matrix.container_platform == 'docker' }}
name: install sysbox (needed for docker-in-docker)
run: |
temp_dir=$(mktemp -d) && cd $temp_dir && trap 'rm -rf $temp_dir' EXIT # save disk space
# https://github.com/nestybox/sysbox/blob/master/docs/user-guide/install-package.md
wget https://downloads.nestybox.com/sysbox/releases/v0.6.4/sysbox-ce_0.6.4-0.linux_amd64.deb
docker rm $(docker ps -a -q) -f || echo "ok: no docker containers to remove"
sudo apt-get install jq
sudo apt-get install ./sysbox-ce_0.6.4-0.linux_amd64.deb
# sudo systemctl status sysbox -n20 # used below
- if: ${{ matrix.container_platform == 'apptainer' }}
uses: eWaterCycle/setup-apptainer@v2
with:
apptainer-version: 1.3.2
- name: build docker image
run: |
docker build --tag ${{ env.DOCKER_IMAGE_NAME }} \
--build-arg="PYTHON=${{ matrix.version }}" \
--build-arg="FLAVOR=${{ matrix.broker_client }},test" \
--build-arg="CONTAINER_PLATFORM=${{ matrix.container_platform }}" \
.
docker images
- name: pre-pull & save images for testing
run: |
set -x
mkdir saved-images/
retry_command_if_255() {
# some logic to get around docker hub's rate limiting
local command="$1"
max_retries=5
for ((i=1; i<=max_retries; i++)); do
eval "$command"
exit_code=$?
if [[ $exit_code -eq 0 ]]; then
break
elif [[ $exit_code -eq 255 ]]; then
# docker hub rate limiting
echo "Command failed with exit code 255. Retry $i/$max_retries..."
else
exit $exit_code
fi
# Wait before retrying
sleep $((30*$i)) # 30, 60, 90, ...
done
}
if [[ "${{ matrix.container_platform }}" == 'docker' ]]; then
retry_command_if_255 "docker pull python:alpine"
docker save -o saved-images/python-alpine.tar python:alpine
elif [[ "${{ matrix.container_platform }}" == "apptainer" ]]; then
cd saved-images/
# building as a sandbox (unpacked dir) allows apptainer-in-apptainer
retry_command_if_255 "apptainer build --sandbox python_alpine_sandbox/ docker://python:alpine"
# pilot converts .sif to apptainer dir -- only 1 test uses this
retry_command_if_255 "apptainer build python_alpine.sif docker://python:alpine"
else
exit 2 # unknown container_platform
fi
- if: ${{ matrix.container_platform == 'apptainer' }}
name: build apptainer image
run: |
docker images
apptainer build $(basename ${{ env.DOCKER_IMAGE_NAME }}).sif docker-daemon://${{ env.DOCKER_IMAGE_NAME }}:latest
ls -lh $(basename ${{ env.DOCKER_IMAGE_NAME }}).sif
- name: get list of pytest tests
run: |
pip install .[test] # for the for-loop below
pytest --collect-only -q --disable-warnings tests | head -n -2 > $SORTED_LIST_OF_TESTS_FILE
cat $SORTED_LIST_OF_TESTS_FILE
- if: ${{ matrix.container_platform == 'docker' }}
run: |
# we're connecting all the containers (broker + apps) in the same docker network
# create common network -- https://github.com/nestybox/sysbox/blob/master/docs/quickstart/kind.md#k8s-cluster-on-user-defined-bridge-networks
docker network create mynet
- name: Integrate
timeout-minutes: ${{ matrix.broker_client == 'rabbitmq' && 20 || 10 }} # adjust if fails, remember this is only time for *this* step
run: |
set -x
# ACTIVATE FOR DOCKER-IN-DOCKER
if [[ "${{ matrix.container_platform }}" == 'docker' ]]; then
# activate sysbox -- https://github.com/nestybox/sysbox/blob/master/docs/user-guide/install-package.md
sudo systemctl status sysbox -n20
fi
# STARTUP BROKER -- this is always in docker container, even in 'apptainer' tests
if [[ "${{ matrix.container_platform }}" == 'docker' ]]; then
# USE THE DOCKER NETWORK CREATED IN PREVIOUS STEP
source ./tests/run-broker.sh "${{ matrix.broker_client }}" $BROKER_CONTAINER_NAME "--net=mynet --runtime=sysbox-runc"
export EWMS_PILOT_QUEUE_INCOMING_BROKER_ADDRESS="$BROKER_CONTAINER_NAME"
export EWMS_PILOT_QUEUE_OUTGOING_BROKER_ADDRESS="$BROKER_CONTAINER_NAME"
elif [[ "${{ matrix.container_platform }}" == "apptainer" ]]; then
# USE THE HOST NETWORK SINCE APPTAINER DOESN'T HAVE AS ISOLATED NETWORKING AS DOCKER DOES
source ./tests/run-broker.sh "${{ matrix.broker_client }}" $BROKER_CONTAINER_NAME "--network=host"
export EWMS_PILOT_QUEUE_INCOMING_BROKER_ADDRESS="localhost" # localhost b/c using --network=host
export EWMS_PILOT_QUEUE_OUTGOING_BROKER_ADDRESS="localhost" # localhost b/c using --network=host
else
exit 2 # unknown container_platform
fi
docker ps
set +x # lots of output in these loops
echo "--------------------------------------------------------------"
echo "running tests..."
# set queue env vars
export EWMS_PILOT_QUEUE_INCOMING_BROKER_TYPE=${{ matrix.broker_client }}
export EWMS_PILOT_QUEUE_OUTGOING_BROKER_TYPE=${{ matrix.broker_client }}
# make external directories -- used in like 1 test
export EWMS_PILOT_EXTERNAL_DIRECTORIES="/cvmfs/dummy-1/dir-A,/cvmfs/dummy-2/dir-B"
mkdir -p $(pwd)/cvmfs/dummy-1/dir-A
echo "alpha" > $(pwd)/cvmfs/dummy-1/dir-A/file.txt
mkdir -p $(pwd)/cvmfs/dummy-2/dir-B
echo "beta" > $(pwd)/cvmfs/dummy-2/dir-B/file.txt
test_offset_delay=5
# iterate each test, starting each in its own container
cat $SORTED_LIST_OF_TESTS_FILE
i=0
while read test; do
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
echo "starting test $test"
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
# NOTE: re 'sleep N && ...': this allows all tests to be submitted in bulk,
# then waited for in order, PLUS it has a built-in delay to not overwhelm
# the broker. This way (as opposed to sleep between loop iterations), we
# can stop all the tests if one fails early on (like pytest --exit-first).
if [[ "${{ matrix.container_platform }}" == 'docker' ]]; then
set -x # lets see the command
sleep "$(($i*$test_offset_delay))" && docker run --rm \
--net=mynet --runtime=sysbox-runc --hostname=syscont \
--mount type=bind,source=$(pwd),target=/repo/,readonly \
--mount type=bind,source=$(pwd)/saved-images,target=/saved-images/ \
--mount type=bind,source=$(pwd)/cvmfs/dummy-1/dir-A,target=/cvmfs/dummy-1/dir-A,readonly \
--mount type=bind,source=$(pwd)/cvmfs/dummy-2/dir-B,target=/cvmfs/dummy-2/dir-B,readonly \
$(env | grep '^EWMS_' | awk '$0="--env "$0') \
$(env | grep '^_EWMS_' | awk '$0="--env "$0') \
--env CI_TEST_ALPINE_PYTHON_IMAGE="python:alpine" \
--env CI=$CI \
${{ env.DOCKER_IMAGE_NAME }} /bin/bash -c "docker load -i /saved-images/python-alpine.tar && ls -l && ls -l / && docker images && pytest -vvv -s $test" \
>> $(basename $test).test.out 2>&1 & pidlist="$pidlist $!"
set +x
elif [[ "${{ matrix.container_platform }}" == "apptainer" ]]; then
# NOTE: we want to mimic how htcondor launches apptainer-enabled EPs
set -x # lets see the command
temp_dir=$(mktemp -d) && trap 'rm -rf $temp_dir' EXIT # save disk space
# '--containall --writable-tmpfs --no-eval' gets us close to docker functionality
# '--workdir' allows using host disk instead of memory
sleep "$(($i*$test_offset_delay))" && apptainer run \
--containall --writable-tmpfs --no-eval \
--mount type=bind,source=$(pwd),target=/repo/,readonly \
--mount type=bind,source=$(pwd)/saved-images,target=/saved-images/ \
--mount type=bind,source=$(pwd)/cvmfs/dummy-1/dir-A,target=/cvmfs/dummy-1/dir-A,readonly \
--mount type=bind,source=$(pwd)/cvmfs/dummy-2/dir-B,target=/cvmfs/dummy-2/dir-B,readonly \
$(env | grep '^EWMS_' | awk '$0="--env "$0') \
$(env | grep '^_EWMS_' | awk '$0="--env "$0') \
--env CI_TEST_ALPINE_PYTHON_IMAGE="/saved-images/python_alpine_sandbox/" \
--env CI_TEST_ALPINE_PYTHON_IMAGE_APPTAINER_SIF="/saved-images/python_alpine.sif" \
--env CI_TEST_ALPINE_PYTHON_IMAGE_APPTAINER_FROM_DOCKER="python:alpine" \
--env CI=$CI \
--workdir "$temp_dir" -B "$temp_dir" \
$(basename ${{ env.DOCKER_IMAGE_NAME }}).sif \
/bin/bash -c "ls -l && ls -l / && pytest -vvv -s /app/$test" \
>> $(basename $test).test.out 2>&1 & pidlist="$pidlist $!"
set +x
else
exit 2 # unknown container_platform
fi
sleep .1 # little sleep to help logs
i=$((i+1))
done < $SORTED_LIST_OF_TESTS_FILE
# wait for tests to finish
# https://stackoverflow.com/a/32604828/13156561
sleep 3 # short sleep to help logs
for pid in $pidlist; do
date --rfc-3339=seconds
echo "waiting for $pid..."
if ! wait -n $pid; then
echo "ERROR: test(s) failed (ctrl+f $pid to match)"
sleep 5 # may need to wait for output files to be written
kill $pidlist 2>/dev/null
exit 1
fi
echo "-> PASSED"
done
- name: pytest first failure
if: always()
run: |
file=$(grep -l -m 1 "= FAILURES =" *.test.out | head -1)
if [ -n "$file" ]; then
cat $file
echo "^^^^ this is the pytest out file -- this step exited with 1 because this *pytest* failed -- the ci step is okay"
exit 1
else
echo "not found"
fi
- name: pytest container outputs
if: always()
run: |
cat $SORTED_LIST_OF_TESTS_FILE
while read test; do
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
echo $test
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
if [ -f $(basename $test).test.out ]; then
cat $(basename $test).test.out
else
echo "CI: output file does not exist"
fi
done < $SORTED_LIST_OF_TESTS_FILE
- name: broker output
if: always()
run: |
set -x
cat ./broker.out
if [ -n "$(ls -A ./broker_logs/ 2>/dev/null)" ]; then
ls ./broker_logs/
more ./broker_logs/* | cat # cats with filenames (delimited by :::::::)
fi
- name: broker docker logs
if: always()
run: |
docker logs $BROKER_CONTAINER_NAME || true
#############################################################################
# GITHUB & PYPI RELEASE
#############################################################################
release:
# only run on main/master/default
if: ${{ github.ref == 'refs/heads/master' || github.ref == 'refs/heads/main' }}
needs: [
py-versions,
flake8,
mypy,
py-setup,
py-dependencies,
test-build-docker,
integration-tests,
vanilla-run
]
runs-on: ubuntu-latest
concurrency: release # prevent any possible race conditions
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
token: ${{ secrets.PERSONAL_ACCESS_TOKEN }} # using 'GITHUB_TOKEN' does not trigger ci on push
# Python-Package Version Bump
- uses: python-semantic-release/[email protected]
id: psr-psr
with:
github_token: ${{ secrets.PERSONAL_ACCESS_TOKEN }} # using 'GITHUB_TOKEN' does not trigger ci on push
# PyPI Release
- uses: pypa/[email protected]
if: steps.psr-psr.outputs.released == 'true'
with:
password: ${{ secrets.PYPI_TOKEN }}
# GitHub Release
- uses: python-semantic-release/[email protected]
if: steps.psr-psr.outputs.released == 'true'
with:
github_token: ${{ secrets.PERSONAL_ACCESS_TOKEN }} # using 'GITHUB_TOKEN' does not trigger ci on push