Misc Updates from Testing with Skymap Scanner (Pt 2) (#105) #1545

Workflow file for this run

.github/workflows/wipac-cicd.yml at bb8e6ff

	name: wipac ci/cd

	on:
	push:
	branches:
	- '**'
	tags-ignore:
	- '**'

	# not using concurrency -- it makes A/B debugging difficult. Now, manually cancel prev tests if needed

	env:
	RABBITMQ_IMAGE_TAG: bitnami/rabbitmq:3.13.5
	PULSAR_IMAGE_TAG: apachepulsar/pulsar:2.6.0
	NATS_IMAGE_TAG: nats:2.10.18
	#
	SORTED_LIST_OF_TESTS_FILE: list-of-tests-sorted.txt
	#
	BROKER_CONTAINER_NAME: thebroker
	#
	EWMS_PILOT_TIMEOUT_INCOMING: 1
	EWMS_PILOT_TIMEOUT_OUTGOING: 1
	EWMS_PILOT_KEEP_ALL_TASK_FILES: True
	EWMS_PILOT_DATA_DIR_PARENT_PATH_ON_HOST: /blah/
	#
	_EWMS_PILOT_APPTAINER_IMAGE_DIRECTORY_MUST_BE_PRESENT: False # allow real-time conversion
	#
	DOCKER_IMAGE_NAME: pilot/local


	jobs:

	#############################################################################
	# PACKAGING & LINTERS
	#############################################################################


	py-versions:
	runs-on: ubuntu-latest
	outputs:
	matrix: ${{ steps.versions.outputs.matrix }}
	steps:
	- uses: actions/checkout@v4
	- id: versions
	uses: WIPACrepo/[email protected]

	flake8:
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v4
	- uses: actions/setup-python@v4
	- uses: WIPACrepo/[email protected]

	mypy:
	needs: [ py-versions ]
	runs-on: ubuntu-latest
	strategy:
	fail-fast: false
	matrix:
	py3: ${{ fromJSON(needs.py-versions.outputs.matrix) }}
	steps:
	- uses: actions/checkout@v4
	- uses: actions/setup-python@v4
	with:
	python-version: ${{ matrix.py3 }}
	- uses: WIPACrepo/[email protected]

	py-setup:
	runs-on: ubuntu-latest
	steps:
	- if: github.actor != 'dependabot[bot]'
	uses: actions/checkout@v4
	with:
	token: ${{ secrets.PERSONAL_ACCESS_TOKEN }} # fyi, dependabot can't access normal secrets
	- if: github.actor != 'dependabot[bot]'
	uses: WIPACrepo/[email protected]
	with:
	python_min: "3.10"
	python_max: "3.12"
	keywords: pilot "Observation Management Service" "Event Workflow Management Service" EWMS "message passing" MQ task
	pypi_name: ewms-pilot
	author: WIPAC Developers
	author_email: [email protected]

	py-dependencies:
	needs: [ py-versions ]
	runs-on: ubuntu-latest
	concurrency:
	group: ${{ github.workflow }}-${{ github.ref }}-${{ github.job }}
	cancel-in-progress: true
	steps:
	- if: github.actor != 'dependabot[bot]'
	uses: actions/checkout@v4
	with:
	token: ${{ secrets.PERSONAL_ACCESS_TOKEN }} # fyi, dependabot can't access normal secrets
	- if: github.actor != 'dependabot[bot]'
	run: \|
	set -x

	mkdir docker-build-logs/ && trap 'rm -rf docker-build-logs/' EXIT

	# first, build the vanilla/default image
	docker build --tag "default:py-dep-this" . \
	>> docker-build-logs/$tag.docker-build.out 2>&1 & pidlist="$pidlist $!"

	# build all dockerfiles
	for py in $(echo ${{ needs.py-versions.outputs.matrix }} \| sed 's/[][]//g; s/,/ /g'); do
	docker pull python:$py # pre-pull to speed up dependent images
	for flavor in rabbitmq pulsar nats; do
	# normal
	tag="$py-$flavor:py-dep-this"
	docker build --tag $tag \
	--build-arg="PYTHON=$py" --build-arg="FLAVOR=$flavor" \
	. \
	>> docker-build-logs/$tag.docker-build.out 2>&1 & pidlist="$pidlist $!"
	sleep .1 # little sleep to help logs
	# another with 'tests'
	tag="$py-$flavor-test:py-dep-this"
	docker build --tag $tag \
	--build-arg="PYTHON=$py" --build-arg="FLAVOR=$flavor,test" \
	. \
	>> docker-build-logs/$tag.docker-build.out 2>&1 & pidlist="$pidlist $!"
	sleep .1 # little sleep to help logs
	done
	done

	# wait for all them
	for pid in $pidlist; do
	sleep .1 # little sleep to help logs
	echo "waiting for $pid..."
	if ! wait -n $pid; then
	sleep 5 # may need to wait for file to be written
	kill $pidlist 2>/dev/null
	sleep 5
	more docker-build-logs/* \| cat # cats with filenames (delimited by :::::::)
	exit 1
	fi
	done

	docker system prune --force # save disk space

	- if: github.actor != 'dependabot[bot]'
	uses: WIPACrepo/[email protected]
	with:
	use_directory: true


	############################################################################
	# tests
	#############################################################################


	test-build-docker:
	needs: [ mypy ]
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v4
	- uses: docker/setup-buildx-action@v3
	- uses: docker/build-push-action@v5
	with:
	context: .
	file: Dockerfile
	cache-from: type=gha
	cache-to: type=gha,mode=min
	tags: ${{ env.DOCKER_IMAGE_NAME }}:local



	vanilla-run:
	needs: [ py-versions ]
	runs-on: ubuntu-latest
	env:
	EWMS_PILOT_QUEUE_INCOMING_BROKER_TYPE: rabbitmq
	EWMS_PILOT_QUEUE_OUTGOING_BROKER_TYPE: rabbitmq
	EWMS_PILOT_QUEUE_INCOMING_BROKER_ADDRESS: blah
	EWMS_PILOT_QUEUE_OUTGOING_BROKER_ADDRESS: blah
	EWMS_PILOT_QUEUE_INCOMING: foo
	EWMS_PILOT_QUEUE_OUTGOING: bar
	EWMS_PILOT_TIMEOUT_QUEUE_WAIT_FOR_FIRST_MESSAGE: 1 # added to figure out https://github.com/WIPACrepo/wipac-dev-tools/pull/106, not needed but not doing any harm
	steps:
	- uses: actions/checkout@v4
	- name: build docker image
	run: \|
	docker build --tag ${{ env.DOCKER_IMAGE_NAME }} .
	docker images
	- run: \|
	set -ex

	docker run --rm \
	$(env \| grep '^EWMS_' \| awk '$0="--env "$0') \
	$(env \| grep '^_EWMS_' \| awk '$0="--env "$0') \
	${{ env.DOCKER_IMAGE_NAME }} \
	2>&1 \| tee -a test.out
	cat test.out

	# The intent of this test is to confirm that the pilot runs as a CL script
	# without error. Real testing is done in 'integration-tests'. This "test"
	# ends on the following error, which is relatively early in the pilot...

	expected="RuntimeError: Task image was not provided."
	if [[ $( tail -n 1 test.out ) == "$expected" ]]; then
	echo "passed! (iow, this is the correct error ^^^)"
	else
	echo "ERROR: the pilot did not fail as expected"
	exit 1
	fi


	integration-tests:
	needs: [ py-versions, mypy ]
	runs-on: ubuntu-latest
	strategy:
	fail-fast: false
	matrix:
	container_platform: [ docker, apptainer ]
	version: ${{ fromJSON(needs.py-versions.outputs.matrix) }}
	broker_client: [ pulsar, rabbitmq, nats ]
	steps:
	- uses: actions/checkout@v4
	- uses: actions/setup-python@v4
	with:
	python-version: ${{ matrix.version }}

	- if: ${{ matrix.broker_client == 'rabbitmq' }}
	uses: jlumbroso/free-disk-space@main
	with:
	android: false # this is a LOT of space, so it takes longer to remove
	docker-images: false # we may actually need this one

	- if: ${{ matrix.container_platform == 'docker' }}
	name: install sysbox (needed for docker-in-docker)
	run: \|
	temp_dir=$(mktemp -d) && cd $temp_dir && trap 'rm -rf $temp_dir' EXIT # save disk space
	# https://github.com/nestybox/sysbox/blob/master/docs/user-guide/install-package.md
	wget https://downloads.nestybox.com/sysbox/releases/v0.6.4/sysbox-ce_0.6.4-0.linux_amd64.deb
	docker rm $(docker ps -a -q) -f \|\| echo "ok: no docker containers to remove"
	sudo apt-get install jq
	sudo apt-get install ./sysbox-ce_0.6.4-0.linux_amd64.deb
	# sudo systemctl status sysbox -n20 # used below

	- if: ${{ matrix.container_platform == 'apptainer' }}
	uses: eWaterCycle/setup-apptainer@v2
	with:
	apptainer-version: 1.3.2

	- name: build docker image
	run: \|
	docker build --tag ${{ env.DOCKER_IMAGE_NAME }} \
	--build-arg="PYTHON=${{ matrix.version }}" \
	--build-arg="FLAVOR=${{ matrix.broker_client }},test" \
	--build-arg="CONTAINER_PLATFORM=${{ matrix.container_platform }}" \
	.
	docker images

	- name: pre-pull & save images for testing
	run: \|
	set -x
	mkdir saved-images/

	retry_command_if_255() {
	# some logic to get around docker hub's rate limiting
	local command="$1"
	max_retries=5
	for ((i=1; i<=max_retries; i++)); do
	eval "$command"
	exit_code=$?
	if [[ $exit_code -eq 0 ]]; then
	break
	elif [[ $exit_code -eq 255 ]]; then
	# docker hub rate limiting
	echo "Command failed with exit code 255. Retry $i/$max_retries..."
	else
	exit $exit_code
	fi
	# Wait before retrying
	sleep $((30*$i)) # 30, 60, 90, ...
	done
	}

	if [[ "${{ matrix.container_platform }}" == 'docker' ]]; then
	retry_command_if_255 "docker pull python:alpine"
	docker save -o saved-images/python-alpine.tar python:alpine
	elif [[ "${{ matrix.container_platform }}" == "apptainer" ]]; then
	cd saved-images/
	# building as a sandbox (unpacked dir) allows apptainer-in-apptainer
	retry_command_if_255 "apptainer build --sandbox python_alpine_sandbox/ docker://python:alpine"
	# pilot converts .sif to apptainer dir -- only 1 test uses this
	retry_command_if_255 "apptainer build python_alpine.sif docker://python:alpine"
	else
	exit 2 # unknown container_platform
	fi

	- if: ${{ matrix.container_platform == 'apptainer' }}
	name: build apptainer image
	run: \|
	docker images
	apptainer build $(basename ${{ env.DOCKER_IMAGE_NAME }}).sif docker-daemon://${{ env.DOCKER_IMAGE_NAME }}:latest
	ls -lh $(basename ${{ env.DOCKER_IMAGE_NAME }}).sif

	- name: get list of pytest tests
	run: \|
	pip install .[test] # for the for-loop below
	pytest --collect-only -q --disable-warnings tests \| head -n -2 > $SORTED_LIST_OF_TESTS_FILE
	cat $SORTED_LIST_OF_TESTS_FILE

	- if: ${{ matrix.container_platform == 'docker' }}
	run: \|
	# we're connecting all the containers (broker + apps) in the same docker network
	# create common network -- https://github.com/nestybox/sysbox/blob/master/docs/quickstart/kind.md#k8s-cluster-on-user-defined-bridge-networks
	docker network create mynet

	- name: Integrate
	timeout-minutes: ${{ matrix.broker_client == 'rabbitmq' && 20 \|\| 10 }} # adjust if fails, remember this is only time for this step
	run: \|
	set -x

	# ACTIVATE FOR DOCKER-IN-DOCKER
	if [[ "${{ matrix.container_platform }}" == 'docker' ]]; then
	# activate sysbox -- https://github.com/nestybox/sysbox/blob/master/docs/user-guide/install-package.md
	sudo systemctl status sysbox -n20
	fi

	# STARTUP BROKER -- this is always in docker container, even in 'apptainer' tests
	if [[ "${{ matrix.container_platform }}" == 'docker' ]]; then
	# USE THE DOCKER NETWORK CREATED IN PREVIOUS STEP
	source ./tests/run-broker.sh "${{ matrix.broker_client }}" $BROKER_CONTAINER_NAME "--net=mynet --runtime=sysbox-runc"
	export EWMS_PILOT_QUEUE_INCOMING_BROKER_ADDRESS="$BROKER_CONTAINER_NAME"
	export EWMS_PILOT_QUEUE_OUTGOING_BROKER_ADDRESS="$BROKER_CONTAINER_NAME"
	elif [[ "${{ matrix.container_platform }}" == "apptainer" ]]; then
	# USE THE HOST NETWORK SINCE APPTAINER DOESN'T HAVE AS ISOLATED NETWORKING AS DOCKER DOES
	source ./tests/run-broker.sh "${{ matrix.broker_client }}" $BROKER_CONTAINER_NAME "--network=host"
	export EWMS_PILOT_QUEUE_INCOMING_BROKER_ADDRESS="localhost" # localhost b/c using --network=host
	export EWMS_PILOT_QUEUE_OUTGOING_BROKER_ADDRESS="localhost" # localhost b/c using --network=host
	else
	exit 2 # unknown container_platform
	fi
	docker ps

	set +x # lots of output in these loops
	echo "--------------------------------------------------------------"
	echo "running tests..."

	# set queue env vars
	export EWMS_PILOT_QUEUE_INCOMING_BROKER_TYPE=${{ matrix.broker_client }}
	export EWMS_PILOT_QUEUE_OUTGOING_BROKER_TYPE=${{ matrix.broker_client }}

	# make external directories -- used in like 1 test
	export EWMS_PILOT_EXTERNAL_DIRECTORIES="/cvmfs/dummy-1/dir-A,/cvmfs/dummy-2/dir-B"
	mkdir -p $(pwd)/cvmfs/dummy-1/dir-A
	echo "alpha" > $(pwd)/cvmfs/dummy-1/dir-A/file.txt
	mkdir -p $(pwd)/cvmfs/dummy-2/dir-B
	echo "beta" > $(pwd)/cvmfs/dummy-2/dir-B/file.txt

	test_offset_delay=5

	# iterate each test, starting each in its own container
	cat $SORTED_LIST_OF_TESTS_FILE
	i=0
	while read test; do
	echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
	echo "starting test $test"
	echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"

	# NOTE: re 'sleep N && ...': this allows all tests to be submitted in bulk,
	# then waited for in order, PLUS it has a built-in delay to not overwhelm
	# the broker. This way (as opposed to sleep between loop iterations), we
	# can stop all the tests if one fails early on (like pytest --exit-first).

	if [[ "${{ matrix.container_platform }}" == 'docker' ]]; then

	set -x # lets see the command
	sleep "$(($i*$test_offset_delay))" && docker run --rm \
	--net=mynet --runtime=sysbox-runc --hostname=syscont \
	--mount type=bind,source=$(pwd),target=/repo/,readonly \
	--mount type=bind,source=$(pwd)/saved-images,target=/saved-images/ \
	--mount type=bind,source=$(pwd)/cvmfs/dummy-1/dir-A,target=/cvmfs/dummy-1/dir-A,readonly \
	--mount type=bind,source=$(pwd)/cvmfs/dummy-2/dir-B,target=/cvmfs/dummy-2/dir-B,readonly \
	$(env \| grep '^EWMS_' \| awk '$0="--env "$0') \
	$(env \| grep '^_EWMS_' \| awk '$0="--env "$0') \
	--env CI_TEST_ALPINE_PYTHON_IMAGE="python:alpine" \
	--env CI=$CI \
	${{ env.DOCKER_IMAGE_NAME }} /bin/bash -c "docker load -i /saved-images/python-alpine.tar && ls -l && ls -l / && docker images && pytest -vvv -s $test" \
	>> $(basename $test).test.out 2>&1 & pidlist="$pidlist $!"
	set +x

	elif [[ "${{ matrix.container_platform }}" == "apptainer" ]]; then

	# NOTE: we want to mimic how htcondor launches apptainer-enabled EPs

	set -x # lets see the command
	temp_dir=$(mktemp -d) && trap 'rm -rf $temp_dir' EXIT # save disk space
	# '--containall --writable-tmpfs --no-eval' gets us close to docker functionality
	# '--workdir' allows using host disk instead of memory
	sleep "$(($i*$test_offset_delay))" && apptainer run \
	--containall --writable-tmpfs --no-eval \
	--mount type=bind,source=$(pwd),target=/repo/,readonly \
	--mount type=bind,source=$(pwd)/saved-images,target=/saved-images/ \
	--mount type=bind,source=$(pwd)/cvmfs/dummy-1/dir-A,target=/cvmfs/dummy-1/dir-A,readonly \
	--mount type=bind,source=$(pwd)/cvmfs/dummy-2/dir-B,target=/cvmfs/dummy-2/dir-B,readonly \
	$(env \| grep '^EWMS_' \| awk '$0="--env "$0') \
	$(env \| grep '^_EWMS_' \| awk '$0="--env "$0') \
	--env CI_TEST_ALPINE_PYTHON_IMAGE="/saved-images/python_alpine_sandbox/" \
	--env CI_TEST_ALPINE_PYTHON_IMAGE_APPTAINER_SIF="/saved-images/python_alpine.sif" \
	--env CI_TEST_ALPINE_PYTHON_IMAGE_APPTAINER_FROM_DOCKER="python:alpine" \
	--env CI=$CI \
	--workdir "$temp_dir" -B "$temp_dir" \
	$(basename ${{ env.DOCKER_IMAGE_NAME }}).sif \
	/bin/bash -c "ls -l && ls -l / && pytest -vvv -s /app/$test" \
	>> $(basename $test).test.out 2>&1 & pidlist="$pidlist $!"
	set +x

	else

	exit 2 # unknown container_platform

	fi

	sleep .1 # little sleep to help logs
	i=$((i+1))
	done < $SORTED_LIST_OF_TESTS_FILE

	# wait for tests to finish
	# https://stackoverflow.com/a/32604828/13156561
	sleep 3 # short sleep to help logs
	for pid in $pidlist; do
	date --rfc-3339=seconds
	echo "waiting for $pid..."
	if ! wait -n $pid; then
	echo "ERROR: test(s) failed (ctrl+f $pid to match)"
	sleep 5 # may need to wait for output files to be written
	kill $pidlist 2>/dev/null
	exit 1
	fi
	echo "-> PASSED"
	done

	- name: pytest first failure
	if: always()
	run: \|
	file=$(grep -l -m 1 "= FAILURES =" *.test.out \| head -1)
	if [ -n "$file" ]; then
	cat $file
	echo "^^^^ this is the pytest out file -- this step exited with 1 because this pytest failed -- the ci step is okay"
	exit 1
	else
	echo "not found"
	fi

	- name: pytest container outputs
	if: always()
	run: \|
	cat $SORTED_LIST_OF_TESTS_FILE
	while read test; do
	echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
	echo $test
	echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
	if [ -f $(basename $test).test.out ]; then
	cat $(basename $test).test.out
	else
	echo "CI: output file does not exist"
	fi
	done < $SORTED_LIST_OF_TESTS_FILE

	- name: broker output
	if: always()
	run: \|
	set -x
	cat ./broker.out
	if [ -n "$(ls -A ./broker_logs/ 2>/dev/null)" ]; then
	ls ./broker_logs/
	more ./broker_logs/* \| cat # cats with filenames (delimited by :::::::)
	fi

	- name: broker docker logs
	if: always()
	run: \|
	docker logs $BROKER_CONTAINER_NAME \|\| true


	#############################################################################
	# GITHUB & PYPI RELEASE
	#############################################################################


	release:
	# only run on main/master/default
	if: ${{ github.ref == 'refs/heads/master' \|\| github.ref == 'refs/heads/main' }}
	needs: [
	py-versions,
	flake8,
	mypy,
	py-setup,
	py-dependencies,
	test-build-docker,
	integration-tests,
	vanilla-run
	]
	runs-on: ubuntu-latest
	concurrency: release # prevent any possible race conditions
	steps:
	- uses: actions/checkout@v4
	with:
	fetch-depth: 0
	token: ${{ secrets.PERSONAL_ACCESS_TOKEN }} # using 'GITHUB_TOKEN' does not trigger ci on push
	# Python-Package Version Bump
	- uses: python-semantic-release/[email protected]
	id: psr-psr
	with:
	github_token: ${{ secrets.PERSONAL_ACCESS_TOKEN }} # using 'GITHUB_TOKEN' does not trigger ci on push
	# PyPI Release
	- uses: pypa/[email protected]
	if: steps.psr-psr.outputs.released == 'true'
	with:
	password: ${{ secrets.PYPI_TOKEN }}
	# GitHub Release
	- uses: python-semantic-release/[email protected]
	if: steps.psr-psr.outputs.released == 'true'
	with:
	github_token: ${{ secrets.PERSONAL_ACCESS_TOKEN }} # using 'GITHUB_TOKEN' does not trigger ci on push

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Misc Updates from Testing with Skymap Scanner (Pt 2) (#105) #1545

Workflow file

Misc Updates from Testing with Skymap Scanner (Pt 2) (#105) #1545

Jobs

Run details

Workflow file for this run