ci/cd

add test - 2 #87

Workflow file for this run

.github/workflows/wipac-cicd.yaml at c6415ac

	name: ci/cd

	on:
	push:
	branches:
	- '**'
	tags-ignore:
	- '**'

	jobs:

	###########################################################################
	# PACKAGING
	###########################################################################

	py-setup:
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v4
	with:
	token: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
	- uses: WIPACrepo/[email protected]
	with:
	python_min: 3.11
	python_max: 3.11
	pypi_name: icecube-simprod-histogram
	author: IceCube
	author_email: [email protected]
	keywords: \|
	"histogram sampling" simulation statistics


	###########################################################################
	# LINTERS
	###########################################################################

	py-versions:
	runs-on: ubuntu-latest
	outputs:
	matrix: ${{ steps.versions.outputs.matrix }}
	steps:
	- uses: actions/checkout@v3
	- id: versions
	uses: WIPACrepo/[email protected]

	flake8:
	needs: [ py-versions ]
	runs-on: ubuntu-latest
	strategy:
	fail-fast: false
	matrix:
	py3: ${{ fromJSON(needs.py-versions.outputs.matrix) }}
	steps:
	- uses: actions/checkout@v3
	- uses: actions/setup-python@v4
	with:
	python-version: ${{ matrix.py3 }}
	- uses: WIPACrepo/[email protected]

	mypy:
	needs: [ py-versions ]
	runs-on: ubuntu-latest
	strategy:
	fail-fast: false
	matrix:
	py3: ${{ fromJSON(needs.py-versions.outputs.matrix) }}
	steps:
	- uses: actions/checkout@v3
	- uses: actions/setup-python@v4
	with:
	python-version: ${{ matrix.py3 }}
	- uses: WIPACrepo/[email protected]

	###########################################################################
	# FORMATTER
	###########################################################################

	code-format:
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v3
	- uses: actions/setup-python@v4
	- name: Run Ruff for code formatting
	run: \|
	set -euo pipefail
	pip install ruff
	ruff check --select C408 --fix . --unsafe-fixes
	- name: Commit formatted code
	run: \|
	set -euo pipefail
	git config user.name github-actions
	git config user.email [email protected]
	git add .
	git commit -m "<bot> auto code format file(s)" \|\| true
	git push \|\| true

	###########################################################################
	# TESTS
	###########################################################################

	unit-tests:
	needs: [ py-versions ]
	runs-on: ubuntu-latest
	strategy:
	fail-fast: false
	matrix:
	py3: ${{ fromJSON(needs.py-versions.outputs.matrix) }}
	steps:
	- uses: actions/checkout@v3
	- uses: actions/setup-python@v4
	with:
	python-version: ${{ matrix.py3 }}
	- name: install
	run: \|
	set -euo pipefail
	pip install .[tests]
	- name: Run unit tests
	run: \|
	set -euo pipefail
	pytest -vvv tests/unit/

	test-sample-each-dataset-sh:
	runs-on: ubuntu-latest
	strategy:
	fail-fast: false
	matrix:
	max_num_datasets:
	- 1
	- 25
	- 100 # aka all of them, currently, there are 48
	base_path:
	- /tmp/data/sim/Upgrade/2022/generated/neutrino-generator/88888
	- /tmp/data/sim/IceCube/2023/filtered/CORSIKA
	- /tmp/data/sim/Upgrade/2022/filtered
	- /tmp/data/sim/IceCube/2023
	- /tmp/data/sim/Upgrade
	- /tmp/data/sim
	steps:
	- name: Checkout repository
	uses: actions/checkout@v3
	- name: Set up Python environment
	uses: actions/setup-python@v4

	- name: Create source dataset dirs/files
	run: \|
	set -euo pipefail
	job_range_dpaths=(
	/tmp/data/sim/{IceCube,Upgrade}/{2022,2023}/{generated,filtered}/{CORSIKA,neutrino-generator}/{77777,88888,99999}/{00-11,22-33,44-55}
	)

	for dpath in "${job_range_dpaths[@]}"; do
	mkdir -p "$dpath"/histos/
	# create 1-5 pkl files
	for i in $( seq 1 "$(( (RANDOM % 5) + 1 ))" ); do
	random_file=$(find "tests/data/simprod-histograms" -type f -name "*.pkl" \| shuf -n 1)
	cp "$random_file" "$dpath/histos/histo_$i.pkl"
	done
	done

	- name: Look at filetree (before)
	run: \|
	set -euo pipefail
	tree /tmp/data/sim/

	- name: Run script
	run: \|
	set -euo pipefail
	set -x
	./scripts/sample-each-dataset.sh ${{ matrix.base_path }} 0.5 ${{ matrix.max_num_datasets }}

	- name: Validate script execution
	run: \|
	set -euo pipefail
	echo "Max num of datasets: ${{ matrix.max_num_datasets }}"

	# Count dataset directories containing at least one "*.histo.hdf5" file
	available_datasets=$(find ${{ matrix.base_path }} -type d -regex ".*/[0-9]+-[0-9]+$" -exec dirname {} \; \| sort -u \| wc -l)
	echo "Available datasets: $available_datasets"

	# Use the lesser of available_datasets and num_datasets for validation
	expected_num_datasets=$(( available_datasets < ${{ matrix.max_num_datasets }} ? available_datasets : ${{ matrix.max_num_datasets }} ))
	echo "Expected datasets: $expected_num_datasets"

	# Check processed count
	processed_count=$(find ${{ matrix.base_path }} -name '*.histo.hdf5' \| wc -l)
	echo "Processed count: $processed_count"

	if [[ $processed_count -ne $expected_num_datasets ]]; then
	echo "Script did not process the expected number of datasets!"
	exit 1
	fi

	echo "All tests passed."

	- name: Look at filetree (after)
	run: \|
	set -euo pipefail
	tree /tmp/data/sim/

	test-cp-dataset-histos-sh:
	runs-on: ubuntu-latest
	strategy:
	fail-fast: false
	matrix:
	prev_histos_setting:
	- none
	- overwrite
	- keep
	base_path:
	- /tmp/data/sim/IceCube/2023/generated/neutrino-generator
	- /tmp/data/sim/Upgrade/2022/filtered/CORSIKA
	dest_dir:
	- /tmp/mycopy
	env:
	OLD_FILE_MODTIME: 0
	steps:
	- name: Checkout repository
	uses: actions/checkout@v3
	- name: Set up Python environment
	uses: actions/setup-python@v4

	- name: Create source dataset dirs/files
	run: \|
	set -euo pipefail
	dataset_dpaths=(
	/tmp/data/sim/{IceCube,Upgrade}/{2022,2023}/{generated,filtered}/{CORSIKA,neutrino-generator}/{77777,88888,99999}/
	)

	for dpath in "${dataset_dpaths[@]}"; do
	histo="$dpath"/"$(basename $dpath).histo.hdf5"
	mkdir -p $(dirname $histo)
	touch $histo
	# pre-create some of these files in the destination
	if [[ "${{ matrix.prev_histos_setting }}" == "overwrite" \|\| "${{ matrix.prev_histos_setting }}" == "keep" ]]; then
	if (( RANDOM % 100 < 25 )); then
	relative_path="${dpath#*/sim/}"
	dest_dataset_dir="${{ matrix.dest_dir }}/sim/$relative_path"
	touch "$dest_dataset_dir"/"$(basename "$dest_dataset_dir").histo.hdf5"
	fi
	fi
	done

	# set the oldest file's mod time
	if [[ "${{ matrix.prev_histos_setting }}" == "overwrite" \|\| "${{ matrix.prev_histos_setting }}" == "keep" ]]; then
	oldest_modtime=$(find "${{ matrix.dest_dir }}" -name "*.histo.hdf5" -type f -exec stat --format='%Y' {} + \| sort -n \| head -1)
	echo "OLD_FILE_MODTIME=$oldest_modtime" >> $GITHUB_ENV
	fi

	- name: Look at src filetree (before)
	run: \|
	set -euo pipefail
	tree /tmp/data/sim/

	- name: Look at dest filetree (before)
	run: \|
	set -euo pipefail
	tree ${{ matrix.dest_dir }}

	- name: Run script
	run: \|
	set -euo pipefail
	prev_histos_setting=""
	if [[ "${{ matrix.prev_histos_setting }}" == "overwrite" ]]; then
	force_flag="--force"
	fi
	set -x
	./cp-dataset-histos.sh ${{ matrix.base_path }} ${{ matrix.dest_dir }} $force_flag

	- name: Validate copied histograms
	run: \|
	set -euo pipefail

	src_count=$(find ${{ matrix.base_path }} -name "*.histo.hdf5" \| wc -l)
	dest_count=$(find ${{ matrix.dest_dir }} -name "*.histo.hdf5" \| wc -l)
	echo "Source histograms: $src_count"
	echo "Copied histograms: $dest_count"
	if [[ $src_count -ne $dest_count ]]; then
	echo "Copied histograms count ($dest_count) does not match source histograms count ($src_count)!"
	exit 1
	fi

	# check the overwriting settings
	oldest_modtime=$(find "${{ matrix.dest_dir }}" -name "*.histo.hdf5" -type f -exec stat --format='%Y' {} + \| sort -n \| head -1)
	echo "Oldest histo file modtime: $oldest_modtime"
	case "${{ matrix.prev_histos_setting }}" in
	none)
	# oldest modtime should be younger than previously-stored value
	if [[ ! $oldest_modtime -lt $OLD_FILE_MODTIME ]]; then
	echo "ERROR: there is an older file in here!" >&2
	exit 1
	fi
	;;
	overwrite)
	# oldest modtime should be younger than previously-stored value
	if [[ ! $oldest_modtime -lt $OLD_FILE_MODTIME ]]; then
	echo "ERROR: there is an older file in here! aka script didn't overwrite" >&2
	exit 1
	fi
	;;
	keep)
	# oldest modtime should be the previously-stored value
	if [[ $oldest_modtime -ne $OLD_FILE_MODTIME ]]; then
	echo "ERROR: there is no older file in here! aka the scrip did overwrite" >&2
	exit 1
	fi
	;;
	*)
	echo "Error: Unknown value for prev_histos_setting: $prev_histos_setting" >&2
	exit 1
	;;
	esac

	echo "All tests passed for base_path=${{ matrix.base_path }} and dest_dir=${{ matrix.dest_dir }}."

	- name: Look at dest filetree (after)
	run: \|
	set -euo pipefail
	tree ${{ matrix.dest_dir }}


	###########################################################################
	# RELEASE
	###########################################################################

	release:
	if: github.ref == 'refs/heads/main'
	needs: [ py-setup, flake8, mypy, code-format, unit-tests, test-sample-each-dataset-sh, test-cp-dataset-histos-sh ]
	runs-on: ubuntu-latest
	concurrency: release # prevent any possible race conditions
	steps:
	- uses: actions/checkout@v4
	with:
	fetch-depth: 0
	# Python-Package Version Bump
	- uses: python-semantic-release/[email protected]
	id: psr-psr
	with:
	github_token: ${{ secrets.GITHUB_TOKEN }}
	# PyPI Release
	- uses: pypa/[email protected]
	if: steps.psr-psr.outputs.released == 'true'
	with:
	password: ${{ secrets.WIPAC_PYPI_TOKEN }}
	# GitHub Release
	- uses: python-semantic-release/[email protected]
	if: steps.psr-psr.outputs.released == 'true'
	with:
	github_token: ${{ secrets.GITHUB_TOKEN }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

add test - 2 #87

Workflow file

add test - 2 #87

Jobs

Run details

Workflow file for this run