Skip to content

fix test - 9 (existing histos) #98

fix test - 9 (existing histos)

fix test - 9 (existing histos) #98

Workflow file for this run

name: ci/cd
on:
push:
branches:
- '**'
tags-ignore:
- '**'
jobs:
###########################################################################
# PACKAGING
###########################################################################
py-setup:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
token: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
- uses: WIPACrepo/[email protected]
with:
python_min: 3.11
python_max: 3.11
pypi_name: icecube-simprod-histogram
author: IceCube
author_email: [email protected]
keywords: |
"histogram sampling" simulation statistics
###########################################################################
# LINTERS
###########################################################################
py-versions:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.versions.outputs.matrix }}
steps:
- uses: actions/checkout@v3
- id: versions
uses: WIPACrepo/[email protected]
flake8:
needs: [ py-versions ]
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
py3: ${{ fromJSON(needs.py-versions.outputs.matrix) }}
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.py3 }}
- uses: WIPACrepo/[email protected]
mypy:
needs: [ py-versions ]
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
py3: ${{ fromJSON(needs.py-versions.outputs.matrix) }}
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.py3 }}
- uses: WIPACrepo/[email protected]
###########################################################################
# FORMATTER
###########################################################################
code-format:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
- name: Run Ruff for code formatting
run: |
set -euo pipefail
pip install ruff
ruff check --select C408 --fix . --unsafe-fixes
- name: Commit formatted code
run: |
set -euo pipefail
git config user.name github-actions
git config user.email [email protected]
git add .
git commit -m "<bot> auto code format file(s)" || true
git push || true
###########################################################################
# TESTS
###########################################################################
unit-tests:
needs: [ py-versions ]
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
py3: ${{ fromJSON(needs.py-versions.outputs.matrix) }}
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.py3 }}
- name: install
run: |
set -euo pipefail
pip install .[tests]
- name: Run unit tests
run: |
set -euo pipefail
pytest -vvv tests/unit/
test-sample-each-dataset-sh:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
max_num_datasets:
- 1
- 25
- 100 # aka all of them, currently, there are 48
src_path:
- /tmp/data/sim/Upgrade/2022/generated/neutrino-generator/88888
- /tmp/data/sim/IceCube/2023/filtered/CORSIKA
- /tmp/data/sim/Upgrade/2022/filtered
- /tmp/data/sim/IceCube/2023
- /tmp/data/sim/Upgrade
- /tmp/data/sim
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Set up Python environment
uses: actions/setup-python@v4
- name: Create source dataset dirs/files
run: |
set -euo pipefail
job_range_dpaths=(
/tmp/data/sim/{IceCube,Upgrade}/{2022,2023}/{generated,filtered}/{CORSIKA,neutrino-generator}/{77777,88888,99999}/{00-11,22-33,44-55}
)
for dpath in "${job_range_dpaths[@]}"; do
mkdir -p "$dpath"/histos/
# create 1-5 pkl files
for i in $( seq 1 "$(( (RANDOM % 5) + 1 ))" ); do
random_file=$(find "tests/data/simprod-histograms" -type f -name "*.pkl" | shuf -n 1)
cp "$random_file" "$dpath/histos/histo_$i.pkl"
done
done
- name: Look at filetree (before)
run: |
set -euo pipefail
tree /tmp/data/sim/
- name: Run script
run: |
set -euo pipefail
set -x
./scripts/sample-each-dataset.sh ${{ matrix.src_path }} 0.5 ${{ matrix.max_num_datasets }}
- name: Validate script execution
run: |
set -euo pipefail
echo "Max num of datasets: ${{ matrix.max_num_datasets }}"
# Count dataset directories containing at least one "*.histo.hdf5" file
available_datasets=$(find ${{ matrix.src_path }} -type d -regex ".*/[0-9]+-[0-9]+$" -exec dirname {} \; | sort -u | wc -l)
echo "Available datasets: $available_datasets"
# Use the lesser of available_datasets and num_datasets for validation
expected_num_datasets=$(( available_datasets < ${{ matrix.max_num_datasets }} ? available_datasets : ${{ matrix.max_num_datasets }} ))
echo "Expected datasets: $expected_num_datasets"
# Check processed count
processed_count=$(find ${{ matrix.src_path }} -name '*.histo.hdf5' | wc -l)
echo "Processed count: $processed_count"
if [[ $processed_count -ne $expected_num_datasets ]]; then
echo "Script did not process the expected number of datasets!"
exit 1
fi
echo "All tests passed."
- name: Look at filetree (after)
run: |
set -euo pipefail
tree /tmp/data/sim/
test-cp-dataset-histos-sh:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
prev_histos_setting:
- none
- overwrite
- keep
src_path:
- /tmp/data/sim/IceCube/2023/generated/neutrino-generator
- /tmp/data/sim/Upgrade/2022/
env:
DEST_DIR: /tmp/mycopy
OLD_FILE_MODTIME: 0
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Set up Python environment
uses: actions/setup-python@v4
- name: Create source dataset dirs/files
run: |
set -euo pipefail
dataset_dpaths=(
/tmp/data/sim/{IceCube,Upgrade}/{2022,2023}/{generated,filtered}/{CORSIKA,neutrino-generator}/{77777,88888,99999}
)
for dpath in "${dataset_dpaths[@]}"; do
echo
echo "adding: $dpath"
histo="$dpath"/"$(basename $dpath).histo.hdf5"
mkdir -p $(dirname $histo)
set -x
touch $histo
set +x
# pre-create some of these files in the destination
if [[ "${{ matrix.prev_histos_setting }}" == "overwrite" || "${{ matrix.prev_histos_setting }}" == "keep" ]]; then
echo "creating 'existing' histo file (25% chance)"
# check that this histo would be touched by the script (then only do it 25% of the time)
if [[ "$dpath" == "${{ matrix.src_path }}"* ]] && (( RANDOM % 100 < 25 )); then
relative_path="${dpath#*/sim/}"
dest_dataset_dir="$DEST_DIR/sim/$relative_path"
mkdir -p "$dest_dataset_dir"
set -x
touch "$dest_dataset_dir"/"$(basename "$dest_dataset_dir").histo.hdf5"
set +x
else
echo "nevermind :o)"
fi
fi
done
# set the oldest file's mod time
if [[ "${{ matrix.prev_histos_setting }}" == "overwrite" || "${{ matrix.prev_histos_setting }}" == "keep" ]]; then
oldest_modtime=$(find "$DEST_DIR" -name "*.histo.hdf5" -type f -exec stat --format='%Y' {} + | sort -n | head -1)
echo "OLD_FILE_MODTIME=$oldest_modtime" >> $GITHUB_ENV
fi
- name: Look at src filetree (before)
run: |
set -euo pipefail
tree /tmp/data/sim/
- name: Look at dest filetree (before)
run: |
set -euo pipefail
tree $DEST_DIR || echo "no files here"
- name: Run script
run: |
set -euo pipefail
if [[ "${{ matrix.prev_histos_setting }}" == "overwrite" ]]; then
force_flag="--force"
else
force_flag=""
fi
set -x
./scripts/cp-dataset-histos.sh ${{ matrix.src_path }} $DEST_DIR $force_flag
- name: Validate copied histograms
run: |
set -euo pipefail
src_count=$(find ${{ matrix.src_path }} -name "*.histo.hdf5" | wc -l)
dest_count=$(find $DEST_DIR -name "*.histo.hdf5" | wc -l)
echo "Source histograms: $src_count"
echo "Copied histograms: $dest_count"
if [[ $src_count -ne $dest_count ]]; then
echo "Copied histograms count ($dest_count) does not match source histograms count ($src_count)!"
exit 1
fi
# check the overwriting settings
oldest_modtime=$(find "$DEST_DIR" -name "*.histo.hdf5" -type f -exec stat --format='%Y' {} + | sort -n | head -1)
echo "Oldest histo file modtime: $oldest_modtime"
echo "Previous oldest histo file modtime: $OLD_FILE_MODTIME"
case "${{ matrix.prev_histos_setting }}" in
none)
# oldest modtime should be younger (greater) than previously-stored value
if [[ $oldest_modtime -le $OLD_FILE_MODTIME ]]; then
echo "ERROR: there is an older file in here!" >&2
exit 1
fi
;;
overwrite)
# oldest modtime should be younger (greater) than previously-stored value
if [[ $oldest_modtime -le $OLD_FILE_MODTIME ]]; then
echo "ERROR: there is an older file in here! aka script didn't overwrite" >&2
exit 1
fi
;;
keep)
# oldest modtime should be the previously-stored value
if [[ $oldest_modtime -ne $OLD_FILE_MODTIME ]]; then
echo "ERROR: there is no older file in here! aka the scrip did overwrite" >&2
exit 1
fi
;;
*)
echo "Error: Unknown value for prev_histos_setting: $prev_histos_setting" >&2
exit 1
;;
esac
echo "All tests passed for src_path=${{ matrix.src_path }} and dest_dir=$DEST_DIR."
- name: Look at dest filetree (after)
run: |
set -euo pipefail
tree $DEST_DIR
###########################################################################
# RELEASE
###########################################################################
release:
if: github.ref == 'refs/heads/main'
needs: [ py-setup, flake8, mypy, code-format, unit-tests, test-sample-each-dataset-sh, test-cp-dataset-histos-sh ]
runs-on: ubuntu-latest
concurrency: release # prevent any possible race conditions
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
# Python-Package Version Bump
- uses: python-semantic-release/[email protected]
id: psr-psr
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
# PyPI Release
- uses: pypa/[email protected]
if: steps.psr-psr.outputs.released == 'true'
with:
password: ${{ secrets.WIPAC_PYPI_TOKEN }}
# GitHub Release
- uses: python-semantic-release/[email protected]
if: steps.psr-psr.outputs.released == 'true'
with:
github_token: ${{ secrets.GITHUB_TOKEN }}