fix test - 8 (existing histos) #97
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: ci/cd | |
on: | |
push: | |
branches: | |
- '**' | |
tags-ignore: | |
- '**' | |
jobs: | |
########################################################################### | |
# PACKAGING | |
########################################################################### | |
py-setup: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
token: ${{ secrets.PERSONAL_ACCESS_TOKEN }} | |
- uses: WIPACrepo/[email protected] | |
with: | |
python_min: 3.11 | |
python_max: 3.11 | |
pypi_name: icecube-simprod-histogram | |
author: IceCube | |
author_email: [email protected] | |
keywords: | | |
"histogram sampling" simulation statistics | |
########################################################################### | |
# LINTERS | |
########################################################################### | |
py-versions: | |
runs-on: ubuntu-latest | |
outputs: | |
matrix: ${{ steps.versions.outputs.matrix }} | |
steps: | |
- uses: actions/checkout@v3 | |
- id: versions | |
uses: WIPACrepo/[email protected] | |
flake8: | |
needs: [ py-versions ] | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: false | |
matrix: | |
py3: ${{ fromJSON(needs.py-versions.outputs.matrix) }} | |
steps: | |
- uses: actions/checkout@v3 | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ matrix.py3 }} | |
- uses: WIPACrepo/[email protected] | |
mypy: | |
needs: [ py-versions ] | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: false | |
matrix: | |
py3: ${{ fromJSON(needs.py-versions.outputs.matrix) }} | |
steps: | |
- uses: actions/checkout@v3 | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ matrix.py3 }} | |
- uses: WIPACrepo/[email protected] | |
########################################################################### | |
# FORMATTER | |
########################################################################### | |
code-format: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v3 | |
- uses: actions/setup-python@v4 | |
- name: Run Ruff for code formatting | |
run: | | |
set -euo pipefail | |
pip install ruff | |
ruff check --select C408 --fix . --unsafe-fixes | |
- name: Commit formatted code | |
run: | | |
set -euo pipefail | |
git config user.name github-actions | |
git config user.email [email protected] | |
git add . | |
git commit -m "<bot> auto code format file(s)" || true | |
git push || true | |
########################################################################### | |
# TESTS | |
########################################################################### | |
unit-tests: | |
needs: [ py-versions ] | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: false | |
matrix: | |
py3: ${{ fromJSON(needs.py-versions.outputs.matrix) }} | |
steps: | |
- uses: actions/checkout@v3 | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ matrix.py3 }} | |
- name: install | |
run: | | |
set -euo pipefail | |
pip install .[tests] | |
- name: Run unit tests | |
run: | | |
set -euo pipefail | |
pytest -vvv tests/unit/ | |
test-sample-each-dataset-sh: | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: false | |
matrix: | |
max_num_datasets: | |
- 1 | |
- 25 | |
- 100 # aka all of them, currently, there are 48 | |
src_path: | |
- /tmp/data/sim/Upgrade/2022/generated/neutrino-generator/88888 | |
- /tmp/data/sim/IceCube/2023/filtered/CORSIKA | |
- /tmp/data/sim/Upgrade/2022/filtered | |
- /tmp/data/sim/IceCube/2023 | |
- /tmp/data/sim/Upgrade | |
- /tmp/data/sim | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v3 | |
- name: Set up Python environment | |
uses: actions/setup-python@v4 | |
- name: Create source dataset dirs/files | |
run: | | |
set -euo pipefail | |
job_range_dpaths=( | |
/tmp/data/sim/{IceCube,Upgrade}/{2022,2023}/{generated,filtered}/{CORSIKA,neutrino-generator}/{77777,88888,99999}/{00-11,22-33,44-55} | |
) | |
for dpath in "${job_range_dpaths[@]}"; do | |
mkdir -p "$dpath"/histos/ | |
# create 1-5 pkl files | |
for i in $( seq 1 "$(( (RANDOM % 5) + 1 ))" ); do | |
random_file=$(find "tests/data/simprod-histograms" -type f -name "*.pkl" | shuf -n 1) | |
cp "$random_file" "$dpath/histos/histo_$i.pkl" | |
done | |
done | |
- name: Look at filetree (before) | |
run: | | |
set -euo pipefail | |
tree /tmp/data/sim/ | |
- name: Run script | |
run: | | |
set -euo pipefail | |
set -x | |
./scripts/sample-each-dataset.sh ${{ matrix.src_path }} 0.5 ${{ matrix.max_num_datasets }} | |
- name: Validate script execution | |
run: | | |
set -euo pipefail | |
echo "Max num of datasets: ${{ matrix.max_num_datasets }}" | |
# Count dataset directories containing at least one "*.histo.hdf5" file | |
available_datasets=$(find ${{ matrix.src_path }} -type d -regex ".*/[0-9]+-[0-9]+$" -exec dirname {} \; | sort -u | wc -l) | |
echo "Available datasets: $available_datasets" | |
# Use the lesser of available_datasets and num_datasets for validation | |
expected_num_datasets=$(( available_datasets < ${{ matrix.max_num_datasets }} ? available_datasets : ${{ matrix.max_num_datasets }} )) | |
echo "Expected datasets: $expected_num_datasets" | |
# Check processed count | |
processed_count=$(find ${{ matrix.src_path }} -name '*.histo.hdf5' | wc -l) | |
echo "Processed count: $processed_count" | |
if [[ $processed_count -ne $expected_num_datasets ]]; then | |
echo "Script did not process the expected number of datasets!" | |
exit 1 | |
fi | |
echo "All tests passed." | |
- name: Look at filetree (after) | |
run: | | |
set -euo pipefail | |
tree /tmp/data/sim/ | |
test-cp-dataset-histos-sh: | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: false | |
matrix: | |
prev_histos_setting: | |
- none | |
- overwrite | |
- keep | |
src_path: | |
- /tmp/data/sim/IceCube/2023/generated/neutrino-generator | |
- /tmp/data/sim/Upgrade/2022/ | |
env: | |
DEST_DIR: /tmp/mycopy | |
OLD_FILE_MODTIME: 0 | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v3 | |
- name: Set up Python environment | |
uses: actions/setup-python@v4 | |
- name: Create source dataset dirs/files | |
run: | | |
set -euo pipefail | |
dataset_dpaths=( | |
/tmp/data/sim/{IceCube,Upgrade}/{2022,2023}/{generated,filtered}/{CORSIKA,neutrino-generator}/{77777,88888,99999} | |
) | |
for dpath in "${dataset_dpaths[@]}"; do | |
histo="$dpath"/"$(basename $dpath).histo.hdf5" | |
mkdir -p $(dirname $histo) | |
set -x | |
touch $histo | |
set +x | |
# pre-create some of these files in the destination | |
if [[ "${{ matrix.prev_histos_setting }}" == "overwrite" || "${{ matrix.prev_histos_setting }}" == "keep" ]]; then | |
echo "creating 'existing' histo file (25% chance)" | |
# check that this histo would be touched by the script (then only do it 25% of the time) | |
if [[ "$dpath" == "${{ matrix.src_path }}"* ]] && (( RANDOM % 100 < 25 )); then | |
relative_path="${dpath#*/sim/}" | |
dest_dataset_dir="$DEST_DIR/sim/$relative_path" | |
mkdir -p "$dest_dataset_dir" | |
set -x | |
touch "$dest_dataset_dir"/"$(basename "$dest_dataset_dir").histo.hdf5" | |
set +x | |
else | |
echo "nevermind :o)" | |
fi | |
fi | |
done | |
# set the oldest file's mod time | |
if [[ "${{ matrix.prev_histos_setting }}" == "overwrite" || "${{ matrix.prev_histos_setting }}" == "keep" ]]; then | |
oldest_modtime=$(find "$DEST_DIR" -name "*.histo.hdf5" -type f -exec stat --format='%Y' {} + | sort -n | head -1) | |
echo "OLD_FILE_MODTIME=$oldest_modtime" >> $GITHUB_ENV | |
fi | |
- name: Look at src filetree (before) | |
run: | | |
set -euo pipefail | |
tree /tmp/data/sim/ | |
- name: Look at dest filetree (before) | |
run: | | |
set -euo pipefail | |
tree $DEST_DIR || echo "no files here" | |
- name: Run script | |
run: | | |
set -euo pipefail | |
if [[ "${{ matrix.prev_histos_setting }}" == "overwrite" ]]; then | |
force_flag="--force" | |
else | |
force_flag="" | |
fi | |
set -x | |
./scripts/cp-dataset-histos.sh ${{ matrix.src_path }} $DEST_DIR $force_flag | |
- name: Validate copied histograms | |
run: | | |
set -euo pipefail | |
src_count=$(find ${{ matrix.src_path }} -name "*.histo.hdf5" | wc -l) | |
dest_count=$(find $DEST_DIR -name "*.histo.hdf5" | wc -l) | |
echo "Source histograms: $src_count" | |
echo "Copied histograms: $dest_count" | |
if [[ $src_count -ne $dest_count ]]; then | |
echo "Copied histograms count ($dest_count) does not match source histograms count ($src_count)!" | |
exit 1 | |
fi | |
# check the overwriting settings | |
oldest_modtime=$(find "$DEST_DIR" -name "*.histo.hdf5" -type f -exec stat --format='%Y' {} + | sort -n | head -1) | |
echo "Oldest histo file modtime: $oldest_modtime" | |
echo "Previous oldest histo file modtime: $OLD_FILE_MODTIME" | |
case "${{ matrix.prev_histos_setting }}" in | |
none) | |
# oldest modtime should be younger (greater) than previously-stored value | |
if [[ $oldest_modtime -le $OLD_FILE_MODTIME ]]; then | |
echo "ERROR: there is an older file in here!" >&2 | |
exit 1 | |
fi | |
;; | |
overwrite) | |
# oldest modtime should be younger (greater) than previously-stored value | |
if [[ $oldest_modtime -le $OLD_FILE_MODTIME ]]; then | |
echo "ERROR: there is an older file in here! aka script didn't overwrite" >&2 | |
exit 1 | |
fi | |
;; | |
keep) | |
# oldest modtime should be the previously-stored value | |
if [[ $oldest_modtime -ne $OLD_FILE_MODTIME ]]; then | |
echo "ERROR: there is no older file in here! aka the scrip did overwrite" >&2 | |
exit 1 | |
fi | |
;; | |
*) | |
echo "Error: Unknown value for prev_histos_setting: $prev_histos_setting" >&2 | |
exit 1 | |
;; | |
esac | |
echo "All tests passed for src_path=${{ matrix.src_path }} and dest_dir=$DEST_DIR." | |
- name: Look at dest filetree (after) | |
run: | | |
set -euo pipefail | |
tree $DEST_DIR | |
########################################################################### | |
# RELEASE | |
########################################################################### | |
release: | |
if: github.ref == 'refs/heads/main' | |
needs: [ py-setup, flake8, mypy, code-format, unit-tests, test-sample-each-dataset-sh, test-cp-dataset-histos-sh ] | |
runs-on: ubuntu-latest | |
concurrency: release # prevent any possible race conditions | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
# Python-Package Version Bump | |
- uses: python-semantic-release/[email protected] | |
id: psr-psr | |
with: | |
github_token: ${{ secrets.GITHUB_TOKEN }} | |
# PyPI Release | |
- uses: pypa/[email protected] | |
if: steps.psr-psr.outputs.released == 'true' | |
with: | |
password: ${{ secrets.WIPAC_PYPI_TOKEN }} | |
# GitHub Release | |
- uses: python-semantic-release/[email protected] | |
if: steps.psr-psr.outputs.released == 'true' | |
with: | |
github_token: ${{ secrets.GITHUB_TOKEN }} |