Skip to content

Commit

Permalink
Merge branch 'master' into main_dev
Browse files Browse the repository at this point in the history
  • Loading branch information
lgray authored Oct 17, 2023
2 parents 3980198 + 8662c9f commit eff2556
Show file tree
Hide file tree
Showing 17 changed files with 333 additions and 236 deletions.
18 changes: 9 additions & 9 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
name: pre-commit
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
- uses: pre-commit/[email protected]
with:
Expand All @@ -45,7 +45,7 @@ jobs:
name: test coffea (${{ matrix.os }}) - python ${{ matrix.python-version }}, JDK${{ matrix.java-version }}

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
Expand All @@ -69,7 +69,7 @@ jobs:
python -m pip install xgboost
python -m pip install tritonclient[grpc,http]
# install checked out coffea
python -m pip install -q -e '.[dev,parsl,dask,spark]'
python -m pip install -q -e '.[dev,parsl,dask,spark]' --upgrade --upgrade-strategy eager
python -m pip list
java -version
- name: Install dependencies (MacOS)
Expand All @@ -80,7 +80,7 @@ jobs:
python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
python -m pip install xgboost
# install checked out coffea
python -m pip install -q -e '.[dev,dask,spark]'
python -m pip install -q -e '.[dev,dask,spark]' --upgrade --upgrade-strategy eager
python -m pip list
java -version
- name: Install dependencies (Windows)
Expand All @@ -91,14 +91,14 @@ jobs:
python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
python -m pip install xgboost
# install checked out coffea
python -m pip install -q -e '.[dev,dask]'
python -m pip install -q -e '.[dev,dask]' --upgrade --upgrade-strategy eager
python -m pip list
java -version
- name: Start triton server with example model
if: matrix.os == 'ubuntu-latest'
run: |
docker run -d --rm -p 8000:8000 -p 8001:8001 -p 8002:8002 -v ${{ github.workspace }}/tests/samples/triton_models_test:/models nvcr.io/nvidia/tritonserver:23.04-py3 tritonserver --model-repository=/models
docker run -d --rm -p 8000:8000 -p 8001:8001 -p 8002:8002 -v ${{ github.workspace }}/tests/samples/triton_models_test:/models nvcr.io/nvidia/tritonserver:23.04-pyt-python-py3 tritonserver --model-repository=/models
- name: Test with pytest
run: |
Expand All @@ -119,7 +119,7 @@ jobs:
touch build/html/.nojekyll
- name: Deploy documentation
if: github.event_name == 'push' && matrix.os == 'ubuntu-latest' && matrix.python-version == 3.11
uses: crazy-max/ghaction-github-pages@v3
uses: crazy-max/ghaction-github-pages@v4
with:
target_branch: gh-pages
build_dir: docs/build/html
Expand All @@ -135,7 +135,7 @@ jobs:
name: test coffea-workqueue

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Conda
uses: conda-incubator/setup-miniconda@v2
env:
Expand Down Expand Up @@ -185,7 +185,7 @@ jobs:
name: deploy release

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,6 @@ jobs:
name: Validate PR title
runs-on: ubuntu-latest
steps:
- uses: amannn/action-semantic-pull-request@v5.2.0
- uses: amannn/action-semantic-pull-request@v5.3.0
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
14 changes: 7 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@ ci:
for more information, see https://pre-commit.ci
autofix_prs: true
autoupdate_branch: ''
autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate'
autoupdate_commit_msg: 'ci(pre-commit): pre-commit autoupdate'
autoupdate_schedule: weekly
skip: []
submodules: false

repos:
- repo: https://github.com/psf/black
rev: 23.7.0
rev: 23.9.1
hooks:
- id: black

Expand All @@ -24,7 +24,7 @@ repos:
args: ["--profile", "black", "--filter-files"]

- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v4.5.0
hooks:
- id: check-case-conflict
- id: check-merge-conflict
Expand All @@ -37,24 +37,24 @@ repos:
- id: trailing-whitespace

- repo: https://github.com/asottile/pyupgrade
rev: v3.9.0
rev: v3.15.0
hooks:
- id: pyupgrade
args: ["--py38-plus"]

- repo: https://github.com/asottile/setup-cfg-fmt
rev: v2.4.0
rev: v2.5.0
hooks:
- id: setup-cfg-fmt

- repo: https://github.com/pycqa/flake8
rev: 6.0.0
rev: 6.1.0
hooks:
- id: flake8
exclude: coffea/processor/templates

- repo: https://github.com/codespell-project/codespell
rev: v2.2.5
rev: v2.2.6
hooks:
- id: codespell
args: ["--skip=*.ipynb","-L hist,Hist,nd,SubJet,subjet,Subjet,PTD,ptd,fPt,fpt,Ser,ser"]
14 changes: 7 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,17 @@ classifiers = [
"Topic :: Utilities",
]
dependencies = [
"awkward>=2.3.3",
"uproot>=5.0.10",
"awkward>=2.4.6",
"uproot>=5.1.1",
"dask[array]>=2023.4.0",
"dask-awkward>=2023.7.1,!=2023.8.0",
"dask-histogram>=2023.6.0",
"correctionlib>=2.0.0",
"dask-awkward>=2023.10.0",
"dask-histogram>=2023.10.0",
"correctionlib>=2.3.3",
"pyarrow>=6.0.0",
"fsspec",
"matplotlib>=3",
"numba>=0.57.0",
"numpy>=1.22.0,<1.25", # < 1.25 for numba 0.57 series
"numba>=0.58.0",
"numpy>=1.22.0,<1.26", # < 1.26 for numba 0.58 series
"scipy>=1.1.0",
"tqdm>=4.27.0",
"lz4",
Expand Down
101 changes: 61 additions & 40 deletions src/coffea/analysis_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ def variations(self):


class NminusOneToNpz:
"""Object to be returned by NmiusOne.to_npz()"""
"""Object to be returned by NminusOne.to_npz()"""

def __init__(self, file, labels, nev, masks, saver):
self._file = file
Expand Down Expand Up @@ -494,11 +494,17 @@ def maskscutflow(self):
return self._maskscutflow

def compute(self):
self._nevonecut = list(dask.compute(*self._nevonecut))
self._nevcutflow = list(dask.compute(*self._nevcutflow))
self._masksonecut = list(dask.compute(*self._masksonecut))
self._maskscutflow = list(dask.compute(*self._maskscutflow))
numpy.savez(
self._nevonecut, self._nevcutflow = dask.compute(
self._nevonecut, self._nevcutflow
)
self._masksonecut, self._maskscutflow = dask.compute(
self._masksonecut, self._maskscutflow
)
self._nevonecut = list(self._nevonecut)
self._nevcutflow = list(self._nevcutflow)
self._masksonecut = list(self._masksonecut)
self._maskscutflow = list(self._maskscutflow)
self._saver(
self._file,
labels=self._labels,
nevonecut=self._nevonecut,
Expand Down Expand Up @@ -538,7 +544,7 @@ def result(self):
labels = ["initial"] + [f"N - {i}" for i in self._names] + ["N"]
return NminusOneResult(labels, self._nev, self._masks)

def to_npz(self, file, compressed=False, compute=True):
def to_npz(self, file, compressed=False, compute=False):
"""Saves the results of the N-1 selection to a .npz file
Parameters
Expand All @@ -554,7 +560,7 @@ def to_npz(self, file, compressed=False, compute=True):
compute : bool, optional
Whether to immediately start writing or to return an object
that the user can choose when to start writing by calling compute().
Default is True.
Default is False.
Returns
-------
Expand All @@ -580,22 +586,29 @@ def print(self):
"""Prints the statistics of the N-1 selection"""

if self._delayed_mode:
warnings.warn(
"Printing the N-1 selection statistics is going to compute dask_awkward objects."
)
self._nev = list(dask.compute(*self._nev))

nev = self._nev
print("N-1 selection stats:")
for i, name in enumerate(self._names):
print(
f"Ignoring {name:<20}: pass = {nev[i+1]:<20}\
all = {nev[0]:<20}\
-- eff = {nev[i+1]*100/nev[0]:.1f} %"
stats = (
f"Ignoring {name:<20}"
f"pass = {nev[i+1]:<20}"
f"all = {nev[0]:<20}"
f"-- eff = {nev[i+1]*100/nev[0]:.1f} %"
)
print(stats)

if True:
print(
f"All cuts {'':<20}: pass = {nev[-1]:<20}\
all = {nev[0]:<20}\
-- eff = {nev[-1]*100/nev[0]:.1f} %"
)
stats_all = (
f"All cuts {'':<20}"
f"pass = {nev[-1]:<20}"
f"all = {nev[0]:<20}"
f"-- eff = {nev[-1]*100/nev[0]:.1f} %"
)
print(stats_all)

def yieldhist(self):
"""Returns the N-1 selection yields as a ``hist.Hist`` object
Expand All @@ -610,13 +623,13 @@ def yieldhist(self):
labels = ["initial"] + [f"N - {i}" for i in self._names] + ["N"]
if not self._delayed_mode:
h = hist.Hist(hist.axis.Integer(0, len(labels), name="N-1"))
h.fill(numpy.arange(len(labels)), weight=self._nev)
h.fill(numpy.arange(len(labels), dtype=int), weight=self._nev)

else:
h = hist.dask.Hist(hist.axis.Integer(0, len(labels), name="N-1"))
for i, weight in enumerate(self._masks, 1):
h.fill(dask_awkward.full_like(weight, i, dtype=int), weight=weight)
h.fill(dask_awkward.zeros_like(weight))
h.fill(dask_awkward.zeros_like(weight, dtype=int))

return h, labels

Expand Down Expand Up @@ -712,7 +725,7 @@ def plot_vars(
hist.axis.Integer(0, len(labels), name="N-1"),
)
arr = awkward.flatten(var)
h.fill(arr, awkward.zeros_like(arr))
h.fill(arr, awkward.zeros_like(arr, dtype=int))
for i, mask in enumerate(self.result().masks, 1):
arr = awkward.flatten(var[mask])
h.fill(arr, awkward.full_like(arr, i, dtype=int))
Expand All @@ -725,7 +738,7 @@ def plot_vars(
hist.axis.Integer(0, len(labels), name="N-1"),
)
arr = dask_awkward.flatten(var)
h.fill(arr, dask_awkward.zeros_like(arr))
h.fill(arr, dask_awkward.zeros_like(arr, dtype=int))
for i, mask in enumerate(self.result().masks, 1):
arr = dask_awkward.flatten(var[mask])
h.fill(arr, dask_awkward.full_like(arr, i, dtype=int))
Expand Down Expand Up @@ -780,7 +793,7 @@ def result(self):
self._maskscutflow,
)

def to_npz(self, file, compressed=False, compute=True):
def to_npz(self, file, compressed=False, compute=False):
"""Saves the results of the cutflow to a .npz file
Parameters
Expand All @@ -796,7 +809,7 @@ def to_npz(self, file, compressed=False, compute=True):
compute : bool, optional
Whether to immediately start writing or to return an object
that the user can choose when to start writing by calling compute().
Default is True.
Default is False.
Returns
-------
Expand Down Expand Up @@ -824,19 +837,27 @@ def print(self):
"""Prints the statistics of the Cutflow"""

if self._delayed_mode:
self._nevonecut = list(dask.compute(*self._nevonecut))
self._nevcutflow = list(dask.compute(*self._nevcutflow))
warnings.warn(
"Printing the cutflow statistics is going to compute dask_awkward objects."
)
self._nevonecut, self._nevcutflow = dask.compute(
self._nevonecut, self._nevcutflow
)

nevonecut = self._nevonecut
nevcutflow = self._nevcutflow

print("Cutflow stats:")
for i, name in enumerate(self._names):
print(
f"Cut {name:<20}: pass = {nevonecut[i+1]:<20}\
cumulative pass = {nevcutflow[i+1]:<20}\
all = {nevonecut[0]:<20}\
-- eff = {nevonecut[i+1]*100/nevonecut[0]:.1f} %\
-- cumulative eff = {nevcutflow[i+1]*100/nevcutflow[0]:.1f} %"
stats = (
f"Cut {name:<20}:"
f"pass = {nevonecut[i+1]:<20}"
f"cumulative pass = {nevcutflow[i+1]:<20}"
f"all = {nevonecut[0]:<20}"
f"-- eff = {nevonecut[i+1]*100/nevonecut[0]:.1f} %{'':<20}"
f"-- cumulative eff = {nevcutflow[i+1]*100/nevcutflow[0]:.1f} %"
)
print(stats)

def yieldhist(self):
"""Returns the cutflow yields as ``hist.Hist`` objects
Expand All @@ -856,8 +877,8 @@ def yieldhist(self):
honecut = hist.Hist(hist.axis.Integer(0, len(labels), name="onecut"))
hcutflow = honecut.copy()
hcutflow.axes.name = ("cutflow",)
honecut.fill(numpy.arange(len(labels)), weight=self._nevonecut)
hcutflow.fill(numpy.arange(len(labels)), weight=self._nevcutflow)
honecut.fill(numpy.arange(len(labels), dtype=int), weight=self._nevonecut)
hcutflow.fill(numpy.arange(len(labels), dtype=int), weight=self._nevcutflow)

else:
honecut = hist.dask.Hist(hist.axis.Integer(0, len(labels), name="onecut"))
Expand All @@ -868,12 +889,12 @@ def yieldhist(self):
honecut.fill(
dask_awkward.full_like(weight, i, dtype=int), weight=weight
)
honecut.fill(dask_awkward.zeros_like(weight))
honecut.fill(dask_awkward.zeros_like(weight, dtype=int))
for i, weight in enumerate(self._maskscutflow, 1):
hcutflow.fill(
dask_awkward.full_like(weight, i, dtype=int), weight=weight
)
hcutflow.fill(dask_awkward.zeros_like(weight))
hcutflow.fill(dask_awkward.zeros_like(weight, dtype=int))

return honecut, hcutflow, labels

Expand Down Expand Up @@ -975,8 +996,8 @@ def plot_vars(
hcutflow.axes.name = name, "cutflow"

arr = awkward.flatten(var)
honecut.fill(arr, awkward.zeros_like(arr))
hcutflow.fill(arr, awkward.zeros_like(arr))
honecut.fill(arr, awkward.zeros_like(arr, dtype=int))
hcutflow.fill(arr, awkward.zeros_like(arr, dtype=int))

for i, mask in enumerate(self.result().masksonecut, 1):
arr = awkward.flatten(var[mask])
Expand All @@ -998,8 +1019,8 @@ def plot_vars(
hcutflow.axes.name = name, "cutflow"

arr = dask_awkward.flatten(var)
honecut.fill(arr, dask_awkward.zeros_like(arr))
hcutflow.fill(arr, dask_awkward.zeros_like(arr))
honecut.fill(arr, dask_awkward.zeros_like(arr, dtype=int))
hcutflow.fill(arr, dask_awkward.zeros_like(arr, dtype=int))

for i, mask in enumerate(self.result().masksonecut, 1):
arr = dask_awkward.flatten(var[mask])
Expand Down
Loading

0 comments on commit eff2556

Please sign in to comment.