Skip to content

Commit

Permalink
Merge branch 'master' into dask-awkward-pin-skooch
Browse files Browse the repository at this point in the history
  • Loading branch information
lgray authored Sep 20, 2023
2 parents 6d88021 + fa73cb5 commit 7b09975
Show file tree
Hide file tree
Showing 8 changed files with 188 additions and 134 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ jobs:
touch build/html/.nojekyll
- name: Deploy documentation
if: github.event_name == 'push' && matrix.os == 'ubuntu-latest' && matrix.python-version == 3.11
uses: crazy-max/ghaction-github-pages@v3
uses: crazy-max/ghaction-github-pages@v4
with:
target_branch: gh-pages
build_dir: docs/build/html
Expand Down
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ ci:

repos:
- repo: https://github.com/psf/black
rev: 23.7.0
rev: 23.9.1
hooks:
- id: black

Expand All @@ -37,7 +37,7 @@ repos:
- id: trailing-whitespace

- repo: https://github.com/asottile/pyupgrade
rev: v3.9.0
rev: v3.11.0
hooks:
- id: pyupgrade
args: ["--py38-plus"]
Expand All @@ -48,7 +48,7 @@ repos:
- id: setup-cfg-fmt

- repo: https://github.com/pycqa/flake8
rev: 6.0.0
rev: 6.1.0
hooks:
- id: flake8
exclude: coffea/processor/templates
Expand Down
101 changes: 61 additions & 40 deletions src/coffea/analysis_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ def variations(self):


class NminusOneToNpz:
"""Object to be returned by NmiusOne.to_npz()"""
"""Object to be returned by NminusOne.to_npz()"""

def __init__(self, file, labels, nev, masks, saver):
self._file = file
Expand Down Expand Up @@ -494,11 +494,17 @@ def maskscutflow(self):
return self._maskscutflow

def compute(self):
self._nevonecut = list(dask.compute(*self._nevonecut))
self._nevcutflow = list(dask.compute(*self._nevcutflow))
self._masksonecut = list(dask.compute(*self._masksonecut))
self._maskscutflow = list(dask.compute(*self._maskscutflow))
numpy.savez(
self._nevonecut, self._nevcutflow = dask.compute(
self._nevonecut, self._nevcutflow
)
self._masksonecut, self._maskscutflow = dask.compute(
self._masksonecut, self._maskscutflow
)
self._nevonecut = list(self._nevonecut)
self._nevcutflow = list(self._nevcutflow)
self._masksonecut = list(self._masksonecut)
self._maskscutflow = list(self._maskscutflow)
self._saver(
self._file,
labels=self._labels,
nevonecut=self._nevonecut,
Expand Down Expand Up @@ -538,7 +544,7 @@ def result(self):
labels = ["initial"] + [f"N - {i}" for i in self._names] + ["N"]
return NminusOneResult(labels, self._nev, self._masks)

def to_npz(self, file, compressed=False, compute=True):
def to_npz(self, file, compressed=False, compute=False):
"""Saves the results of the N-1 selection to a .npz file
Parameters
Expand All @@ -554,7 +560,7 @@ def to_npz(self, file, compressed=False, compute=True):
compute : bool, optional
Whether to immediately start writing or to return an object
that the user can choose when to start writing by calling compute().
Default is True.
Default is False.
Returns
-------
Expand All @@ -580,22 +586,29 @@ def print(self):
"""Prints the statistics of the N-1 selection"""

if self._delayed_mode:
warnings.warn(
"Printing the N-1 selection statistics is going to compute dask_awkward objects."
)
self._nev = list(dask.compute(*self._nev))

nev = self._nev
print("N-1 selection stats:")
for i, name in enumerate(self._names):
print(
f"Ignoring {name:<20}: pass = {nev[i+1]:<20}\
all = {nev[0]:<20}\
-- eff = {nev[i+1]*100/nev[0]:.1f} %"
stats = (
f"Ignoring {name:<20}"
f"pass = {nev[i+1]:<20}"
f"all = {nev[0]:<20}"
f"-- eff = {nev[i+1]*100/nev[0]:.1f} %"
)
print(stats)

if True:
print(
f"All cuts {'':<20}: pass = {nev[-1]:<20}\
all = {nev[0]:<20}\
-- eff = {nev[-1]*100/nev[0]:.1f} %"
)
stats_all = (
f"All cuts {'':<20}"
f"pass = {nev[-1]:<20}"
f"all = {nev[0]:<20}"
f"-- eff = {nev[-1]*100/nev[0]:.1f} %"
)
print(stats_all)

def yieldhist(self):
"""Returns the N-1 selection yields as a ``hist.Hist`` object
Expand All @@ -610,13 +623,13 @@ def yieldhist(self):
labels = ["initial"] + [f"N - {i}" for i in self._names] + ["N"]
if not self._delayed_mode:
h = hist.Hist(hist.axis.Integer(0, len(labels), name="N-1"))
h.fill(numpy.arange(len(labels)), weight=self._nev)
h.fill(numpy.arange(len(labels), dtype=int), weight=self._nev)

else:
h = hist.dask.Hist(hist.axis.Integer(0, len(labels), name="N-1"))
for i, weight in enumerate(self._masks, 1):
h.fill(dask_awkward.full_like(weight, i, dtype=int), weight=weight)
h.fill(dask_awkward.zeros_like(weight))
h.fill(dask_awkward.zeros_like(weight, dtype=int))

return h, labels

Expand Down Expand Up @@ -712,7 +725,7 @@ def plot_vars(
hist.axis.Integer(0, len(labels), name="N-1"),
)
arr = awkward.flatten(var)
h.fill(arr, awkward.zeros_like(arr))
h.fill(arr, awkward.zeros_like(arr, dtype=int))
for i, mask in enumerate(self.result().masks, 1):
arr = awkward.flatten(var[mask])
h.fill(arr, awkward.full_like(arr, i, dtype=int))
Expand All @@ -725,7 +738,7 @@ def plot_vars(
hist.axis.Integer(0, len(labels), name="N-1"),
)
arr = dask_awkward.flatten(var)
h.fill(arr, dask_awkward.zeros_like(arr))
h.fill(arr, dask_awkward.zeros_like(arr, dtype=int))
for i, mask in enumerate(self.result().masks, 1):
arr = dask_awkward.flatten(var[mask])
h.fill(arr, dask_awkward.full_like(arr, i, dtype=int))
Expand Down Expand Up @@ -780,7 +793,7 @@ def result(self):
self._maskscutflow,
)

def to_npz(self, file, compressed=False, compute=True):
def to_npz(self, file, compressed=False, compute=False):
"""Saves the results of the cutflow to a .npz file
Parameters
Expand All @@ -796,7 +809,7 @@ def to_npz(self, file, compressed=False, compute=True):
compute : bool, optional
Whether to immediately start writing or to return an object
that the user can choose when to start writing by calling compute().
Default is True.
Default is False.
Returns
-------
Expand Down Expand Up @@ -824,19 +837,27 @@ def print(self):
"""Prints the statistics of the Cutflow"""

if self._delayed_mode:
self._nevonecut = list(dask.compute(*self._nevonecut))
self._nevcutflow = list(dask.compute(*self._nevcutflow))
warnings.warn(
"Printing the cutflow statistics is going to compute dask_awkward objects."
)
self._nevonecut, self._nevcutflow = dask.compute(
self._nevonecut, self._nevcutflow
)

nevonecut = self._nevonecut
nevcutflow = self._nevcutflow

print("Cutflow stats:")
for i, name in enumerate(self._names):
print(
f"Cut {name:<20}: pass = {nevonecut[i+1]:<20}\
cumulative pass = {nevcutflow[i+1]:<20}\
all = {nevonecut[0]:<20}\
-- eff = {nevonecut[i+1]*100/nevonecut[0]:.1f} %\
-- cumulative eff = {nevcutflow[i+1]*100/nevcutflow[0]:.1f} %"
stats = (
f"Cut {name:<20}:"
f"pass = {nevonecut[i+1]:<20}"
f"cumulative pass = {nevcutflow[i+1]:<20}"
f"all = {nevonecut[0]:<20}"
f"-- eff = {nevonecut[i+1]*100/nevonecut[0]:.1f} %{'':<20}"
f"-- cumulative eff = {nevcutflow[i+1]*100/nevcutflow[0]:.1f} %"
)
print(stats)

def yieldhist(self):
"""Returns the cutflow yields as ``hist.Hist`` objects
Expand All @@ -856,8 +877,8 @@ def yieldhist(self):
honecut = hist.Hist(hist.axis.Integer(0, len(labels), name="onecut"))
hcutflow = honecut.copy()
hcutflow.axes.name = ("cutflow",)
honecut.fill(numpy.arange(len(labels)), weight=self._nevonecut)
hcutflow.fill(numpy.arange(len(labels)), weight=self._nevcutflow)
honecut.fill(numpy.arange(len(labels), dtype=int), weight=self._nevonecut)
hcutflow.fill(numpy.arange(len(labels), dtype=int), weight=self._nevcutflow)

else:
honecut = hist.dask.Hist(hist.axis.Integer(0, len(labels), name="onecut"))
Expand All @@ -868,12 +889,12 @@ def yieldhist(self):
honecut.fill(
dask_awkward.full_like(weight, i, dtype=int), weight=weight
)
honecut.fill(dask_awkward.zeros_like(weight))
honecut.fill(dask_awkward.zeros_like(weight, dtype=int))
for i, weight in enumerate(self._maskscutflow, 1):
hcutflow.fill(
dask_awkward.full_like(weight, i, dtype=int), weight=weight
)
hcutflow.fill(dask_awkward.zeros_like(weight))
hcutflow.fill(dask_awkward.zeros_like(weight, dtype=int))

return honecut, hcutflow, labels

Expand Down Expand Up @@ -975,8 +996,8 @@ def plot_vars(
hcutflow.axes.name = name, "cutflow"

arr = awkward.flatten(var)
honecut.fill(arr, awkward.zeros_like(arr))
hcutflow.fill(arr, awkward.zeros_like(arr))
honecut.fill(arr, awkward.zeros_like(arr, dtype=int))
hcutflow.fill(arr, awkward.zeros_like(arr, dtype=int))

for i, mask in enumerate(self.result().masksonecut, 1):
arr = awkward.flatten(var[mask])
Expand All @@ -998,8 +1019,8 @@ def plot_vars(
hcutflow.axes.name = name, "cutflow"

arr = dask_awkward.flatten(var)
honecut.fill(arr, dask_awkward.zeros_like(arr))
hcutflow.fill(arr, dask_awkward.zeros_like(arr))
honecut.fill(arr, dask_awkward.zeros_like(arr, dtype=int))
hcutflow.fill(arr, dask_awkward.zeros_like(arr, dtype=int))

for i, mask in enumerate(self.result().masksonecut, 1):
arr = dask_awkward.flatten(var[mask])
Expand Down
Loading

0 comments on commit 7b09975

Please sign in to comment.