Merge branch 'master' into main_dev

scikit-hep · Oct 17, 2023 · eff2556 · eff2556
2 parents 3980198 + 8662c9f
commit eff2556
Show file tree

Hide file tree

Showing 17 changed files with 333 additions and 236 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -26,7 +26,7 @@ jobs:
     name: pre-commit
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - uses: actions/setup-python@v4
     - uses: pre-commit/[email protected]
       with:
@@ -45,7 +45,7 @@ jobs:
     name: test coffea (${{ matrix.os }}) - python ${{ matrix.python-version }}, JDK${{ matrix.java-version }}
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v4
       with:
@@ -69,7 +69,7 @@ jobs:
         python -m pip install xgboost
         python -m pip install tritonclient[grpc,http]
         # install checked out coffea
-        python -m pip install -q -e '.[dev,parsl,dask,spark]'
+        python -m pip install -q -e '.[dev,parsl,dask,spark]' --upgrade --upgrade-strategy eager
         python -m pip list
         java -version
     - name: Install dependencies (MacOS)
@@ -80,7 +80,7 @@ jobs:
         python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
         python -m pip install xgboost
         # install checked out coffea
-        python -m pip install -q -e '.[dev,dask,spark]'
+        python -m pip install -q -e '.[dev,dask,spark]' --upgrade --upgrade-strategy eager
         python -m pip list
         java -version
     - name: Install dependencies (Windows)
@@ -91,14 +91,14 @@ jobs:
         python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
         python -m pip install xgboost
         # install checked out coffea
-        python -m pip install -q -e '.[dev,dask]'
+        python -m pip install -q -e '.[dev,dask]' --upgrade --upgrade-strategy eager
         python -m pip list
         java -version
 
     - name: Start triton server with example model
       if: matrix.os == 'ubuntu-latest'
       run: |
-        docker run -d --rm -p 8000:8000 -p 8001:8001 -p 8002:8002 -v ${{ github.workspace }}/tests/samples/triton_models_test:/models nvcr.io/nvidia/tritonserver:23.04-py3 tritonserver --model-repository=/models
+        docker run -d --rm -p 8000:8000 -p 8001:8001 -p 8002:8002 -v ${{ github.workspace }}/tests/samples/triton_models_test:/models nvcr.io/nvidia/tritonserver:23.04-pyt-python-py3 tritonserver --model-repository=/models
 
     - name: Test with pytest
       run: |
@@ -119,7 +119,7 @@ jobs:
         touch build/html/.nojekyll
     - name: Deploy documentation
       if: github.event_name == 'push' && matrix.os == 'ubuntu-latest' && matrix.python-version == 3.11
-      uses: crazy-max/ghaction-github-pages@v3
+      uses: crazy-max/ghaction-github-pages@v4
       with:
         target_branch: gh-pages
         build_dir: docs/build/html
@@ -135,7 +135,7 @@ jobs:
     name: test coffea-workqueue
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Set up Conda
       uses: conda-incubator/setup-miniconda@v2
       env:
@@ -185,7 +185,7 @@ jobs:
     name: deploy release
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v4
       with:

diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
@@ -17,6 +17,6 @@ jobs:
     name: Validate PR title
     runs-on: ubuntu-latest
     steps:
-      - uses: amannn/action-semantic-pull-request@v5.2.0
+      - uses: amannn/action-semantic-pull-request@v5.3.0
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -5,14 +5,14 @@ ci:
         for more information, see https://pre-commit.ci
     autofix_prs: true
     autoupdate_branch: ''
-    autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate'
+    autoupdate_commit_msg: 'ci(pre-commit): pre-commit autoupdate'
     autoupdate_schedule: weekly
     skip: []
     submodules: false
 
 repos:
 - repo: https://github.com/psf/black
-  rev: 23.7.0
+  rev: 23.9.1
   hooks:
   - id: black
 
@@ -24,7 +24,7 @@ repos:
     args: ["--profile", "black", "--filter-files"]
 
 - repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v4.4.0
+  rev: v4.5.0
   hooks:
   - id: check-case-conflict
   - id: check-merge-conflict
@@ -37,24 +37,24 @@ repos:
   - id: trailing-whitespace
 
 - repo: https://github.com/asottile/pyupgrade
-  rev: v3.9.0
+  rev: v3.15.0
   hooks:
   - id: pyupgrade
     args: ["--py38-plus"]
 
 - repo: https://github.com/asottile/setup-cfg-fmt
-  rev: v2.4.0
+  rev: v2.5.0
   hooks:
   - id: setup-cfg-fmt
 
 - repo: https://github.com/pycqa/flake8
-  rev: 6.0.0
+  rev: 6.1.0
   hooks:
   - id: flake8
     exclude: coffea/processor/templates
 
 - repo: https://github.com/codespell-project/codespell
-  rev: v2.2.5
+  rev: v2.2.6
   hooks:
   - id: codespell
     args: ["--skip=*.ipynb","-L hist,Hist,nd,SubJet,subjet,Subjet,PTD,ptd,fPt,fpt,Ser,ser"]
diff --git a/pyproject.toml b/pyproject.toml
@@ -37,17 +37,17 @@ classifiers = [
   "Topic :: Utilities",
 ]
 dependencies = [
-  "awkward>=2.3.3",
-  "uproot>=5.0.10",
+  "awkward>=2.4.6",
+  "uproot>=5.1.1",
   "dask[array]>=2023.4.0",
-  "dask-awkward>=2023.7.1,!=2023.8.0",
-  "dask-histogram>=2023.6.0",
-  "correctionlib>=2.0.0",
+  "dask-awkward>=2023.10.0",
+  "dask-histogram>=2023.10.0",
+  "correctionlib>=2.3.3",
   "pyarrow>=6.0.0",
   "fsspec",
   "matplotlib>=3",
-  "numba>=0.57.0",
-  "numpy>=1.22.0,<1.25",  # < 1.25 for numba 0.57 series
+  "numba>=0.58.0",
+  "numpy>=1.22.0,<1.26",  # < 1.26 for numba 0.58 series
   "scipy>=1.1.0",
   "tqdm>=4.27.0",
   "lz4",

diff --git a/src/coffea/analysis_tools.py b/src/coffea/analysis_tools.py
@@ -418,7 +418,7 @@ def variations(self):
 
 
 class NminusOneToNpz:
-    """Object to be returned by NmiusOne.to_npz()"""
+    """Object to be returned by NminusOne.to_npz()"""
 
     def __init__(self, file, labels, nev, masks, saver):
         self._file = file
@@ -494,11 +494,17 @@ def maskscutflow(self):
         return self._maskscutflow
 
     def compute(self):
-        self._nevonecut = list(dask.compute(*self._nevonecut))
-        self._nevcutflow = list(dask.compute(*self._nevcutflow))
-        self._masksonecut = list(dask.compute(*self._masksonecut))
-        self._maskscutflow = list(dask.compute(*self._maskscutflow))
-        numpy.savez(
+        self._nevonecut, self._nevcutflow = dask.compute(
+            self._nevonecut, self._nevcutflow
+        )
+        self._masksonecut, self._maskscutflow = dask.compute(
+            self._masksonecut, self._maskscutflow
+        )
+        self._nevonecut = list(self._nevonecut)
+        self._nevcutflow = list(self._nevcutflow)
+        self._masksonecut = list(self._masksonecut)
+        self._maskscutflow = list(self._maskscutflow)
+        self._saver(
             self._file,
             labels=self._labels,
             nevonecut=self._nevonecut,
@@ -538,7 +544,7 @@ def result(self):
         labels = ["initial"] + [f"N - {i}" for i in self._names] + ["N"]
         return NminusOneResult(labels, self._nev, self._masks)
 
-    def to_npz(self, file, compressed=False, compute=True):
+    def to_npz(self, file, compressed=False, compute=False):
         """Saves the results of the N-1 selection to a .npz file
 
         Parameters
@@ -554,7 +560,7 @@ def to_npz(self, file, compressed=False, compute=True):
             compute : bool, optional
                 Whether to immediately start writing or to return an object
                 that the user can choose when to start writing by calling compute().
-                Default is True.
+                Default is False.
 
         Returns
         -------
@@ -580,22 +586,29 @@ def print(self):
         """Prints the statistics of the N-1 selection"""
 
         if self._delayed_mode:
+            warnings.warn(
+                "Printing the N-1 selection statistics is going to compute dask_awkward objects."
+            )
             self._nev = list(dask.compute(*self._nev))
+
         nev = self._nev
         print("N-1 selection stats:")
         for i, name in enumerate(self._names):
-            print(
-                f"Ignoring {name:<20}: pass = {nev[i+1]:<20}\
-                all = {nev[0]:<20}\
-                -- eff = {nev[i+1]*100/nev[0]:.1f} %"
+            stats = (
+                f"Ignoring {name:<20}"
+                f"pass = {nev[i+1]:<20}"
+                f"all = {nev[0]:<20}"
+                f"-- eff = {nev[i+1]*100/nev[0]:.1f} %"
             )
+            print(stats)
 
-        if True:
-            print(
-                f"All cuts {'':<20}: pass = {nev[-1]:<20}\
-                all = {nev[0]:<20}\
-                -- eff = {nev[-1]*100/nev[0]:.1f} %"
-            )
+        stats_all = (
+            f"All cuts {'':<20}"
+            f"pass = {nev[-1]:<20}"
+            f"all = {nev[0]:<20}"
+            f"-- eff = {nev[-1]*100/nev[0]:.1f} %"
+        )
+        print(stats_all)
 
     def yieldhist(self):
         """Returns the N-1 selection yields as a ``hist.Hist`` object
@@ -610,13 +623,13 @@ def yieldhist(self):
         labels = ["initial"] + [f"N - {i}" for i in self._names] + ["N"]
         if not self._delayed_mode:
             h = hist.Hist(hist.axis.Integer(0, len(labels), name="N-1"))
-            h.fill(numpy.arange(len(labels)), weight=self._nev)
+            h.fill(numpy.arange(len(labels), dtype=int), weight=self._nev)
 
         else:
             h = hist.dask.Hist(hist.axis.Integer(0, len(labels), name="N-1"))
             for i, weight in enumerate(self._masks, 1):
                 h.fill(dask_awkward.full_like(weight, i, dtype=int), weight=weight)
-            h.fill(dask_awkward.zeros_like(weight))
+            h.fill(dask_awkward.zeros_like(weight, dtype=int))
 
         return h, labels
 
@@ -712,7 +725,7 @@ def plot_vars(
                     hist.axis.Integer(0, len(labels), name="N-1"),
                 )
                 arr = awkward.flatten(var)
-                h.fill(arr, awkward.zeros_like(arr))
+                h.fill(arr, awkward.zeros_like(arr, dtype=int))
                 for i, mask in enumerate(self.result().masks, 1):
                     arr = awkward.flatten(var[mask])
                     h.fill(arr, awkward.full_like(arr, i, dtype=int))
@@ -725,7 +738,7 @@ def plot_vars(
                     hist.axis.Integer(0, len(labels), name="N-1"),
                 )
                 arr = dask_awkward.flatten(var)
-                h.fill(arr, dask_awkward.zeros_like(arr))
+                h.fill(arr, dask_awkward.zeros_like(arr, dtype=int))
                 for i, mask in enumerate(self.result().masks, 1):
                     arr = dask_awkward.flatten(var[mask])
                     h.fill(arr, dask_awkward.full_like(arr, i, dtype=int))
@@ -780,7 +793,7 @@ def result(self):
             self._maskscutflow,
         )
 
-    def to_npz(self, file, compressed=False, compute=True):
+    def to_npz(self, file, compressed=False, compute=False):
         """Saves the results of the cutflow to a .npz file
 
         Parameters
@@ -796,7 +809,7 @@ def to_npz(self, file, compressed=False, compute=True):
             compute : bool, optional
                 Whether to immediately start writing or to return an object
                 that the user can choose when to start writing by calling compute().
-                Default is True.
+                Default is False.
 
         Returns
         -------
@@ -824,19 +837,27 @@ def print(self):
         """Prints the statistics of the Cutflow"""
 
         if self._delayed_mode:
-            self._nevonecut = list(dask.compute(*self._nevonecut))
-            self._nevcutflow = list(dask.compute(*self._nevcutflow))
+            warnings.warn(
+                "Printing the cutflow statistics is going to compute dask_awkward objects."
+            )
+            self._nevonecut, self._nevcutflow = dask.compute(
+                self._nevonecut, self._nevcutflow
+            )
+
         nevonecut = self._nevonecut
         nevcutflow = self._nevcutflow
+
         print("Cutflow stats:")
         for i, name in enumerate(self._names):
-            print(
-                f"Cut {name:<20}: pass = {nevonecut[i+1]:<20}\
-                cumulative pass = {nevcutflow[i+1]:<20}\
-                all = {nevonecut[0]:<20}\
-                --  eff = {nevonecut[i+1]*100/nevonecut[0]:.1f} %\
-                -- cumulative eff = {nevcutflow[i+1]*100/nevcutflow[0]:.1f} %"
+            stats = (
+                f"Cut {name:<20}:"
+                f"pass = {nevonecut[i+1]:<20}"
+                f"cumulative pass = {nevcutflow[i+1]:<20}"
+                f"all = {nevonecut[0]:<20}"
+                f"-- eff = {nevonecut[i+1]*100/nevonecut[0]:.1f} %{'':<20}"
+                f"-- cumulative eff = {nevcutflow[i+1]*100/nevcutflow[0]:.1f} %"
             )
+            print(stats)
 
     def yieldhist(self):
         """Returns the cutflow yields as ``hist.Hist`` objects
@@ -856,8 +877,8 @@ def yieldhist(self):
             honecut = hist.Hist(hist.axis.Integer(0, len(labels), name="onecut"))
             hcutflow = honecut.copy()
             hcutflow.axes.name = ("cutflow",)
-            honecut.fill(numpy.arange(len(labels)), weight=self._nevonecut)
-            hcutflow.fill(numpy.arange(len(labels)), weight=self._nevcutflow)
+            honecut.fill(numpy.arange(len(labels), dtype=int), weight=self._nevonecut)
+            hcutflow.fill(numpy.arange(len(labels), dtype=int), weight=self._nevcutflow)
 
         else:
             honecut = hist.dask.Hist(hist.axis.Integer(0, len(labels), name="onecut"))
@@ -868,12 +889,12 @@ def yieldhist(self):
                 honecut.fill(
                     dask_awkward.full_like(weight, i, dtype=int), weight=weight
                 )
-            honecut.fill(dask_awkward.zeros_like(weight))
+            honecut.fill(dask_awkward.zeros_like(weight, dtype=int))
             for i, weight in enumerate(self._maskscutflow, 1):
                 hcutflow.fill(
                     dask_awkward.full_like(weight, i, dtype=int), weight=weight
                 )
-            hcutflow.fill(dask_awkward.zeros_like(weight))
+            hcutflow.fill(dask_awkward.zeros_like(weight, dtype=int))
 
         return honecut, hcutflow, labels
 
@@ -975,8 +996,8 @@ def plot_vars(
                 hcutflow.axes.name = name, "cutflow"
 
                 arr = awkward.flatten(var)
-                honecut.fill(arr, awkward.zeros_like(arr))
-                hcutflow.fill(arr, awkward.zeros_like(arr))
+                honecut.fill(arr, awkward.zeros_like(arr, dtype=int))
+                hcutflow.fill(arr, awkward.zeros_like(arr, dtype=int))
 
                 for i, mask in enumerate(self.result().masksonecut, 1):
                     arr = awkward.flatten(var[mask])
@@ -998,8 +1019,8 @@ def plot_vars(
                 hcutflow.axes.name = name, "cutflow"
 
                 arr = dask_awkward.flatten(var)
-                honecut.fill(arr, dask_awkward.zeros_like(arr))
-                hcutflow.fill(arr, dask_awkward.zeros_like(arr))
+                honecut.fill(arr, dask_awkward.zeros_like(arr, dtype=int))
+                hcutflow.fill(arr, dask_awkward.zeros_like(arr, dtype=int))
 
                 for i, mask in enumerate(self.result().masksonecut, 1):
                     arr = dask_awkward.flatten(var[mask])