From aeb53c4bf81926ca7e810918576ce23d3bcbbd6c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20R=C3=BCbenach?= <jonas.ruebenach@desy.de>
Date: Thu, 3 Aug 2023 11:28:19 +0200
Subject: [PATCH 01/75] Make rochester_lookup compatible with dask_awkward

---
 src/coffea/lookup_tools/rochester_lookup.py | 24 +++++++++++++++------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/src/coffea/lookup_tools/rochester_lookup.py b/src/coffea/lookup_tools/rochester_lookup.py
index 3e8bf6eab..bc462f4ba 100644
--- a/src/coffea/lookup_tools/rochester_lookup.py
+++ b/src/coffea/lookup_tools/rochester_lookup.py
@@ -1,4 +1,5 @@
 import awkward
+import dask_awkward as dak
 import numpy
 
 from coffea.lookup_tools.dense_lookup import dense_lookup
@@ -75,7 +76,7 @@ def _error(self, func, *args):
 
         newargs = args + (0, 0)
         default = func(*newargs)
-        result = numpy.zeros_like(default)
+        result = awkward.zeros_like(default)
         for s in range(self._nsets):
             oneOver = 1.0 / self._members[s]
             for m in range(self._members[s]):
@@ -226,12 +227,21 @@ def _kExtra(self, kpt, eta, nl, u, s=0, m=0):
         cbN_flat = awkward.flatten(cbN)
         cbS_flat = awkward.flatten(cbS)
 
-        invcdf = awkward.unflatten(
-            doublecrystalball.ppf(
-                u_flat, cbA_flat, cbA_flat, cbN_flat, cbN_flat, loc, cbS_flat
-            ),
-            counts,
-        )
+        args = (u_flat, cbA_flat, cbA_flat, cbN_flat, cbN_flat, loc, cbS_flat)
+
+        if any(isinstance(arg, dak.Array) for arg in args):
+            def apply(*args):
+                args_lz = [awkward.typetracer.length_zero_if_typetracer(arg) for arg in args]
+                out = awkward.Array(doublecrystalball.ppf(*args_lz))
+                if awkward.backend(args[0]) == "typetracer":
+                    out = awkward.Array(out.layout.to_typetracer(forget_length=True), behavior=out.behavior)
+                return out
+
+            invcdf = dak.map_partitions(apply, *args)
+        else:
+            invcdf = doublecrystalball.ppf(*args)
+
+        invcdf = awkward.unflatten(invcdf, counts)
 
         x = awkward.where(
             mask,

From e6acf51e3d362947c04e9fc6d2e35201e31f638a Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 3 Aug 2023 09:35:01 +0000
Subject: [PATCH 02/75] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/coffea/lookup_tools/rochester_lookup.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/coffea/lookup_tools/rochester_lookup.py b/src/coffea/lookup_tools/rochester_lookup.py
index bc462f4ba..fece55d6d 100644
--- a/src/coffea/lookup_tools/rochester_lookup.py
+++ b/src/coffea/lookup_tools/rochester_lookup.py
@@ -230,11 +230,17 @@ def _kExtra(self, kpt, eta, nl, u, s=0, m=0):
         args = (u_flat, cbA_flat, cbA_flat, cbN_flat, cbN_flat, loc, cbS_flat)
 
         if any(isinstance(arg, dak.Array) for arg in args):
+
             def apply(*args):
-                args_lz = [awkward.typetracer.length_zero_if_typetracer(arg) for arg in args]
+                args_lz = [
+                    awkward.typetracer.length_zero_if_typetracer(arg) for arg in args
+                ]
                 out = awkward.Array(doublecrystalball.ppf(*args_lz))
                 if awkward.backend(args[0]) == "typetracer":
-                    out = awkward.Array(out.layout.to_typetracer(forget_length=True), behavior=out.behavior)
+                    out = awkward.Array(
+                        out.layout.to_typetracer(forget_length=True),
+                        behavior=out.behavior,
+                    )
                 return out
 
             invcdf = dak.map_partitions(apply, *args)

From 5feaff63a5fd03c913a60caa3e01b2195db6b8d9 Mon Sep 17 00:00:00 2001
From: Lindsey Gray <lindsey.gray@gmail.com>
Date: Thu, 3 Aug 2023 10:08:53 -0500
Subject: [PATCH 03/75] reactivate rochester corrections tests

---
 tests/test_lookup_tools.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/test_lookup_tools.py b/tests/test_lookup_tools.py
index f4d5a3b30..4b86ec3df 100644
--- a/tests/test_lookup_tools.py
+++ b/tests/test_lookup_tools.py
@@ -372,8 +372,6 @@ def test_jec_txt_effareas():
 
 
 def test_rochester():
-    pytest.xfail("weird side effect from running other tests... passes by itself")
-
     rochester_data = lookup_tools.txt_converters.convert_rochester_file(
         "tests/samples/RoccoR2018.txt.gz", loaduncs=True
     )

From fbfbb0d0357f6d143eae821a2da5b005bdff60c8 Mon Sep 17 00:00:00 2001
From: Lindsey Gray <lindsey.gray@gmail.com>
Date: Thu, 3 Aug 2023 10:45:40 -0500
Subject: [PATCH 04/75] hoisted by my own petard

---
 tests/test_lookup_tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_lookup_tools.py b/tests/test_lookup_tools.py
index 4b86ec3df..a57c6b7cf 100644
--- a/tests/test_lookup_tools.py
+++ b/tests/test_lookup_tools.py
@@ -388,7 +388,7 @@ def test_rochester():
 
     # test against nanoaod
     events = NanoEventsFactory.from_root(
-        os.path.abspath("tests/samples/nano_dimuon.root")
+        {os.path.abspath("tests/samples/nano_dimuon.root"): "Events"}
     ).events()
 
     data_k = rochester.kScaleDT(

From 78d2ed2f0e378772ff212e8a0d0473190ef18142 Mon Sep 17 00:00:00 2001
From: Lindsey Gray <lindsey.gray@gmail.com>
Date: Thu, 3 Aug 2023 15:36:23 -0500
Subject: [PATCH 05/75] this time, with feeling

---
 tests/test_lookup_tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_lookup_tools.py b/tests/test_lookup_tools.py
index a57c6b7cf..7bebd7e18 100644
--- a/tests/test_lookup_tools.py
+++ b/tests/test_lookup_tools.py
@@ -404,7 +404,7 @@ def test_rochester():
 
     # test against mc
     events = NanoEventsFactory.from_root(
-        os.path.abspath("tests/samples/nano_dy.root")
+        {os.path.abspath("tests/samples/nano_dy.root"): "Events"},
     ).events()
 
     hasgen = ~np.isnan(ak.fill_none(events.Muon.matched_gen.pt, np.nan))

From 129e2691f14c5b49787186b401726fb18c7c877b Mon Sep 17 00:00:00 2001
From: Lindsey Gray <lindsey.gray@gmail.com>
Date: Fri, 4 Aug 2023 13:06:24 -0500
Subject: [PATCH 06/75] daskify tests

---
 tests/test_lookup_tools.py | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/tests/test_lookup_tools.py b/tests/test_lookup_tools.py
index 7bebd7e18..9a6a7954d 100644
--- a/tests/test_lookup_tools.py
+++ b/tests/test_lookup_tools.py
@@ -388,23 +388,25 @@ def test_rochester():
 
     # test against nanoaod
     events = NanoEventsFactory.from_root(
-        {os.path.abspath("tests/samples/nano_dimuon.root"): "Events"}
+        {os.path.abspath("tests/samples/nano_dimuon.root"): "Events"},
+        permit_dask=True,
     ).events()
 
     data_k = rochester.kScaleDT(
         events.Muon.charge, events.Muon.pt, events.Muon.eta, events.Muon.phi
     )
-    data_k = np.array(ak.flatten(data_k))
+    data_k = ak.flatten(data_k).compute().to_numpy()
     assert all(np.isclose(data_k, official_data_k))
     data_err = rochester.kScaleDTerror(
         events.Muon.charge, events.Muon.pt, events.Muon.eta, events.Muon.phi
     )
-    data_err = np.array(ak.flatten(data_err), dtype=float)
+    data_err = ak.flatten(data_err).compute().to_numpy()
     assert all(np.isclose(data_err, official_data_err, atol=1e-8))
 
     # test against mc
     events = NanoEventsFactory.from_root(
         {os.path.abspath("tests/samples/nano_dy.root"): "Events"},
+        permit_dask=True,
     ).events()
 
     hasgen = ~np.isnan(ak.fill_none(events.Muon.matched_gen.pt, np.nan))
@@ -424,10 +426,10 @@ def test_rochester():
         events.Muon.nTrackerLayers[~hasgen],
         mc_rand[~hasgen],
     )
-    mc_k = np.array(ak.flatten(ak.ones_like(events.Muon.pt)))
-    hasgen_flat = np.array(ak.flatten(hasgen))
-    mc_k[hasgen_flat] = np.array(ak.flatten(mc_kspread))
-    mc_k[~hasgen_flat] = np.array(ak.flatten(mc_ksmear))
+    mc_k = ak.flatten(ak.ones_like(events.Muon.pt)).compute().to_numpy()
+    hasgen_flat = ak.flatten(hasgen).compute().to_numpy()
+    mc_k[hasgen_flat] = ak.flatten(mc_kspread).compute().to_numpy()
+    mc_k[~hasgen_flat] = ak.flatten(mc_ksmear).compute().to_numpy()
     assert all(np.isclose(mc_k, official_mc_k))
 
     mc_errspread = rochester.kSpreadMCerror(
@@ -445,9 +447,9 @@ def test_rochester():
         events.Muon.nTrackerLayers[~hasgen],
         mc_rand[~hasgen],
     )
-    mc_err = np.array(ak.flatten(ak.ones_like(events.Muon.pt)))
-    mc_err[hasgen_flat] = np.array(ak.flatten(mc_errspread))
-    mc_err[~hasgen_flat] = np.array(ak.flatten(mc_errsmear))
+    mc_err = ak.flatten(ak.ones_like(events.Muon.pt)).compute().to_numpy()
+    mc_err[hasgen_flat] = ak.flatten(mc_errspread).compute().to_numpy()
+    mc_err[~hasgen_flat] = ak.flatten(mc_errsmear).compute().to_numpy()
     assert all(np.isclose(mc_err, official_mc_err, atol=1e-8))
 
 

From dcbc45b97eddf5c22fc3d7e2d7bae195f27c9143 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20R=C3=BCbenach?= <jonas.ruebenach@desy.de>
Date: Tue, 8 Aug 2023 18:26:57 +0200
Subject: [PATCH 07/75] Fix daskification of test_rochester

---
 tests/test_lookup_tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_lookup_tools.py b/tests/test_lookup_tools.py
index 9a6a7954d..7fde363de 100644
--- a/tests/test_lookup_tools.py
+++ b/tests/test_lookup_tools.py
@@ -410,7 +410,7 @@ def test_rochester():
     ).events()
 
     hasgen = ~np.isnan(ak.fill_none(events.Muon.matched_gen.pt, np.nan))
-    mc_rand = ak.unflatten(mc_rand, ak.num(hasgen))
+    mc_rand = ak.unflatten(dak.from_awkward(ak.Array(mc_rand), 1), ak.num(hasgen))
     mc_kspread = rochester.kSpreadMC(
         events.Muon.charge[hasgen],
         events.Muon.pt[hasgen],

From 9e832e0335bc121cab17c2fdbee5b8d09c208d96 Mon Sep 17 00:00:00 2001
From: Nikolai Hartmann <nikoladze@posteo.de>
Date: Thu, 27 Jul 2023 10:22:53 -0400
Subject: [PATCH 08/75] global index fetching working

---
 src/coffea/nanoevents/methods/physlite.py |  49 ++++++++-
 tests/test_nanoevents_physlite.py         | 125 ++++++++++++----------
 2 files changed, 111 insertions(+), 63 deletions(-)

diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py
index c0efcdc39..0d3cc2898 100644
--- a/src/coffea/nanoevents/methods/physlite.py
+++ b/src/coffea/nanoevents/methods/physlite.py
@@ -3,6 +3,7 @@
 
 import awkward
 import numpy
+import dask_awkward
 
 from coffea.nanoevents.methods import base, vector
 
@@ -64,7 +65,8 @@ def where(unique_keys):
     return out
 
 
-def _get_target_offsets(offsets, event_index):
+def _concrete_get_target_offsets(load_column, event_index):
+    offsets = awkward.typetracer.length_one_if_typetracer(load_column.layout.offsets.data)
     if isinstance(event_index, Number):
         return offsets[event_index]
 
@@ -72,15 +74,52 @@ def descend(layout, depth, **kwargs):
         if layout.purelist_depth == 1:
             return awkward.contents.NumpyArray(offsets)[layout]
 
-    return awkward.transform(descend, event_index)
+    return awkward.transform(descend, event_index.layout)
+
+
+def _dask_get_target_offsets(load_column, event_index):
+    return dask_awkward.map_partitions(
+        _concrete_get_target_offsets,
+        load_column,
+        event_index
+    )
+
+
+def _get_target_offsets(load_column, event_index):
+    # TODO check event_index as well
+    if isinstance(load_column, dask_awkward.Array):
+        return _dask_get_target_offsets(load_column, event_index)
+    return _concrete_get_target_offsets(load_column, event_index)
 
 
 def _get_global_index(target, eventindex, index):
     load_column = target[
         target.fields[0]
-    ]  # awkward is eager-mode now (will need to dask this)
-    target_offsets = _get_target_offsets(load_column.layout.offsets, eventindex)
-    return target_offsets + index
+    ]
+    target_offsets = _get_target_offsets(load_column, eventindex)
+    return target_offsets + index # here i get
+
+
+# def _concrete_get_global_index(target, eventindex, index):
+#     load_column = target[
+#         target.fields[0]
+#     ]
+#     target_offsets = _get_target_offsets(load_column.layout.offsets, eventindex)
+#     return target_offsets + index
+
+# def _dask_get_global_index(target, eventindex, index):
+#     return dask_awkward.map_partitions(
+#         _concrete_get_global_index,
+#         target,
+#         eventindex,
+#         index,
+#     )
+
+# def _get_global_index(target, eventindex, index):
+#     # check target, eventindex, index all dak
+#     if isinstance(target, dask_awkward.Array):
+#         return _dask_get_global_index(target, eventindex, index)
+#     return _concrete_get_global_index(target, eventindex, index)
 
 
 @awkward.mixin_class(behavior)
diff --git a/tests/test_nanoevents_physlite.py b/tests/test_nanoevents_physlite.py
index f82471198..488395183 100644
--- a/tests/test_nanoevents_physlite.py
+++ b/tests/test_nanoevents_physlite.py
@@ -5,6 +5,11 @@
 
 from coffea.nanoevents import NanoEventsFactory, PHYSLITESchema
 
+from coffea.nanoevents.methods.physlite import _get_global_index
+
+import dask
+dask.config.set({"awkward.optimization.enabled": False, "awkward.raise-failed-meta": True, "awkward.optimization.on-fail": "raise"})
+
 pytestmark = pytest.mark.skip(reason="uproot is upset with this file...")
 
 
@@ -13,64 +18,68 @@ def _events():
     factory = NanoEventsFactory.from_root(
         {path: "CollectionTree"},
         schemaclass=PHYSLITESchema,
-        permit_dask=False,
+        permit_dask=True,
+        #permit_dask=False,
     )
     return factory.events()
 
-
-@pytest.fixture(scope="module")
-def events():
-    return _events()
-
-
-@pytest.mark.parametrize("do_slice", [False, True])
-def test_electron_track_links(events, do_slice):
-    if do_slice:
-        events = events[np.random.randint(2, size=len(events)).astype(bool)]
-    for event in events:
-        for electron in event.Electrons:
-            for link_index, link in enumerate(electron.trackParticleLinks):
-                track_index = link.m_persIndex
-                print(track_index)
-                print(event.GSFTrackParticles)
-                print(electron.trackParticleLinks)
-                print(electron.trackParticles)
-
-                assert (
-                    event.GSFTrackParticles[track_index].z0
-                    == electron.trackParticles[link_index].z0
-                )
-
-
-# from MetaData/EventFormat
-_hash_to_target_name = {
-    13267281: "TruthPhotons",
-    342174277: "TruthMuons",
-    368360608: "TruthNeutrinos",
-    375408000: "TruthTaus",
-    394100163: "TruthElectrons",
-    614719239: "TruthBoson",
-    660928181: "TruthTop",
-    779635413: "TruthBottom",
-}
-
-
-def test_truth_links_toplevel(events):
-    children_px = events.TruthBoson.children.px
-    for i_event, event in enumerate(events):
-        for i_particle, particle in enumerate(event.TruthBoson):
-            for i_link, link in enumerate(particle.childLinks):
-                assert (
-                    event[_hash_to_target_name[link.m_persKey]][link.m_persIndex].px
-                    == children_px[i_event][i_particle][i_link]
-                )
-
-
-def test_truth_links(events):
-    for i_event, event in enumerate(events):
-        for i_particle, particle in enumerate(event.TruthBoson):
-            for i_link, link in enumerate(particle.childLinks):
-                assert (
-                    event[_hash_to_target_name[link.m_persKey]][link.m_persIndex].px
-                    == particle.children[i_link].px
-                )
+events = _events()
+
+gi = _get_global_index(events.GSFTrackParticles, events.Electrons._eventindex, events.Electrons.trackParticleLinks.m_persIndex)
+
+# @pytest.fixture(scope="module")
+# def events():
+#     return _events()
+
+
+# @pytest.mark.parametrize("do_slice", [False, True])
+# def test_electron_track_links(events, do_slice):
+#     if do_slice:
+#         events = events[np.random.randint(2, size=len(events)).astype(bool)]
+#     for event in events:
+#         for electron in event.Electrons:
+#             for link_index, link in enumerate(electron.trackParticleLinks):
+#                 track_index = link.m_persIndex
+#                 print(track_index)
+#                 print(event.GSFTrackParticles)
+#                 print(electron.trackParticleLinks)
+#                 print(electron.trackParticles)
+
+#                 assert (
+#                     event.GSFTrackParticles[track_index].z0
+#                     == electron.trackParticles[link_index].z0
+#                 )
+
+
+# # from MetaData/EventFormat
+# _hash_to_target_name = {
+#     13267281: "TruthPhotons",
+#     342174277: "TruthMuons",
+#     368360608: "TruthNeutrinos",
+#     375408000: "TruthTaus",
+#     394100163: "TruthElectrons",
+#     614719239: "TruthBoson",
+#     660928181: "TruthTop",
+#     779635413: "TruthBottom",
+# }
+
+
+# def test_truth_links_toplevel(events):
+#     children_px = events.TruthBoson.children.px
+#     for i_event, event in enumerate(events):
+#         for i_particle, particle in enumerate(event.TruthBoson):
+#             for i_link, link in enumerate(particle.childLinks):
+#                 assert (
+#                     event[_hash_to_target_name[link.m_persKey]][link.m_persIndex].px
+#                     == children_px[i_event][i_particle][i_link]
+#                 )
+
+
+# def test_truth_links(events):
+#     for i_event, event in enumerate(events):
+#         for i_particle, particle in enumerate(event.TruthBoson):
+#             for i_link, link in enumerate(particle.childLinks):
+#                 assert (
+#                     event[_hash_to_target_name[link.m_persKey]][link.m_persIndex].px
+#                     == particle.children[i_link].px
+#                 )

From 29c359e026cb00d3b18f017975981f25489e2f17 Mon Sep 17 00:00:00 2001
From: Nikolai Hartmann <nikoladze@posteo.de>
Date: Fri, 28 Jul 2023 12:40:07 -0400
Subject: [PATCH 09/75] track particles working

---
 src/coffea/nanoevents/methods/physlite.py | 53 +++++++++++++----------
 tests/test_nanoevents_physlite.py         | 16 ++++++-
 2 files changed, 44 insertions(+), 25 deletions(-)

diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py
index 0d3cc2898..ddbd4b218 100644
--- a/src/coffea/nanoevents/methods/physlite.py
+++ b/src/coffea/nanoevents/methods/physlite.py
@@ -65,11 +65,24 @@ def where(unique_keys):
     return out
 
 
-def _concrete_get_target_offsets(load_column, event_index):
-    offsets = awkward.typetracer.length_one_if_typetracer(load_column.layout.offsets.data)
+def _get_target_offsets(load_column, event_index):
+    if isinstance(load_column, dask_awkward.Array):
+        # TODO check event_index as well
+        return dask_awkward.map_partitions(
+            _get_target_offsets, load_column, event_index
+        )
+
+    offsets = load_column.layout.offsets.data
+
     if isinstance(event_index, Number):
+        # TODO i think this is not working yet in dask
         return offsets[event_index]
 
+    # nescessary to stick it into the `NumpyArray` constructor
+    offsets = awkward.typetracer.length_zero_if_typetracer(
+        load_column.layout.offsets.data
+    )
+
     def descend(layout, depth, **kwargs):
         if layout.purelist_depth == 1:
             return awkward.contents.NumpyArray(offsets)[layout]
@@ -77,27 +90,10 @@ def descend(layout, depth, **kwargs):
     return awkward.transform(descend, event_index.layout)
 
 
-def _dask_get_target_offsets(load_column, event_index):
-    return dask_awkward.map_partitions(
-        _concrete_get_target_offsets,
-        load_column,
-        event_index
-    )
-
-
-def _get_target_offsets(load_column, event_index):
-    # TODO check event_index as well
-    if isinstance(load_column, dask_awkward.Array):
-        return _dask_get_target_offsets(load_column, event_index)
-    return _concrete_get_target_offsets(load_column, event_index)
-
-
 def _get_global_index(target, eventindex, index):
-    load_column = target[
-        target.fields[0]
-    ]
+    load_column = target[target.fields[0]]
     target_offsets = _get_target_offsets(load_column, eventindex)
-    return target_offsets + index # here i get
+    return target_offsets + index  # here i get
 
 
 # def _concrete_get_global_index(target, eventindex, index):
@@ -196,10 +192,21 @@ class Electron(Particle):
     """Electron collection, following `xAOD::Electron_v1
     <https://gitlab.cern.ch/atlas/athena/-/blob/21.2/Event/xAOD/xAODEgamma/Root/Electron_v1.cxx>`_.
     """
-
     @property
-    def trackParticles(self):
+    def trackParticles(self, _dask_array_=None):
+
+        if _dask_array_ is not None:
+            target = _dask_array_.behavior["__original_array__"]().GSFTrackParticles
+            links = _dask_array_.trackParticleLinks
+            return _element_link(
+                target,
+                _dask_array_._eventindex,
+                links.m_persIndex,
+                links.m_persKey,
+            )
+
         links = self.trackParticleLinks
+
         return _element_link(
             self._events().GSFTrackParticles,
             self._eventindex,
diff --git a/tests/test_nanoevents_physlite.py b/tests/test_nanoevents_physlite.py
index 488395183..6354f0376 100644
--- a/tests/test_nanoevents_physlite.py
+++ b/tests/test_nanoevents_physlite.py
@@ -5,9 +5,10 @@
 
 from coffea.nanoevents import NanoEventsFactory, PHYSLITESchema
 
-from coffea.nanoevents.methods.physlite import _get_global_index
+from coffea.nanoevents.methods.physlite import _get_global_index, _element_link
 
 import dask
+import dask_awkward as dak
 dask.config.set({"awkward.optimization.enabled": False, "awkward.raise-failed-meta": True, "awkward.optimization.on-fail": "raise"})
 
 pytestmark = pytest.mark.skip(reason="uproot is upset with this file...")
@@ -25,7 +26,18 @@ def _events():
 
 events = _events()
 
-gi = _get_global_index(events.GSFTrackParticles, events.Electrons._eventindex, events.Electrons.trackParticleLinks.m_persIndex)
+gi = _get_global_index(
+    events.GSFTrackParticles,
+    events.Electrons._eventindex,
+    events.Electrons.trackParticleLinks.m_persIndex
+)
+
+el = _element_link(
+    events.GSFTrackParticles,
+    events.Electrons._eventindex,
+    events.Electrons.trackParticleLinks.m_persIndex,
+    events.Electrons.trackParticleLinks.m_persKey
+)
 
 # @pytest.fixture(scope="module")
 # def events():

From 109e73ab02a706460dde8c953a1776b0f2dc8479 Mon Sep 17 00:00:00 2001
From: Nikolai Hartmann <nikoladze@posteo.de>
Date: Fri, 28 Jul 2023 13:29:24 -0400
Subject: [PATCH 10/75] trackParticle

---
 src/coffea/nanoevents/methods/physlite.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py
index ddbd4b218..1fa2fe013 100644
--- a/src/coffea/nanoevents/methods/physlite.py
+++ b/src/coffea/nanoevents/methods/physlite.py
@@ -192,9 +192,9 @@ class Electron(Particle):
     """Electron collection, following `xAOD::Electron_v1
     <https://gitlab.cern.ch/atlas/athena/-/blob/21.2/Event/xAOD/xAODEgamma/Root/Electron_v1.cxx>`_.
     """
+
     @property
     def trackParticles(self, _dask_array_=None):
-
         if _dask_array_ is not None:
             target = _dask_array_.behavior["__original_array__"]().GSFTrackParticles
             links = _dask_array_.trackParticleLinks
@@ -215,7 +215,9 @@ def trackParticles(self, _dask_array_=None):
         )
 
     @property
-    def trackParticle(self):
+    def trackParticle(self, _dask_array_=None):
+        if _dask_array_ is not None:
+            self = _dask_array_  # TODO: is this what i should be doing?
         trackParticles = self.trackParticles
         return self.trackParticles[
             tuple([slice(None) for i in range(trackParticles.ndim - 1)] + [0])

From 2e16a9e6c57b64a1ebfc9129b2645a1ac28fb1a9 Mon Sep 17 00:00:00 2001
From: Nikolai Hartmann <nikoladze@posteo.de>
Date: Fri, 28 Jul 2023 18:21:25 -0400
Subject: [PATCH 11/75] cleanup and add caloclusters

---
 src/coffea/nanoevents/methods/physlite.py | 88 ++++++++++-------------
 src/coffea/nanoevents/schemas/physlite.py |  1 +
 2 files changed, 40 insertions(+), 49 deletions(-)

diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py
index 1fa2fe013..b6badb9d9 100644
--- a/src/coffea/nanoevents/methods/physlite.py
+++ b/src/coffea/nanoevents/methods/physlite.py
@@ -39,6 +39,25 @@ def _element_link(target_collection, eventindex, index, key):
     return target_collection._apply_global_index(global_index)
 
 
+def _element_link_method(self, link_name, target_name, _dask_array_):
+    if _dask_array_ is not None:
+        target = _dask_array_.behavior["__original_array__"]()[target_name]
+        links = _dask_array_[link_name]
+        return _element_link(
+            target,
+            _dask_array_._eventindex,
+            links.m_persIndex,
+            links.m_persKey,
+        )
+    links = self[link_name]
+    return _element_link(
+        self._events()[target_name],
+        self._eventindex,
+        links.m_persIndex,
+        links.m_persKey,
+    )
+
+
 def _element_link_multiple(events, obj, link_field, with_name=None):
     link = obj[link_field]
     key = link.m_persKey
@@ -66,8 +85,10 @@ def where(unique_keys):
 
 
 def _get_target_offsets(load_column, event_index):
-    if isinstance(load_column, dask_awkward.Array):
-        # TODO check event_index as well
+    if isinstance(load_column, dask_awkward.Array) and isinstance(
+        event_index, dask_awkward.Array
+    ):
+        # wrap in map_partitions if dask arrays
         return dask_awkward.map_partitions(
             _get_target_offsets, load_column, event_index
         )
@@ -75,10 +96,10 @@ def _get_target_offsets(load_column, event_index):
     offsets = load_column.layout.offsets.data
 
     if isinstance(event_index, Number):
-        # TODO i think this is not working yet in dask
         return offsets[event_index]
 
     # nescessary to stick it into the `NumpyArray` constructor
+    # if typetracer is passed through
     offsets = awkward.typetracer.length_zero_if_typetracer(
         load_column.layout.offsets.data
     )
@@ -93,29 +114,7 @@ def descend(layout, depth, **kwargs):
 def _get_global_index(target, eventindex, index):
     load_column = target[target.fields[0]]
     target_offsets = _get_target_offsets(load_column, eventindex)
-    return target_offsets + index  # here i get
-
-
-# def _concrete_get_global_index(target, eventindex, index):
-#     load_column = target[
-#         target.fields[0]
-#     ]
-#     target_offsets = _get_target_offsets(load_column.layout.offsets, eventindex)
-#     return target_offsets + index
-
-# def _dask_get_global_index(target, eventindex, index):
-#     return dask_awkward.map_partitions(
-#         _concrete_get_global_index,
-#         target,
-#         eventindex,
-#         index,
-#     )
-
-# def _get_global_index(target, eventindex, index):
-#     # check target, eventindex, index all dak
-#     if isinstance(target, dask_awkward.Array):
-#         return _dask_get_global_index(target, eventindex, index)
-#     return _concrete_get_global_index(target, eventindex, index)
+    return target_offsets + index
 
 
 @awkward.mixin_class(behavior)
@@ -175,12 +174,12 @@ class Muon(Particle):
     """
 
     @property
-    def trackParticle(self):
-        return _element_link(
-            self._events().CombinedMuonTrackParticles,
-            self._eventindex,
-            self["combinedTrackParticleLink.m_persIndex"],
-            self["combinedTrackParticleLink.m_persKey"],
+    def trackParticle(self, _dask_array_=None):
+        return _element_link_method(
+            self,
+            "combinedTrackParticleLink",
+            "CombinedMuonTrackParticles",
+            _dask_array_,
         )
 
 
@@ -195,23 +194,8 @@ class Electron(Particle):
 
     @property
     def trackParticles(self, _dask_array_=None):
-        if _dask_array_ is not None:
-            target = _dask_array_.behavior["__original_array__"]().GSFTrackParticles
-            links = _dask_array_.trackParticleLinks
-            return _element_link(
-                target,
-                _dask_array_._eventindex,
-                links.m_persIndex,
-                links.m_persKey,
-            )
-
-        links = self.trackParticleLinks
-
-        return _element_link(
-            self._events().GSFTrackParticles,
-            self._eventindex,
-            links.m_persIndex,
-            links.m_persKey,
+        return _element_link_method(
+            self, "trackParticleLinks", "GSFTrackParticles", _dask_array_
         )
 
     @property
@@ -223,6 +207,12 @@ def trackParticle(self, _dask_array_=None):
             tuple([slice(None) for i in range(trackParticles.ndim - 1)] + [0])
         ]
 
+    @property
+    def caloClusters(self, _dask_array_=None):
+        return _element_link_method(
+            self, "caloClusterLinks", "CaloCalTopoClusters", _dask_array_
+        )
+
 
 _set_repr_name("Electron")
 
diff --git a/src/coffea/nanoevents/schemas/physlite.py b/src/coffea/nanoevents/schemas/physlite.py
index 1b9b89205..6a6aa8659 100644
--- a/src/coffea/nanoevents/schemas/physlite.py
+++ b/src/coffea/nanoevents/schemas/physlite.py
@@ -53,6 +53,7 @@ class PHYSLITESchema(BaseSchema):
         "GSFTrackParticles": "TrackParticle",
         "InDetTrackParticles": "TrackParticle",
         "MuonSpectrometerTrackParticles": "TrackParticle",
+        "CaloCalTopoClusters": "NanoCollection",
     }
     """Default configuration for mixin types, based on the collection name.
 

From f4d66682cec0e3b62a3d7ec96a2b7ced42fa5c76 Mon Sep 17 00:00:00 2001
From: Nikolai Hartmann <nikoladze@posteo.de>
Date: Fri, 28 Jul 2023 18:46:56 -0400
Subject: [PATCH 12/75] comment about multiple elementlinks

---
 src/coffea/nanoevents/methods/physlite.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py
index b6badb9d9..979fff0a0 100644
--- a/src/coffea/nanoevents/methods/physlite.py
+++ b/src/coffea/nanoevents/methods/physlite.py
@@ -59,6 +59,10 @@ def _element_link_method(self, link_name, target_name, _dask_array_):
 
 
 def _element_link_multiple(events, obj, link_field, with_name=None):
+    # currently not working in dask because:
+    # - we don't know the resulting type beforehand
+    # - also not the targets, so no way to find out which columns to load?
+    # - could consider to treat the case of truth collections by just loading all truth columns
     link = obj[link_field]
     key = link.m_persKey
     index = link.m_persIndex

From 4da309f4d745ccbefdcd913b755ee47fe74c8b82 Mon Sep 17 00:00:00 2001
From: Nikolai Hartmann <nikoladze@posteo.de>
Date: Fri, 28 Jul 2023 19:34:30 -0400
Subject: [PATCH 13/75] cleanup tests and add test for single field of linked
 collection

---
 tests/test_nanoevents_physlite.py | 93 ++++++-------------------------
 1 file changed, 18 insertions(+), 75 deletions(-)

diff --git a/tests/test_nanoevents_physlite.py b/tests/test_nanoevents_physlite.py
index 6354f0376..6b464cdb8 100644
--- a/tests/test_nanoevents_physlite.py
+++ b/tests/test_nanoevents_physlite.py
@@ -5,14 +5,6 @@
 
 from coffea.nanoevents import NanoEventsFactory, PHYSLITESchema
 
-from coffea.nanoevents.methods.physlite import _get_global_index, _element_link
-
-import dask
-import dask_awkward as dak
-dask.config.set({"awkward.optimization.enabled": False, "awkward.raise-failed-meta": True, "awkward.optimization.on-fail": "raise"})
-
-pytestmark = pytest.mark.skip(reason="uproot is upset with this file...")
-
 
 def _events():
     path = os.path.abspath("tests/samples/DAOD_PHYSLITE_21.2.108.0.art.pool.root")
@@ -20,78 +12,29 @@ def _events():
         {path: "CollectionTree"},
         schemaclass=PHYSLITESchema,
         permit_dask=True,
-        #permit_dask=False,
     )
     return factory.events()
 
-events = _events()
-
-gi = _get_global_index(
-    events.GSFTrackParticles,
-    events.Electrons._eventindex,
-    events.Electrons.trackParticleLinks.m_persIndex
-)
-
-el = _element_link(
-    events.GSFTrackParticles,
-    events.Electrons._eventindex,
-    events.Electrons.trackParticleLinks.m_persIndex,
-    events.Electrons.trackParticleLinks.m_persKey
-)
-
-# @pytest.fixture(scope="module")
-# def events():
-#     return _events()
-
-
-# @pytest.mark.parametrize("do_slice", [False, True])
-# def test_electron_track_links(events, do_slice):
-#     if do_slice:
-#         events = events[np.random.randint(2, size=len(events)).astype(bool)]
-#     for event in events:
-#         for electron in event.Electrons:
-#             for link_index, link in enumerate(electron.trackParticleLinks):
-#                 track_index = link.m_persIndex
-#                 print(track_index)
-#                 print(event.GSFTrackParticles)
-#                 print(electron.trackParticleLinks)
-#                 print(electron.trackParticles)
-
-#                 assert (
-#                     event.GSFTrackParticles[track_index].z0
-#                     == electron.trackParticles[link_index].z0
-#                 )
-
 
-# # from MetaData/EventFormat
-# _hash_to_target_name = {
-#     13267281: "TruthPhotons",
-#     342174277: "TruthMuons",
-#     368360608: "TruthNeutrinos",
-#     375408000: "TruthTaus",
-#     394100163: "TruthElectrons",
-#     614719239: "TruthBoson",
-#     660928181: "TruthTop",
-#     779635413: "TruthBottom",
-# }
+@pytest.fixture(scope="module")
+def events():
+    return _events()
 
 
-# def test_truth_links_toplevel(events):
-#     children_px = events.TruthBoson.children.px
-#     for i_event, event in enumerate(events):
-#         for i_particle, particle in enumerate(event.TruthBoson):
-#             for i_link, link in enumerate(particle.childLinks):
-#                 assert (
-#                     event[_hash_to_target_name[link.m_persKey]][link.m_persIndex].px
-#                     == children_px[i_event][i_particle][i_link]
-#                 )
+def test_load_single_field_of_linked(events):
+    events.Electrons.caloClusters.calE.compute()
 
 
-# def test_truth_links(events):
-#     for i_event, event in enumerate(events):
-#         for i_particle, particle in enumerate(event.TruthBoson):
-#             for i_link, link in enumerate(particle.childLinks):
-#                 assert (
-#                     event[_hash_to_target_name[link.m_persKey]][link.m_persIndex].px
-#                     == particle.children[i_link].px
-#                 )
+@pytest.mark.parametrize("do_slice", [False, True])
+def test_electron_track_links(events, do_slice):
+    if do_slice:
+        events = events[::2]
+    trackParticles = events.Electrons.trackParticles.compute()
+    for i, event in enumerate(events[["Electrons", "GSFTrackParticles"]].compute()):
+        for j, electron in enumerate(event.Electrons):
+            for link_index, link in enumerate(electron.trackParticleLinks):
+                track_index = link.m_persIndex
+                assert (
+                    event.GSFTrackParticles[track_index].z0
+                    == trackParticles[i][j][link_index].z0
+                )

From dbccef044c9e9ca3e98ba8dce5795206c2c98f9b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 28 Jul 2023 23:59:34 +0000
Subject: [PATCH 14/75] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/coffea/nanoevents/methods/physlite.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py
index 979fff0a0..90a449004 100644
--- a/src/coffea/nanoevents/methods/physlite.py
+++ b/src/coffea/nanoevents/methods/physlite.py
@@ -2,8 +2,8 @@
 from numbers import Number
 
 import awkward
-import numpy
 import dask_awkward
+import numpy
 
 from coffea.nanoevents.methods import base, vector
 

From dbfadd8cb9841786c10b43e6f08b5663b690ddbe Mon Sep 17 00:00:00 2001
From: Nikolai Hartmann <nikoladze@posteo.de>
Date: Fri, 28 Jul 2023 20:04:33 -0400
Subject: [PATCH 15/75] pylint

---
 src/coffea/nanoevents/methods/physlite.py | 2 +-
 tests/test_nanoevents_physlite.py         | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py
index 90a449004..5b1cbe50b 100644
--- a/src/coffea/nanoevents/methods/physlite.py
+++ b/src/coffea/nanoevents/methods/physlite.py
@@ -102,7 +102,7 @@ def _get_target_offsets(load_column, event_index):
     if isinstance(event_index, Number):
         return offsets[event_index]
 
-    # nescessary to stick it into the `NumpyArray` constructor
+    # necessary to stick it into the `NumpyArray` constructor
     # if typetracer is passed through
     offsets = awkward.typetracer.length_zero_if_typetracer(
         load_column.layout.offsets.data
diff --git a/tests/test_nanoevents_physlite.py b/tests/test_nanoevents_physlite.py
index 6b464cdb8..55293a164 100644
--- a/tests/test_nanoevents_physlite.py
+++ b/tests/test_nanoevents_physlite.py
@@ -1,6 +1,5 @@
 import os
 
-import numpy as np
 import pytest
 
 from coffea.nanoevents import NanoEventsFactory, PHYSLITESchema

From ab5164bd9c0689a59678c87ff8024c077a518e4e Mon Sep 17 00:00:00 2001
From: Nikolai Hartmann <nikoladze@posteo.de>
Date: Tue, 22 Aug 2023 14:25:51 +0200
Subject: [PATCH 16/75] flat calling structure for trackParticle(s) behavior
 methods

---
 src/coffea/nanoevents/methods/physlite.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py
index 5b1cbe50b..72ca50165 100644
--- a/src/coffea/nanoevents/methods/physlite.py
+++ b/src/coffea/nanoevents/methods/physlite.py
@@ -204,12 +204,12 @@ def trackParticles(self, _dask_array_=None):
 
     @property
     def trackParticle(self, _dask_array_=None):
-        if _dask_array_ is not None:
-            self = _dask_array_  # TODO: is this what i should be doing?
-        trackParticles = self.trackParticles
-        return self.trackParticles[
-            tuple([slice(None) for i in range(trackParticles.ndim - 1)] + [0])
-        ]
+        trackParticles = _element_link_method(
+            self, "trackParticleLinks", "GSFTrackParticles", _dask_array_
+        )
+        # Ellipsis (..., 0) slicing not supported yet by dask_awkward
+        slicer = tuple([slice(None) for i in range(trackParticles.ndim - 1)] + [0])
+        return trackParticles[slicer]
 
     @property
     def caloClusters(self, _dask_array_=None):

From 8ec38cfda7c374568ab06025c423aef0272caa9d Mon Sep 17 00:00:00 2001
From: Nikolai Hartmann <nikoladze@posteo.de>
Date: Wed, 30 Aug 2023 15:32:00 +0200
Subject: [PATCH 17/75] fix column touching for _get_target_offsets

---
 src/coffea/nanoevents/methods/physlite.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py
index 72ca50165..eefff7eb1 100644
--- a/src/coffea/nanoevents/methods/physlite.py
+++ b/src/coffea/nanoevents/methods/physlite.py
@@ -102,6 +102,11 @@ def _get_target_offsets(load_column, event_index):
     if isinstance(event_index, Number):
         return offsets[event_index]
 
+    # let the necessary column optimization know that we need to load this
+    # column to get the offsets
+    if awkward.backend(load_column) == "typetracer":
+        awkward.typetracer.touch_data(load_column)
+
     # necessary to stick it into the `NumpyArray` constructor
     # if typetracer is passed through
     offsets = awkward.typetracer.length_zero_if_typetracer(

From e6127d5e54ef12516760e51d1e4a229923bed2c2 Mon Sep 17 00:00:00 2001
From: Nikolai Hartmann <nikoladze@posteo.de>
Date: Thu, 31 Aug 2023 15:24:29 +0200
Subject: [PATCH 18/75] make test actually fail

---
 tests/test_nanoevents_physlite.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/test_nanoevents_physlite.py b/tests/test_nanoevents_physlite.py
index 55293a164..95f58491d 100644
--- a/tests/test_nanoevents_physlite.py
+++ b/tests/test_nanoevents_physlite.py
@@ -1,5 +1,6 @@
 import os
 
+import dask
 import pytest
 
 from coffea.nanoevents import NanoEventsFactory, PHYSLITESchema
@@ -21,7 +22,8 @@ def events():
 
 
 def test_load_single_field_of_linked(events):
-    events.Electrons.caloClusters.calE.compute()
+    with dask.config.set({"awkward.raise-failed-meta": True}):
+        events.Electrons.caloClusters.calE.compute()
 
 
 @pytest.mark.parametrize("do_slice", [False, True])

From c4385b14e41b5df770fe9bb1ca7bac51b6aa4a85 Mon Sep 17 00:00:00 2001
From: Nikolai Hartmann <nikoladze@posteo.de>
Date: Thu, 31 Aug 2023 15:27:17 +0200
Subject: [PATCH 19/75] use layout._touch_data since public touch_data not yet
 available in ak 2.3.3

---
 src/coffea/nanoevents/methods/physlite.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py
index eefff7eb1..f8147082b 100644
--- a/src/coffea/nanoevents/methods/physlite.py
+++ b/src/coffea/nanoevents/methods/physlite.py
@@ -105,7 +105,8 @@ def _get_target_offsets(load_column, event_index):
     # let the necessary column optimization know that we need to load this
     # column to get the offsets
     if awkward.backend(load_column) == "typetracer":
-        awkward.typetracer.touch_data(load_column)
+        # awkward.typetracer.touch_data(load_column) # available in awkward > 2.3.3
+        load_column.layout._touch_data(recursive=True)
 
     # necessary to stick it into the `NumpyArray` constructor
     # if typetracer is passed through

From e2dd3f03534e7cb166cb146ad2511a5906b2c289 Mon Sep 17 00:00:00 2001
From: Nikolai Hartmann <nikoladze@posteo.de>
Date: Thu, 31 Aug 2023 15:29:05 +0200
Subject: [PATCH 20/75] try to avoid loading double-jagged columns for getting
 offsets in elementlink calculation

---
 src/coffea/nanoevents/methods/physlite.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py
index f8147082b..7b9f98580 100644
--- a/src/coffea/nanoevents/methods/physlite.py
+++ b/src/coffea/nanoevents/methods/physlite.py
@@ -122,7 +122,12 @@ def descend(layout, depth, **kwargs):
 
 
 def _get_global_index(target, eventindex, index):
-    load_column = target[target.fields[0]]
+    for field in target.fields:
+        # fetch first column to get offsets from
+        # (but try to avoid the double-jagged ones if possible)
+        load_column = target[field]
+        if load_column.ndim < 3:
+            break
     target_offsets = _get_target_offsets(load_column, eventindex)
     return target_offsets + index
 

From e72d3731db814e24d56b3e3f3d37b3626dac75a7 Mon Sep 17 00:00:00 2001
From: iasonkrom <iason.krom@gmail.com>
Date: Thu, 31 Aug 2023 09:56:50 -0500
Subject: [PATCH 21/75] add uproot_options to uporoot.dask in factory

---
 src/coffea/nanoevents/factory.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py
index 38e06d601..fb9a6c4b5 100644
--- a/src/coffea/nanoevents/factory.py
+++ b/src/coffea/nanoevents/factory.py
@@ -232,7 +232,7 @@ def from_root(
         treepath="/Events",
         entry_start=None,
         entry_stop=None,
-        chunks_per_file=1,
+        chunks_per_file=None,
         runtime_cache=None,
         persistent_cache=None,
         schemaclass=NanoAODSchema,
@@ -268,7 +268,7 @@ def from_root(
             metadata : dict, optional
                 Arbitrary metadata to add to the `base.NanoEvents` object
             uproot_options : dict, optional
-                Any options to pass to ``uproot.open``
+                Any options to pass to ``uproot.open`` or ``uproot.dask``
             access_log : list, optional
                 Pass a list instance to record which branches were lazily accessed by this instance
             use_ak_forth:
@@ -326,6 +326,17 @@ def from_root(
                     ak_add_doc=True,
                     filter_branch=_remove_not_interpretable,
                     steps_per_file=chunks_per_file,
+                    **uproot_options,
+                )
+            elif chunks_per_file is None:
+                opener = partial(
+                    uproot.dask,
+                    file,
+                    full_paths=True,
+                    open_files=False,
+                    ak_add_doc=True,
+                    filter_branch=_remove_not_interpretable,
+                    **uproot_options,
                 )
             else:
                 opener = partial(
@@ -336,6 +347,7 @@ def from_root(
                     ak_add_doc=True,
                     filter_branch=_remove_not_interpretable,
                     steps_per_file=chunks_per_file,
+                    **uproot_options,
                 )
             return cls(map_schema, opener, None, cache=None, is_dask=True)
         elif permit_dask and not schemaclass.__dask_capable__:

From f491a36c04e1fa2c3b63d14468345465d398afa6 Mon Sep 17 00:00:00 2001
From: Nikolai Hartmann <nikoladze@posteo.de>
Date: Thu, 31 Aug 2023 17:20:00 +0200
Subject: [PATCH 22/75] allow for collections that contain non-list fields

---
 src/coffea/nanoevents/schemas/physlite.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/coffea/nanoevents/schemas/physlite.py b/src/coffea/nanoevents/schemas/physlite.py
index 1b9b89205..3b6508e2e 100644
--- a/src/coffea/nanoevents/schemas/physlite.py
+++ b/src/coffea/nanoevents/schemas/physlite.py
@@ -118,14 +118,21 @@ def _build_collections(self, branch_forms):
                     to_zip,
                     objname,
                     self.mixins.get(objname, None),
-                    bypass=True,
-                )
-                content = contents[objname]["content"]
-                content["parameters"] = dict(
-                    content.get("parameters", {}), collection_name=objname
+                    bypass=False,
                 )
             except NotImplementedError:
                 warnings.warn(f"Can't zip collection {objname}")
+            if "content" in contents[objname]:
+                # in this case we were able to zip everything together to a ListOffsetArray(RecordArray)
+                assert "List" in contents[objname]["class"]
+                content = contents[objname]["content"]
+            else:
+                # in this case this was not possible (e.g. because we also had non-list fields)
+                assert contents[objname]["class"] == "RecordArray"
+                content = contents[objname]
+            content["parameters"] = dict(
+                content.get("parameters", {}), collection_name=objname
+            )
         return contents
 
     @staticmethod

From 79ae6d5d7e4a9eb018b5cf1efb57713ea3c96b34 Mon Sep 17 00:00:00 2001
From: Nikolai Hartmann <nikoladze@posteo.de>
Date: Thu, 31 Aug 2023 17:34:35 +0200
Subject: [PATCH 23/75] skip empty records

---
 src/coffea/nanoevents/schemas/physlite.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/coffea/nanoevents/schemas/physlite.py b/src/coffea/nanoevents/schemas/physlite.py
index 3b6508e2e..11446b7a2 100644
--- a/src/coffea/nanoevents/schemas/physlite.py
+++ b/src/coffea/nanoevents/schemas/physlite.py
@@ -79,6 +79,9 @@ def _build_collections(self, branch_forms):
             key_fields = key.split("/")[-1].split(".")
             top_key = key_fields[0]
             sub_key = ".".join(key_fields[1:])
+            if ak_form["class"] == "RecordArray" and not ak_form["fields"]:
+                # skip empty records (e.g. the branches ending in "." only containing the base class)
+                continue
             objname = top_key.replace("Analysis", "").replace("AuxDyn", "")
 
             zip_groups[objname].append(((key, sub_key), ak_form))

From 94b648fd0926ad6150813d9d4f0cd289bde4515d Mon Sep 17 00:00:00 2001
From: Nikolai Hartmann <nikoladze@posteo.de>
Date: Thu, 31 Aug 2023 17:38:25 +0200
Subject: [PATCH 24/75] don't zip branches that are not grouped with anything
 else (e.g. index_ref in newer PHYSLITE)

---
 src/coffea/nanoevents/schemas/physlite.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/coffea/nanoevents/schemas/physlite.py b/src/coffea/nanoevents/schemas/physlite.py
index 11446b7a2..52e3ac747 100644
--- a/src/coffea/nanoevents/schemas/physlite.py
+++ b/src/coffea/nanoevents/schemas/physlite.py
@@ -100,6 +100,10 @@ def _build_collections(self, branch_forms):
         # zip the forms
         contents = {}
         for objname, keys_and_form in zip_groups.items():
+            if len(keys_and_form) == 1:
+                # don't zip if there is only one item
+                contents[objname] = keys_and_form[0][1]
+                continue
             to_zip = {}
             for (key, sub_key), form in keys_and_form:
                 if "." in sub_key:

From 0334acd8feb208602b0f173d473921e155215cd4 Mon Sep 17 00:00:00 2001
From: Nikolai Hartmann <nikoladze@posteo.de>
Date: Thu, 31 Aug 2023 18:03:57 +0200
Subject: [PATCH 25/75] also remove Aux from branch names to zip them with
 AuxDyn and potentially non-aux branches

---
 src/coffea/nanoevents/schemas/physlite.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/coffea/nanoevents/schemas/physlite.py b/src/coffea/nanoevents/schemas/physlite.py
index 52e3ac747..c45240d6a 100644
--- a/src/coffea/nanoevents/schemas/physlite.py
+++ b/src/coffea/nanoevents/schemas/physlite.py
@@ -82,7 +82,9 @@ def _build_collections(self, branch_forms):
             if ak_form["class"] == "RecordArray" and not ak_form["fields"]:
                 # skip empty records (e.g. the branches ending in "." only containing the base class)
                 continue
-            objname = top_key.replace("Analysis", "").replace("AuxDyn", "")
+            objname = (
+                top_key.replace("Analysis", "").replace("AuxDyn", "").replace("Aux", "")
+            )
 
             zip_groups[objname].append(((key, sub_key), ak_form))
 

From 881e4e23c5d00eba203956c80c2f758444ee96d0 Mon Sep 17 00:00:00 2001
From: iasonkrom <iason.krom@gmail.com>
Date: Thu, 31 Aug 2023 11:29:55 -0500
Subject: [PATCH 26/75] leave chunks_per_file=1 for now

---
 src/coffea/nanoevents/factory.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py
index fb9a6c4b5..f429f04bd 100644
--- a/src/coffea/nanoevents/factory.py
+++ b/src/coffea/nanoevents/factory.py
@@ -232,7 +232,7 @@ def from_root(
         treepath="/Events",
         entry_start=None,
         entry_stop=None,
-        chunks_per_file=None,
+        chunks_per_file=1,
         runtime_cache=None,
         persistent_cache=None,
         schemaclass=NanoAODSchema,

From 3fe091b12bc06b4a0cdc866ac0c4183dde422620 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 4 Sep 2023 14:41:39 +0000
Subject: [PATCH 27/75] Bump actions/checkout from 3 to 4

Bumps [actions/checkout](https://github.com/actions/checkout) from 3 to 4.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/v3...v4)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 .github/workflows/ci.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 9a387fcbc..628df33dd 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -26,7 +26,7 @@ jobs:
     name: pre-commit
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - uses: actions/setup-python@v4
     - uses: pre-commit/action@v3.0.0
       with:
@@ -45,7 +45,7 @@ jobs:
     name: test coffea (${{ matrix.os }}) - python ${{ matrix.python-version }}, JDK${{ matrix.java-version }}
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v4
       with:
@@ -135,7 +135,7 @@ jobs:
     name: test coffea-workqueue
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Set up Conda
       uses: conda-incubator/setup-miniconda@v2
       env:
@@ -185,7 +185,7 @@ jobs:
     name: deploy release
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v4
       with:

From 88176f22a853032c519c2b2ef0cb4460bd9549a1 Mon Sep 17 00:00:00 2001
From: Lindsey Gray <lindsey.gray@gmail.com>
Date: Wed, 6 Sep 2023 10:09:24 -0500
Subject: [PATCH 28/75] repin to awkward 2.4.1

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 454ed3319..87b29d513 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,7 +37,7 @@ classifiers = [
   "Topic :: Utilities",
 ]
 dependencies = [
-  "awkward>=2.3.3",
+  "awkward>=2.4.1",
   "uproot>=5.0.10",
   "dask[array]>=2023.4.0",
   "dask-awkward>=2023.7.1,!=2023.8.0",

From d20468aa5895444c89828815f6bf9b9db81df7f9 Mon Sep 17 00:00:00 2001
From: Lindsey Gray <lindsey.gray@gmail.com>
Date: Wed, 6 Sep 2023 14:59:14 -0500
Subject: [PATCH 29/75] repin to latest fixed awkward

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 454ed3319..f5b168c1e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,7 +37,7 @@ classifiers = [
   "Topic :: Utilities",
 ]
 dependencies = [
-  "awkward>=2.3.3",
+  "awkward>=2.4.2",
   "uproot>=5.0.10",
   "dask[array]>=2023.4.0",
   "dask-awkward>=2023.7.1,!=2023.8.0",

From 5dd6868d98534f960e04c973cba5d06a751534b7 Mon Sep 17 00:00:00 2001
From: Lindsey Gray <lindsey.gray@gmail.com>
Date: Wed, 6 Sep 2023 15:00:36 -0500
Subject: [PATCH 30/75] awkward 2.4.2 (just to not clobber main)

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 87b29d513..f5b168c1e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,7 +37,7 @@ classifiers = [
   "Topic :: Utilities",
 ]
 dependencies = [
-  "awkward>=2.4.1",
+  "awkward>=2.4.2",
   "uproot>=5.0.10",
   "dask[array]>=2023.4.0",
   "dask-awkward>=2023.7.1,!=2023.8.0",

From 5527a02e94aa2685be59d6f6ca334d56c62d1a77 Mon Sep 17 00:00:00 2001
From: Lindsey Gray <lindsey.gray@gmail.com>
Date: Wed, 6 Sep 2023 16:10:38 -0500
Subject: [PATCH 31/75] use uproot._util.unset as default value to
 chunks_per_file to ensure correct behavior

---
 src/coffea/nanoevents/factory.py | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py
index f429f04bd..f1c8ebb64 100644
--- a/src/coffea/nanoevents/factory.py
+++ b/src/coffea/nanoevents/factory.py
@@ -232,7 +232,7 @@ def from_root(
         treepath="/Events",
         entry_start=None,
         entry_stop=None,
-        chunks_per_file=1,
+        chunks_per_file=uproot._util.unset,
         runtime_cache=None,
         persistent_cache=None,
         schemaclass=NanoAODSchema,
@@ -327,17 +327,7 @@ def from_root(
                     filter_branch=_remove_not_interpretable,
                     steps_per_file=chunks_per_file,
                     **uproot_options,
-                )
-            elif chunks_per_file is None:
-                opener = partial(
-                    uproot.dask,
-                    file,
-                    full_paths=True,
-                    open_files=False,
-                    ak_add_doc=True,
-                    filter_branch=_remove_not_interpretable,
-                    **uproot_options,
-                )
+                )           
             else:
                 opener = partial(
                     uproot.dask,

From 130a90342912311bfa2379c1aeaa4a064cb9817e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 6 Sep 2023 21:11:24 +0000
Subject: [PATCH 32/75] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/coffea/nanoevents/factory.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py
index f1c8ebb64..58dd55ad5 100644
--- a/src/coffea/nanoevents/factory.py
+++ b/src/coffea/nanoevents/factory.py
@@ -327,7 +327,7 @@ def from_root(
                     filter_branch=_remove_not_interpretable,
                     steps_per_file=chunks_per_file,
                     **uproot_options,
-                )           
+                )
             else:
                 opener = partial(
                     uproot.dask,

From 6abc42c768b0e51392c5ca275876f1d38d45cfd6 Mon Sep 17 00:00:00 2001
From: Nikolai Hartmann <nikoladze@posteo.de>
Date: Thu, 7 Sep 2023 08:43:44 +0200
Subject: [PATCH 33/75] go back to using public touch_data since we have ak
 2.4.2 now

---
 src/coffea/nanoevents/methods/physlite.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py
index 7b9f98580..751c5d03f 100644
--- a/src/coffea/nanoevents/methods/physlite.py
+++ b/src/coffea/nanoevents/methods/physlite.py
@@ -105,8 +105,7 @@ def _get_target_offsets(load_column, event_index):
     # let the necessary column optimization know that we need to load this
     # column to get the offsets
     if awkward.backend(load_column) == "typetracer":
-        # awkward.typetracer.touch_data(load_column) # available in awkward > 2.3.3
-        load_column.layout._touch_data(recursive=True)
+        awkward.typetracer.touch_data(load_column)
 
     # necessary to stick it into the `NumpyArray` constructor
     # if typetracer is passed through

From 1aaaa63c03af7ded0881cf40a1e35512d81b7361 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 11 Sep 2023 14:29:31 +0000
Subject: [PATCH 34/75] Bump crazy-max/ghaction-github-pages from 3 to 4

Bumps [crazy-max/ghaction-github-pages](https://github.com/crazy-max/ghaction-github-pages) from 3 to 4.
- [Release notes](https://github.com/crazy-max/ghaction-github-pages/releases)
- [Commits](https://github.com/crazy-max/ghaction-github-pages/compare/v3...v4)

---
updated-dependencies:
- dependency-name: crazy-max/ghaction-github-pages
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 628df33dd..fe8453b8d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -119,7 +119,7 @@ jobs:
         touch build/html/.nojekyll
     - name: Deploy documentation
       if: github.event_name == 'push' && matrix.os == 'ubuntu-latest' && matrix.python-version == 3.11
-      uses: crazy-max/ghaction-github-pages@v3
+      uses: crazy-max/ghaction-github-pages@v4
       with:
         target_branch: gh-pages
         build_dir: docs/build/html

From 7a64bb62777c30d50f04c50c3848e6c25111b3cf Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 12 Sep 2023 06:14:36 +0000
Subject: [PATCH 35/75] [pre-commit.ci] pre-commit autoupdate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/psf/black: 23.7.0 → 23.9.1](https://github.com/psf/black/compare/23.7.0...23.9.1)
- [github.com/asottile/pyupgrade: v3.9.0 → v3.10.1](https://github.com/asottile/pyupgrade/compare/v3.9.0...v3.10.1)
- [github.com/pycqa/flake8: 6.0.0 → 6.1.0](https://github.com/pycqa/flake8/compare/6.0.0...6.1.0)
---
 .pre-commit-config.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a4d511b07..1b3695665 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -12,7 +12,7 @@ ci:
 
 repos:
 - repo: https://github.com/psf/black
-  rev: 23.7.0
+  rev: 23.9.1
   hooks:
   - id: black
 
@@ -37,7 +37,7 @@ repos:
   - id: trailing-whitespace
 
 - repo: https://github.com/asottile/pyupgrade
-  rev: v3.9.0
+  rev: v3.10.1
   hooks:
   - id: pyupgrade
     args: ["--py38-plus"]
@@ -48,7 +48,7 @@ repos:
   - id: setup-cfg-fmt
 
 - repo: https://github.com/pycqa/flake8
-  rev: 6.0.0
+  rev: 6.1.0
   hooks:
   - id: flake8
     exclude: coffea/processor/templates

From 13ebcfc530def1ab6b324cd0edffd5f207d3b60d Mon Sep 17 00:00:00 2001
From: iasonkrom <iason.krom@gmail.com>
Date: Fri, 15 Sep 2023 18:44:51 -0500
Subject: [PATCH 36/75] request dtype from np.arange and ak.zeros_like

---
 src/coffea/analysis_tools.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/coffea/analysis_tools.py b/src/coffea/analysis_tools.py
index 66b92fe2b..f8bbd21e2 100644
--- a/src/coffea/analysis_tools.py
+++ b/src/coffea/analysis_tools.py
@@ -610,13 +610,13 @@ def yieldhist(self):
         labels = ["initial"] + [f"N - {i}" for i in self._names] + ["N"]
         if not self._delayed_mode:
             h = hist.Hist(hist.axis.Integer(0, len(labels), name="N-1"))
-            h.fill(numpy.arange(len(labels)), weight=self._nev)
+            h.fill(numpy.arange(len(labels), dtype=int), weight=self._nev)
 
         else:
             h = hist.dask.Hist(hist.axis.Integer(0, len(labels), name="N-1"))
             for i, weight in enumerate(self._masks, 1):
                 h.fill(dask_awkward.full_like(weight, i, dtype=int), weight=weight)
-            h.fill(dask_awkward.zeros_like(weight))
+            h.fill(dask_awkward.zeros_like(weight, dtype=int))
 
         return h, labels
 
@@ -712,7 +712,7 @@ def plot_vars(
                     hist.axis.Integer(0, len(labels), name="N-1"),
                 )
                 arr = awkward.flatten(var)
-                h.fill(arr, awkward.zeros_like(arr))
+                h.fill(arr, awkward.zeros_like(arr, dtype=int))
                 for i, mask in enumerate(self.result().masks, 1):
                     arr = awkward.flatten(var[mask])
                     h.fill(arr, awkward.full_like(arr, i, dtype=int))
@@ -725,7 +725,7 @@ def plot_vars(
                     hist.axis.Integer(0, len(labels), name="N-1"),
                 )
                 arr = dask_awkward.flatten(var)
-                h.fill(arr, dask_awkward.zeros_like(arr))
+                h.fill(arr, dask_awkward.zeros_like(arr, dtype=int))
                 for i, mask in enumerate(self.result().masks, 1):
                     arr = dask_awkward.flatten(var[mask])
                     h.fill(arr, dask_awkward.full_like(arr, i, dtype=int))
@@ -856,8 +856,8 @@ def yieldhist(self):
             honecut = hist.Hist(hist.axis.Integer(0, len(labels), name="onecut"))
             hcutflow = honecut.copy()
             hcutflow.axes.name = ("cutflow",)
-            honecut.fill(numpy.arange(len(labels)), weight=self._nevonecut)
-            hcutflow.fill(numpy.arange(len(labels)), weight=self._nevcutflow)
+            honecut.fill(numpy.arange(len(labels), dtype=int), weight=self._nevonecut)
+            hcutflow.fill(numpy.arange(len(labels), dtype=int), weight=self._nevcutflow)
 
         else:
             honecut = hist.dask.Hist(hist.axis.Integer(0, len(labels), name="onecut"))
@@ -868,12 +868,12 @@ def yieldhist(self):
                 honecut.fill(
                     dask_awkward.full_like(weight, i, dtype=int), weight=weight
                 )
-            honecut.fill(dask_awkward.zeros_like(weight))
+            honecut.fill(dask_awkward.zeros_like(weight, dtype=int))
             for i, weight in enumerate(self._maskscutflow, 1):
                 hcutflow.fill(
                     dask_awkward.full_like(weight, i, dtype=int), weight=weight
                 )
-            hcutflow.fill(dask_awkward.zeros_like(weight))
+            hcutflow.fill(dask_awkward.zeros_like(weight, dtype=int))
 
         return honecut, hcutflow, labels
 
@@ -975,8 +975,8 @@ def plot_vars(
                 hcutflow.axes.name = name, "cutflow"
 
                 arr = awkward.flatten(var)
-                honecut.fill(arr, awkward.zeros_like(arr))
-                hcutflow.fill(arr, awkward.zeros_like(arr))
+                honecut.fill(arr, awkward.zeros_like(arr, dtype=int))
+                hcutflow.fill(arr, awkward.zeros_like(arr, dtype=int))
 
                 for i, mask in enumerate(self.result().masksonecut, 1):
                     arr = awkward.flatten(var[mask])
@@ -998,8 +998,8 @@ def plot_vars(
                 hcutflow.axes.name = name, "cutflow"
 
                 arr = dask_awkward.flatten(var)
-                honecut.fill(arr, dask_awkward.zeros_like(arr))
-                hcutflow.fill(arr, dask_awkward.zeros_like(arr))
+                honecut.fill(arr, dask_awkward.zeros_like(arr, dtype=int))
+                hcutflow.fill(arr, dask_awkward.zeros_like(arr, dtype=int))
 
                 for i, mask in enumerate(self.result().masksonecut, 1):
                     arr = dask_awkward.flatten(var[mask])

From a7434fe393418885cba80cfa2c57fb2dfcf8e223 Mon Sep 17 00:00:00 2001
From: Lindsey Gray <lindsey.gray@gmail.com>
Date: Fri, 15 Sep 2023 21:07:17 -0500
Subject: [PATCH 37/75] remove weirdly shadowed member variable from base
 schema

---
 src/coffea/nanoevents/schemas/base.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/coffea/nanoevents/schemas/base.py b/src/coffea/nanoevents/schemas/base.py
index 09812eee0..8a1f2251e 100644
--- a/src/coffea/nanoevents/schemas/base.py
+++ b/src/coffea/nanoevents/schemas/base.py
@@ -105,7 +105,6 @@ class BaseSchema:
     """
 
     __dask_capable__ = True
-    behavior = {}
 
     def __init__(self, base_form, *args, **kwargs):
         params = dict(base_form.get("parameters", {}))

From a65a3dfea35df64a778ee1fb1387d3fd6d35260e Mon Sep 17 00:00:00 2001
From: iasonkrom <iason.krom@gmail.com>
Date: Sat, 16 Sep 2023 12:38:55 -0500
Subject: [PATCH 38/75] found a random typo along the way

---
 src/coffea/analysis_tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coffea/analysis_tools.py b/src/coffea/analysis_tools.py
index f8bbd21e2..e1176f95c 100644
--- a/src/coffea/analysis_tools.py
+++ b/src/coffea/analysis_tools.py
@@ -418,7 +418,7 @@ def variations(self):
 
 
 class NminusOneToNpz:
-    """Object to be returned by NmiusOne.to_npz()"""
+    """Object to be returned by NminusOne.to_npz()"""
 
     def __init__(self, file, labels, nev, masks, saver):
         self._file = file

From acdb1d829db5437f863caa144853676a6ea1d04e Mon Sep 17 00:00:00 2001
From: iasonkrom <iason.krom@gmail.com>
Date: Sat, 16 Sep 2023 14:07:20 -0500
Subject: [PATCH 39/75] prettier print statements and dask.compute reduction

---
 src/coffea/analysis_tools.py | 61 ++++++++++++++++++++++--------------
 1 file changed, 38 insertions(+), 23 deletions(-)

diff --git a/src/coffea/analysis_tools.py b/src/coffea/analysis_tools.py
index e1176f95c..6b2ebc77e 100644
--- a/src/coffea/analysis_tools.py
+++ b/src/coffea/analysis_tools.py
@@ -494,11 +494,17 @@ def maskscutflow(self):
         return self._maskscutflow
 
     def compute(self):
-        self._nevonecut = list(dask.compute(*self._nevonecut))
-        self._nevcutflow = list(dask.compute(*self._nevcutflow))
-        self._masksonecut = list(dask.compute(*self._masksonecut))
-        self._maskscutflow = list(dask.compute(*self._maskscutflow))
-        numpy.savez(
+        self._nevonecut, self._nevcutflow = dask.compute(
+            self._nevonecut, self._nevcutflow
+        )
+        self._masksonecut, self._maskscutflow = dask.compute(
+            self._masksonecut, self._maskscutflow
+        )
+        self._nevonecut = list(self._nevonecut)
+        self._nevcutflow = list(self._nevcutflow)
+        self._masksonecut = list(self._masksonecut)
+        self._maskscutflow = list(self._maskscutflow)
+        self._saver(
             self._file,
             labels=self._labels,
             nevonecut=self._nevonecut,
@@ -581,21 +587,25 @@ def print(self):
 
         if self._delayed_mode:
             self._nev = list(dask.compute(*self._nev))
+
         nev = self._nev
         print("N-1 selection stats:")
         for i, name in enumerate(self._names):
-            print(
-                f"Ignoring {name:<20}: pass = {nev[i+1]:<20}\
-                all = {nev[0]:<20}\
-                -- eff = {nev[i+1]*100/nev[0]:.1f} %"
+            stats = (
+                f"Ignoring {name:<20}"
+                f"pass = {nev[i+1]:<20}"
+                f"all = {nev[0]:<20}"
+                f"-- eff = {nev[i+1]*100/nev[0]:.1f} %"
             )
+            print(stats)
 
-        if True:
-            print(
-                f"All cuts {'':<20}: pass = {nev[-1]:<20}\
-                all = {nev[0]:<20}\
-                -- eff = {nev[-1]*100/nev[0]:.1f} %"
-            )
+        stats_all = (
+            f"All cuts {'':<20}"
+            f"pass = {nev[-1]:<20}"
+            f"all = {nev[0]:<20}"
+            f"-- eff = {nev[-1]*100/nev[0]:.1f} %"
+        )
+        print(stats_all)
 
     def yieldhist(self):
         """Returns the N-1 selection yields as a ``hist.Hist`` object
@@ -824,19 +834,24 @@ def print(self):
         """Prints the statistics of the Cutflow"""
 
         if self._delayed_mode:
-            self._nevonecut = list(dask.compute(*self._nevonecut))
-            self._nevcutflow = list(dask.compute(*self._nevcutflow))
+            self._nevonecut, self._nevcutflow = dask.compute(
+                self._nevonecut, self._nevcutflow
+            )
+
         nevonecut = self._nevonecut
         nevcutflow = self._nevcutflow
+
         print("Cutflow stats:")
         for i, name in enumerate(self._names):
-            print(
-                f"Cut {name:<20}: pass = {nevonecut[i+1]:<20}\
-                cumulative pass = {nevcutflow[i+1]:<20}\
-                all = {nevonecut[0]:<20}\
-                --  eff = {nevonecut[i+1]*100/nevonecut[0]:.1f} %\
-                -- cumulative eff = {nevcutflow[i+1]*100/nevcutflow[0]:.1f} %"
+            stats = (
+                f"Cut {name:<20}:"
+                f"pass = {nevonecut[i+1]:<20}"
+                f"cumulative pass = {nevcutflow[i+1]:<20}"
+                f"all = {nevonecut[0]:<20}"
+                f"-- eff = {nevonecut[i+1]*100/nevonecut[0]:.1f} %{'':<20}"
+                f"-- cumulative eff = {nevcutflow[i+1]*100/nevcutflow[0]:.1f} %"
             )
+            print(stats)
 
     def yieldhist(self):
         """Returns the cutflow yields as ``hist.Hist`` objects

From 2fee783b79cee6c51ee1f89c244b1b272d1f6356 Mon Sep 17 00:00:00 2001
From: iasonkrom <iason.krom@gmail.com>
Date: Sat, 16 Sep 2023 15:58:36 -0500
Subject: [PATCH 40/75] make the default to be compute=False for to_npz()

---
 src/coffea/analysis_tools.py |  8 ++++----
 tests/test_analysis_tools.py | 24 ++++++++++++------------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/coffea/analysis_tools.py b/src/coffea/analysis_tools.py
index 6b2ebc77e..14fd170f3 100644
--- a/src/coffea/analysis_tools.py
+++ b/src/coffea/analysis_tools.py
@@ -544,7 +544,7 @@ def result(self):
         labels = ["initial"] + [f"N - {i}" for i in self._names] + ["N"]
         return NminusOneResult(labels, self._nev, self._masks)
 
-    def to_npz(self, file, compressed=False, compute=True):
+    def to_npz(self, file, compressed=False, compute=False):
         """Saves the results of the N-1 selection to a .npz file
 
         Parameters
@@ -560,7 +560,7 @@ def to_npz(self, file, compressed=False, compute=True):
             compute : bool, optional
                 Whether to immediately start writing or to return an object
                 that the user can choose when to start writing by calling compute().
-                Default is True.
+                Default is False.
 
         Returns
         -------
@@ -790,7 +790,7 @@ def result(self):
             self._maskscutflow,
         )
 
-    def to_npz(self, file, compressed=False, compute=True):
+    def to_npz(self, file, compressed=False, compute=False):
         """Saves the results of the cutflow to a .npz file
 
         Parameters
@@ -806,7 +806,7 @@ def to_npz(self, file, compressed=False, compute=True):
             compute : bool, optional
                 Whether to immediately start writing or to return an object
                 that the user can choose when to start writing by calling compute().
-                Default is True.
+                Default is False.
 
         Returns
         -------
diff --git a/tests/test_analysis_tools.py b/tests/test_analysis_tools.py
index 1e8c46ec1..bb3221432 100644
--- a/tests/test_analysis_tools.py
+++ b/tests/test_analysis_tools.py
@@ -513,14 +513,14 @@ def test_packed_selection_nminusone():
     ):
         assert np.all(mask == truth)
 
-    nminusone.to_npz("nminusone.npz", compressed=False)
+    nminusone.to_npz("nminusone.npz", compressed=False).compute()
     with np.load("nminusone.npz") as file:
         assert np.all(file["labels"] == labels)
         assert np.all(file["nev"] == nev)
         assert np.all(file["masks"] == masks)
     os.remove("nminusone.npz")
 
-    nminusone.to_npz("nminusone.npz", compressed=True)
+    nminusone.to_npz("nminusone.npz", compressed=True).compute()
     with np.load("nminusone.npz") as file:
         assert np.all(file["labels"] == labels)
         assert np.all(file["nev"] == nev)
@@ -619,7 +619,7 @@ def test_packed_selection_cutflow():
     ):
         assert np.all(mask == truth)
 
-    cutflow.to_npz("cutflow.npz", compressed=False)
+    cutflow.to_npz("cutflow.npz", compressed=False).compute()
     with np.load("cutflow.npz") as file:
         assert np.all(file["labels"] == labels)
         assert np.all(file["nevonecut"] == nevonecut)
@@ -628,7 +628,7 @@ def test_packed_selection_cutflow():
         assert np.all(file["maskscutflow"] == maskscutflow)
     os.remove("cutflow.npz")
 
-    cutflow.to_npz("cutflow.npz", compressed=True)
+    cutflow.to_npz("cutflow.npz", compressed=True).compute()
     with np.load("cutflow.npz") as file:
         assert np.all(file["labels"] == labels)
         assert np.all(file["nevonecut"] == nevonecut)
@@ -854,14 +854,14 @@ def test_packed_selection_nminusone_dak(optimization_enabled):
         ):
             assert np.all(mask.compute() == truth.compute())
 
-        nminusone.to_npz("nminusone.npz", compressed=False)
+        nminusone.to_npz("nminusone.npz", compressed=False).compute()
         with np.load("nminusone.npz") as file:
             assert np.all(file["labels"] == labels)
             assert np.all(file["nev"] == list(dask.compute(*nev)))
             assert np.all(file["masks"] == list(dask.compute(*masks)))
         os.remove("nminusone.npz")
 
-        nminusone.to_npz("nminusone.npz", compressed=True)
+        nminusone.to_npz("nminusone.npz", compressed=True).compute()
         with np.load("nminusone.npz") as file:
             assert np.all(file["labels"] == labels)
             assert np.all(file["nev"] == list(dask.compute(*nev)))
@@ -978,7 +978,7 @@ def test_packed_selection_cutflow_dak(optimization_enabled):
         ):
             assert np.all(mask.compute() == truth.compute())
 
-        cutflow.to_npz("cutflow.npz", compressed=False)
+        cutflow.to_npz("cutflow.npz", compressed=False).compute()
         with np.load("cutflow.npz") as file:
             assert np.all(file["labels"] == labels)
             assert np.all(file["nevonecut"] == list(dask.compute(*nevonecut)))
@@ -987,7 +987,7 @@ def test_packed_selection_cutflow_dak(optimization_enabled):
             assert np.all(file["maskscutflow"] == list(dask.compute(*maskscutflow)))
         os.remove("cutflow.npz")
 
-        cutflow.to_npz("cutflow.npz", compressed=True)
+        cutflow.to_npz("cutflow.npz", compressed=True).compute()
         with np.load("cutflow.npz") as file:
             assert np.all(file["labels"] == labels)
             assert np.all(file["nevonecut"] == list(dask.compute(*nevonecut)))
@@ -1109,14 +1109,14 @@ def test_packed_selection_nminusone_dak_uproot_only(optimization_enabled):
         ):
             assert np.all(mask.compute() == truth.compute())
 
-        nminusone.to_npz("nminusone.npz", compressed=False)
+        nminusone.to_npz("nminusone.npz", compressed=False).compute()
         with np.load("nminusone.npz") as file:
             assert np.all(file["labels"] == labels)
             assert np.all(file["nev"] == list(dask.compute(*nev)))
             assert np.all(file["masks"] == list(dask.compute(*masks)))
         os.remove("nminusone.npz")
 
-        nminusone.to_npz("nminusone.npz", compressed=True)
+        nminusone.to_npz("nminusone.npz", compressed=True).compute()
         with np.load("nminusone.npz") as file:
             assert np.all(file["labels"] == labels)
             assert np.all(file["nev"] == list(dask.compute(*nev)))
@@ -1233,7 +1233,7 @@ def test_packed_selection_cutflow_dak_uproot_only(optimization_enabled):
         ):
             assert np.all(mask.compute() == truth.compute())
 
-        cutflow.to_npz("cutflow.npz", compressed=False)
+        cutflow.to_npz("cutflow.npz", compressed=False).compute()
         with np.load("cutflow.npz") as file:
             assert np.all(file["labels"] == labels)
             assert np.all(file["nevonecut"] == list(dask.compute(*nevonecut)))
@@ -1242,7 +1242,7 @@ def test_packed_selection_cutflow_dak_uproot_only(optimization_enabled):
             assert np.all(file["maskscutflow"] == list(dask.compute(*maskscutflow)))
         os.remove("cutflow.npz")
 
-        cutflow.to_npz("cutflow.npz", compressed=True)
+        cutflow.to_npz("cutflow.npz", compressed=True).compute()
         with np.load("cutflow.npz") as file:
             assert np.all(file["labels"] == labels)
             assert np.all(file["nevonecut"] == list(dask.compute(*nevonecut)))

From 3bdff8c3d33e73b7fcb57edad830605b0eb27fae Mon Sep 17 00:00:00 2001
From: iasonkrom <iason.krom@gmail.com>
Date: Sat, 16 Sep 2023 20:21:51 -0500
Subject: [PATCH 41/75] warn in print() when user is about to compute dask
 stuff

---
 src/coffea/analysis_tools.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/src/coffea/analysis_tools.py b/src/coffea/analysis_tools.py
index 14fd170f3..a68124d87 100644
--- a/src/coffea/analysis_tools.py
+++ b/src/coffea/analysis_tools.py
@@ -582,10 +582,16 @@ def to_npz(self, file, compressed=False, compute=False):
         else:
             return out
 
-    def print(self):
+    def print(self, compute=False):
         """Prints the statistics of the N-1 selection"""
 
-        if self._delayed_mode:
+        if self._delayed_mode and not compute:
+            warnings.warn(
+                "This will compute dask_awkward arrays. If you really want to do this now, call print(compute=True)"
+            )
+            return
+
+        if self._delayed_mode and compute:
             self._nev = list(dask.compute(*self._nev))
 
         nev = self._nev
@@ -830,10 +836,16 @@ def to_npz(self, file, compressed=False, compute=False):
         else:
             return out
 
-    def print(self):
+    def print(self, compute=False):
         """Prints the statistics of the Cutflow"""
 
-        if self._delayed_mode:
+        if self._delayed_mode and not compute:
+            warnings.warn(
+                "This will compute dask_awkward arrays. If you really want to do this now, call print(compute=True)"
+            )
+            return
+
+        if self._delayed_mode and compute:
             self._nevonecut, self._nevcutflow = dask.compute(
                 self._nevonecut, self._nevcutflow
             )

From 8e6bb10d8917d59293fd0f613feba05edeb9b8a6 Mon Sep 17 00:00:00 2001
From: iasonkrom <iason.krom@gmail.com>
Date: Mon, 18 Sep 2023 09:02:25 -0500
Subject: [PATCH 42/75] Revert "warn in print() when user is about to compute
 dask stuff"

This reverts commit 3bdff8c3d33e73b7fcb57edad830605b0eb27fae.
---
 src/coffea/analysis_tools.py | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/src/coffea/analysis_tools.py b/src/coffea/analysis_tools.py
index a68124d87..14fd170f3 100644
--- a/src/coffea/analysis_tools.py
+++ b/src/coffea/analysis_tools.py
@@ -582,16 +582,10 @@ def to_npz(self, file, compressed=False, compute=False):
         else:
             return out
 
-    def print(self, compute=False):
+    def print(self):
         """Prints the statistics of the N-1 selection"""
 
-        if self._delayed_mode and not compute:
-            warnings.warn(
-                "This will compute dask_awkward arrays. If you really want to do this now, call print(compute=True)"
-            )
-            return
-
-        if self._delayed_mode and compute:
+        if self._delayed_mode:
             self._nev = list(dask.compute(*self._nev))
 
         nev = self._nev
@@ -836,16 +830,10 @@ def to_npz(self, file, compressed=False, compute=False):
         else:
             return out
 
-    def print(self, compute=False):
+    def print(self):
         """Prints the statistics of the Cutflow"""
 
-        if self._delayed_mode and not compute:
-            warnings.warn(
-                "This will compute dask_awkward arrays. If you really want to do this now, call print(compute=True)"
-            )
-            return
-
-        if self._delayed_mode and compute:
+        if self._delayed_mode:
             self._nevonecut, self._nevcutflow = dask.compute(
                 self._nevonecut, self._nevcutflow
             )

From 522f38b036507b0b79d57cfacedb41a0822a7e5c Mon Sep 17 00:00:00 2001
From: iasonkrom <iason.krom@gmail.com>
Date: Mon, 18 Sep 2023 09:07:50 -0500
Subject: [PATCH 43/75] only warn and not add compute argument in print

---
 src/coffea/analysis_tools.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/coffea/analysis_tools.py b/src/coffea/analysis_tools.py
index 14fd170f3..facf14e97 100644
--- a/src/coffea/analysis_tools.py
+++ b/src/coffea/analysis_tools.py
@@ -586,6 +586,9 @@ def print(self):
         """Prints the statistics of the N-1 selection"""
 
         if self._delayed_mode:
+            warnings.warn(
+                "Printing the N-1 selection statistics is going to compute dask_awkward objects."
+            )
             self._nev = list(dask.compute(*self._nev))
 
         nev = self._nev
@@ -834,6 +837,9 @@ def print(self):
         """Prints the statistics of the Cutflow"""
 
         if self._delayed_mode:
+            warnings.warn(
+                "Printing the cutflow statistics is going to compute dask_awkward objects."
+            )
             self._nevonecut, self._nevcutflow = dask.compute(
                 self._nevonecut, self._nevcutflow
             )

From 9feea2b3ac5c2d5b14f5cfb52aa94ce86b26db2e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 19 Sep 2023 05:39:03 +0000
Subject: [PATCH 44/75] [pre-commit.ci] pre-commit autoupdate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/asottile/pyupgrade: v3.10.1 → v3.11.0](https://github.com/asottile/pyupgrade/compare/v3.10.1...v3.11.0)
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1b3695665..d535cd79e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -37,7 +37,7 @@ repos:
   - id: trailing-whitespace
 
 - repo: https://github.com/asottile/pyupgrade
-  rev: v3.10.1
+  rev: v3.11.0
   hooks:
   - id: pyupgrade
     args: ["--py38-plus"]

From dbf0b641b60cdb485dc6672be00419e5824d9f2f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 25 Sep 2023 14:53:14 +0000
Subject: [PATCH 45/75] Bump amannn/action-semantic-pull-request from 5.2.0 to
 5.3.0

Bumps [amannn/action-semantic-pull-request](https://github.com/amannn/action-semantic-pull-request) from 5.2.0 to 5.3.0.
- [Release notes](https://github.com/amannn/action-semantic-pull-request/releases)
- [Changelog](https://github.com/amannn/action-semantic-pull-request/blob/main/CHANGELOG.md)
- [Commits](https://github.com/amannn/action-semantic-pull-request/compare/v5.2.0...v5.3.0)

---
updated-dependencies:
- dependency-name: amannn/action-semantic-pull-request
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 .github/workflows/pr.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
index 99d3f9e26..44dbedb0b 100644
--- a/.github/workflows/pr.yml
+++ b/.github/workflows/pr.yml
@@ -17,6 +17,6 @@ jobs:
     name: Validate PR title
     runs-on: ubuntu-latest
     steps:
-      - uses: amannn/action-semantic-pull-request@v5.2.0
+      - uses: amannn/action-semantic-pull-request@v5.3.0
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

From 67d29034485b990665f1c82eeca0bd5b0640f673 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 3 Oct 2023 07:10:05 +0000
Subject: [PATCH 46/75] [pre-commit.ci] pre-commit autoupdate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/asottile/pyupgrade: v3.11.0 → v3.14.0](https://github.com/asottile/pyupgrade/compare/v3.11.0...v3.14.0)
- [github.com/asottile/setup-cfg-fmt: v2.4.0 → v2.5.0](https://github.com/asottile/setup-cfg-fmt/compare/v2.4.0...v2.5.0)
- [github.com/codespell-project/codespell: v2.2.5 → v2.2.6](https://github.com/codespell-project/codespell/compare/v2.2.5...v2.2.6)
---
 .pre-commit-config.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d535cd79e..8060d85d9 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -37,13 +37,13 @@ repos:
   - id: trailing-whitespace
 
 - repo: https://github.com/asottile/pyupgrade
-  rev: v3.11.0
+  rev: v3.14.0
   hooks:
   - id: pyupgrade
     args: ["--py38-plus"]
 
 - repo: https://github.com/asottile/setup-cfg-fmt
-  rev: v2.4.0
+  rev: v2.5.0
   hooks:
   - id: setup-cfg-fmt
 
@@ -54,7 +54,7 @@ repos:
     exclude: coffea/processor/templates
 
 - repo: https://github.com/codespell-project/codespell
-  rev: v2.2.5
+  rev: v2.2.6
   hooks:
   - id: codespell
     args: ["--skip=*.ipynb","-L hist,Hist,nd,SubJet,subjet,Subjet,PTD,ptd,fPt,fpt,Ser,ser"]

From b8fc7fe86eb2cf04b0e78e7965da32ad7d19aa77 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Tue, 3 Oct 2023 17:41:09 +0100
Subject: [PATCH 47/75] wip: initial commit

---
 src/coffea/nanoevents/factory.py | 66 +++++++++++++++++++++++++++++---
 1 file changed, 60 insertions(+), 6 deletions(-)

diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py
index 38e06d601..c43b182ad 100644
--- a/src/coffea/nanoevents/factory.py
+++ b/src/coffea/nanoevents/factory.py
@@ -77,23 +77,48 @@ def __init__(
         self.metadata = metadata
         self.version = version
 
-    def extract_form_keys_base_columns(self, form_keys):
-        base_columns = []
-        for form_key in form_keys:
-            base_columns.extend(
+    def keys_for_buffer_keys(self, buffer_keys):
+        base_columns = set()
+        for buffer_key in buffer_keys:
+            form_key, attribute = self.parse_buffer_key(buffer_key)
+            base_columns.update(
                 [
                     acolumn
                     for acolumn in urllib.parse.unquote(form_key).split(",")
                     if not acolumn.startswith("!")
                 ]
             )
-        return list(set(base_columns))
+        return base_columns
+
+    def parse_buffer_key(self, buffer_key):
+        prefix, attribute, form_key = buffer_key.rsplit("/", maxsplit=2)
+        if attribute == "offsets":
+            return (form_key[: -len("%2C%21offsets")], attribute)
+        else:
+            return (form_key, attribute)
+
+    @property
+    def buffer_key(self):
+        return partial(self._key_formatter, "")
 
     def _key_formatter(self, prefix, form_key, form, attribute):
         if attribute == "offsets":
             form_key += "%2C%21offsets"
         return prefix + f"/{attribute}/{form_key}"
 
+    # TODO: deprecate
+    def extract_form_keys_base_columns(self, form_keys):
+        base_columns = []
+        for form_key in form_keys:
+            base_columns.extend(
+                [
+                    acolumn
+                    for acolumn in urllib.parse.unquote(form_key).split(",")
+                    if not acolumn.startswith("!")
+                ]
+            )
+        return list(set(base_columns))
+
 
 class _map_schema_uproot(_map_schema_base):
     def __init__(
@@ -125,7 +150,36 @@ def __call__(self, form):
             },
             "form_key": None,
         }
-        return awkward.forms.form.from_dict(self.schemaclass(lform, self.version).form)
+        return awkward.forms.form.from_dict(self.schemaclass(lform, self.version).form), self
+
+    def create_column_mapping(self, tree, keys, start, stop, interp_options):
+        from functools import partial
+
+        from coffea.nanoevents.util import tuple_to_key
+
+        partition_key = (
+            str(tree.file.uuid),
+            tree.object_path,
+            f"{start}-{stop}",
+        )
+        uuidpfn = {partition_key[0]: tree.file.file_path}
+        mapping = UprootSourceMapping(
+            TrivialUprootOpener(uuidpfn, interp_options),
+            start,
+            stop,
+            cache={},
+            access_log=None,
+            use_ak_forth=True,
+        )
+        mapping.preload_column_source(partition_key[0], partition_key[1], tree)
+        buffer_key = partial(self._key_formatter, tuple_to_key(partition_key))
+
+        class TranslateBufferKeys:
+            def __getitem__(this, key):
+                form_key, attribute = self.parse_buffer_key(key)
+                return mapping[buffer_key(form_key=form_key, attribute=attribute, form=None)]
+
+        return TranslateBufferKeys()
 
     def create_column_mapping_and_key(self, tree, start, stop, interp_options):
         from functools import partial

From 1b4bd50af71c7e06166a7d7d285e341e606c3ee4 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 3 Oct 2023 16:43:11 +0000
Subject: [PATCH 48/75] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/coffea/nanoevents/factory.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py
index c43b182ad..cc0eff0ad 100644
--- a/src/coffea/nanoevents/factory.py
+++ b/src/coffea/nanoevents/factory.py
@@ -150,7 +150,10 @@ def __call__(self, form):
             },
             "form_key": None,
         }
-        return awkward.forms.form.from_dict(self.schemaclass(lform, self.version).form), self
+        return (
+            awkward.forms.form.from_dict(self.schemaclass(lform, self.version).form),
+            self,
+        )
 
     def create_column_mapping(self, tree, keys, start, stop, interp_options):
         from functools import partial
@@ -177,7 +180,9 @@ def create_column_mapping(self, tree, keys, start, stop, interp_options):
         class TranslateBufferKeys:
             def __getitem__(this, key):
                 form_key, attribute = self.parse_buffer_key(key)
-                return mapping[buffer_key(form_key=form_key, attribute=attribute, form=None)]
+                return mapping[
+                    buffer_key(form_key=form_key, attribute=attribute, form=None)
+                ]
 
         return TranslateBufferKeys()
 

From daa8529cfb7ea5027d0ae8606615c575f0119519 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Wed, 4 Oct 2023 10:18:45 +0100
Subject: [PATCH 49/75] fix: rename function

---
 src/coffea/nanoevents/factory.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py
index c43b182ad..559504c57 100644
--- a/src/coffea/nanoevents/factory.py
+++ b/src/coffea/nanoevents/factory.py
@@ -152,7 +152,7 @@ def __call__(self, form):
         }
         return awkward.forms.form.from_dict(self.schemaclass(lform, self.version).form), self
 
-    def create_column_mapping(self, tree, keys, start, stop, interp_options):
+    def load_buffers(self, tree, keys, start, stop, interp_options):
         from functools import partial
 
         from coffea.nanoevents.util import tuple_to_key

From 66c8710c3ea1630a519a2de6eabd3af6327329d3 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Wed, 4 Oct 2023 11:17:43 +0100
Subject: [PATCH 50/75] fix: use report_necessary_buffers

---
 src/coffea/processor/executor.py | 6 +++---
 tests/test_jetmet_tools.py       | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/coffea/processor/executor.py b/src/coffea/processor/executor.py
index 618b1c741..9698fa296 100644
--- a/src/coffea/processor/executor.py
+++ b/src/coffea/processor/executor.py
@@ -1718,7 +1718,7 @@ def _work_function(
                     import dask_awkward
 
                     to_compute = processor_instance.process(events)
-                    materialized = dask_awkward.necessary_columns(to_compute)
+                    # materialized = dask_awkward.report_necessary_buffers(to_compute)
                     out = dask.compute(to_compute, scheduler="single-threaded")[0]
             except Exception as e:
                 raise Exception(f"Failed processing file: {item!r}") from e
@@ -1734,11 +1734,11 @@ def _work_function(
                     metrics = {}
                     if isinstance(file, uproot.ReadOnlyDirectory):
                         metrics["bytesread"] = file.file.source.num_requested_bytes
+                    # metrics["data_and_shape_buffers"] = set(materialized)
+                    # metrics["shape_only_buffers"] = set(materialized)
                     if schema is not None and issubclass(schema, schemas.BaseSchema):
-                        metrics["columns"] = set(materialized)
                         metrics["entries"] = len(events)
                     else:
-                        metrics["columns"] = set(materialized)
                         metrics["entries"] = events.size
                     metrics["processtime"] = toc - tic
                     return {"out": out, "metrics": metrics, "processed": {item}}
diff --git a/tests/test_jetmet_tools.py b/tests/test_jetmet_tools.py
index a7ef91385..b1375afa2 100644
--- a/tests/test_jetmet_tools.py
+++ b/tests/test_jetmet_tools.py
@@ -837,9 +837,9 @@ def test_corrected_jets_factory(optimization_enabled):
             **{name: evaluator[name] for name in jec_stack_names[5:6]}
         )
 
-        print(dak.necessary_columns(jets.eta))
+        print(dak.report_necessary_buffers(jets.eta))
         print(
-            dak.necessary_columns(
+            dak.report_necessary_buffers(
                 resosf.getScaleFactor(
                     JetEta=jets.eta,
                 )

From 2353a2306aae1ab57e72bb181b37d6e6f03f5e7d Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Wed, 4 Oct 2023 12:19:57 +0100
Subject: [PATCH 51/75] fix: properly parse form keys

---
 src/coffea/nanoevents/factory.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py
index 9b25a6c6a..24b31feed 100644
--- a/src/coffea/nanoevents/factory.py
+++ b/src/coffea/nanoevents/factory.py
@@ -81,11 +81,16 @@ def keys_for_buffer_keys(self, buffer_keys):
         base_columns = set()
         for buffer_key in buffer_keys:
             form_key, attribute = self.parse_buffer_key(buffer_key)
+            operands = urllib.parse.unquote(form_key).split(",")
+
+            it_operands = iter(operands)
+            next(it_operands)
+
             base_columns.update(
                 [
-                    acolumn
-                    for acolumn in urllib.parse.unquote(form_key).split(",")
-                    if not acolumn.startswith("!")
+                    name
+                    for name, maybe_transform in zip(operands, it_operands)
+                    if maybe_transform == "!load"
                 ]
             )
         return base_columns

From bd07d03fada9a61b1de1db726d560a917525aa44 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Thu, 5 Oct 2023 14:05:25 +0100
Subject: [PATCH 52/75] hack: convert Content to array

---
 src/coffea/nanoevents/mapping/base.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/coffea/nanoevents/mapping/base.py b/src/coffea/nanoevents/mapping/base.py
index c6a5e8e2e..f20b3bb2c 100644
--- a/src/coffea/nanoevents/mapping/base.py
+++ b/src/coffea/nanoevents/mapping/base.py
@@ -111,14 +111,18 @@ def __getitem__(self, key):
         if len(stack) != 1:
             raise RuntimeError(f"Syntax error in form key {nodes}")
         out = stack.pop()
-        try:
-            out = numpy.array(out)
-        except ValueError:
-            if self._debug:
-                print(out)
-            raise RuntimeError(
-                f"Left with non-bare array after evaluating form key {nodes}"
-            )
+        import awkward
+        if isinstance(out, awkward.contents.Content):
+            out = awkward.to_numpy(out)
+        else:
+            try:
+                out = numpy.array(out)
+            except ValueError:
+                if self._debug:
+                    print(out)
+                raise RuntimeError(
+                    f"Left with non-bare array after evaluating form key {nodes}"
+                )
         return out
 
     @abstractmethod

From a6848a0824d94be45793f5ab58505bca60754fd2 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 5 Oct 2023 13:10:21 +0000
Subject: [PATCH 53/75] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/coffea/nanoevents/mapping/base.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/coffea/nanoevents/mapping/base.py b/src/coffea/nanoevents/mapping/base.py
index f20b3bb2c..3d87b410c 100644
--- a/src/coffea/nanoevents/mapping/base.py
+++ b/src/coffea/nanoevents/mapping/base.py
@@ -112,6 +112,7 @@ def __getitem__(self, key):
             raise RuntimeError(f"Syntax error in form key {nodes}")
         out = stack.pop()
         import awkward
+
         if isinstance(out, awkward.contents.Content):
             out = awkward.to_numpy(out)
         else:

From 9c90205c576b10d3334e3dc482c6fb8c6d374fa3 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Thu, 5 Oct 2023 22:14:05 +0100
Subject: [PATCH 54/75] fix: ensure layout nodes converted to arrays

---
 src/coffea/nanoevents/transforms.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/coffea/nanoevents/transforms.py b/src/coffea/nanoevents/transforms.py
index e969310b2..2985f9709 100644
--- a/src/coffea/nanoevents/transforms.py
+++ b/src/coffea/nanoevents/transforms.py
@@ -13,6 +13,15 @@ def to_layout(array):
     return array.layout
 
 
+def ensure_array(arraylike):
+    if isinstance(arraylike, (awkward.contents.Content, awkward.Array)):
+        return awkward.to_numpy(arraylike)
+    elif isinstance(arraylike, awkward.index.Index):
+        return arraylike.data
+    else:
+        return numpy.asarray(arraylike)
+
+
 def data(stack):
     """Extract content from array
     (currently a noop, can probably take place of !content)
@@ -96,7 +105,7 @@ def counts2offsets(stack):
     Signature: counts,!counts2offsets
     Outputs an array with length one larger than input
     """
-    counts = numpy.array(stack.pop())
+    counts = ensure_array(stack.pop())
     offsets = numpy.empty(len(counts) + 1, dtype=numpy.int64)
     offsets[0] = 0
     numpy.cumsum(counts, out=offsets[1:])
@@ -123,11 +132,11 @@ def local2global(stack):
     Signature: index,target_offsets,!local2global
     Outputs a content array with same shape as index content
     """
-    target_offsets = numpy.asarray(stack.pop())
+    target_offsets = ensure_array(stack.pop())
     index = stack.pop()
     index = index.mask[index >= 0] + target_offsets[:-1]
     index = index.mask[index < target_offsets[1:]]
-    out = numpy.array(awkward.flatten(awkward.fill_none(index, -1), axis=None))
+    out = ensure_array(awkward.flatten(awkward.fill_none(index, -1), axis=None))
     if out.dtype != numpy.int64:
         raise RuntimeError
     stack.append(out)

From 04b5a1a235a14ed80500054b59e7b921aefc335e Mon Sep 17 00:00:00 2001
From: Lindsey Gray <lindsey.gray@gmail.com>
Date: Sat, 7 Oct 2023 09:13:07 -0500
Subject: [PATCH 55/75] adjust coffea pins to latest releases and pre-releases

---
 pyproject.toml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index f5b168c1e..177cd9926 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,11 +37,11 @@ classifiers = [
   "Topic :: Utilities",
 ]
 dependencies = [
-  "awkward>=2.4.2",
-  "uproot>=5.0.10",
+  "awkward>=2.4.5",
+  "uproot>=5.1.0rc1",
   "dask[array]>=2023.4.0",
-  "dask-awkward>=2023.7.1,!=2023.8.0",
-  "dask-histogram>=2023.6.0",
+  "dask-awkward>=2023.10a1,!=2023.8.0",
+  "dask-histogram>=2023.7a0",
   "correctionlib>=2.0.0",
   "pyarrow>=6.0.0",
   "fsspec",

From f19c11b17e8bdbcc6024f7456f515b96d9fc085c Mon Sep 17 00:00:00 2001
From: Lindsey Gray <lindsey.gray@gmail.com>
Date: Sat, 7 Oct 2023 09:36:27 -0500
Subject: [PATCH 56/75] use pytorch-only triton image

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index fe8453b8d..234ac2e21 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -98,7 +98,7 @@ jobs:
     - name: Start triton server with example model
       if: matrix.os == 'ubuntu-latest'
       run: |
-        docker run -d --rm -p 8000:8000 -p 8001:8001 -p 8002:8002 -v ${{ github.workspace }}/tests/samples/triton_models_test:/models nvcr.io/nvidia/tritonserver:23.04-py3 tritonserver --model-repository=/models
+        docker run -d --rm -p 8000:8000 -p 8001:8001 -p 8002:8002 -v ${{ github.workspace }}/tests/samples/triton_models_test:/models nvcr.io/nvidia/tritonserver:23.04-pyt-python-py3 tritonserver --model-repository=/models
 
     - name: Test with pytest
       run: |

From 7051d2e40a8655b7d9aa86359a014d50bc9dd1a1 Mon Sep 17 00:00:00 2001
From: Lindsey Gray <lindsey.gray@gmail.com>
Date: Sat, 7 Oct 2023 09:40:30 -0500
Subject: [PATCH 57/75] streamline version requirements

Co-authored-by: Angus Hollands <goosey15@gmail.com>
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 177cd9926..36fada2d0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,7 +40,7 @@ dependencies = [
   "awkward>=2.4.5",
   "uproot>=5.1.0rc1",
   "dask[array]>=2023.4.0",
-  "dask-awkward>=2023.10a1,!=2023.8.0",
+  "dask-awkward>=2023.10a1",
   "dask-histogram>=2023.7a0",
   "correctionlib>=2.0.0",
   "pyarrow>=6.0.0",

From d14e4635011d970e86f9b81edf74d840e2ad22b7 Mon Sep 17 00:00:00 2001
From: Lindsey Gray <lindsey.gray@gmail.com>
Date: Sat, 7 Oct 2023 12:22:56 -0500
Subject: [PATCH 58/75] codespell

---
 src/coffea/processor/executor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coffea/processor/executor.py b/src/coffea/processor/executor.py
index 618b1c741..42df52eeb 100644
--- a/src/coffea/processor/executor.py
+++ b/src/coffea/processor/executor.py
@@ -694,7 +694,7 @@ class FuturesExecutor(ExecutorBase):
             An accumulator to collect the output of the function
         pool : concurrent.futures.Executor class or instance, optional
             The type of futures executor to use, defaults to ProcessPoolExecutor.
-            You can pass an instance instead of a class to re-use an executor
+            You can pass an instance instead of a class to reuse an executor
         workers : int, optional
             Number of parallel processes for futures (default 1)
         status : bool, optional

From 33d2e681301c1c37b257dbbfe6d50ef7cf56c47a Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Sun, 8 Oct 2023 12:19:48 +0100
Subject: [PATCH 59/75] fix: don't import protocol

---
 src/coffea/nanoevents/factory.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py
index 40d1bda53..8361eaaa2 100644
--- a/src/coffea/nanoevents/factory.py
+++ b/src/coffea/nanoevents/factory.py
@@ -11,7 +11,6 @@
 import dask_awkward
 import fsspec
 import uproot
-from dask_awkward import ImplementsFormTransformation
 
 from coffea.nanoevents.mapping import (
     CachedMapping,
@@ -68,7 +67,7 @@ def _key_formatter(prefix, form_key, form, attribute):
     return prefix + f"/{attribute}/{form_key}"
 
 
-class _map_schema_base(ImplementsFormTransformation):
+class _map_schema_base: # ImplementsFormMapping, ImplementsFormMappingInfo
     def __init__(
         self, schemaclass=BaseSchema, metadata=None, behavior=None, version=None
     ):

From 9d94cb0b8d50e89d6e79ff3a4be3ba88602dc0e3 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 8 Oct 2023 11:20:09 +0000
Subject: [PATCH 60/75] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/coffea/nanoevents/factory.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py
index 8361eaaa2..9b2557ac6 100644
--- a/src/coffea/nanoevents/factory.py
+++ b/src/coffea/nanoevents/factory.py
@@ -67,7 +67,7 @@ def _key_formatter(prefix, form_key, form, attribute):
     return prefix + f"/{attribute}/{form_key}"
 
 
-class _map_schema_base: # ImplementsFormMapping, ImplementsFormMappingInfo
+class _map_schema_base:  # ImplementsFormMapping, ImplementsFormMappingInfo
     def __init__(
         self, schemaclass=BaseSchema, metadata=None, behavior=None, version=None
     ):

From c451d60eea51c94b92e411cd0fe877bca9421505 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 9 Oct 2023 20:10:17 +0000
Subject: [PATCH 61/75] [pre-commit.ci] pre-commit autoupdate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/pre-commit/pre-commit-hooks: v4.4.0 → v4.5.0](https://github.com/pre-commit/pre-commit-hooks/compare/v4.4.0...v4.5.0)
- [github.com/asottile/pyupgrade: v3.14.0 → v3.15.0](https://github.com/asottile/pyupgrade/compare/v3.14.0...v3.15.0)
---
 .pre-commit-config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 8060d85d9..52f3c2023 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -24,7 +24,7 @@ repos:
     args: ["--profile", "black", "--filter-files"]
 
 - repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v4.4.0
+  rev: v4.5.0
   hooks:
   - id: check-case-conflict
   - id: check-merge-conflict
@@ -37,7 +37,7 @@ repos:
   - id: trailing-whitespace
 
 - repo: https://github.com/asottile/pyupgrade
-  rev: v3.14.0
+  rev: v3.15.0
   hooks:
   - id: pyupgrade
     args: ["--py38-plus"]

From 746bd422499291e4ab266068064200eaadb054d3 Mon Sep 17 00:00:00 2001
From: Lindsey Gray <lindsey.gray@gmail.com>
Date: Tue, 10 Oct 2023 08:46:39 -0500
Subject: [PATCH 62/75] fix title in bot config

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 52f3c2023..baa961304 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -5,7 +5,7 @@ ci:
         for more information, see https://pre-commit.ci
     autofix_prs: true
     autoupdate_branch: ''
-    autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate'
+    autoupdate_commit_msg: 'ci(pre-commit): pre-commit autoupdate'
     autoupdate_schedule: weekly
     skip: []
     submodules: false

From 0d9c913c1be461d669b8313208f5ee52764a72a5 Mon Sep 17 00:00:00 2001
From: Lindsey Gray <lindsey.gray@gmail.com>
Date: Wed, 11 Oct 2023 02:17:32 -0500
Subject: [PATCH 63/75] remove deprecated interface definition

---
 src/coffea/nanoevents/factory.py | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py
index 9b2557ac6..d82b434e4 100644
--- a/src/coffea/nanoevents/factory.py
+++ b/src/coffea/nanoevents/factory.py
@@ -110,19 +110,6 @@ def _key_formatter(self, prefix, form_key, form, attribute):
             form_key += "%2C%21offsets"
         return prefix + f"/{attribute}/{form_key}"
 
-    # TODO: deprecate
-    def extract_form_keys_base_columns(self, form_keys):
-        base_columns = []
-        for form_key in form_keys:
-            base_columns.extend(
-                [
-                    acolumn
-                    for acolumn in urllib.parse.unquote(form_key).split(",")
-                    if not acolumn.startswith("!")
-                ]
-            )
-        return list(set(base_columns))
-
 
 class _map_schema_uproot(_map_schema_base):
     def __init__(

From bb4df59dc10f8aabcb979b51e18dba661aa5df9c Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Wed, 11 Oct 2023 08:28:51 +0100
Subject: [PATCH 64/75] Update tests/test_jetmet_tools.py

---
 tests/test_jetmet_tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_jetmet_tools.py b/tests/test_jetmet_tools.py
index b1375afa2..aace9b1bf 100644
--- a/tests/test_jetmet_tools.py
+++ b/tests/test_jetmet_tools.py
@@ -837,7 +837,7 @@ def test_corrected_jets_factory(optimization_enabled):
             **{name: evaluator[name] for name in jec_stack_names[5:6]}
         )
 
-        print(dak.report_necessary_buffers(jets.eta))
+        print(dak.report_necessary_columns(jets.eta))
         print(
             dak.report_necessary_buffers(
                 resosf.getScaleFactor(

From e0694ad6c112b5a15a2aa2b976daced0d3a4da44 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Wed, 11 Oct 2023 08:29:04 +0100
Subject: [PATCH 65/75] Update tests/test_jetmet_tools.py

---
 tests/test_jetmet_tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_jetmet_tools.py b/tests/test_jetmet_tools.py
index aace9b1bf..8be3a97f3 100644
--- a/tests/test_jetmet_tools.py
+++ b/tests/test_jetmet_tools.py
@@ -839,7 +839,7 @@ def test_corrected_jets_factory(optimization_enabled):
 
         print(dak.report_necessary_columns(jets.eta))
         print(
-            dak.report_necessary_buffers(
+            dak.report_necessary_columns(
                 resosf.getScaleFactor(
                     JetEta=jets.eta,
                 )

From e7384f995853733faaef19514632399a109e5064 Mon Sep 17 00:00:00 2001
From: Lindsey Gray <lindsey.gray@gmail.com>
Date: Wed, 11 Oct 2023 03:00:13 -0500
Subject: [PATCH 66/75] remove further remnants of old remapping interface

---
 src/coffea/nanoevents/factory.py | 48 --------------------------------
 1 file changed, 48 deletions(-)

diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py
index d82b434e4..ee9e75d0b 100644
--- a/src/coffea/nanoevents/factory.py
+++ b/src/coffea/nanoevents/factory.py
@@ -177,29 +177,6 @@ def __getitem__(this, key):
 
         return TranslateBufferKeys()
 
-    def create_column_mapping_and_key(self, tree, start, stop, interp_options):
-        from functools import partial
-
-        from coffea.nanoevents.util import tuple_to_key
-
-        partition_key = (
-            str(tree.file.uuid),
-            tree.object_path,
-            f"{start}-{stop}",
-        )
-        uuidpfn = {partition_key[0]: tree.file.file_path}
-        mapping = UprootSourceMapping(
-            TrivialUprootOpener(uuidpfn, interp_options),
-            start,
-            stop,
-            cache={},
-            access_log=None,
-            use_ak_forth=True,
-        )
-        mapping.preload_column_source(partition_key[0], partition_key[1], tree)
-
-        return mapping, partial(self._key_formatter, tuple_to_key(partition_key))
-
 
 class _map_schema_parquet(_map_schema_base):
     def __init__(
@@ -224,31 +201,6 @@ def __call__(self, form):
 
         return awkward.forms.form.from_dict(self.schemaclass(lform, self.version).form)
 
-    def create_column_mapping_and_key(self, columns, start, stop, interp_options):
-        from functools import partial
-
-        from coffea.nanoevents.util import tuple_to_key
-
-        uuid = "NO_UUID"
-        obj_path = "NO_OBJECT_PATH"
-
-        partition_key = (
-            str(uuid),
-            obj_path,
-            f"{start}-{stop}",
-        )
-        uuidpfn = {uuid: columns}
-        mapping = PreloadedSourceMapping(
-            PreloadedOpener(uuidpfn),
-            start,
-            stop,
-            cache={},
-            access_log=None,
-        )
-        mapping.preload_column_source(partition_key[0], partition_key[1], columns)
-
-        return mapping, partial(self._key_formatter, tuple_to_key(partition_key))
-
 
 class NanoEventsFactory:
     """A factory class to build NanoEvents objects"""

From 92efdb20012675be136f18f17ed3cc0ed3e92044 Mon Sep 17 00:00:00 2001
From: Angus Hollands <goosey15@gmail.com>
Date: Wed, 11 Oct 2023 13:48:16 +0100
Subject: [PATCH 67/75] refactor: make key translation obvious

---
 src/coffea/nanoevents/factory.py | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py
index ee9e75d0b..9bed55ed2 100644
--- a/src/coffea/nanoevents/factory.py
+++ b/src/coffea/nanoevents/factory.py
@@ -111,6 +111,15 @@ def _key_formatter(self, prefix, form_key, form, attribute):
         return prefix + f"/{attribute}/{form_key}"
 
 
+class _TranslatedMapping:
+    def __init__(self, func, mapping):
+        self._func = func
+        self._mapping = mapping
+
+    def __getitem__(self, index):
+        return self._mapping[self._func(index)]
+
+
 class _map_schema_uproot(_map_schema_base):
     def __init__(
         self, schemaclass=BaseSchema, metadata=None, behavior=None, version=None
@@ -168,14 +177,15 @@ def load_buffers(self, tree, keys, start, stop, interp_options):
         mapping.preload_column_source(partition_key[0], partition_key[1], tree)
         buffer_key = partial(self._key_formatter, tuple_to_key(partition_key))
 
-        class TranslateBufferKeys:
-            def __getitem__(this, key):
-                form_key, attribute = self.parse_buffer_key(key)
-                return mapping[
-                    buffer_key(form_key=form_key, attribute=attribute, form=None)
-                ]
+        # The buffer-keys that dask-awkward knows about will not include the
+        # partition key. Therefore, we must translate the keys here.
+        def translate_key(index):
+            form_key, attribute = self.parse_buffer_key(index)
+            return mapping[
+                buffer_key(form_key=form_key, attribute=attribute, form=None)
+            ]
 
-        return TranslateBufferKeys()
+        return _TranslatedMapping(translate_key, mapping)
 
 
 class _map_schema_parquet(_map_schema_base):

From 8ae3cd5660bbf1acb6c2e7dd30683a1eda84bfbb Mon Sep 17 00:00:00 2001
From: Lindsey Gray <lindsey.gray@gmail.com>
Date: Thu, 12 Oct 2023 02:35:06 -0500
Subject: [PATCH 68/75] fix typo from refactor

---
 src/coffea/nanoevents/factory.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py
index 9bed55ed2..123f6a131 100644
--- a/src/coffea/nanoevents/factory.py
+++ b/src/coffea/nanoevents/factory.py
@@ -181,9 +181,7 @@ def load_buffers(self, tree, keys, start, stop, interp_options):
         # partition key. Therefore, we must translate the keys here.
         def translate_key(index):
             form_key, attribute = self.parse_buffer_key(index)
-            return mapping[
-                buffer_key(form_key=form_key, attribute=attribute, form=None)
-            ]
+            return buffer_key(form_key=form_key, attribute=attribute, form=None)
 
         return _TranslatedMapping(translate_key, mapping)
 

From 45a006004349ce5800e3c7221623004f2b4bf4f3 Mon Sep 17 00:00:00 2001
From: Lindsey Gray <lindsey.gray@gmail.com>
Date: Sat, 14 Oct 2023 10:22:42 -0500
Subject: [PATCH 69/75] update pins (note uncapped numpy and numba skooch)

---
 pyproject.toml | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 36fada2d0..a12f82a74 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,17 +37,17 @@ classifiers = [
   "Topic :: Utilities",
 ]
 dependencies = [
-  "awkward>=2.4.5",
-  "uproot>=5.1.0rc1",
+  "awkward>=2.4.6",
+  "uproot>=5.1.1",
   "dask[array]>=2023.4.0",
-  "dask-awkward>=2023.10a1",
-  "dask-histogram>=2023.7a0",
-  "correctionlib>=2.0.0",
+  "dask-awkward>=2023.10.0",
+  "dask-histogram>=2023.10.0",
+  "correctionlib>=2.3.3",
   "pyarrow>=6.0.0",
   "fsspec",
   "matplotlib>=3",
-  "numba>=0.57.0",
-  "numpy>=1.22.0,<1.25",  # < 1.25 for numba 0.57 series
+  "numba>=0.58.0",
+  "numpy>=1.22.0",
   "scipy>=1.1.0",
   "tqdm>=4.27.0",
   "lz4",

From 14d2cc2ed36fe07a2cacdef791574d6fffcfb65f Mon Sep 17 00:00:00 2001
From: Lindsey Gray <lindsey.gray@gmail.com>
Date: Sat, 14 Oct 2023 11:02:51 -0500
Subject: [PATCH 70/75] try to convince pip to upgrade numpy upon installing
 coffea

---
 .github/workflows/ci.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 234ac2e21..16aed3abc 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -69,7 +69,7 @@ jobs:
         python -m pip install xgboost
         python -m pip install tritonclient[grpc,http]
         # install checked out coffea
-        python -m pip install -q -e '.[dev,parsl,dask,spark]'
+        python -m pip install -q -e '.[dev,parsl,dask,spark]' --upgrade
         python -m pip list
         java -version
     - name: Install dependencies (MacOS)
@@ -80,7 +80,7 @@ jobs:
         python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
         python -m pip install xgboost
         # install checked out coffea
-        python -m pip install -q -e '.[dev,dask,spark]'
+        python -m pip install -q -e '.[dev,dask,spark]' --upgrade
         python -m pip list
         java -version
     - name: Install dependencies (Windows)
@@ -91,7 +91,7 @@ jobs:
         python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
         python -m pip install xgboost
         # install checked out coffea
-        python -m pip install -q -e '.[dev,dask]'
+        python -m pip install -q -e '.[dev,dask]' --upgrade
         python -m pip list
         java -version
 

From ab3599e5ef408788068da6b2d8dd82cb86b57ce7 Mon Sep 17 00:00:00 2001
From: Lindsey Gray <lindsey.gray@gmail.com>
Date: Sat, 14 Oct 2023 11:11:49 -0500
Subject: [PATCH 71/75] be more insistent

---
 .github/workflows/ci.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 16aed3abc..ea61615a7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -69,7 +69,7 @@ jobs:
         python -m pip install xgboost
         python -m pip install tritonclient[grpc,http]
         # install checked out coffea
-        python -m pip install -q -e '.[dev,parsl,dask,spark]' --upgrade
+        python -m pip install -q -e '.[dev,parsl,dask,spark]' --upgrade --upgrade-strategy eager
         python -m pip list
         java -version
     - name: Install dependencies (MacOS)
@@ -80,7 +80,7 @@ jobs:
         python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
         python -m pip install xgboost
         # install checked out coffea
-        python -m pip install -q -e '.[dev,dask,spark]' --upgrade
+        python -m pip install -q -e '.[dev,dask,spark]' --upgrade --upgrade-strategy eager
         python -m pip list
         java -version
     - name: Install dependencies (Windows)
@@ -91,7 +91,7 @@ jobs:
         python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
         python -m pip install xgboost
         # install checked out coffea
-        python -m pip install -q -e '.[dev,dask]' --upgrade
+        python -m pip install -q -e '.[dev,dask]' --upgrade --upgrade-strategy eager
         python -m pip list
         java -version
 

From faff41ec1bdbd33ac7ca2d1d8ddda3ac11a3f427 Mon Sep 17 00:00:00 2001
From: Lindsey Gray <lindsey.gray@gmail.com>
Date: Sat, 14 Oct 2023 11:36:56 -0500
Subject: [PATCH 72/75] numba 0.58 pins numpy from above < 1.26

nb: safer to repin in coffea for users because of numba's sliding window, very easy to get a mismatch
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index a12f82a74..689e03ff0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -47,7 +47,7 @@ dependencies = [
   "fsspec",
   "matplotlib>=3",
   "numba>=0.58.0",
-  "numpy>=1.22.0",
+  "numpy>=1.22.0,<1.26",  # < 1.26 for numba 0.58 series
   "scipy>=1.1.0",
   "tqdm>=4.27.0",
   "lz4",

From 864f7094e92fe11792dba736f093f8f3115b3ac0 Mon Sep 17 00:00:00 2001
From: Lindsey Gray <lindsey.gray@gmail.com>
Date: Tue, 17 Oct 2023 10:22:52 -0500
Subject: [PATCH 73/75] clean up usage of quoted ",!offsets"

---
 src/coffea/nanoevents/factory.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py
index 123f6a131..e97f556f0 100644
--- a/src/coffea/nanoevents/factory.py
+++ b/src/coffea/nanoevents/factory.py
@@ -29,8 +29,9 @@
     PHYSLITESchema,
     TreeMakerSchema,
 )
-from coffea.nanoevents.util import key_to_tuple, tuple_to_key
+from coffea.nanoevents.util import quote, unquote, key_to_tuple, tuple_to_key
 
+_offsets_label = quote(",!offsets")
 
 def _remove_not_interpretable(branch):
     if isinstance(
@@ -63,7 +64,7 @@ def _remove_not_interpretable(branch):
 
 def _key_formatter(prefix, form_key, form, attribute):
     if attribute == "offsets":
-        form_key += "%2C%21offsets"
+        form_key += _offsets_label
     return prefix + f"/{attribute}/{form_key}"
 
 
@@ -80,7 +81,7 @@ def keys_for_buffer_keys(self, buffer_keys):
         base_columns = set()
         for buffer_key in buffer_keys:
             form_key, attribute = self.parse_buffer_key(buffer_key)
-            operands = urllib.parse.unquote(form_key).split(",")
+            operands = unquote(form_key).split(",")
 
             it_operands = iter(operands)
             next(it_operands)
@@ -97,7 +98,7 @@ def keys_for_buffer_keys(self, buffer_keys):
     def parse_buffer_key(self, buffer_key):
         prefix, attribute, form_key = buffer_key.rsplit("/", maxsplit=2)
         if attribute == "offsets":
-            return (form_key[: -len("%2C%21offsets")], attribute)
+            return (form_key[: -len(_offsets_label)], attribute)
         else:
             return (form_key, attribute)
 
@@ -107,7 +108,7 @@ def buffer_key(self):
 
     def _key_formatter(self, prefix, form_key, form, attribute):
         if attribute == "offsets":
-            form_key += "%2C%21offsets"
+            form_key += _offsets_label
         return prefix + f"/{attribute}/{form_key}"
 
 

From 9b96f7b8d3d1613e0ed22e111cf5c0221f03a024 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 17 Oct 2023 15:23:12 +0000
Subject: [PATCH 74/75] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/coffea/nanoevents/factory.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py
index e97f556f0..66f3cb482 100644
--- a/src/coffea/nanoevents/factory.py
+++ b/src/coffea/nanoevents/factory.py
@@ -29,10 +29,11 @@
     PHYSLITESchema,
     TreeMakerSchema,
 )
-from coffea.nanoevents.util import quote, unquote, key_to_tuple, tuple_to_key
+from coffea.nanoevents.util import key_to_tuple, quote, tuple_to_key, unquote
 
 _offsets_label = quote(",!offsets")
 
+
 def _remove_not_interpretable(branch):
     if isinstance(
         branch.interpretation, uproot.interpretation.identify.uproot.AsGrouped

From 0a525d0340aa0afc240cd313cd0067578c970556 Mon Sep 17 00:00:00 2001
From: Lindsey Gray <lindsey.gray@gmail.com>
Date: Tue, 17 Oct 2023 10:24:32 -0500
Subject: [PATCH 75/75] flake8 lint

---
 src/coffea/nanoevents/factory.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py
index 66f3cb482..b6656282f 100644
--- a/src/coffea/nanoevents/factory.py
+++ b/src/coffea/nanoevents/factory.py
@@ -1,6 +1,5 @@
 import io
 import pathlib
-import urllib.parse
 import warnings
 import weakref
 from functools import partial