From aeb53c4bf81926ca7e810918576ce23d3bcbbd6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20R=C3=BCbenach?= Date: Thu, 3 Aug 2023 11:28:19 +0200 Subject: [PATCH 01/75] Make rochester_lookup compatible with dask_awkward --- src/coffea/lookup_tools/rochester_lookup.py | 24 +++++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/src/coffea/lookup_tools/rochester_lookup.py b/src/coffea/lookup_tools/rochester_lookup.py index 3e8bf6eab..bc462f4ba 100644 --- a/src/coffea/lookup_tools/rochester_lookup.py +++ b/src/coffea/lookup_tools/rochester_lookup.py @@ -1,4 +1,5 @@ import awkward +import dask_awkward as dak import numpy from coffea.lookup_tools.dense_lookup import dense_lookup @@ -75,7 +76,7 @@ def _error(self, func, *args): newargs = args + (0, 0) default = func(*newargs) - result = numpy.zeros_like(default) + result = awkward.zeros_like(default) for s in range(self._nsets): oneOver = 1.0 / self._members[s] for m in range(self._members[s]): @@ -226,12 +227,21 @@ def _kExtra(self, kpt, eta, nl, u, s=0, m=0): cbN_flat = awkward.flatten(cbN) cbS_flat = awkward.flatten(cbS) - invcdf = awkward.unflatten( - doublecrystalball.ppf( - u_flat, cbA_flat, cbA_flat, cbN_flat, cbN_flat, loc, cbS_flat - ), - counts, - ) + args = (u_flat, cbA_flat, cbA_flat, cbN_flat, cbN_flat, loc, cbS_flat) + + if any(isinstance(arg, dak.Array) for arg in args): + def apply(*args): + args_lz = [awkward.typetracer.length_zero_if_typetracer(arg) for arg in args] + out = awkward.Array(doublecrystalball.ppf(*args_lz)) + if awkward.backend(args[0]) == "typetracer": + out = awkward.Array(out.layout.to_typetracer(forget_length=True), behavior=out.behavior) + return out + + invcdf = dak.map_partitions(apply, *args) + else: + invcdf = doublecrystalball.ppf(*args) + + invcdf = awkward.unflatten(invcdf, counts) x = awkward.where( mask, From e6acf51e3d362947c04e9fc6d2e35201e31f638a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 3 Aug 2023 09:35:01 +0000 Subject: [PATCH 02/75] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/coffea/lookup_tools/rochester_lookup.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/coffea/lookup_tools/rochester_lookup.py b/src/coffea/lookup_tools/rochester_lookup.py index bc462f4ba..fece55d6d 100644 --- a/src/coffea/lookup_tools/rochester_lookup.py +++ b/src/coffea/lookup_tools/rochester_lookup.py @@ -230,11 +230,17 @@ def _kExtra(self, kpt, eta, nl, u, s=0, m=0): args = (u_flat, cbA_flat, cbA_flat, cbN_flat, cbN_flat, loc, cbS_flat) if any(isinstance(arg, dak.Array) for arg in args): + def apply(*args): - args_lz = [awkward.typetracer.length_zero_if_typetracer(arg) for arg in args] + args_lz = [ + awkward.typetracer.length_zero_if_typetracer(arg) for arg in args + ] out = awkward.Array(doublecrystalball.ppf(*args_lz)) if awkward.backend(args[0]) == "typetracer": - out = awkward.Array(out.layout.to_typetracer(forget_length=True), behavior=out.behavior) + out = awkward.Array( + out.layout.to_typetracer(forget_length=True), + behavior=out.behavior, + ) return out invcdf = dak.map_partitions(apply, *args) From 5feaff63a5fd03c913a60caa3e01b2195db6b8d9 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Thu, 3 Aug 2023 10:08:53 -0500 Subject: [PATCH 03/75] reactivate rochester corrections tests --- tests/test_lookup_tools.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_lookup_tools.py b/tests/test_lookup_tools.py index f4d5a3b30..4b86ec3df 100644 --- a/tests/test_lookup_tools.py +++ b/tests/test_lookup_tools.py @@ -372,8 +372,6 @@ def test_jec_txt_effareas(): def test_rochester(): - pytest.xfail("weird side effect from running other tests... passes by itself") - rochester_data = lookup_tools.txt_converters.convert_rochester_file( "tests/samples/RoccoR2018.txt.gz", loaduncs=True ) From fbfbb0d0357f6d143eae821a2da5b005bdff60c8 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Thu, 3 Aug 2023 10:45:40 -0500 Subject: [PATCH 04/75] hoisted by my own petard --- tests/test_lookup_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_lookup_tools.py b/tests/test_lookup_tools.py index 4b86ec3df..a57c6b7cf 100644 --- a/tests/test_lookup_tools.py +++ b/tests/test_lookup_tools.py @@ -388,7 +388,7 @@ def test_rochester(): # test against nanoaod events = NanoEventsFactory.from_root( - os.path.abspath("tests/samples/nano_dimuon.root") + {os.path.abspath("tests/samples/nano_dimuon.root"): "Events"} ).events() data_k = rochester.kScaleDT( From 78d2ed2f0e378772ff212e8a0d0473190ef18142 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Thu, 3 Aug 2023 15:36:23 -0500 Subject: [PATCH 05/75] this time, with feeling --- tests/test_lookup_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_lookup_tools.py b/tests/test_lookup_tools.py index a57c6b7cf..7bebd7e18 100644 --- a/tests/test_lookup_tools.py +++ b/tests/test_lookup_tools.py @@ -404,7 +404,7 @@ def test_rochester(): # test against mc events = NanoEventsFactory.from_root( - os.path.abspath("tests/samples/nano_dy.root") + {os.path.abspath("tests/samples/nano_dy.root"): "Events"}, ).events() hasgen = ~np.isnan(ak.fill_none(events.Muon.matched_gen.pt, np.nan)) From 129e2691f14c5b49787186b401726fb18c7c877b Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Fri, 4 Aug 2023 13:06:24 -0500 Subject: [PATCH 06/75] daskify tests --- tests/test_lookup_tools.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/tests/test_lookup_tools.py b/tests/test_lookup_tools.py index 7bebd7e18..9a6a7954d 100644 --- a/tests/test_lookup_tools.py +++ b/tests/test_lookup_tools.py @@ -388,23 +388,25 @@ def test_rochester(): # test against nanoaod events = NanoEventsFactory.from_root( - {os.path.abspath("tests/samples/nano_dimuon.root"): "Events"} + {os.path.abspath("tests/samples/nano_dimuon.root"): "Events"}, + permit_dask=True, ).events() data_k = rochester.kScaleDT( events.Muon.charge, events.Muon.pt, events.Muon.eta, events.Muon.phi ) - data_k = np.array(ak.flatten(data_k)) + data_k = ak.flatten(data_k).compute().to_numpy() assert all(np.isclose(data_k, official_data_k)) data_err = rochester.kScaleDTerror( events.Muon.charge, events.Muon.pt, events.Muon.eta, events.Muon.phi ) - data_err = np.array(ak.flatten(data_err), dtype=float) + data_err = ak.flatten(data_err).compute().to_numpy() assert all(np.isclose(data_err, official_data_err, atol=1e-8)) # test against mc events = NanoEventsFactory.from_root( {os.path.abspath("tests/samples/nano_dy.root"): "Events"}, + permit_dask=True, ).events() hasgen = ~np.isnan(ak.fill_none(events.Muon.matched_gen.pt, np.nan)) @@ -424,10 +426,10 @@ def test_rochester(): events.Muon.nTrackerLayers[~hasgen], mc_rand[~hasgen], ) - mc_k = np.array(ak.flatten(ak.ones_like(events.Muon.pt))) - hasgen_flat = np.array(ak.flatten(hasgen)) - mc_k[hasgen_flat] = np.array(ak.flatten(mc_kspread)) - mc_k[~hasgen_flat] = np.array(ak.flatten(mc_ksmear)) + mc_k = ak.flatten(ak.ones_like(events.Muon.pt)).compute().to_numpy() + hasgen_flat = ak.flatten(hasgen).compute().to_numpy() + mc_k[hasgen_flat] = ak.flatten(mc_kspread).compute().to_numpy() + mc_k[~hasgen_flat] = ak.flatten(mc_ksmear).compute().to_numpy() assert all(np.isclose(mc_k, official_mc_k)) mc_errspread = rochester.kSpreadMCerror( @@ -445,9 +447,9 @@ def test_rochester(): events.Muon.nTrackerLayers[~hasgen], mc_rand[~hasgen], ) - mc_err = np.array(ak.flatten(ak.ones_like(events.Muon.pt))) - mc_err[hasgen_flat] = np.array(ak.flatten(mc_errspread)) - mc_err[~hasgen_flat] = np.array(ak.flatten(mc_errsmear)) + mc_err = ak.flatten(ak.ones_like(events.Muon.pt)).compute().to_numpy() + mc_err[hasgen_flat] = ak.flatten(mc_errspread).compute().to_numpy() + mc_err[~hasgen_flat] = ak.flatten(mc_errsmear).compute().to_numpy() assert all(np.isclose(mc_err, official_mc_err, atol=1e-8)) From dcbc45b97eddf5c22fc3d7e2d7bae195f27c9143 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20R=C3=BCbenach?= Date: Tue, 8 Aug 2023 18:26:57 +0200 Subject: [PATCH 07/75] Fix daskification of test_rochester --- tests/test_lookup_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_lookup_tools.py b/tests/test_lookup_tools.py index 9a6a7954d..7fde363de 100644 --- a/tests/test_lookup_tools.py +++ b/tests/test_lookup_tools.py @@ -410,7 +410,7 @@ def test_rochester(): ).events() hasgen = ~np.isnan(ak.fill_none(events.Muon.matched_gen.pt, np.nan)) - mc_rand = ak.unflatten(mc_rand, ak.num(hasgen)) + mc_rand = ak.unflatten(dak.from_awkward(ak.Array(mc_rand), 1), ak.num(hasgen)) mc_kspread = rochester.kSpreadMC( events.Muon.charge[hasgen], events.Muon.pt[hasgen], From 9e832e0335bc121cab17c2fdbee5b8d09c208d96 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Thu, 27 Jul 2023 10:22:53 -0400 Subject: [PATCH 08/75] global index fetching working --- src/coffea/nanoevents/methods/physlite.py | 49 ++++++++- tests/test_nanoevents_physlite.py | 125 ++++++++++++---------- 2 files changed, 111 insertions(+), 63 deletions(-) diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py index c0efcdc39..0d3cc2898 100644 --- a/src/coffea/nanoevents/methods/physlite.py +++ b/src/coffea/nanoevents/methods/physlite.py @@ -3,6 +3,7 @@ import awkward import numpy +import dask_awkward from coffea.nanoevents.methods import base, vector @@ -64,7 +65,8 @@ def where(unique_keys): return out -def _get_target_offsets(offsets, event_index): +def _concrete_get_target_offsets(load_column, event_index): + offsets = awkward.typetracer.length_one_if_typetracer(load_column.layout.offsets.data) if isinstance(event_index, Number): return offsets[event_index] @@ -72,15 +74,52 @@ def descend(layout, depth, **kwargs): if layout.purelist_depth == 1: return awkward.contents.NumpyArray(offsets)[layout] - return awkward.transform(descend, event_index) + return awkward.transform(descend, event_index.layout) + + +def _dask_get_target_offsets(load_column, event_index): + return dask_awkward.map_partitions( + _concrete_get_target_offsets, + load_column, + event_index + ) + + +def _get_target_offsets(load_column, event_index): + # TODO check event_index as well + if isinstance(load_column, dask_awkward.Array): + return _dask_get_target_offsets(load_column, event_index) + return _concrete_get_target_offsets(load_column, event_index) def _get_global_index(target, eventindex, index): load_column = target[ target.fields[0] - ] # awkward is eager-mode now (will need to dask this) - target_offsets = _get_target_offsets(load_column.layout.offsets, eventindex) - return target_offsets + index + ] + target_offsets = _get_target_offsets(load_column, eventindex) + return target_offsets + index # here i get + + +# def _concrete_get_global_index(target, eventindex, index): +# load_column = target[ +# target.fields[0] +# ] +# target_offsets = _get_target_offsets(load_column.layout.offsets, eventindex) +# return target_offsets + index + +# def _dask_get_global_index(target, eventindex, index): +# return dask_awkward.map_partitions( +# _concrete_get_global_index, +# target, +# eventindex, +# index, +# ) + +# def _get_global_index(target, eventindex, index): +# # check target, eventindex, index all dak +# if isinstance(target, dask_awkward.Array): +# return _dask_get_global_index(target, eventindex, index) +# return _concrete_get_global_index(target, eventindex, index) @awkward.mixin_class(behavior) diff --git a/tests/test_nanoevents_physlite.py b/tests/test_nanoevents_physlite.py index f82471198..488395183 100644 --- a/tests/test_nanoevents_physlite.py +++ b/tests/test_nanoevents_physlite.py @@ -5,6 +5,11 @@ from coffea.nanoevents import NanoEventsFactory, PHYSLITESchema +from coffea.nanoevents.methods.physlite import _get_global_index + +import dask +dask.config.set({"awkward.optimization.enabled": False, "awkward.raise-failed-meta": True, "awkward.optimization.on-fail": "raise"}) + pytestmark = pytest.mark.skip(reason="uproot is upset with this file...") @@ -13,64 +18,68 @@ def _events(): factory = NanoEventsFactory.from_root( {path: "CollectionTree"}, schemaclass=PHYSLITESchema, - permit_dask=False, + permit_dask=True, + #permit_dask=False, ) return factory.events() - -@pytest.fixture(scope="module") -def events(): - return _events() - - -@pytest.mark.parametrize("do_slice", [False, True]) -def test_electron_track_links(events, do_slice): - if do_slice: - events = events[np.random.randint(2, size=len(events)).astype(bool)] - for event in events: - for electron in event.Electrons: - for link_index, link in enumerate(electron.trackParticleLinks): - track_index = link.m_persIndex - print(track_index) - print(event.GSFTrackParticles) - print(electron.trackParticleLinks) - print(electron.trackParticles) - - assert ( - event.GSFTrackParticles[track_index].z0 - == electron.trackParticles[link_index].z0 - ) - - -# from MetaData/EventFormat -_hash_to_target_name = { - 13267281: "TruthPhotons", - 342174277: "TruthMuons", - 368360608: "TruthNeutrinos", - 375408000: "TruthTaus", - 394100163: "TruthElectrons", - 614719239: "TruthBoson", - 660928181: "TruthTop", - 779635413: "TruthBottom", -} - - -def test_truth_links_toplevel(events): - children_px = events.TruthBoson.children.px - for i_event, event in enumerate(events): - for i_particle, particle in enumerate(event.TruthBoson): - for i_link, link in enumerate(particle.childLinks): - assert ( - event[_hash_to_target_name[link.m_persKey]][link.m_persIndex].px - == children_px[i_event][i_particle][i_link] - ) - - -def test_truth_links(events): - for i_event, event in enumerate(events): - for i_particle, particle in enumerate(event.TruthBoson): - for i_link, link in enumerate(particle.childLinks): - assert ( - event[_hash_to_target_name[link.m_persKey]][link.m_persIndex].px - == particle.children[i_link].px - ) +events = _events() + +gi = _get_global_index(events.GSFTrackParticles, events.Electrons._eventindex, events.Electrons.trackParticleLinks.m_persIndex) + +# @pytest.fixture(scope="module") +# def events(): +# return _events() + + +# @pytest.mark.parametrize("do_slice", [False, True]) +# def test_electron_track_links(events, do_slice): +# if do_slice: +# events = events[np.random.randint(2, size=len(events)).astype(bool)] +# for event in events: +# for electron in event.Electrons: +# for link_index, link in enumerate(electron.trackParticleLinks): +# track_index = link.m_persIndex +# print(track_index) +# print(event.GSFTrackParticles) +# print(electron.trackParticleLinks) +# print(electron.trackParticles) + +# assert ( +# event.GSFTrackParticles[track_index].z0 +# == electron.trackParticles[link_index].z0 +# ) + + +# # from MetaData/EventFormat +# _hash_to_target_name = { +# 13267281: "TruthPhotons", +# 342174277: "TruthMuons", +# 368360608: "TruthNeutrinos", +# 375408000: "TruthTaus", +# 394100163: "TruthElectrons", +# 614719239: "TruthBoson", +# 660928181: "TruthTop", +# 779635413: "TruthBottom", +# } + + +# def test_truth_links_toplevel(events): +# children_px = events.TruthBoson.children.px +# for i_event, event in enumerate(events): +# for i_particle, particle in enumerate(event.TruthBoson): +# for i_link, link in enumerate(particle.childLinks): +# assert ( +# event[_hash_to_target_name[link.m_persKey]][link.m_persIndex].px +# == children_px[i_event][i_particle][i_link] +# ) + + +# def test_truth_links(events): +# for i_event, event in enumerate(events): +# for i_particle, particle in enumerate(event.TruthBoson): +# for i_link, link in enumerate(particle.childLinks): +# assert ( +# event[_hash_to_target_name[link.m_persKey]][link.m_persIndex].px +# == particle.children[i_link].px +# ) From 29c359e026cb00d3b18f017975981f25489e2f17 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Fri, 28 Jul 2023 12:40:07 -0400 Subject: [PATCH 09/75] track particles working --- src/coffea/nanoevents/methods/physlite.py | 53 +++++++++++++---------- tests/test_nanoevents_physlite.py | 16 ++++++- 2 files changed, 44 insertions(+), 25 deletions(-) diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py index 0d3cc2898..ddbd4b218 100644 --- a/src/coffea/nanoevents/methods/physlite.py +++ b/src/coffea/nanoevents/methods/physlite.py @@ -65,11 +65,24 @@ def where(unique_keys): return out -def _concrete_get_target_offsets(load_column, event_index): - offsets = awkward.typetracer.length_one_if_typetracer(load_column.layout.offsets.data) +def _get_target_offsets(load_column, event_index): + if isinstance(load_column, dask_awkward.Array): + # TODO check event_index as well + return dask_awkward.map_partitions( + _get_target_offsets, load_column, event_index + ) + + offsets = load_column.layout.offsets.data + if isinstance(event_index, Number): + # TODO i think this is not working yet in dask return offsets[event_index] + # nescessary to stick it into the `NumpyArray` constructor + offsets = awkward.typetracer.length_zero_if_typetracer( + load_column.layout.offsets.data + ) + def descend(layout, depth, **kwargs): if layout.purelist_depth == 1: return awkward.contents.NumpyArray(offsets)[layout] @@ -77,27 +90,10 @@ def descend(layout, depth, **kwargs): return awkward.transform(descend, event_index.layout) -def _dask_get_target_offsets(load_column, event_index): - return dask_awkward.map_partitions( - _concrete_get_target_offsets, - load_column, - event_index - ) - - -def _get_target_offsets(load_column, event_index): - # TODO check event_index as well - if isinstance(load_column, dask_awkward.Array): - return _dask_get_target_offsets(load_column, event_index) - return _concrete_get_target_offsets(load_column, event_index) - - def _get_global_index(target, eventindex, index): - load_column = target[ - target.fields[0] - ] + load_column = target[target.fields[0]] target_offsets = _get_target_offsets(load_column, eventindex) - return target_offsets + index # here i get + return target_offsets + index # here i get # def _concrete_get_global_index(target, eventindex, index): @@ -196,10 +192,21 @@ class Electron(Particle): """Electron collection, following `xAOD::Electron_v1 `_. """ - @property - def trackParticles(self): + def trackParticles(self, _dask_array_=None): + + if _dask_array_ is not None: + target = _dask_array_.behavior["__original_array__"]().GSFTrackParticles + links = _dask_array_.trackParticleLinks + return _element_link( + target, + _dask_array_._eventindex, + links.m_persIndex, + links.m_persKey, + ) + links = self.trackParticleLinks + return _element_link( self._events().GSFTrackParticles, self._eventindex, diff --git a/tests/test_nanoevents_physlite.py b/tests/test_nanoevents_physlite.py index 488395183..6354f0376 100644 --- a/tests/test_nanoevents_physlite.py +++ b/tests/test_nanoevents_physlite.py @@ -5,9 +5,10 @@ from coffea.nanoevents import NanoEventsFactory, PHYSLITESchema -from coffea.nanoevents.methods.physlite import _get_global_index +from coffea.nanoevents.methods.physlite import _get_global_index, _element_link import dask +import dask_awkward as dak dask.config.set({"awkward.optimization.enabled": False, "awkward.raise-failed-meta": True, "awkward.optimization.on-fail": "raise"}) pytestmark = pytest.mark.skip(reason="uproot is upset with this file...") @@ -25,7 +26,18 @@ def _events(): events = _events() -gi = _get_global_index(events.GSFTrackParticles, events.Electrons._eventindex, events.Electrons.trackParticleLinks.m_persIndex) +gi = _get_global_index( + events.GSFTrackParticles, + events.Electrons._eventindex, + events.Electrons.trackParticleLinks.m_persIndex +) + +el = _element_link( + events.GSFTrackParticles, + events.Electrons._eventindex, + events.Electrons.trackParticleLinks.m_persIndex, + events.Electrons.trackParticleLinks.m_persKey +) # @pytest.fixture(scope="module") # def events(): From 109e73ab02a706460dde8c953a1776b0f2dc8479 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Fri, 28 Jul 2023 13:29:24 -0400 Subject: [PATCH 10/75] trackParticle --- src/coffea/nanoevents/methods/physlite.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py index ddbd4b218..1fa2fe013 100644 --- a/src/coffea/nanoevents/methods/physlite.py +++ b/src/coffea/nanoevents/methods/physlite.py @@ -192,9 +192,9 @@ class Electron(Particle): """Electron collection, following `xAOD::Electron_v1 `_. """ + @property def trackParticles(self, _dask_array_=None): - if _dask_array_ is not None: target = _dask_array_.behavior["__original_array__"]().GSFTrackParticles links = _dask_array_.trackParticleLinks @@ -215,7 +215,9 @@ def trackParticles(self, _dask_array_=None): ) @property - def trackParticle(self): + def trackParticle(self, _dask_array_=None): + if _dask_array_ is not None: + self = _dask_array_ # TODO: is this what i should be doing? trackParticles = self.trackParticles return self.trackParticles[ tuple([slice(None) for i in range(trackParticles.ndim - 1)] + [0]) From 2e16a9e6c57b64a1ebfc9129b2645a1ac28fb1a9 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Fri, 28 Jul 2023 18:21:25 -0400 Subject: [PATCH 11/75] cleanup and add caloclusters --- src/coffea/nanoevents/methods/physlite.py | 88 ++++++++++------------- src/coffea/nanoevents/schemas/physlite.py | 1 + 2 files changed, 40 insertions(+), 49 deletions(-) diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py index 1fa2fe013..b6badb9d9 100644 --- a/src/coffea/nanoevents/methods/physlite.py +++ b/src/coffea/nanoevents/methods/physlite.py @@ -39,6 +39,25 @@ def _element_link(target_collection, eventindex, index, key): return target_collection._apply_global_index(global_index) +def _element_link_method(self, link_name, target_name, _dask_array_): + if _dask_array_ is not None: + target = _dask_array_.behavior["__original_array__"]()[target_name] + links = _dask_array_[link_name] + return _element_link( + target, + _dask_array_._eventindex, + links.m_persIndex, + links.m_persKey, + ) + links = self[link_name] + return _element_link( + self._events()[target_name], + self._eventindex, + links.m_persIndex, + links.m_persKey, + ) + + def _element_link_multiple(events, obj, link_field, with_name=None): link = obj[link_field] key = link.m_persKey @@ -66,8 +85,10 @@ def where(unique_keys): def _get_target_offsets(load_column, event_index): - if isinstance(load_column, dask_awkward.Array): - # TODO check event_index as well + if isinstance(load_column, dask_awkward.Array) and isinstance( + event_index, dask_awkward.Array + ): + # wrap in map_partitions if dask arrays return dask_awkward.map_partitions( _get_target_offsets, load_column, event_index ) @@ -75,10 +96,10 @@ def _get_target_offsets(load_column, event_index): offsets = load_column.layout.offsets.data if isinstance(event_index, Number): - # TODO i think this is not working yet in dask return offsets[event_index] # nescessary to stick it into the `NumpyArray` constructor + # if typetracer is passed through offsets = awkward.typetracer.length_zero_if_typetracer( load_column.layout.offsets.data ) @@ -93,29 +114,7 @@ def descend(layout, depth, **kwargs): def _get_global_index(target, eventindex, index): load_column = target[target.fields[0]] target_offsets = _get_target_offsets(load_column, eventindex) - return target_offsets + index # here i get - - -# def _concrete_get_global_index(target, eventindex, index): -# load_column = target[ -# target.fields[0] -# ] -# target_offsets = _get_target_offsets(load_column.layout.offsets, eventindex) -# return target_offsets + index - -# def _dask_get_global_index(target, eventindex, index): -# return dask_awkward.map_partitions( -# _concrete_get_global_index, -# target, -# eventindex, -# index, -# ) - -# def _get_global_index(target, eventindex, index): -# # check target, eventindex, index all dak -# if isinstance(target, dask_awkward.Array): -# return _dask_get_global_index(target, eventindex, index) -# return _concrete_get_global_index(target, eventindex, index) + return target_offsets + index @awkward.mixin_class(behavior) @@ -175,12 +174,12 @@ class Muon(Particle): """ @property - def trackParticle(self): - return _element_link( - self._events().CombinedMuonTrackParticles, - self._eventindex, - self["combinedTrackParticleLink.m_persIndex"], - self["combinedTrackParticleLink.m_persKey"], + def trackParticle(self, _dask_array_=None): + return _element_link_method( + self, + "combinedTrackParticleLink", + "CombinedMuonTrackParticles", + _dask_array_, ) @@ -195,23 +194,8 @@ class Electron(Particle): @property def trackParticles(self, _dask_array_=None): - if _dask_array_ is not None: - target = _dask_array_.behavior["__original_array__"]().GSFTrackParticles - links = _dask_array_.trackParticleLinks - return _element_link( - target, - _dask_array_._eventindex, - links.m_persIndex, - links.m_persKey, - ) - - links = self.trackParticleLinks - - return _element_link( - self._events().GSFTrackParticles, - self._eventindex, - links.m_persIndex, - links.m_persKey, + return _element_link_method( + self, "trackParticleLinks", "GSFTrackParticles", _dask_array_ ) @property @@ -223,6 +207,12 @@ def trackParticle(self, _dask_array_=None): tuple([slice(None) for i in range(trackParticles.ndim - 1)] + [0]) ] + @property + def caloClusters(self, _dask_array_=None): + return _element_link_method( + self, "caloClusterLinks", "CaloCalTopoClusters", _dask_array_ + ) + _set_repr_name("Electron") diff --git a/src/coffea/nanoevents/schemas/physlite.py b/src/coffea/nanoevents/schemas/physlite.py index 1b9b89205..6a6aa8659 100644 --- a/src/coffea/nanoevents/schemas/physlite.py +++ b/src/coffea/nanoevents/schemas/physlite.py @@ -53,6 +53,7 @@ class PHYSLITESchema(BaseSchema): "GSFTrackParticles": "TrackParticle", "InDetTrackParticles": "TrackParticle", "MuonSpectrometerTrackParticles": "TrackParticle", + "CaloCalTopoClusters": "NanoCollection", } """Default configuration for mixin types, based on the collection name. From f4d66682cec0e3b62a3d7ec96a2b7ced42fa5c76 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Fri, 28 Jul 2023 18:46:56 -0400 Subject: [PATCH 12/75] comment about multiple elementlinks --- src/coffea/nanoevents/methods/physlite.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py index b6badb9d9..979fff0a0 100644 --- a/src/coffea/nanoevents/methods/physlite.py +++ b/src/coffea/nanoevents/methods/physlite.py @@ -59,6 +59,10 @@ def _element_link_method(self, link_name, target_name, _dask_array_): def _element_link_multiple(events, obj, link_field, with_name=None): + # currently not working in dask because: + # - we don't know the resulting type beforehand + # - also not the targets, so no way to find out which columns to load? + # - could consider to treat the case of truth collections by just loading all truth columns link = obj[link_field] key = link.m_persKey index = link.m_persIndex From 4da309f4d745ccbefdcd913b755ee47fe74c8b82 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Fri, 28 Jul 2023 19:34:30 -0400 Subject: [PATCH 13/75] cleanup tests and add test for single field of linked collection --- tests/test_nanoevents_physlite.py | 93 ++++++------------------------- 1 file changed, 18 insertions(+), 75 deletions(-) diff --git a/tests/test_nanoevents_physlite.py b/tests/test_nanoevents_physlite.py index 6354f0376..6b464cdb8 100644 --- a/tests/test_nanoevents_physlite.py +++ b/tests/test_nanoevents_physlite.py @@ -5,14 +5,6 @@ from coffea.nanoevents import NanoEventsFactory, PHYSLITESchema -from coffea.nanoevents.methods.physlite import _get_global_index, _element_link - -import dask -import dask_awkward as dak -dask.config.set({"awkward.optimization.enabled": False, "awkward.raise-failed-meta": True, "awkward.optimization.on-fail": "raise"}) - -pytestmark = pytest.mark.skip(reason="uproot is upset with this file...") - def _events(): path = os.path.abspath("tests/samples/DAOD_PHYSLITE_21.2.108.0.art.pool.root") @@ -20,78 +12,29 @@ def _events(): {path: "CollectionTree"}, schemaclass=PHYSLITESchema, permit_dask=True, - #permit_dask=False, ) return factory.events() -events = _events() - -gi = _get_global_index( - events.GSFTrackParticles, - events.Electrons._eventindex, - events.Electrons.trackParticleLinks.m_persIndex -) - -el = _element_link( - events.GSFTrackParticles, - events.Electrons._eventindex, - events.Electrons.trackParticleLinks.m_persIndex, - events.Electrons.trackParticleLinks.m_persKey -) - -# @pytest.fixture(scope="module") -# def events(): -# return _events() - - -# @pytest.mark.parametrize("do_slice", [False, True]) -# def test_electron_track_links(events, do_slice): -# if do_slice: -# events = events[np.random.randint(2, size=len(events)).astype(bool)] -# for event in events: -# for electron in event.Electrons: -# for link_index, link in enumerate(electron.trackParticleLinks): -# track_index = link.m_persIndex -# print(track_index) -# print(event.GSFTrackParticles) -# print(electron.trackParticleLinks) -# print(electron.trackParticles) - -# assert ( -# event.GSFTrackParticles[track_index].z0 -# == electron.trackParticles[link_index].z0 -# ) - -# # from MetaData/EventFormat -# _hash_to_target_name = { -# 13267281: "TruthPhotons", -# 342174277: "TruthMuons", -# 368360608: "TruthNeutrinos", -# 375408000: "TruthTaus", -# 394100163: "TruthElectrons", -# 614719239: "TruthBoson", -# 660928181: "TruthTop", -# 779635413: "TruthBottom", -# } +@pytest.fixture(scope="module") +def events(): + return _events() -# def test_truth_links_toplevel(events): -# children_px = events.TruthBoson.children.px -# for i_event, event in enumerate(events): -# for i_particle, particle in enumerate(event.TruthBoson): -# for i_link, link in enumerate(particle.childLinks): -# assert ( -# event[_hash_to_target_name[link.m_persKey]][link.m_persIndex].px -# == children_px[i_event][i_particle][i_link] -# ) +def test_load_single_field_of_linked(events): + events.Electrons.caloClusters.calE.compute() -# def test_truth_links(events): -# for i_event, event in enumerate(events): -# for i_particle, particle in enumerate(event.TruthBoson): -# for i_link, link in enumerate(particle.childLinks): -# assert ( -# event[_hash_to_target_name[link.m_persKey]][link.m_persIndex].px -# == particle.children[i_link].px -# ) +@pytest.mark.parametrize("do_slice", [False, True]) +def test_electron_track_links(events, do_slice): + if do_slice: + events = events[::2] + trackParticles = events.Electrons.trackParticles.compute() + for i, event in enumerate(events[["Electrons", "GSFTrackParticles"]].compute()): + for j, electron in enumerate(event.Electrons): + for link_index, link in enumerate(electron.trackParticleLinks): + track_index = link.m_persIndex + assert ( + event.GSFTrackParticles[track_index].z0 + == trackParticles[i][j][link_index].z0 + ) From dbccef044c9e9ca3e98ba8dce5795206c2c98f9b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 28 Jul 2023 23:59:34 +0000 Subject: [PATCH 14/75] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/coffea/nanoevents/methods/physlite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py index 979fff0a0..90a449004 100644 --- a/src/coffea/nanoevents/methods/physlite.py +++ b/src/coffea/nanoevents/methods/physlite.py @@ -2,8 +2,8 @@ from numbers import Number import awkward -import numpy import dask_awkward +import numpy from coffea.nanoevents.methods import base, vector From dbfadd8cb9841786c10b43e6f08b5663b690ddbe Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Fri, 28 Jul 2023 20:04:33 -0400 Subject: [PATCH 15/75] pylint --- src/coffea/nanoevents/methods/physlite.py | 2 +- tests/test_nanoevents_physlite.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py index 90a449004..5b1cbe50b 100644 --- a/src/coffea/nanoevents/methods/physlite.py +++ b/src/coffea/nanoevents/methods/physlite.py @@ -102,7 +102,7 @@ def _get_target_offsets(load_column, event_index): if isinstance(event_index, Number): return offsets[event_index] - # nescessary to stick it into the `NumpyArray` constructor + # necessary to stick it into the `NumpyArray` constructor # if typetracer is passed through offsets = awkward.typetracer.length_zero_if_typetracer( load_column.layout.offsets.data diff --git a/tests/test_nanoevents_physlite.py b/tests/test_nanoevents_physlite.py index 6b464cdb8..55293a164 100644 --- a/tests/test_nanoevents_physlite.py +++ b/tests/test_nanoevents_physlite.py @@ -1,6 +1,5 @@ import os -import numpy as np import pytest from coffea.nanoevents import NanoEventsFactory, PHYSLITESchema From ab5164bd9c0689a59678c87ff8024c077a518e4e Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Tue, 22 Aug 2023 14:25:51 +0200 Subject: [PATCH 16/75] flat calling structure for trackParticle(s) behavior methods --- src/coffea/nanoevents/methods/physlite.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py index 5b1cbe50b..72ca50165 100644 --- a/src/coffea/nanoevents/methods/physlite.py +++ b/src/coffea/nanoevents/methods/physlite.py @@ -204,12 +204,12 @@ def trackParticles(self, _dask_array_=None): @property def trackParticle(self, _dask_array_=None): - if _dask_array_ is not None: - self = _dask_array_ # TODO: is this what i should be doing? - trackParticles = self.trackParticles - return self.trackParticles[ - tuple([slice(None) for i in range(trackParticles.ndim - 1)] + [0]) - ] + trackParticles = _element_link_method( + self, "trackParticleLinks", "GSFTrackParticles", _dask_array_ + ) + # Ellipsis (..., 0) slicing not supported yet by dask_awkward + slicer = tuple([slice(None) for i in range(trackParticles.ndim - 1)] + [0]) + return trackParticles[slicer] @property def caloClusters(self, _dask_array_=None): From 8ec38cfda7c374568ab06025c423aef0272caa9d Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Wed, 30 Aug 2023 15:32:00 +0200 Subject: [PATCH 17/75] fix column touching for _get_target_offsets --- src/coffea/nanoevents/methods/physlite.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py index 72ca50165..eefff7eb1 100644 --- a/src/coffea/nanoevents/methods/physlite.py +++ b/src/coffea/nanoevents/methods/physlite.py @@ -102,6 +102,11 @@ def _get_target_offsets(load_column, event_index): if isinstance(event_index, Number): return offsets[event_index] + # let the necessary column optimization know that we need to load this + # column to get the offsets + if awkward.backend(load_column) == "typetracer": + awkward.typetracer.touch_data(load_column) + # necessary to stick it into the `NumpyArray` constructor # if typetracer is passed through offsets = awkward.typetracer.length_zero_if_typetracer( From e6127d5e54ef12516760e51d1e4a229923bed2c2 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Thu, 31 Aug 2023 15:24:29 +0200 Subject: [PATCH 18/75] make test actually fail --- tests/test_nanoevents_physlite.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_nanoevents_physlite.py b/tests/test_nanoevents_physlite.py index 55293a164..95f58491d 100644 --- a/tests/test_nanoevents_physlite.py +++ b/tests/test_nanoevents_physlite.py @@ -1,5 +1,6 @@ import os +import dask import pytest from coffea.nanoevents import NanoEventsFactory, PHYSLITESchema @@ -21,7 +22,8 @@ def events(): def test_load_single_field_of_linked(events): - events.Electrons.caloClusters.calE.compute() + with dask.config.set({"awkward.raise-failed-meta": True}): + events.Electrons.caloClusters.calE.compute() @pytest.mark.parametrize("do_slice", [False, True]) From c4385b14e41b5df770fe9bb1ca7bac51b6aa4a85 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Thu, 31 Aug 2023 15:27:17 +0200 Subject: [PATCH 19/75] use layout._touch_data since public touch_data not yet available in ak 2.3.3 --- src/coffea/nanoevents/methods/physlite.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py index eefff7eb1..f8147082b 100644 --- a/src/coffea/nanoevents/methods/physlite.py +++ b/src/coffea/nanoevents/methods/physlite.py @@ -105,7 +105,8 @@ def _get_target_offsets(load_column, event_index): # let the necessary column optimization know that we need to load this # column to get the offsets if awkward.backend(load_column) == "typetracer": - awkward.typetracer.touch_data(load_column) + # awkward.typetracer.touch_data(load_column) # available in awkward > 2.3.3 + load_column.layout._touch_data(recursive=True) # necessary to stick it into the `NumpyArray` constructor # if typetracer is passed through From e2dd3f03534e7cb166cb146ad2511a5906b2c289 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Thu, 31 Aug 2023 15:29:05 +0200 Subject: [PATCH 20/75] try to avoid loading double-jagged columns for getting offsets in elementlink calculation --- src/coffea/nanoevents/methods/physlite.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py index f8147082b..7b9f98580 100644 --- a/src/coffea/nanoevents/methods/physlite.py +++ b/src/coffea/nanoevents/methods/physlite.py @@ -122,7 +122,12 @@ def descend(layout, depth, **kwargs): def _get_global_index(target, eventindex, index): - load_column = target[target.fields[0]] + for field in target.fields: + # fetch first column to get offsets from + # (but try to avoid the double-jagged ones if possible) + load_column = target[field] + if load_column.ndim < 3: + break target_offsets = _get_target_offsets(load_column, eventindex) return target_offsets + index From e72d3731db814e24d56b3e3f3d37b3626dac75a7 Mon Sep 17 00:00:00 2001 From: iasonkrom Date: Thu, 31 Aug 2023 09:56:50 -0500 Subject: [PATCH 21/75] add uproot_options to uporoot.dask in factory --- src/coffea/nanoevents/factory.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index 38e06d601..fb9a6c4b5 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -232,7 +232,7 @@ def from_root( treepath="/Events", entry_start=None, entry_stop=None, - chunks_per_file=1, + chunks_per_file=None, runtime_cache=None, persistent_cache=None, schemaclass=NanoAODSchema, @@ -268,7 +268,7 @@ def from_root( metadata : dict, optional Arbitrary metadata to add to the `base.NanoEvents` object uproot_options : dict, optional - Any options to pass to ``uproot.open`` + Any options to pass to ``uproot.open`` or ``uproot.dask`` access_log : list, optional Pass a list instance to record which branches were lazily accessed by this instance use_ak_forth: @@ -326,6 +326,17 @@ def from_root( ak_add_doc=True, filter_branch=_remove_not_interpretable, steps_per_file=chunks_per_file, + **uproot_options, + ) + elif chunks_per_file is None: + opener = partial( + uproot.dask, + file, + full_paths=True, + open_files=False, + ak_add_doc=True, + filter_branch=_remove_not_interpretable, + **uproot_options, ) else: opener = partial( @@ -336,6 +347,7 @@ def from_root( ak_add_doc=True, filter_branch=_remove_not_interpretable, steps_per_file=chunks_per_file, + **uproot_options, ) return cls(map_schema, opener, None, cache=None, is_dask=True) elif permit_dask and not schemaclass.__dask_capable__: From f491a36c04e1fa2c3b63d14468345465d398afa6 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Thu, 31 Aug 2023 17:20:00 +0200 Subject: [PATCH 22/75] allow for collections that contain non-list fields --- src/coffea/nanoevents/schemas/physlite.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/coffea/nanoevents/schemas/physlite.py b/src/coffea/nanoevents/schemas/physlite.py index 1b9b89205..3b6508e2e 100644 --- a/src/coffea/nanoevents/schemas/physlite.py +++ b/src/coffea/nanoevents/schemas/physlite.py @@ -118,14 +118,21 @@ def _build_collections(self, branch_forms): to_zip, objname, self.mixins.get(objname, None), - bypass=True, - ) - content = contents[objname]["content"] - content["parameters"] = dict( - content.get("parameters", {}), collection_name=objname + bypass=False, ) except NotImplementedError: warnings.warn(f"Can't zip collection {objname}") + if "content" in contents[objname]: + # in this case we were able to zip everything together to a ListOffsetArray(RecordArray) + assert "List" in contents[objname]["class"] + content = contents[objname]["content"] + else: + # in this case this was not possible (e.g. because we also had non-list fields) + assert contents[objname]["class"] == "RecordArray" + content = contents[objname] + content["parameters"] = dict( + content.get("parameters", {}), collection_name=objname + ) return contents @staticmethod From 79ae6d5d7e4a9eb018b5cf1efb57713ea3c96b34 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Thu, 31 Aug 2023 17:34:35 +0200 Subject: [PATCH 23/75] skip empty records --- src/coffea/nanoevents/schemas/physlite.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/coffea/nanoevents/schemas/physlite.py b/src/coffea/nanoevents/schemas/physlite.py index 3b6508e2e..11446b7a2 100644 --- a/src/coffea/nanoevents/schemas/physlite.py +++ b/src/coffea/nanoevents/schemas/physlite.py @@ -79,6 +79,9 @@ def _build_collections(self, branch_forms): key_fields = key.split("/")[-1].split(".") top_key = key_fields[0] sub_key = ".".join(key_fields[1:]) + if ak_form["class"] == "RecordArray" and not ak_form["fields"]: + # skip empty records (e.g. the branches ending in "." only containing the base class) + continue objname = top_key.replace("Analysis", "").replace("AuxDyn", "") zip_groups[objname].append(((key, sub_key), ak_form)) From 94b648fd0926ad6150813d9d4f0cd289bde4515d Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Thu, 31 Aug 2023 17:38:25 +0200 Subject: [PATCH 24/75] don't zip branches that are not grouped with anything else (e.g. index_ref in newer PHYSLITE) --- src/coffea/nanoevents/schemas/physlite.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coffea/nanoevents/schemas/physlite.py b/src/coffea/nanoevents/schemas/physlite.py index 11446b7a2..52e3ac747 100644 --- a/src/coffea/nanoevents/schemas/physlite.py +++ b/src/coffea/nanoevents/schemas/physlite.py @@ -100,6 +100,10 @@ def _build_collections(self, branch_forms): # zip the forms contents = {} for objname, keys_and_form in zip_groups.items(): + if len(keys_and_form) == 1: + # don't zip if there is only one item + contents[objname] = keys_and_form[0][1] + continue to_zip = {} for (key, sub_key), form in keys_and_form: if "." in sub_key: From 0334acd8feb208602b0f173d473921e155215cd4 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Thu, 31 Aug 2023 18:03:57 +0200 Subject: [PATCH 25/75] also remove Aux from branch names to zip them with AuxDyn and potentially non-aux branches --- src/coffea/nanoevents/schemas/physlite.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/coffea/nanoevents/schemas/physlite.py b/src/coffea/nanoevents/schemas/physlite.py index 52e3ac747..c45240d6a 100644 --- a/src/coffea/nanoevents/schemas/physlite.py +++ b/src/coffea/nanoevents/schemas/physlite.py @@ -82,7 +82,9 @@ def _build_collections(self, branch_forms): if ak_form["class"] == "RecordArray" and not ak_form["fields"]: # skip empty records (e.g. the branches ending in "." only containing the base class) continue - objname = top_key.replace("Analysis", "").replace("AuxDyn", "") + objname = ( + top_key.replace("Analysis", "").replace("AuxDyn", "").replace("Aux", "") + ) zip_groups[objname].append(((key, sub_key), ak_form)) From 881e4e23c5d00eba203956c80c2f758444ee96d0 Mon Sep 17 00:00:00 2001 From: iasonkrom Date: Thu, 31 Aug 2023 11:29:55 -0500 Subject: [PATCH 26/75] leave chunks_per_file=1 for now --- src/coffea/nanoevents/factory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index fb9a6c4b5..f429f04bd 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -232,7 +232,7 @@ def from_root( treepath="/Events", entry_start=None, entry_stop=None, - chunks_per_file=None, + chunks_per_file=1, runtime_cache=None, persistent_cache=None, schemaclass=NanoAODSchema, From 3fe091b12bc06b4a0cdc866ac0c4183dde422620 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 4 Sep 2023 14:41:39 +0000 Subject: [PATCH 27/75] Bump actions/checkout from 3 to 4 Bumps [actions/checkout](https://github.com/actions/checkout) from 3 to 4. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/ci.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9a387fcbc..628df33dd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,7 +26,7 @@ jobs: name: pre-commit runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-python@v4 - uses: pre-commit/action@v3.0.0 with: @@ -45,7 +45,7 @@ jobs: name: test coffea (${{ matrix.os }}) - python ${{ matrix.python-version }}, JDK${{ matrix.java-version }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: @@ -135,7 +135,7 @@ jobs: name: test coffea-workqueue steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Conda uses: conda-incubator/setup-miniconda@v2 env: @@ -185,7 +185,7 @@ jobs: name: deploy release steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: From 88176f22a853032c519c2b2ef0cb4460bd9549a1 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Wed, 6 Sep 2023 10:09:24 -0500 Subject: [PATCH 28/75] repin to awkward 2.4.1 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 454ed3319..87b29d513 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ classifiers = [ "Topic :: Utilities", ] dependencies = [ - "awkward>=2.3.3", + "awkward>=2.4.1", "uproot>=5.0.10", "dask[array]>=2023.4.0", "dask-awkward>=2023.7.1,!=2023.8.0", From d20468aa5895444c89828815f6bf9b9db81df7f9 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Wed, 6 Sep 2023 14:59:14 -0500 Subject: [PATCH 29/75] repin to latest fixed awkward --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 454ed3319..f5b168c1e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ classifiers = [ "Topic :: Utilities", ] dependencies = [ - "awkward>=2.3.3", + "awkward>=2.4.2", "uproot>=5.0.10", "dask[array]>=2023.4.0", "dask-awkward>=2023.7.1,!=2023.8.0", From 5dd6868d98534f960e04c973cba5d06a751534b7 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Wed, 6 Sep 2023 15:00:36 -0500 Subject: [PATCH 30/75] awkward 2.4.2 (just to not clobber main) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 87b29d513..f5b168c1e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ classifiers = [ "Topic :: Utilities", ] dependencies = [ - "awkward>=2.4.1", + "awkward>=2.4.2", "uproot>=5.0.10", "dask[array]>=2023.4.0", "dask-awkward>=2023.7.1,!=2023.8.0", From 5527a02e94aa2685be59d6f6ca334d56c62d1a77 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Wed, 6 Sep 2023 16:10:38 -0500 Subject: [PATCH 31/75] use uproot._util.unset as default value to chunks_per_file to ensure correct behavior --- src/coffea/nanoevents/factory.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index f429f04bd..f1c8ebb64 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -232,7 +232,7 @@ def from_root( treepath="/Events", entry_start=None, entry_stop=None, - chunks_per_file=1, + chunks_per_file=uproot._util.unset, runtime_cache=None, persistent_cache=None, schemaclass=NanoAODSchema, @@ -327,17 +327,7 @@ def from_root( filter_branch=_remove_not_interpretable, steps_per_file=chunks_per_file, **uproot_options, - ) - elif chunks_per_file is None: - opener = partial( - uproot.dask, - file, - full_paths=True, - open_files=False, - ak_add_doc=True, - filter_branch=_remove_not_interpretable, - **uproot_options, - ) + ) else: opener = partial( uproot.dask, From 130a90342912311bfa2379c1aeaa4a064cb9817e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 6 Sep 2023 21:11:24 +0000 Subject: [PATCH 32/75] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/coffea/nanoevents/factory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index f1c8ebb64..58dd55ad5 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -327,7 +327,7 @@ def from_root( filter_branch=_remove_not_interpretable, steps_per_file=chunks_per_file, **uproot_options, - ) + ) else: opener = partial( uproot.dask, From 6abc42c768b0e51392c5ca275876f1d38d45cfd6 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Thu, 7 Sep 2023 08:43:44 +0200 Subject: [PATCH 33/75] go back to using public touch_data since we have ak 2.4.2 now --- src/coffea/nanoevents/methods/physlite.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py index 7b9f98580..751c5d03f 100644 --- a/src/coffea/nanoevents/methods/physlite.py +++ b/src/coffea/nanoevents/methods/physlite.py @@ -105,8 +105,7 @@ def _get_target_offsets(load_column, event_index): # let the necessary column optimization know that we need to load this # column to get the offsets if awkward.backend(load_column) == "typetracer": - # awkward.typetracer.touch_data(load_column) # available in awkward > 2.3.3 - load_column.layout._touch_data(recursive=True) + awkward.typetracer.touch_data(load_column) # necessary to stick it into the `NumpyArray` constructor # if typetracer is passed through From 1aaaa63c03af7ded0881cf40a1e35512d81b7361 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 Sep 2023 14:29:31 +0000 Subject: [PATCH 34/75] Bump crazy-max/ghaction-github-pages from 3 to 4 Bumps [crazy-max/ghaction-github-pages](https://github.com/crazy-max/ghaction-github-pages) from 3 to 4. - [Release notes](https://github.com/crazy-max/ghaction-github-pages/releases) - [Commits](https://github.com/crazy-max/ghaction-github-pages/compare/v3...v4) --- updated-dependencies: - dependency-name: crazy-max/ghaction-github-pages dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 628df33dd..fe8453b8d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -119,7 +119,7 @@ jobs: touch build/html/.nojekyll - name: Deploy documentation if: github.event_name == 'push' && matrix.os == 'ubuntu-latest' && matrix.python-version == 3.11 - uses: crazy-max/ghaction-github-pages@v3 + uses: crazy-max/ghaction-github-pages@v4 with: target_branch: gh-pages build_dir: docs/build/html From 7a64bb62777c30d50f04c50c3848e6c25111b3cf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 12 Sep 2023 06:14:36 +0000 Subject: [PATCH 35/75] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/psf/black: 23.7.0 → 23.9.1](https://github.com/psf/black/compare/23.7.0...23.9.1) - [github.com/asottile/pyupgrade: v3.9.0 → v3.10.1](https://github.com/asottile/pyupgrade/compare/v3.9.0...v3.10.1) - [github.com/pycqa/flake8: 6.0.0 → 6.1.0](https://github.com/pycqa/flake8/compare/6.0.0...6.1.0) --- .pre-commit-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a4d511b07..1b3695665 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,7 +12,7 @@ ci: repos: - repo: https://github.com/psf/black - rev: 23.7.0 + rev: 23.9.1 hooks: - id: black @@ -37,7 +37,7 @@ repos: - id: trailing-whitespace - repo: https://github.com/asottile/pyupgrade - rev: v3.9.0 + rev: v3.10.1 hooks: - id: pyupgrade args: ["--py38-plus"] @@ -48,7 +48,7 @@ repos: - id: setup-cfg-fmt - repo: https://github.com/pycqa/flake8 - rev: 6.0.0 + rev: 6.1.0 hooks: - id: flake8 exclude: coffea/processor/templates From 13ebcfc530def1ab6b324cd0edffd5f207d3b60d Mon Sep 17 00:00:00 2001 From: iasonkrom Date: Fri, 15 Sep 2023 18:44:51 -0500 Subject: [PATCH 36/75] request dtype from np.arange and ak.zeros_like --- src/coffea/analysis_tools.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/coffea/analysis_tools.py b/src/coffea/analysis_tools.py index 66b92fe2b..f8bbd21e2 100644 --- a/src/coffea/analysis_tools.py +++ b/src/coffea/analysis_tools.py @@ -610,13 +610,13 @@ def yieldhist(self): labels = ["initial"] + [f"N - {i}" for i in self._names] + ["N"] if not self._delayed_mode: h = hist.Hist(hist.axis.Integer(0, len(labels), name="N-1")) - h.fill(numpy.arange(len(labels)), weight=self._nev) + h.fill(numpy.arange(len(labels), dtype=int), weight=self._nev) else: h = hist.dask.Hist(hist.axis.Integer(0, len(labels), name="N-1")) for i, weight in enumerate(self._masks, 1): h.fill(dask_awkward.full_like(weight, i, dtype=int), weight=weight) - h.fill(dask_awkward.zeros_like(weight)) + h.fill(dask_awkward.zeros_like(weight, dtype=int)) return h, labels @@ -712,7 +712,7 @@ def plot_vars( hist.axis.Integer(0, len(labels), name="N-1"), ) arr = awkward.flatten(var) - h.fill(arr, awkward.zeros_like(arr)) + h.fill(arr, awkward.zeros_like(arr, dtype=int)) for i, mask in enumerate(self.result().masks, 1): arr = awkward.flatten(var[mask]) h.fill(arr, awkward.full_like(arr, i, dtype=int)) @@ -725,7 +725,7 @@ def plot_vars( hist.axis.Integer(0, len(labels), name="N-1"), ) arr = dask_awkward.flatten(var) - h.fill(arr, dask_awkward.zeros_like(arr)) + h.fill(arr, dask_awkward.zeros_like(arr, dtype=int)) for i, mask in enumerate(self.result().masks, 1): arr = dask_awkward.flatten(var[mask]) h.fill(arr, dask_awkward.full_like(arr, i, dtype=int)) @@ -856,8 +856,8 @@ def yieldhist(self): honecut = hist.Hist(hist.axis.Integer(0, len(labels), name="onecut")) hcutflow = honecut.copy() hcutflow.axes.name = ("cutflow",) - honecut.fill(numpy.arange(len(labels)), weight=self._nevonecut) - hcutflow.fill(numpy.arange(len(labels)), weight=self._nevcutflow) + honecut.fill(numpy.arange(len(labels), dtype=int), weight=self._nevonecut) + hcutflow.fill(numpy.arange(len(labels), dtype=int), weight=self._nevcutflow) else: honecut = hist.dask.Hist(hist.axis.Integer(0, len(labels), name="onecut")) @@ -868,12 +868,12 @@ def yieldhist(self): honecut.fill( dask_awkward.full_like(weight, i, dtype=int), weight=weight ) - honecut.fill(dask_awkward.zeros_like(weight)) + honecut.fill(dask_awkward.zeros_like(weight, dtype=int)) for i, weight in enumerate(self._maskscutflow, 1): hcutflow.fill( dask_awkward.full_like(weight, i, dtype=int), weight=weight ) - hcutflow.fill(dask_awkward.zeros_like(weight)) + hcutflow.fill(dask_awkward.zeros_like(weight, dtype=int)) return honecut, hcutflow, labels @@ -975,8 +975,8 @@ def plot_vars( hcutflow.axes.name = name, "cutflow" arr = awkward.flatten(var) - honecut.fill(arr, awkward.zeros_like(arr)) - hcutflow.fill(arr, awkward.zeros_like(arr)) + honecut.fill(arr, awkward.zeros_like(arr, dtype=int)) + hcutflow.fill(arr, awkward.zeros_like(arr, dtype=int)) for i, mask in enumerate(self.result().masksonecut, 1): arr = awkward.flatten(var[mask]) @@ -998,8 +998,8 @@ def plot_vars( hcutflow.axes.name = name, "cutflow" arr = dask_awkward.flatten(var) - honecut.fill(arr, dask_awkward.zeros_like(arr)) - hcutflow.fill(arr, dask_awkward.zeros_like(arr)) + honecut.fill(arr, dask_awkward.zeros_like(arr, dtype=int)) + hcutflow.fill(arr, dask_awkward.zeros_like(arr, dtype=int)) for i, mask in enumerate(self.result().masksonecut, 1): arr = dask_awkward.flatten(var[mask]) From a7434fe393418885cba80cfa2c57fb2dfcf8e223 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Fri, 15 Sep 2023 21:07:17 -0500 Subject: [PATCH 37/75] remove weirdly shadowed member variable from base schema --- src/coffea/nanoevents/schemas/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coffea/nanoevents/schemas/base.py b/src/coffea/nanoevents/schemas/base.py index 09812eee0..8a1f2251e 100644 --- a/src/coffea/nanoevents/schemas/base.py +++ b/src/coffea/nanoevents/schemas/base.py @@ -105,7 +105,6 @@ class BaseSchema: """ __dask_capable__ = True - behavior = {} def __init__(self, base_form, *args, **kwargs): params = dict(base_form.get("parameters", {})) From a65a3dfea35df64a778ee1fb1387d3fd6d35260e Mon Sep 17 00:00:00 2001 From: iasonkrom Date: Sat, 16 Sep 2023 12:38:55 -0500 Subject: [PATCH 38/75] found a random typo along the way --- src/coffea/analysis_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coffea/analysis_tools.py b/src/coffea/analysis_tools.py index f8bbd21e2..e1176f95c 100644 --- a/src/coffea/analysis_tools.py +++ b/src/coffea/analysis_tools.py @@ -418,7 +418,7 @@ def variations(self): class NminusOneToNpz: - """Object to be returned by NmiusOne.to_npz()""" + """Object to be returned by NminusOne.to_npz()""" def __init__(self, file, labels, nev, masks, saver): self._file = file From acdb1d829db5437f863caa144853676a6ea1d04e Mon Sep 17 00:00:00 2001 From: iasonkrom Date: Sat, 16 Sep 2023 14:07:20 -0500 Subject: [PATCH 39/75] prettier print statements and dask.compute reduction --- src/coffea/analysis_tools.py | 61 ++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/src/coffea/analysis_tools.py b/src/coffea/analysis_tools.py index e1176f95c..6b2ebc77e 100644 --- a/src/coffea/analysis_tools.py +++ b/src/coffea/analysis_tools.py @@ -494,11 +494,17 @@ def maskscutflow(self): return self._maskscutflow def compute(self): - self._nevonecut = list(dask.compute(*self._nevonecut)) - self._nevcutflow = list(dask.compute(*self._nevcutflow)) - self._masksonecut = list(dask.compute(*self._masksonecut)) - self._maskscutflow = list(dask.compute(*self._maskscutflow)) - numpy.savez( + self._nevonecut, self._nevcutflow = dask.compute( + self._nevonecut, self._nevcutflow + ) + self._masksonecut, self._maskscutflow = dask.compute( + self._masksonecut, self._maskscutflow + ) + self._nevonecut = list(self._nevonecut) + self._nevcutflow = list(self._nevcutflow) + self._masksonecut = list(self._masksonecut) + self._maskscutflow = list(self._maskscutflow) + self._saver( self._file, labels=self._labels, nevonecut=self._nevonecut, @@ -581,21 +587,25 @@ def print(self): if self._delayed_mode: self._nev = list(dask.compute(*self._nev)) + nev = self._nev print("N-1 selection stats:") for i, name in enumerate(self._names): - print( - f"Ignoring {name:<20}: pass = {nev[i+1]:<20}\ - all = {nev[0]:<20}\ - -- eff = {nev[i+1]*100/nev[0]:.1f} %" + stats = ( + f"Ignoring {name:<20}" + f"pass = {nev[i+1]:<20}" + f"all = {nev[0]:<20}" + f"-- eff = {nev[i+1]*100/nev[0]:.1f} %" ) + print(stats) - if True: - print( - f"All cuts {'':<20}: pass = {nev[-1]:<20}\ - all = {nev[0]:<20}\ - -- eff = {nev[-1]*100/nev[0]:.1f} %" - ) + stats_all = ( + f"All cuts {'':<20}" + f"pass = {nev[-1]:<20}" + f"all = {nev[0]:<20}" + f"-- eff = {nev[-1]*100/nev[0]:.1f} %" + ) + print(stats_all) def yieldhist(self): """Returns the N-1 selection yields as a ``hist.Hist`` object @@ -824,19 +834,24 @@ def print(self): """Prints the statistics of the Cutflow""" if self._delayed_mode: - self._nevonecut = list(dask.compute(*self._nevonecut)) - self._nevcutflow = list(dask.compute(*self._nevcutflow)) + self._nevonecut, self._nevcutflow = dask.compute( + self._nevonecut, self._nevcutflow + ) + nevonecut = self._nevonecut nevcutflow = self._nevcutflow + print("Cutflow stats:") for i, name in enumerate(self._names): - print( - f"Cut {name:<20}: pass = {nevonecut[i+1]:<20}\ - cumulative pass = {nevcutflow[i+1]:<20}\ - all = {nevonecut[0]:<20}\ - -- eff = {nevonecut[i+1]*100/nevonecut[0]:.1f} %\ - -- cumulative eff = {nevcutflow[i+1]*100/nevcutflow[0]:.1f} %" + stats = ( + f"Cut {name:<20}:" + f"pass = {nevonecut[i+1]:<20}" + f"cumulative pass = {nevcutflow[i+1]:<20}" + f"all = {nevonecut[0]:<20}" + f"-- eff = {nevonecut[i+1]*100/nevonecut[0]:.1f} %{'':<20}" + f"-- cumulative eff = {nevcutflow[i+1]*100/nevcutflow[0]:.1f} %" ) + print(stats) def yieldhist(self): """Returns the cutflow yields as ``hist.Hist`` objects From 2fee783b79cee6c51ee1f89c244b1b272d1f6356 Mon Sep 17 00:00:00 2001 From: iasonkrom Date: Sat, 16 Sep 2023 15:58:36 -0500 Subject: [PATCH 40/75] make the default to be compute=False for to_npz() --- src/coffea/analysis_tools.py | 8 ++++---- tests/test_analysis_tools.py | 24 ++++++++++++------------ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/coffea/analysis_tools.py b/src/coffea/analysis_tools.py index 6b2ebc77e..14fd170f3 100644 --- a/src/coffea/analysis_tools.py +++ b/src/coffea/analysis_tools.py @@ -544,7 +544,7 @@ def result(self): labels = ["initial"] + [f"N - {i}" for i in self._names] + ["N"] return NminusOneResult(labels, self._nev, self._masks) - def to_npz(self, file, compressed=False, compute=True): + def to_npz(self, file, compressed=False, compute=False): """Saves the results of the N-1 selection to a .npz file Parameters @@ -560,7 +560,7 @@ def to_npz(self, file, compressed=False, compute=True): compute : bool, optional Whether to immediately start writing or to return an object that the user can choose when to start writing by calling compute(). - Default is True. + Default is False. Returns ------- @@ -790,7 +790,7 @@ def result(self): self._maskscutflow, ) - def to_npz(self, file, compressed=False, compute=True): + def to_npz(self, file, compressed=False, compute=False): """Saves the results of the cutflow to a .npz file Parameters @@ -806,7 +806,7 @@ def to_npz(self, file, compressed=False, compute=True): compute : bool, optional Whether to immediately start writing or to return an object that the user can choose when to start writing by calling compute(). - Default is True. + Default is False. Returns ------- diff --git a/tests/test_analysis_tools.py b/tests/test_analysis_tools.py index 1e8c46ec1..bb3221432 100644 --- a/tests/test_analysis_tools.py +++ b/tests/test_analysis_tools.py @@ -513,14 +513,14 @@ def test_packed_selection_nminusone(): ): assert np.all(mask == truth) - nminusone.to_npz("nminusone.npz", compressed=False) + nminusone.to_npz("nminusone.npz", compressed=False).compute() with np.load("nminusone.npz") as file: assert np.all(file["labels"] == labels) assert np.all(file["nev"] == nev) assert np.all(file["masks"] == masks) os.remove("nminusone.npz") - nminusone.to_npz("nminusone.npz", compressed=True) + nminusone.to_npz("nminusone.npz", compressed=True).compute() with np.load("nminusone.npz") as file: assert np.all(file["labels"] == labels) assert np.all(file["nev"] == nev) @@ -619,7 +619,7 @@ def test_packed_selection_cutflow(): ): assert np.all(mask == truth) - cutflow.to_npz("cutflow.npz", compressed=False) + cutflow.to_npz("cutflow.npz", compressed=False).compute() with np.load("cutflow.npz") as file: assert np.all(file["labels"] == labels) assert np.all(file["nevonecut"] == nevonecut) @@ -628,7 +628,7 @@ def test_packed_selection_cutflow(): assert np.all(file["maskscutflow"] == maskscutflow) os.remove("cutflow.npz") - cutflow.to_npz("cutflow.npz", compressed=True) + cutflow.to_npz("cutflow.npz", compressed=True).compute() with np.load("cutflow.npz") as file: assert np.all(file["labels"] == labels) assert np.all(file["nevonecut"] == nevonecut) @@ -854,14 +854,14 @@ def test_packed_selection_nminusone_dak(optimization_enabled): ): assert np.all(mask.compute() == truth.compute()) - nminusone.to_npz("nminusone.npz", compressed=False) + nminusone.to_npz("nminusone.npz", compressed=False).compute() with np.load("nminusone.npz") as file: assert np.all(file["labels"] == labels) assert np.all(file["nev"] == list(dask.compute(*nev))) assert np.all(file["masks"] == list(dask.compute(*masks))) os.remove("nminusone.npz") - nminusone.to_npz("nminusone.npz", compressed=True) + nminusone.to_npz("nminusone.npz", compressed=True).compute() with np.load("nminusone.npz") as file: assert np.all(file["labels"] == labels) assert np.all(file["nev"] == list(dask.compute(*nev))) @@ -978,7 +978,7 @@ def test_packed_selection_cutflow_dak(optimization_enabled): ): assert np.all(mask.compute() == truth.compute()) - cutflow.to_npz("cutflow.npz", compressed=False) + cutflow.to_npz("cutflow.npz", compressed=False).compute() with np.load("cutflow.npz") as file: assert np.all(file["labels"] == labels) assert np.all(file["nevonecut"] == list(dask.compute(*nevonecut))) @@ -987,7 +987,7 @@ def test_packed_selection_cutflow_dak(optimization_enabled): assert np.all(file["maskscutflow"] == list(dask.compute(*maskscutflow))) os.remove("cutflow.npz") - cutflow.to_npz("cutflow.npz", compressed=True) + cutflow.to_npz("cutflow.npz", compressed=True).compute() with np.load("cutflow.npz") as file: assert np.all(file["labels"] == labels) assert np.all(file["nevonecut"] == list(dask.compute(*nevonecut))) @@ -1109,14 +1109,14 @@ def test_packed_selection_nminusone_dak_uproot_only(optimization_enabled): ): assert np.all(mask.compute() == truth.compute()) - nminusone.to_npz("nminusone.npz", compressed=False) + nminusone.to_npz("nminusone.npz", compressed=False).compute() with np.load("nminusone.npz") as file: assert np.all(file["labels"] == labels) assert np.all(file["nev"] == list(dask.compute(*nev))) assert np.all(file["masks"] == list(dask.compute(*masks))) os.remove("nminusone.npz") - nminusone.to_npz("nminusone.npz", compressed=True) + nminusone.to_npz("nminusone.npz", compressed=True).compute() with np.load("nminusone.npz") as file: assert np.all(file["labels"] == labels) assert np.all(file["nev"] == list(dask.compute(*nev))) @@ -1233,7 +1233,7 @@ def test_packed_selection_cutflow_dak_uproot_only(optimization_enabled): ): assert np.all(mask.compute() == truth.compute()) - cutflow.to_npz("cutflow.npz", compressed=False) + cutflow.to_npz("cutflow.npz", compressed=False).compute() with np.load("cutflow.npz") as file: assert np.all(file["labels"] == labels) assert np.all(file["nevonecut"] == list(dask.compute(*nevonecut))) @@ -1242,7 +1242,7 @@ def test_packed_selection_cutflow_dak_uproot_only(optimization_enabled): assert np.all(file["maskscutflow"] == list(dask.compute(*maskscutflow))) os.remove("cutflow.npz") - cutflow.to_npz("cutflow.npz", compressed=True) + cutflow.to_npz("cutflow.npz", compressed=True).compute() with np.load("cutflow.npz") as file: assert np.all(file["labels"] == labels) assert np.all(file["nevonecut"] == list(dask.compute(*nevonecut))) From 3bdff8c3d33e73b7fcb57edad830605b0eb27fae Mon Sep 17 00:00:00 2001 From: iasonkrom Date: Sat, 16 Sep 2023 20:21:51 -0500 Subject: [PATCH 41/75] warn in print() when user is about to compute dask stuff --- src/coffea/analysis_tools.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/coffea/analysis_tools.py b/src/coffea/analysis_tools.py index 14fd170f3..a68124d87 100644 --- a/src/coffea/analysis_tools.py +++ b/src/coffea/analysis_tools.py @@ -582,10 +582,16 @@ def to_npz(self, file, compressed=False, compute=False): else: return out - def print(self): + def print(self, compute=False): """Prints the statistics of the N-1 selection""" - if self._delayed_mode: + if self._delayed_mode and not compute: + warnings.warn( + "This will compute dask_awkward arrays. If you really want to do this now, call print(compute=True)" + ) + return + + if self._delayed_mode and compute: self._nev = list(dask.compute(*self._nev)) nev = self._nev @@ -830,10 +836,16 @@ def to_npz(self, file, compressed=False, compute=False): else: return out - def print(self): + def print(self, compute=False): """Prints the statistics of the Cutflow""" - if self._delayed_mode: + if self._delayed_mode and not compute: + warnings.warn( + "This will compute dask_awkward arrays. If you really want to do this now, call print(compute=True)" + ) + return + + if self._delayed_mode and compute: self._nevonecut, self._nevcutflow = dask.compute( self._nevonecut, self._nevcutflow ) From 8e6bb10d8917d59293fd0f613feba05edeb9b8a6 Mon Sep 17 00:00:00 2001 From: iasonkrom Date: Mon, 18 Sep 2023 09:02:25 -0500 Subject: [PATCH 42/75] Revert "warn in print() when user is about to compute dask stuff" This reverts commit 3bdff8c3d33e73b7fcb57edad830605b0eb27fae. --- src/coffea/analysis_tools.py | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/src/coffea/analysis_tools.py b/src/coffea/analysis_tools.py index a68124d87..14fd170f3 100644 --- a/src/coffea/analysis_tools.py +++ b/src/coffea/analysis_tools.py @@ -582,16 +582,10 @@ def to_npz(self, file, compressed=False, compute=False): else: return out - def print(self, compute=False): + def print(self): """Prints the statistics of the N-1 selection""" - if self._delayed_mode and not compute: - warnings.warn( - "This will compute dask_awkward arrays. If you really want to do this now, call print(compute=True)" - ) - return - - if self._delayed_mode and compute: + if self._delayed_mode: self._nev = list(dask.compute(*self._nev)) nev = self._nev @@ -836,16 +830,10 @@ def to_npz(self, file, compressed=False, compute=False): else: return out - def print(self, compute=False): + def print(self): """Prints the statistics of the Cutflow""" - if self._delayed_mode and not compute: - warnings.warn( - "This will compute dask_awkward arrays. If you really want to do this now, call print(compute=True)" - ) - return - - if self._delayed_mode and compute: + if self._delayed_mode: self._nevonecut, self._nevcutflow = dask.compute( self._nevonecut, self._nevcutflow ) From 522f38b036507b0b79d57cfacedb41a0822a7e5c Mon Sep 17 00:00:00 2001 From: iasonkrom Date: Mon, 18 Sep 2023 09:07:50 -0500 Subject: [PATCH 43/75] only warn and not add compute argument in print --- src/coffea/analysis_tools.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/coffea/analysis_tools.py b/src/coffea/analysis_tools.py index 14fd170f3..facf14e97 100644 --- a/src/coffea/analysis_tools.py +++ b/src/coffea/analysis_tools.py @@ -586,6 +586,9 @@ def print(self): """Prints the statistics of the N-1 selection""" if self._delayed_mode: + warnings.warn( + "Printing the N-1 selection statistics is going to compute dask_awkward objects." + ) self._nev = list(dask.compute(*self._nev)) nev = self._nev @@ -834,6 +837,9 @@ def print(self): """Prints the statistics of the Cutflow""" if self._delayed_mode: + warnings.warn( + "Printing the cutflow statistics is going to compute dask_awkward objects." + ) self._nevonecut, self._nevcutflow = dask.compute( self._nevonecut, self._nevcutflow ) From 9feea2b3ac5c2d5b14f5cfb52aa94ce86b26db2e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 19 Sep 2023 05:39:03 +0000 Subject: [PATCH 44/75] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/asottile/pyupgrade: v3.10.1 → v3.11.0](https://github.com/asottile/pyupgrade/compare/v3.10.1...v3.11.0) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1b3695665..d535cd79e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -37,7 +37,7 @@ repos: - id: trailing-whitespace - repo: https://github.com/asottile/pyupgrade - rev: v3.10.1 + rev: v3.11.0 hooks: - id: pyupgrade args: ["--py38-plus"] From dbf0b641b60cdb485dc6672be00419e5824d9f2f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Sep 2023 14:53:14 +0000 Subject: [PATCH 45/75] Bump amannn/action-semantic-pull-request from 5.2.0 to 5.3.0 Bumps [amannn/action-semantic-pull-request](https://github.com/amannn/action-semantic-pull-request) from 5.2.0 to 5.3.0. - [Release notes](https://github.com/amannn/action-semantic-pull-request/releases) - [Changelog](https://github.com/amannn/action-semantic-pull-request/blob/main/CHANGELOG.md) - [Commits](https://github.com/amannn/action-semantic-pull-request/compare/v5.2.0...v5.3.0) --- updated-dependencies: - dependency-name: amannn/action-semantic-pull-request dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/pr.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 99d3f9e26..44dbedb0b 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -17,6 +17,6 @@ jobs: name: Validate PR title runs-on: ubuntu-latest steps: - - uses: amannn/action-semantic-pull-request@v5.2.0 + - uses: amannn/action-semantic-pull-request@v5.3.0 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From 67d29034485b990665f1c82eeca0bd5b0640f673 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 3 Oct 2023 07:10:05 +0000 Subject: [PATCH 46/75] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/asottile/pyupgrade: v3.11.0 → v3.14.0](https://github.com/asottile/pyupgrade/compare/v3.11.0...v3.14.0) - [github.com/asottile/setup-cfg-fmt: v2.4.0 → v2.5.0](https://github.com/asottile/setup-cfg-fmt/compare/v2.4.0...v2.5.0) - [github.com/codespell-project/codespell: v2.2.5 → v2.2.6](https://github.com/codespell-project/codespell/compare/v2.2.5...v2.2.6) --- .pre-commit-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d535cd79e..8060d85d9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -37,13 +37,13 @@ repos: - id: trailing-whitespace - repo: https://github.com/asottile/pyupgrade - rev: v3.11.0 + rev: v3.14.0 hooks: - id: pyupgrade args: ["--py38-plus"] - repo: https://github.com/asottile/setup-cfg-fmt - rev: v2.4.0 + rev: v2.5.0 hooks: - id: setup-cfg-fmt @@ -54,7 +54,7 @@ repos: exclude: coffea/processor/templates - repo: https://github.com/codespell-project/codespell - rev: v2.2.5 + rev: v2.2.6 hooks: - id: codespell args: ["--skip=*.ipynb","-L hist,Hist,nd,SubJet,subjet,Subjet,PTD,ptd,fPt,fpt,Ser,ser"] From b8fc7fe86eb2cf04b0e78e7965da32ad7d19aa77 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 3 Oct 2023 17:41:09 +0100 Subject: [PATCH 47/75] wip: initial commit --- src/coffea/nanoevents/factory.py | 66 +++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 6 deletions(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index 38e06d601..c43b182ad 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -77,23 +77,48 @@ def __init__( self.metadata = metadata self.version = version - def extract_form_keys_base_columns(self, form_keys): - base_columns = [] - for form_key in form_keys: - base_columns.extend( + def keys_for_buffer_keys(self, buffer_keys): + base_columns = set() + for buffer_key in buffer_keys: + form_key, attribute = self.parse_buffer_key(buffer_key) + base_columns.update( [ acolumn for acolumn in urllib.parse.unquote(form_key).split(",") if not acolumn.startswith("!") ] ) - return list(set(base_columns)) + return base_columns + + def parse_buffer_key(self, buffer_key): + prefix, attribute, form_key = buffer_key.rsplit("/", maxsplit=2) + if attribute == "offsets": + return (form_key[: -len("%2C%21offsets")], attribute) + else: + return (form_key, attribute) + + @property + def buffer_key(self): + return partial(self._key_formatter, "") def _key_formatter(self, prefix, form_key, form, attribute): if attribute == "offsets": form_key += "%2C%21offsets" return prefix + f"/{attribute}/{form_key}" + # TODO: deprecate + def extract_form_keys_base_columns(self, form_keys): + base_columns = [] + for form_key in form_keys: + base_columns.extend( + [ + acolumn + for acolumn in urllib.parse.unquote(form_key).split(",") + if not acolumn.startswith("!") + ] + ) + return list(set(base_columns)) + class _map_schema_uproot(_map_schema_base): def __init__( @@ -125,7 +150,36 @@ def __call__(self, form): }, "form_key": None, } - return awkward.forms.form.from_dict(self.schemaclass(lform, self.version).form) + return awkward.forms.form.from_dict(self.schemaclass(lform, self.version).form), self + + def create_column_mapping(self, tree, keys, start, stop, interp_options): + from functools import partial + + from coffea.nanoevents.util import tuple_to_key + + partition_key = ( + str(tree.file.uuid), + tree.object_path, + f"{start}-{stop}", + ) + uuidpfn = {partition_key[0]: tree.file.file_path} + mapping = UprootSourceMapping( + TrivialUprootOpener(uuidpfn, interp_options), + start, + stop, + cache={}, + access_log=None, + use_ak_forth=True, + ) + mapping.preload_column_source(partition_key[0], partition_key[1], tree) + buffer_key = partial(self._key_formatter, tuple_to_key(partition_key)) + + class TranslateBufferKeys: + def __getitem__(this, key): + form_key, attribute = self.parse_buffer_key(key) + return mapping[buffer_key(form_key=form_key, attribute=attribute, form=None)] + + return TranslateBufferKeys() def create_column_mapping_and_key(self, tree, start, stop, interp_options): from functools import partial From 1b4bd50af71c7e06166a7d7d285e341e606c3ee4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 3 Oct 2023 16:43:11 +0000 Subject: [PATCH 48/75] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/coffea/nanoevents/factory.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index c43b182ad..cc0eff0ad 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -150,7 +150,10 @@ def __call__(self, form): }, "form_key": None, } - return awkward.forms.form.from_dict(self.schemaclass(lform, self.version).form), self + return ( + awkward.forms.form.from_dict(self.schemaclass(lform, self.version).form), + self, + ) def create_column_mapping(self, tree, keys, start, stop, interp_options): from functools import partial @@ -177,7 +180,9 @@ def create_column_mapping(self, tree, keys, start, stop, interp_options): class TranslateBufferKeys: def __getitem__(this, key): form_key, attribute = self.parse_buffer_key(key) - return mapping[buffer_key(form_key=form_key, attribute=attribute, form=None)] + return mapping[ + buffer_key(form_key=form_key, attribute=attribute, form=None) + ] return TranslateBufferKeys() From daa8529cfb7ea5027d0ae8606615c575f0119519 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 4 Oct 2023 10:18:45 +0100 Subject: [PATCH 49/75] fix: rename function --- src/coffea/nanoevents/factory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index c43b182ad..559504c57 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -152,7 +152,7 @@ def __call__(self, form): } return awkward.forms.form.from_dict(self.schemaclass(lform, self.version).form), self - def create_column_mapping(self, tree, keys, start, stop, interp_options): + def load_buffers(self, tree, keys, start, stop, interp_options): from functools import partial from coffea.nanoevents.util import tuple_to_key From 66c8710c3ea1630a519a2de6eabd3af6327329d3 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 4 Oct 2023 11:17:43 +0100 Subject: [PATCH 50/75] fix: use report_necessary_buffers --- src/coffea/processor/executor.py | 6 +++--- tests/test_jetmet_tools.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coffea/processor/executor.py b/src/coffea/processor/executor.py index 618b1c741..9698fa296 100644 --- a/src/coffea/processor/executor.py +++ b/src/coffea/processor/executor.py @@ -1718,7 +1718,7 @@ def _work_function( import dask_awkward to_compute = processor_instance.process(events) - materialized = dask_awkward.necessary_columns(to_compute) + # materialized = dask_awkward.report_necessary_buffers(to_compute) out = dask.compute(to_compute, scheduler="single-threaded")[0] except Exception as e: raise Exception(f"Failed processing file: {item!r}") from e @@ -1734,11 +1734,11 @@ def _work_function( metrics = {} if isinstance(file, uproot.ReadOnlyDirectory): metrics["bytesread"] = file.file.source.num_requested_bytes + # metrics["data_and_shape_buffers"] = set(materialized) + # metrics["shape_only_buffers"] = set(materialized) if schema is not None and issubclass(schema, schemas.BaseSchema): - metrics["columns"] = set(materialized) metrics["entries"] = len(events) else: - metrics["columns"] = set(materialized) metrics["entries"] = events.size metrics["processtime"] = toc - tic return {"out": out, "metrics": metrics, "processed": {item}} diff --git a/tests/test_jetmet_tools.py b/tests/test_jetmet_tools.py index a7ef91385..b1375afa2 100644 --- a/tests/test_jetmet_tools.py +++ b/tests/test_jetmet_tools.py @@ -837,9 +837,9 @@ def test_corrected_jets_factory(optimization_enabled): **{name: evaluator[name] for name in jec_stack_names[5:6]} ) - print(dak.necessary_columns(jets.eta)) + print(dak.report_necessary_buffers(jets.eta)) print( - dak.necessary_columns( + dak.report_necessary_buffers( resosf.getScaleFactor( JetEta=jets.eta, ) From 2353a2306aae1ab57e72bb181b37d6e6f03f5e7d Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 4 Oct 2023 12:19:57 +0100 Subject: [PATCH 51/75] fix: properly parse form keys --- src/coffea/nanoevents/factory.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index 9b25a6c6a..24b31feed 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -81,11 +81,16 @@ def keys_for_buffer_keys(self, buffer_keys): base_columns = set() for buffer_key in buffer_keys: form_key, attribute = self.parse_buffer_key(buffer_key) + operands = urllib.parse.unquote(form_key).split(",") + + it_operands = iter(operands) + next(it_operands) + base_columns.update( [ - acolumn - for acolumn in urllib.parse.unquote(form_key).split(",") - if not acolumn.startswith("!") + name + for name, maybe_transform in zip(operands, it_operands) + if maybe_transform == "!load" ] ) return base_columns From bd07d03fada9a61b1de1db726d560a917525aa44 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 5 Oct 2023 14:05:25 +0100 Subject: [PATCH 52/75] hack: convert Content to array --- src/coffea/nanoevents/mapping/base.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/coffea/nanoevents/mapping/base.py b/src/coffea/nanoevents/mapping/base.py index c6a5e8e2e..f20b3bb2c 100644 --- a/src/coffea/nanoevents/mapping/base.py +++ b/src/coffea/nanoevents/mapping/base.py @@ -111,14 +111,18 @@ def __getitem__(self, key): if len(stack) != 1: raise RuntimeError(f"Syntax error in form key {nodes}") out = stack.pop() - try: - out = numpy.array(out) - except ValueError: - if self._debug: - print(out) - raise RuntimeError( - f"Left with non-bare array after evaluating form key {nodes}" - ) + import awkward + if isinstance(out, awkward.contents.Content): + out = awkward.to_numpy(out) + else: + try: + out = numpy.array(out) + except ValueError: + if self._debug: + print(out) + raise RuntimeError( + f"Left with non-bare array after evaluating form key {nodes}" + ) return out @abstractmethod From a6848a0824d94be45793f5ab58505bca60754fd2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 5 Oct 2023 13:10:21 +0000 Subject: [PATCH 53/75] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/coffea/nanoevents/mapping/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coffea/nanoevents/mapping/base.py b/src/coffea/nanoevents/mapping/base.py index f20b3bb2c..3d87b410c 100644 --- a/src/coffea/nanoevents/mapping/base.py +++ b/src/coffea/nanoevents/mapping/base.py @@ -112,6 +112,7 @@ def __getitem__(self, key): raise RuntimeError(f"Syntax error in form key {nodes}") out = stack.pop() import awkward + if isinstance(out, awkward.contents.Content): out = awkward.to_numpy(out) else: From 9c90205c576b10d3334e3dc482c6fb8c6d374fa3 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 5 Oct 2023 22:14:05 +0100 Subject: [PATCH 54/75] fix: ensure layout nodes converted to arrays --- src/coffea/nanoevents/transforms.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/coffea/nanoevents/transforms.py b/src/coffea/nanoevents/transforms.py index e969310b2..2985f9709 100644 --- a/src/coffea/nanoevents/transforms.py +++ b/src/coffea/nanoevents/transforms.py @@ -13,6 +13,15 @@ def to_layout(array): return array.layout +def ensure_array(arraylike): + if isinstance(arraylike, (awkward.contents.Content, awkward.Array)): + return awkward.to_numpy(arraylike) + elif isinstance(arraylike, awkward.index.Index): + return arraylike.data + else: + return numpy.asarray(arraylike) + + def data(stack): """Extract content from array (currently a noop, can probably take place of !content) @@ -96,7 +105,7 @@ def counts2offsets(stack): Signature: counts,!counts2offsets Outputs an array with length one larger than input """ - counts = numpy.array(stack.pop()) + counts = ensure_array(stack.pop()) offsets = numpy.empty(len(counts) + 1, dtype=numpy.int64) offsets[0] = 0 numpy.cumsum(counts, out=offsets[1:]) @@ -123,11 +132,11 @@ def local2global(stack): Signature: index,target_offsets,!local2global Outputs a content array with same shape as index content """ - target_offsets = numpy.asarray(stack.pop()) + target_offsets = ensure_array(stack.pop()) index = stack.pop() index = index.mask[index >= 0] + target_offsets[:-1] index = index.mask[index < target_offsets[1:]] - out = numpy.array(awkward.flatten(awkward.fill_none(index, -1), axis=None)) + out = ensure_array(awkward.flatten(awkward.fill_none(index, -1), axis=None)) if out.dtype != numpy.int64: raise RuntimeError stack.append(out) From 04b5a1a235a14ed80500054b59e7b921aefc335e Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Sat, 7 Oct 2023 09:13:07 -0500 Subject: [PATCH 55/75] adjust coffea pins to latest releases and pre-releases --- pyproject.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f5b168c1e..177cd9926 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,11 +37,11 @@ classifiers = [ "Topic :: Utilities", ] dependencies = [ - "awkward>=2.4.2", - "uproot>=5.0.10", + "awkward>=2.4.5", + "uproot>=5.1.0rc1", "dask[array]>=2023.4.0", - "dask-awkward>=2023.7.1,!=2023.8.0", - "dask-histogram>=2023.6.0", + "dask-awkward>=2023.10a1,!=2023.8.0", + "dask-histogram>=2023.7a0", "correctionlib>=2.0.0", "pyarrow>=6.0.0", "fsspec", From f19c11b17e8bdbcc6024f7456f515b96d9fc085c Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Sat, 7 Oct 2023 09:36:27 -0500 Subject: [PATCH 56/75] use pytorch-only triton image --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fe8453b8d..234ac2e21 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -98,7 +98,7 @@ jobs: - name: Start triton server with example model if: matrix.os == 'ubuntu-latest' run: | - docker run -d --rm -p 8000:8000 -p 8001:8001 -p 8002:8002 -v ${{ github.workspace }}/tests/samples/triton_models_test:/models nvcr.io/nvidia/tritonserver:23.04-py3 tritonserver --model-repository=/models + docker run -d --rm -p 8000:8000 -p 8001:8001 -p 8002:8002 -v ${{ github.workspace }}/tests/samples/triton_models_test:/models nvcr.io/nvidia/tritonserver:23.04-pyt-python-py3 tritonserver --model-repository=/models - name: Test with pytest run: | From 7051d2e40a8655b7d9aa86359a014d50bc9dd1a1 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Sat, 7 Oct 2023 09:40:30 -0500 Subject: [PATCH 57/75] streamline version requirements Co-authored-by: Angus Hollands --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 177cd9926..36fada2d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,7 @@ dependencies = [ "awkward>=2.4.5", "uproot>=5.1.0rc1", "dask[array]>=2023.4.0", - "dask-awkward>=2023.10a1,!=2023.8.0", + "dask-awkward>=2023.10a1", "dask-histogram>=2023.7a0", "correctionlib>=2.0.0", "pyarrow>=6.0.0", From d14e4635011d970e86f9b81edf74d840e2ad22b7 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Sat, 7 Oct 2023 12:22:56 -0500 Subject: [PATCH 58/75] codespell --- src/coffea/processor/executor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coffea/processor/executor.py b/src/coffea/processor/executor.py index 618b1c741..42df52eeb 100644 --- a/src/coffea/processor/executor.py +++ b/src/coffea/processor/executor.py @@ -694,7 +694,7 @@ class FuturesExecutor(ExecutorBase): An accumulator to collect the output of the function pool : concurrent.futures.Executor class or instance, optional The type of futures executor to use, defaults to ProcessPoolExecutor. - You can pass an instance instead of a class to re-use an executor + You can pass an instance instead of a class to reuse an executor workers : int, optional Number of parallel processes for futures (default 1) status : bool, optional From 33d2e681301c1c37b257dbbfe6d50ef7cf56c47a Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Sun, 8 Oct 2023 12:19:48 +0100 Subject: [PATCH 59/75] fix: don't import protocol --- src/coffea/nanoevents/factory.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index 40d1bda53..8361eaaa2 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -11,7 +11,6 @@ import dask_awkward import fsspec import uproot -from dask_awkward import ImplementsFormTransformation from coffea.nanoevents.mapping import ( CachedMapping, @@ -68,7 +67,7 @@ def _key_formatter(prefix, form_key, form, attribute): return prefix + f"/{attribute}/{form_key}" -class _map_schema_base(ImplementsFormTransformation): +class _map_schema_base: # ImplementsFormMapping, ImplementsFormMappingInfo def __init__( self, schemaclass=BaseSchema, metadata=None, behavior=None, version=None ): From 9d94cb0b8d50e89d6e79ff3a4be3ba88602dc0e3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 8 Oct 2023 11:20:09 +0000 Subject: [PATCH 60/75] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/coffea/nanoevents/factory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index 8361eaaa2..9b2557ac6 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -67,7 +67,7 @@ def _key_formatter(prefix, form_key, form, attribute): return prefix + f"/{attribute}/{form_key}" -class _map_schema_base: # ImplementsFormMapping, ImplementsFormMappingInfo +class _map_schema_base: # ImplementsFormMapping, ImplementsFormMappingInfo def __init__( self, schemaclass=BaseSchema, metadata=None, behavior=None, version=None ): From c451d60eea51c94b92e411cd0fe877bca9421505 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 9 Oct 2023 20:10:17 +0000 Subject: [PATCH 61/75] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/pre-commit-hooks: v4.4.0 → v4.5.0](https://github.com/pre-commit/pre-commit-hooks/compare/v4.4.0...v4.5.0) - [github.com/asottile/pyupgrade: v3.14.0 → v3.15.0](https://github.com/asottile/pyupgrade/compare/v3.14.0...v3.15.0) --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8060d85d9..52f3c2023 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,7 +24,7 @@ repos: args: ["--profile", "black", "--filter-files"] - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: check-case-conflict - id: check-merge-conflict @@ -37,7 +37,7 @@ repos: - id: trailing-whitespace - repo: https://github.com/asottile/pyupgrade - rev: v3.14.0 + rev: v3.15.0 hooks: - id: pyupgrade args: ["--py38-plus"] From 746bd422499291e4ab266068064200eaadb054d3 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Tue, 10 Oct 2023 08:46:39 -0500 Subject: [PATCH 62/75] fix title in bot config --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 52f3c2023..baa961304 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,7 +5,7 @@ ci: for more information, see https://pre-commit.ci autofix_prs: true autoupdate_branch: '' - autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate' + autoupdate_commit_msg: 'ci(pre-commit): pre-commit autoupdate' autoupdate_schedule: weekly skip: [] submodules: false From 0d9c913c1be461d669b8313208f5ee52764a72a5 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Wed, 11 Oct 2023 02:17:32 -0500 Subject: [PATCH 63/75] remove deprecated interface definition --- src/coffea/nanoevents/factory.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index 9b2557ac6..d82b434e4 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -110,19 +110,6 @@ def _key_formatter(self, prefix, form_key, form, attribute): form_key += "%2C%21offsets" return prefix + f"/{attribute}/{form_key}" - # TODO: deprecate - def extract_form_keys_base_columns(self, form_keys): - base_columns = [] - for form_key in form_keys: - base_columns.extend( - [ - acolumn - for acolumn in urllib.parse.unquote(form_key).split(",") - if not acolumn.startswith("!") - ] - ) - return list(set(base_columns)) - class _map_schema_uproot(_map_schema_base): def __init__( From bb4df59dc10f8aabcb979b51e18dba661aa5df9c Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 11 Oct 2023 08:28:51 +0100 Subject: [PATCH 64/75] Update tests/test_jetmet_tools.py --- tests/test_jetmet_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_jetmet_tools.py b/tests/test_jetmet_tools.py index b1375afa2..aace9b1bf 100644 --- a/tests/test_jetmet_tools.py +++ b/tests/test_jetmet_tools.py @@ -837,7 +837,7 @@ def test_corrected_jets_factory(optimization_enabled): **{name: evaluator[name] for name in jec_stack_names[5:6]} ) - print(dak.report_necessary_buffers(jets.eta)) + print(dak.report_necessary_columns(jets.eta)) print( dak.report_necessary_buffers( resosf.getScaleFactor( From e0694ad6c112b5a15a2aa2b976daced0d3a4da44 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 11 Oct 2023 08:29:04 +0100 Subject: [PATCH 65/75] Update tests/test_jetmet_tools.py --- tests/test_jetmet_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_jetmet_tools.py b/tests/test_jetmet_tools.py index aace9b1bf..8be3a97f3 100644 --- a/tests/test_jetmet_tools.py +++ b/tests/test_jetmet_tools.py @@ -839,7 +839,7 @@ def test_corrected_jets_factory(optimization_enabled): print(dak.report_necessary_columns(jets.eta)) print( - dak.report_necessary_buffers( + dak.report_necessary_columns( resosf.getScaleFactor( JetEta=jets.eta, ) From e7384f995853733faaef19514632399a109e5064 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Wed, 11 Oct 2023 03:00:13 -0500 Subject: [PATCH 66/75] remove further remnants of old remapping interface --- src/coffea/nanoevents/factory.py | 48 -------------------------------- 1 file changed, 48 deletions(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index d82b434e4..ee9e75d0b 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -177,29 +177,6 @@ def __getitem__(this, key): return TranslateBufferKeys() - def create_column_mapping_and_key(self, tree, start, stop, interp_options): - from functools import partial - - from coffea.nanoevents.util import tuple_to_key - - partition_key = ( - str(tree.file.uuid), - tree.object_path, - f"{start}-{stop}", - ) - uuidpfn = {partition_key[0]: tree.file.file_path} - mapping = UprootSourceMapping( - TrivialUprootOpener(uuidpfn, interp_options), - start, - stop, - cache={}, - access_log=None, - use_ak_forth=True, - ) - mapping.preload_column_source(partition_key[0], partition_key[1], tree) - - return mapping, partial(self._key_formatter, tuple_to_key(partition_key)) - class _map_schema_parquet(_map_schema_base): def __init__( @@ -224,31 +201,6 @@ def __call__(self, form): return awkward.forms.form.from_dict(self.schemaclass(lform, self.version).form) - def create_column_mapping_and_key(self, columns, start, stop, interp_options): - from functools import partial - - from coffea.nanoevents.util import tuple_to_key - - uuid = "NO_UUID" - obj_path = "NO_OBJECT_PATH" - - partition_key = ( - str(uuid), - obj_path, - f"{start}-{stop}", - ) - uuidpfn = {uuid: columns} - mapping = PreloadedSourceMapping( - PreloadedOpener(uuidpfn), - start, - stop, - cache={}, - access_log=None, - ) - mapping.preload_column_source(partition_key[0], partition_key[1], columns) - - return mapping, partial(self._key_formatter, tuple_to_key(partition_key)) - class NanoEventsFactory: """A factory class to build NanoEvents objects""" From 92efdb20012675be136f18f17ed3cc0ed3e92044 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 11 Oct 2023 13:48:16 +0100 Subject: [PATCH 67/75] refactor: make key translation obvious --- src/coffea/nanoevents/factory.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index ee9e75d0b..9bed55ed2 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -111,6 +111,15 @@ def _key_formatter(self, prefix, form_key, form, attribute): return prefix + f"/{attribute}/{form_key}" +class _TranslatedMapping: + def __init__(self, func, mapping): + self._func = func + self._mapping = mapping + + def __getitem__(self, index): + return self._mapping[self._func(index)] + + class _map_schema_uproot(_map_schema_base): def __init__( self, schemaclass=BaseSchema, metadata=None, behavior=None, version=None @@ -168,14 +177,15 @@ def load_buffers(self, tree, keys, start, stop, interp_options): mapping.preload_column_source(partition_key[0], partition_key[1], tree) buffer_key = partial(self._key_formatter, tuple_to_key(partition_key)) - class TranslateBufferKeys: - def __getitem__(this, key): - form_key, attribute = self.parse_buffer_key(key) - return mapping[ - buffer_key(form_key=form_key, attribute=attribute, form=None) - ] + # The buffer-keys that dask-awkward knows about will not include the + # partition key. Therefore, we must translate the keys here. + def translate_key(index): + form_key, attribute = self.parse_buffer_key(index) + return mapping[ + buffer_key(form_key=form_key, attribute=attribute, form=None) + ] - return TranslateBufferKeys() + return _TranslatedMapping(translate_key, mapping) class _map_schema_parquet(_map_schema_base): From 8ae3cd5660bbf1acb6c2e7dd30683a1eda84bfbb Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Thu, 12 Oct 2023 02:35:06 -0500 Subject: [PATCH 68/75] fix typo from refactor --- src/coffea/nanoevents/factory.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index 9bed55ed2..123f6a131 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -181,9 +181,7 @@ def load_buffers(self, tree, keys, start, stop, interp_options): # partition key. Therefore, we must translate the keys here. def translate_key(index): form_key, attribute = self.parse_buffer_key(index) - return mapping[ - buffer_key(form_key=form_key, attribute=attribute, form=None) - ] + return buffer_key(form_key=form_key, attribute=attribute, form=None) return _TranslatedMapping(translate_key, mapping) From 45a006004349ce5800e3c7221623004f2b4bf4f3 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Sat, 14 Oct 2023 10:22:42 -0500 Subject: [PATCH 69/75] update pins (note uncapped numpy and numba skooch) --- pyproject.toml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 36fada2d0..a12f82a74 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,17 +37,17 @@ classifiers = [ "Topic :: Utilities", ] dependencies = [ - "awkward>=2.4.5", - "uproot>=5.1.0rc1", + "awkward>=2.4.6", + "uproot>=5.1.1", "dask[array]>=2023.4.0", - "dask-awkward>=2023.10a1", - "dask-histogram>=2023.7a0", - "correctionlib>=2.0.0", + "dask-awkward>=2023.10.0", + "dask-histogram>=2023.10.0", + "correctionlib>=2.3.3", "pyarrow>=6.0.0", "fsspec", "matplotlib>=3", - "numba>=0.57.0", - "numpy>=1.22.0,<1.25", # < 1.25 for numba 0.57 series + "numba>=0.58.0", + "numpy>=1.22.0", "scipy>=1.1.0", "tqdm>=4.27.0", "lz4", From 14d2cc2ed36fe07a2cacdef791574d6fffcfb65f Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Sat, 14 Oct 2023 11:02:51 -0500 Subject: [PATCH 70/75] try to convince pip to upgrade numpy upon installing coffea --- .github/workflows/ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 234ac2e21..16aed3abc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -69,7 +69,7 @@ jobs: python -m pip install xgboost python -m pip install tritonclient[grpc,http] # install checked out coffea - python -m pip install -q -e '.[dev,parsl,dask,spark]' + python -m pip install -q -e '.[dev,parsl,dask,spark]' --upgrade python -m pip list java -version - name: Install dependencies (MacOS) @@ -80,7 +80,7 @@ jobs: python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu python -m pip install xgboost # install checked out coffea - python -m pip install -q -e '.[dev,dask,spark]' + python -m pip install -q -e '.[dev,dask,spark]' --upgrade python -m pip list java -version - name: Install dependencies (Windows) @@ -91,7 +91,7 @@ jobs: python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu python -m pip install xgboost # install checked out coffea - python -m pip install -q -e '.[dev,dask]' + python -m pip install -q -e '.[dev,dask]' --upgrade python -m pip list java -version From ab3599e5ef408788068da6b2d8dd82cb86b57ce7 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Sat, 14 Oct 2023 11:11:49 -0500 Subject: [PATCH 71/75] be more insistent --- .github/workflows/ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 16aed3abc..ea61615a7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -69,7 +69,7 @@ jobs: python -m pip install xgboost python -m pip install tritonclient[grpc,http] # install checked out coffea - python -m pip install -q -e '.[dev,parsl,dask,spark]' --upgrade + python -m pip install -q -e '.[dev,parsl,dask,spark]' --upgrade --upgrade-strategy eager python -m pip list java -version - name: Install dependencies (MacOS) @@ -80,7 +80,7 @@ jobs: python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu python -m pip install xgboost # install checked out coffea - python -m pip install -q -e '.[dev,dask,spark]' --upgrade + python -m pip install -q -e '.[dev,dask,spark]' --upgrade --upgrade-strategy eager python -m pip list java -version - name: Install dependencies (Windows) @@ -91,7 +91,7 @@ jobs: python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu python -m pip install xgboost # install checked out coffea - python -m pip install -q -e '.[dev,dask]' --upgrade + python -m pip install -q -e '.[dev,dask]' --upgrade --upgrade-strategy eager python -m pip list java -version From faff41ec1bdbd33ac7ca2d1d8ddda3ac11a3f427 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Sat, 14 Oct 2023 11:36:56 -0500 Subject: [PATCH 72/75] numba 0.58 pins numpy from above < 1.26 nb: safer to repin in coffea for users because of numba's sliding window, very easy to get a mismatch --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a12f82a74..689e03ff0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,7 @@ dependencies = [ "fsspec", "matplotlib>=3", "numba>=0.58.0", - "numpy>=1.22.0", + "numpy>=1.22.0,<1.26", # < 1.26 for numba 0.58 series "scipy>=1.1.0", "tqdm>=4.27.0", "lz4", From 864f7094e92fe11792dba736f093f8f3115b3ac0 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Tue, 17 Oct 2023 10:22:52 -0500 Subject: [PATCH 73/75] clean up usage of quoted ",!offsets" --- src/coffea/nanoevents/factory.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index 123f6a131..e97f556f0 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -29,8 +29,9 @@ PHYSLITESchema, TreeMakerSchema, ) -from coffea.nanoevents.util import key_to_tuple, tuple_to_key +from coffea.nanoevents.util import quote, unquote, key_to_tuple, tuple_to_key +_offsets_label = quote(",!offsets") def _remove_not_interpretable(branch): if isinstance( @@ -63,7 +64,7 @@ def _remove_not_interpretable(branch): def _key_formatter(prefix, form_key, form, attribute): if attribute == "offsets": - form_key += "%2C%21offsets" + form_key += _offsets_label return prefix + f"/{attribute}/{form_key}" @@ -80,7 +81,7 @@ def keys_for_buffer_keys(self, buffer_keys): base_columns = set() for buffer_key in buffer_keys: form_key, attribute = self.parse_buffer_key(buffer_key) - operands = urllib.parse.unquote(form_key).split(",") + operands = unquote(form_key).split(",") it_operands = iter(operands) next(it_operands) @@ -97,7 +98,7 @@ def keys_for_buffer_keys(self, buffer_keys): def parse_buffer_key(self, buffer_key): prefix, attribute, form_key = buffer_key.rsplit("/", maxsplit=2) if attribute == "offsets": - return (form_key[: -len("%2C%21offsets")], attribute) + return (form_key[: -len(_offsets_label)], attribute) else: return (form_key, attribute) @@ -107,7 +108,7 @@ def buffer_key(self): def _key_formatter(self, prefix, form_key, form, attribute): if attribute == "offsets": - form_key += "%2C%21offsets" + form_key += _offsets_label return prefix + f"/{attribute}/{form_key}" From 9b96f7b8d3d1613e0ed22e111cf5c0221f03a024 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 17 Oct 2023 15:23:12 +0000 Subject: [PATCH 74/75] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/coffea/nanoevents/factory.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index e97f556f0..66f3cb482 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -29,10 +29,11 @@ PHYSLITESchema, TreeMakerSchema, ) -from coffea.nanoevents.util import quote, unquote, key_to_tuple, tuple_to_key +from coffea.nanoevents.util import key_to_tuple, quote, tuple_to_key, unquote _offsets_label = quote(",!offsets") + def _remove_not_interpretable(branch): if isinstance( branch.interpretation, uproot.interpretation.identify.uproot.AsGrouped From 0a525d0340aa0afc240cd313cd0067578c970556 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Tue, 17 Oct 2023 10:24:32 -0500 Subject: [PATCH 75/75] flake8 lint --- src/coffea/nanoevents/factory.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index 66f3cb482..b6656282f 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -1,6 +1,5 @@ import io import pathlib -import urllib.parse import warnings import weakref from functools import partial