From 9e832e0335bc121cab17c2fdbee5b8d09c208d96 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Thu, 27 Jul 2023 10:22:53 -0400 Subject: [PATCH 01/29] global index fetching working --- src/coffea/nanoevents/methods/physlite.py | 49 ++++++++- tests/test_nanoevents_physlite.py | 125 ++++++++++++---------- 2 files changed, 111 insertions(+), 63 deletions(-) diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py index c0efcdc39..0d3cc2898 100644 --- a/src/coffea/nanoevents/methods/physlite.py +++ b/src/coffea/nanoevents/methods/physlite.py @@ -3,6 +3,7 @@ import awkward import numpy +import dask_awkward from coffea.nanoevents.methods import base, vector @@ -64,7 +65,8 @@ def where(unique_keys): return out -def _get_target_offsets(offsets, event_index): +def _concrete_get_target_offsets(load_column, event_index): + offsets = awkward.typetracer.length_one_if_typetracer(load_column.layout.offsets.data) if isinstance(event_index, Number): return offsets[event_index] @@ -72,15 +74,52 @@ def descend(layout, depth, **kwargs): if layout.purelist_depth == 1: return awkward.contents.NumpyArray(offsets)[layout] - return awkward.transform(descend, event_index) + return awkward.transform(descend, event_index.layout) + + +def _dask_get_target_offsets(load_column, event_index): + return dask_awkward.map_partitions( + _concrete_get_target_offsets, + load_column, + event_index + ) + + +def _get_target_offsets(load_column, event_index): + # TODO check event_index as well + if isinstance(load_column, dask_awkward.Array): + return _dask_get_target_offsets(load_column, event_index) + return _concrete_get_target_offsets(load_column, event_index) def _get_global_index(target, eventindex, index): load_column = target[ target.fields[0] - ] # awkward is eager-mode now (will need to dask this) - target_offsets = _get_target_offsets(load_column.layout.offsets, eventindex) - return target_offsets + index + ] + target_offsets = _get_target_offsets(load_column, eventindex) + return target_offsets + index # here i get + + +# def _concrete_get_global_index(target, eventindex, index): +# load_column = target[ +# target.fields[0] +# ] +# target_offsets = _get_target_offsets(load_column.layout.offsets, eventindex) +# return target_offsets + index + +# def _dask_get_global_index(target, eventindex, index): +# return dask_awkward.map_partitions( +# _concrete_get_global_index, +# target, +# eventindex, +# index, +# ) + +# def _get_global_index(target, eventindex, index): +# # check target, eventindex, index all dak +# if isinstance(target, dask_awkward.Array): +# return _dask_get_global_index(target, eventindex, index) +# return _concrete_get_global_index(target, eventindex, index) @awkward.mixin_class(behavior) diff --git a/tests/test_nanoevents_physlite.py b/tests/test_nanoevents_physlite.py index f82471198..488395183 100644 --- a/tests/test_nanoevents_physlite.py +++ b/tests/test_nanoevents_physlite.py @@ -5,6 +5,11 @@ from coffea.nanoevents import NanoEventsFactory, PHYSLITESchema +from coffea.nanoevents.methods.physlite import _get_global_index + +import dask +dask.config.set({"awkward.optimization.enabled": False, "awkward.raise-failed-meta": True, "awkward.optimization.on-fail": "raise"}) + pytestmark = pytest.mark.skip(reason="uproot is upset with this file...") @@ -13,64 +18,68 @@ def _events(): factory = NanoEventsFactory.from_root( {path: "CollectionTree"}, schemaclass=PHYSLITESchema, - permit_dask=False, + permit_dask=True, + #permit_dask=False, ) return factory.events() - -@pytest.fixture(scope="module") -def events(): - return _events() - - -@pytest.mark.parametrize("do_slice", [False, True]) -def test_electron_track_links(events, do_slice): - if do_slice: - events = events[np.random.randint(2, size=len(events)).astype(bool)] - for event in events: - for electron in event.Electrons: - for link_index, link in enumerate(electron.trackParticleLinks): - track_index = link.m_persIndex - print(track_index) - print(event.GSFTrackParticles) - print(electron.trackParticleLinks) - print(electron.trackParticles) - - assert ( - event.GSFTrackParticles[track_index].z0 - == electron.trackParticles[link_index].z0 - ) - - -# from MetaData/EventFormat -_hash_to_target_name = { - 13267281: "TruthPhotons", - 342174277: "TruthMuons", - 368360608: "TruthNeutrinos", - 375408000: "TruthTaus", - 394100163: "TruthElectrons", - 614719239: "TruthBoson", - 660928181: "TruthTop", - 779635413: "TruthBottom", -} - - -def test_truth_links_toplevel(events): - children_px = events.TruthBoson.children.px - for i_event, event in enumerate(events): - for i_particle, particle in enumerate(event.TruthBoson): - for i_link, link in enumerate(particle.childLinks): - assert ( - event[_hash_to_target_name[link.m_persKey]][link.m_persIndex].px - == children_px[i_event][i_particle][i_link] - ) - - -def test_truth_links(events): - for i_event, event in enumerate(events): - for i_particle, particle in enumerate(event.TruthBoson): - for i_link, link in enumerate(particle.childLinks): - assert ( - event[_hash_to_target_name[link.m_persKey]][link.m_persIndex].px - == particle.children[i_link].px - ) +events = _events() + +gi = _get_global_index(events.GSFTrackParticles, events.Electrons._eventindex, events.Electrons.trackParticleLinks.m_persIndex) + +# @pytest.fixture(scope="module") +# def events(): +# return _events() + + +# @pytest.mark.parametrize("do_slice", [False, True]) +# def test_electron_track_links(events, do_slice): +# if do_slice: +# events = events[np.random.randint(2, size=len(events)).astype(bool)] +# for event in events: +# for electron in event.Electrons: +# for link_index, link in enumerate(electron.trackParticleLinks): +# track_index = link.m_persIndex +# print(track_index) +# print(event.GSFTrackParticles) +# print(electron.trackParticleLinks) +# print(electron.trackParticles) + +# assert ( +# event.GSFTrackParticles[track_index].z0 +# == electron.trackParticles[link_index].z0 +# ) + + +# # from MetaData/EventFormat +# _hash_to_target_name = { +# 13267281: "TruthPhotons", +# 342174277: "TruthMuons", +# 368360608: "TruthNeutrinos", +# 375408000: "TruthTaus", +# 394100163: "TruthElectrons", +# 614719239: "TruthBoson", +# 660928181: "TruthTop", +# 779635413: "TruthBottom", +# } + + +# def test_truth_links_toplevel(events): +# children_px = events.TruthBoson.children.px +# for i_event, event in enumerate(events): +# for i_particle, particle in enumerate(event.TruthBoson): +# for i_link, link in enumerate(particle.childLinks): +# assert ( +# event[_hash_to_target_name[link.m_persKey]][link.m_persIndex].px +# == children_px[i_event][i_particle][i_link] +# ) + + +# def test_truth_links(events): +# for i_event, event in enumerate(events): +# for i_particle, particle in enumerate(event.TruthBoson): +# for i_link, link in enumerate(particle.childLinks): +# assert ( +# event[_hash_to_target_name[link.m_persKey]][link.m_persIndex].px +# == particle.children[i_link].px +# ) From 29c359e026cb00d3b18f017975981f25489e2f17 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Fri, 28 Jul 2023 12:40:07 -0400 Subject: [PATCH 02/29] track particles working --- src/coffea/nanoevents/methods/physlite.py | 53 +++++++++++++---------- tests/test_nanoevents_physlite.py | 16 ++++++- 2 files changed, 44 insertions(+), 25 deletions(-) diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py index 0d3cc2898..ddbd4b218 100644 --- a/src/coffea/nanoevents/methods/physlite.py +++ b/src/coffea/nanoevents/methods/physlite.py @@ -65,11 +65,24 @@ def where(unique_keys): return out -def _concrete_get_target_offsets(load_column, event_index): - offsets = awkward.typetracer.length_one_if_typetracer(load_column.layout.offsets.data) +def _get_target_offsets(load_column, event_index): + if isinstance(load_column, dask_awkward.Array): + # TODO check event_index as well + return dask_awkward.map_partitions( + _get_target_offsets, load_column, event_index + ) + + offsets = load_column.layout.offsets.data + if isinstance(event_index, Number): + # TODO i think this is not working yet in dask return offsets[event_index] + # nescessary to stick it into the `NumpyArray` constructor + offsets = awkward.typetracer.length_zero_if_typetracer( + load_column.layout.offsets.data + ) + def descend(layout, depth, **kwargs): if layout.purelist_depth == 1: return awkward.contents.NumpyArray(offsets)[layout] @@ -77,27 +90,10 @@ def descend(layout, depth, **kwargs): return awkward.transform(descend, event_index.layout) -def _dask_get_target_offsets(load_column, event_index): - return dask_awkward.map_partitions( - _concrete_get_target_offsets, - load_column, - event_index - ) - - -def _get_target_offsets(load_column, event_index): - # TODO check event_index as well - if isinstance(load_column, dask_awkward.Array): - return _dask_get_target_offsets(load_column, event_index) - return _concrete_get_target_offsets(load_column, event_index) - - def _get_global_index(target, eventindex, index): - load_column = target[ - target.fields[0] - ] + load_column = target[target.fields[0]] target_offsets = _get_target_offsets(load_column, eventindex) - return target_offsets + index # here i get + return target_offsets + index # here i get # def _concrete_get_global_index(target, eventindex, index): @@ -196,10 +192,21 @@ class Electron(Particle): """Electron collection, following `xAOD::Electron_v1 `_. """ - @property - def trackParticles(self): + def trackParticles(self, _dask_array_=None): + + if _dask_array_ is not None: + target = _dask_array_.behavior["__original_array__"]().GSFTrackParticles + links = _dask_array_.trackParticleLinks + return _element_link( + target, + _dask_array_._eventindex, + links.m_persIndex, + links.m_persKey, + ) + links = self.trackParticleLinks + return _element_link( self._events().GSFTrackParticles, self._eventindex, diff --git a/tests/test_nanoevents_physlite.py b/tests/test_nanoevents_physlite.py index 488395183..6354f0376 100644 --- a/tests/test_nanoevents_physlite.py +++ b/tests/test_nanoevents_physlite.py @@ -5,9 +5,10 @@ from coffea.nanoevents import NanoEventsFactory, PHYSLITESchema -from coffea.nanoevents.methods.physlite import _get_global_index +from coffea.nanoevents.methods.physlite import _get_global_index, _element_link import dask +import dask_awkward as dak dask.config.set({"awkward.optimization.enabled": False, "awkward.raise-failed-meta": True, "awkward.optimization.on-fail": "raise"}) pytestmark = pytest.mark.skip(reason="uproot is upset with this file...") @@ -25,7 +26,18 @@ def _events(): events = _events() -gi = _get_global_index(events.GSFTrackParticles, events.Electrons._eventindex, events.Electrons.trackParticleLinks.m_persIndex) +gi = _get_global_index( + events.GSFTrackParticles, + events.Electrons._eventindex, + events.Electrons.trackParticleLinks.m_persIndex +) + +el = _element_link( + events.GSFTrackParticles, + events.Electrons._eventindex, + events.Electrons.trackParticleLinks.m_persIndex, + events.Electrons.trackParticleLinks.m_persKey +) # @pytest.fixture(scope="module") # def events(): From 109e73ab02a706460dde8c953a1776b0f2dc8479 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Fri, 28 Jul 2023 13:29:24 -0400 Subject: [PATCH 03/29] trackParticle --- src/coffea/nanoevents/methods/physlite.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py index ddbd4b218..1fa2fe013 100644 --- a/src/coffea/nanoevents/methods/physlite.py +++ b/src/coffea/nanoevents/methods/physlite.py @@ -192,9 +192,9 @@ class Electron(Particle): """Electron collection, following `xAOD::Electron_v1 `_. """ + @property def trackParticles(self, _dask_array_=None): - if _dask_array_ is not None: target = _dask_array_.behavior["__original_array__"]().GSFTrackParticles links = _dask_array_.trackParticleLinks @@ -215,7 +215,9 @@ def trackParticles(self, _dask_array_=None): ) @property - def trackParticle(self): + def trackParticle(self, _dask_array_=None): + if _dask_array_ is not None: + self = _dask_array_ # TODO: is this what i should be doing? trackParticles = self.trackParticles return self.trackParticles[ tuple([slice(None) for i in range(trackParticles.ndim - 1)] + [0]) From 2e16a9e6c57b64a1ebfc9129b2645a1ac28fb1a9 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Fri, 28 Jul 2023 18:21:25 -0400 Subject: [PATCH 04/29] cleanup and add caloclusters --- src/coffea/nanoevents/methods/physlite.py | 88 ++++++++++------------- src/coffea/nanoevents/schemas/physlite.py | 1 + 2 files changed, 40 insertions(+), 49 deletions(-) diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py index 1fa2fe013..b6badb9d9 100644 --- a/src/coffea/nanoevents/methods/physlite.py +++ b/src/coffea/nanoevents/methods/physlite.py @@ -39,6 +39,25 @@ def _element_link(target_collection, eventindex, index, key): return target_collection._apply_global_index(global_index) +def _element_link_method(self, link_name, target_name, _dask_array_): + if _dask_array_ is not None: + target = _dask_array_.behavior["__original_array__"]()[target_name] + links = _dask_array_[link_name] + return _element_link( + target, + _dask_array_._eventindex, + links.m_persIndex, + links.m_persKey, + ) + links = self[link_name] + return _element_link( + self._events()[target_name], + self._eventindex, + links.m_persIndex, + links.m_persKey, + ) + + def _element_link_multiple(events, obj, link_field, with_name=None): link = obj[link_field] key = link.m_persKey @@ -66,8 +85,10 @@ def where(unique_keys): def _get_target_offsets(load_column, event_index): - if isinstance(load_column, dask_awkward.Array): - # TODO check event_index as well + if isinstance(load_column, dask_awkward.Array) and isinstance( + event_index, dask_awkward.Array + ): + # wrap in map_partitions if dask arrays return dask_awkward.map_partitions( _get_target_offsets, load_column, event_index ) @@ -75,10 +96,10 @@ def _get_target_offsets(load_column, event_index): offsets = load_column.layout.offsets.data if isinstance(event_index, Number): - # TODO i think this is not working yet in dask return offsets[event_index] # nescessary to stick it into the `NumpyArray` constructor + # if typetracer is passed through offsets = awkward.typetracer.length_zero_if_typetracer( load_column.layout.offsets.data ) @@ -93,29 +114,7 @@ def descend(layout, depth, **kwargs): def _get_global_index(target, eventindex, index): load_column = target[target.fields[0]] target_offsets = _get_target_offsets(load_column, eventindex) - return target_offsets + index # here i get - - -# def _concrete_get_global_index(target, eventindex, index): -# load_column = target[ -# target.fields[0] -# ] -# target_offsets = _get_target_offsets(load_column.layout.offsets, eventindex) -# return target_offsets + index - -# def _dask_get_global_index(target, eventindex, index): -# return dask_awkward.map_partitions( -# _concrete_get_global_index, -# target, -# eventindex, -# index, -# ) - -# def _get_global_index(target, eventindex, index): -# # check target, eventindex, index all dak -# if isinstance(target, dask_awkward.Array): -# return _dask_get_global_index(target, eventindex, index) -# return _concrete_get_global_index(target, eventindex, index) + return target_offsets + index @awkward.mixin_class(behavior) @@ -175,12 +174,12 @@ class Muon(Particle): """ @property - def trackParticle(self): - return _element_link( - self._events().CombinedMuonTrackParticles, - self._eventindex, - self["combinedTrackParticleLink.m_persIndex"], - self["combinedTrackParticleLink.m_persKey"], + def trackParticle(self, _dask_array_=None): + return _element_link_method( + self, + "combinedTrackParticleLink", + "CombinedMuonTrackParticles", + _dask_array_, ) @@ -195,23 +194,8 @@ class Electron(Particle): @property def trackParticles(self, _dask_array_=None): - if _dask_array_ is not None: - target = _dask_array_.behavior["__original_array__"]().GSFTrackParticles - links = _dask_array_.trackParticleLinks - return _element_link( - target, - _dask_array_._eventindex, - links.m_persIndex, - links.m_persKey, - ) - - links = self.trackParticleLinks - - return _element_link( - self._events().GSFTrackParticles, - self._eventindex, - links.m_persIndex, - links.m_persKey, + return _element_link_method( + self, "trackParticleLinks", "GSFTrackParticles", _dask_array_ ) @property @@ -223,6 +207,12 @@ def trackParticle(self, _dask_array_=None): tuple([slice(None) for i in range(trackParticles.ndim - 1)] + [0]) ] + @property + def caloClusters(self, _dask_array_=None): + return _element_link_method( + self, "caloClusterLinks", "CaloCalTopoClusters", _dask_array_ + ) + _set_repr_name("Electron") diff --git a/src/coffea/nanoevents/schemas/physlite.py b/src/coffea/nanoevents/schemas/physlite.py index 1b9b89205..6a6aa8659 100644 --- a/src/coffea/nanoevents/schemas/physlite.py +++ b/src/coffea/nanoevents/schemas/physlite.py @@ -53,6 +53,7 @@ class PHYSLITESchema(BaseSchema): "GSFTrackParticles": "TrackParticle", "InDetTrackParticles": "TrackParticle", "MuonSpectrometerTrackParticles": "TrackParticle", + "CaloCalTopoClusters": "NanoCollection", } """Default configuration for mixin types, based on the collection name. From f4d66682cec0e3b62a3d7ec96a2b7ced42fa5c76 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Fri, 28 Jul 2023 18:46:56 -0400 Subject: [PATCH 05/29] comment about multiple elementlinks --- src/coffea/nanoevents/methods/physlite.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py index b6badb9d9..979fff0a0 100644 --- a/src/coffea/nanoevents/methods/physlite.py +++ b/src/coffea/nanoevents/methods/physlite.py @@ -59,6 +59,10 @@ def _element_link_method(self, link_name, target_name, _dask_array_): def _element_link_multiple(events, obj, link_field, with_name=None): + # currently not working in dask because: + # - we don't know the resulting type beforehand + # - also not the targets, so no way to find out which columns to load? + # - could consider to treat the case of truth collections by just loading all truth columns link = obj[link_field] key = link.m_persKey index = link.m_persIndex From 4da309f4d745ccbefdcd913b755ee47fe74c8b82 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Fri, 28 Jul 2023 19:34:30 -0400 Subject: [PATCH 06/29] cleanup tests and add test for single field of linked collection --- tests/test_nanoevents_physlite.py | 93 ++++++------------------------- 1 file changed, 18 insertions(+), 75 deletions(-) diff --git a/tests/test_nanoevents_physlite.py b/tests/test_nanoevents_physlite.py index 6354f0376..6b464cdb8 100644 --- a/tests/test_nanoevents_physlite.py +++ b/tests/test_nanoevents_physlite.py @@ -5,14 +5,6 @@ from coffea.nanoevents import NanoEventsFactory, PHYSLITESchema -from coffea.nanoevents.methods.physlite import _get_global_index, _element_link - -import dask -import dask_awkward as dak -dask.config.set({"awkward.optimization.enabled": False, "awkward.raise-failed-meta": True, "awkward.optimization.on-fail": "raise"}) - -pytestmark = pytest.mark.skip(reason="uproot is upset with this file...") - def _events(): path = os.path.abspath("tests/samples/DAOD_PHYSLITE_21.2.108.0.art.pool.root") @@ -20,78 +12,29 @@ def _events(): {path: "CollectionTree"}, schemaclass=PHYSLITESchema, permit_dask=True, - #permit_dask=False, ) return factory.events() -events = _events() - -gi = _get_global_index( - events.GSFTrackParticles, - events.Electrons._eventindex, - events.Electrons.trackParticleLinks.m_persIndex -) - -el = _element_link( - events.GSFTrackParticles, - events.Electrons._eventindex, - events.Electrons.trackParticleLinks.m_persIndex, - events.Electrons.trackParticleLinks.m_persKey -) - -# @pytest.fixture(scope="module") -# def events(): -# return _events() - - -# @pytest.mark.parametrize("do_slice", [False, True]) -# def test_electron_track_links(events, do_slice): -# if do_slice: -# events = events[np.random.randint(2, size=len(events)).astype(bool)] -# for event in events: -# for electron in event.Electrons: -# for link_index, link in enumerate(electron.trackParticleLinks): -# track_index = link.m_persIndex -# print(track_index) -# print(event.GSFTrackParticles) -# print(electron.trackParticleLinks) -# print(electron.trackParticles) - -# assert ( -# event.GSFTrackParticles[track_index].z0 -# == electron.trackParticles[link_index].z0 -# ) - -# # from MetaData/EventFormat -# _hash_to_target_name = { -# 13267281: "TruthPhotons", -# 342174277: "TruthMuons", -# 368360608: "TruthNeutrinos", -# 375408000: "TruthTaus", -# 394100163: "TruthElectrons", -# 614719239: "TruthBoson", -# 660928181: "TruthTop", -# 779635413: "TruthBottom", -# } +@pytest.fixture(scope="module") +def events(): + return _events() -# def test_truth_links_toplevel(events): -# children_px = events.TruthBoson.children.px -# for i_event, event in enumerate(events): -# for i_particle, particle in enumerate(event.TruthBoson): -# for i_link, link in enumerate(particle.childLinks): -# assert ( -# event[_hash_to_target_name[link.m_persKey]][link.m_persIndex].px -# == children_px[i_event][i_particle][i_link] -# ) +def test_load_single_field_of_linked(events): + events.Electrons.caloClusters.calE.compute() -# def test_truth_links(events): -# for i_event, event in enumerate(events): -# for i_particle, particle in enumerate(event.TruthBoson): -# for i_link, link in enumerate(particle.childLinks): -# assert ( -# event[_hash_to_target_name[link.m_persKey]][link.m_persIndex].px -# == particle.children[i_link].px -# ) +@pytest.mark.parametrize("do_slice", [False, True]) +def test_electron_track_links(events, do_slice): + if do_slice: + events = events[::2] + trackParticles = events.Electrons.trackParticles.compute() + for i, event in enumerate(events[["Electrons", "GSFTrackParticles"]].compute()): + for j, electron in enumerate(event.Electrons): + for link_index, link in enumerate(electron.trackParticleLinks): + track_index = link.m_persIndex + assert ( + event.GSFTrackParticles[track_index].z0 + == trackParticles[i][j][link_index].z0 + ) From dbccef044c9e9ca3e98ba8dce5795206c2c98f9b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 28 Jul 2023 23:59:34 +0000 Subject: [PATCH 07/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/coffea/nanoevents/methods/physlite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py index 979fff0a0..90a449004 100644 --- a/src/coffea/nanoevents/methods/physlite.py +++ b/src/coffea/nanoevents/methods/physlite.py @@ -2,8 +2,8 @@ from numbers import Number import awkward -import numpy import dask_awkward +import numpy from coffea.nanoevents.methods import base, vector From dbfadd8cb9841786c10b43e6f08b5663b690ddbe Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Fri, 28 Jul 2023 20:04:33 -0400 Subject: [PATCH 08/29] pylint --- src/coffea/nanoevents/methods/physlite.py | 2 +- tests/test_nanoevents_physlite.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py index 90a449004..5b1cbe50b 100644 --- a/src/coffea/nanoevents/methods/physlite.py +++ b/src/coffea/nanoevents/methods/physlite.py @@ -102,7 +102,7 @@ def _get_target_offsets(load_column, event_index): if isinstance(event_index, Number): return offsets[event_index] - # nescessary to stick it into the `NumpyArray` constructor + # necessary to stick it into the `NumpyArray` constructor # if typetracer is passed through offsets = awkward.typetracer.length_zero_if_typetracer( load_column.layout.offsets.data diff --git a/tests/test_nanoevents_physlite.py b/tests/test_nanoevents_physlite.py index 6b464cdb8..55293a164 100644 --- a/tests/test_nanoevents_physlite.py +++ b/tests/test_nanoevents_physlite.py @@ -1,6 +1,5 @@ import os -import numpy as np import pytest from coffea.nanoevents import NanoEventsFactory, PHYSLITESchema From ab5164bd9c0689a59678c87ff8024c077a518e4e Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Tue, 22 Aug 2023 14:25:51 +0200 Subject: [PATCH 09/29] flat calling structure for trackParticle(s) behavior methods --- src/coffea/nanoevents/methods/physlite.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py index 5b1cbe50b..72ca50165 100644 --- a/src/coffea/nanoevents/methods/physlite.py +++ b/src/coffea/nanoevents/methods/physlite.py @@ -204,12 +204,12 @@ def trackParticles(self, _dask_array_=None): @property def trackParticle(self, _dask_array_=None): - if _dask_array_ is not None: - self = _dask_array_ # TODO: is this what i should be doing? - trackParticles = self.trackParticles - return self.trackParticles[ - tuple([slice(None) for i in range(trackParticles.ndim - 1)] + [0]) - ] + trackParticles = _element_link_method( + self, "trackParticleLinks", "GSFTrackParticles", _dask_array_ + ) + # Ellipsis (..., 0) slicing not supported yet by dask_awkward + slicer = tuple([slice(None) for i in range(trackParticles.ndim - 1)] + [0]) + return trackParticles[slicer] @property def caloClusters(self, _dask_array_=None): From 8ec38cfda7c374568ab06025c423aef0272caa9d Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Wed, 30 Aug 2023 15:32:00 +0200 Subject: [PATCH 10/29] fix column touching for _get_target_offsets --- src/coffea/nanoevents/methods/physlite.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py index 72ca50165..eefff7eb1 100644 --- a/src/coffea/nanoevents/methods/physlite.py +++ b/src/coffea/nanoevents/methods/physlite.py @@ -102,6 +102,11 @@ def _get_target_offsets(load_column, event_index): if isinstance(event_index, Number): return offsets[event_index] + # let the necessary column optimization know that we need to load this + # column to get the offsets + if awkward.backend(load_column) == "typetracer": + awkward.typetracer.touch_data(load_column) + # necessary to stick it into the `NumpyArray` constructor # if typetracer is passed through offsets = awkward.typetracer.length_zero_if_typetracer( From e6127d5e54ef12516760e51d1e4a229923bed2c2 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Thu, 31 Aug 2023 15:24:29 +0200 Subject: [PATCH 11/29] make test actually fail --- tests/test_nanoevents_physlite.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_nanoevents_physlite.py b/tests/test_nanoevents_physlite.py index 55293a164..95f58491d 100644 --- a/tests/test_nanoevents_physlite.py +++ b/tests/test_nanoevents_physlite.py @@ -1,5 +1,6 @@ import os +import dask import pytest from coffea.nanoevents import NanoEventsFactory, PHYSLITESchema @@ -21,7 +22,8 @@ def events(): def test_load_single_field_of_linked(events): - events.Electrons.caloClusters.calE.compute() + with dask.config.set({"awkward.raise-failed-meta": True}): + events.Electrons.caloClusters.calE.compute() @pytest.mark.parametrize("do_slice", [False, True]) From c4385b14e41b5df770fe9bb1ca7bac51b6aa4a85 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Thu, 31 Aug 2023 15:27:17 +0200 Subject: [PATCH 12/29] use layout._touch_data since public touch_data not yet available in ak 2.3.3 --- src/coffea/nanoevents/methods/physlite.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py index eefff7eb1..f8147082b 100644 --- a/src/coffea/nanoevents/methods/physlite.py +++ b/src/coffea/nanoevents/methods/physlite.py @@ -105,7 +105,8 @@ def _get_target_offsets(load_column, event_index): # let the necessary column optimization know that we need to load this # column to get the offsets if awkward.backend(load_column) == "typetracer": - awkward.typetracer.touch_data(load_column) + # awkward.typetracer.touch_data(load_column) # available in awkward > 2.3.3 + load_column.layout._touch_data(recursive=True) # necessary to stick it into the `NumpyArray` constructor # if typetracer is passed through From e2dd3f03534e7cb166cb146ad2511a5906b2c289 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Thu, 31 Aug 2023 15:29:05 +0200 Subject: [PATCH 13/29] try to avoid loading double-jagged columns for getting offsets in elementlink calculation --- src/coffea/nanoevents/methods/physlite.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py index f8147082b..7b9f98580 100644 --- a/src/coffea/nanoevents/methods/physlite.py +++ b/src/coffea/nanoevents/methods/physlite.py @@ -122,7 +122,12 @@ def descend(layout, depth, **kwargs): def _get_global_index(target, eventindex, index): - load_column = target[target.fields[0]] + for field in target.fields: + # fetch first column to get offsets from + # (but try to avoid the double-jagged ones if possible) + load_column = target[field] + if load_column.ndim < 3: + break target_offsets = _get_target_offsets(load_column, eventindex) return target_offsets + index From f491a36c04e1fa2c3b63d14468345465d398afa6 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Thu, 31 Aug 2023 17:20:00 +0200 Subject: [PATCH 14/29] allow for collections that contain non-list fields --- src/coffea/nanoevents/schemas/physlite.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/coffea/nanoevents/schemas/physlite.py b/src/coffea/nanoevents/schemas/physlite.py index 1b9b89205..3b6508e2e 100644 --- a/src/coffea/nanoevents/schemas/physlite.py +++ b/src/coffea/nanoevents/schemas/physlite.py @@ -118,14 +118,21 @@ def _build_collections(self, branch_forms): to_zip, objname, self.mixins.get(objname, None), - bypass=True, - ) - content = contents[objname]["content"] - content["parameters"] = dict( - content.get("parameters", {}), collection_name=objname + bypass=False, ) except NotImplementedError: warnings.warn(f"Can't zip collection {objname}") + if "content" in contents[objname]: + # in this case we were able to zip everything together to a ListOffsetArray(RecordArray) + assert "List" in contents[objname]["class"] + content = contents[objname]["content"] + else: + # in this case this was not possible (e.g. because we also had non-list fields) + assert contents[objname]["class"] == "RecordArray" + content = contents[objname] + content["parameters"] = dict( + content.get("parameters", {}), collection_name=objname + ) return contents @staticmethod From 79ae6d5d7e4a9eb018b5cf1efb57713ea3c96b34 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Thu, 31 Aug 2023 17:34:35 +0200 Subject: [PATCH 15/29] skip empty records --- src/coffea/nanoevents/schemas/physlite.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/coffea/nanoevents/schemas/physlite.py b/src/coffea/nanoevents/schemas/physlite.py index 3b6508e2e..11446b7a2 100644 --- a/src/coffea/nanoevents/schemas/physlite.py +++ b/src/coffea/nanoevents/schemas/physlite.py @@ -79,6 +79,9 @@ def _build_collections(self, branch_forms): key_fields = key.split("/")[-1].split(".") top_key = key_fields[0] sub_key = ".".join(key_fields[1:]) + if ak_form["class"] == "RecordArray" and not ak_form["fields"]: + # skip empty records (e.g. the branches ending in "." only containing the base class) + continue objname = top_key.replace("Analysis", "").replace("AuxDyn", "") zip_groups[objname].append(((key, sub_key), ak_form)) From 94b648fd0926ad6150813d9d4f0cd289bde4515d Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Thu, 31 Aug 2023 17:38:25 +0200 Subject: [PATCH 16/29] don't zip branches that are not grouped with anything else (e.g. index_ref in newer PHYSLITE) --- src/coffea/nanoevents/schemas/physlite.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coffea/nanoevents/schemas/physlite.py b/src/coffea/nanoevents/schemas/physlite.py index 11446b7a2..52e3ac747 100644 --- a/src/coffea/nanoevents/schemas/physlite.py +++ b/src/coffea/nanoevents/schemas/physlite.py @@ -100,6 +100,10 @@ def _build_collections(self, branch_forms): # zip the forms contents = {} for objname, keys_and_form in zip_groups.items(): + if len(keys_and_form) == 1: + # don't zip if there is only one item + contents[objname] = keys_and_form[0][1] + continue to_zip = {} for (key, sub_key), form in keys_and_form: if "." in sub_key: From 0334acd8feb208602b0f173d473921e155215cd4 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Thu, 31 Aug 2023 18:03:57 +0200 Subject: [PATCH 17/29] also remove Aux from branch names to zip them with AuxDyn and potentially non-aux branches --- src/coffea/nanoevents/schemas/physlite.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/coffea/nanoevents/schemas/physlite.py b/src/coffea/nanoevents/schemas/physlite.py index 52e3ac747..c45240d6a 100644 --- a/src/coffea/nanoevents/schemas/physlite.py +++ b/src/coffea/nanoevents/schemas/physlite.py @@ -82,7 +82,9 @@ def _build_collections(self, branch_forms): if ak_form["class"] == "RecordArray" and not ak_form["fields"]: # skip empty records (e.g. the branches ending in "." only containing the base class) continue - objname = top_key.replace("Analysis", "").replace("AuxDyn", "") + objname = ( + top_key.replace("Analysis", "").replace("AuxDyn", "").replace("Aux", "") + ) zip_groups[objname].append(((key, sub_key), ak_form)) From 6abc42c768b0e51392c5ca275876f1d38d45cfd6 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Thu, 7 Sep 2023 08:43:44 +0200 Subject: [PATCH 18/29] go back to using public touch_data since we have ak 2.4.2 now --- src/coffea/nanoevents/methods/physlite.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coffea/nanoevents/methods/physlite.py b/src/coffea/nanoevents/methods/physlite.py index 7b9f98580..751c5d03f 100644 --- a/src/coffea/nanoevents/methods/physlite.py +++ b/src/coffea/nanoevents/methods/physlite.py @@ -105,8 +105,7 @@ def _get_target_offsets(load_column, event_index): # let the necessary column optimization know that we need to load this # column to get the offsets if awkward.backend(load_column) == "typetracer": - # awkward.typetracer.touch_data(load_column) # available in awkward > 2.3.3 - load_column.layout._touch_data(recursive=True) + awkward.typetracer.touch_data(load_column) # necessary to stick it into the `NumpyArray` constructor # if typetracer is passed through From 1aaaa63c03af7ded0881cf40a1e35512d81b7361 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 Sep 2023 14:29:31 +0000 Subject: [PATCH 19/29] Bump crazy-max/ghaction-github-pages from 3 to 4 Bumps [crazy-max/ghaction-github-pages](https://github.com/crazy-max/ghaction-github-pages) from 3 to 4. - [Release notes](https://github.com/crazy-max/ghaction-github-pages/releases) - [Commits](https://github.com/crazy-max/ghaction-github-pages/compare/v3...v4) --- updated-dependencies: - dependency-name: crazy-max/ghaction-github-pages dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 628df33dd..fe8453b8d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -119,7 +119,7 @@ jobs: touch build/html/.nojekyll - name: Deploy documentation if: github.event_name == 'push' && matrix.os == 'ubuntu-latest' && matrix.python-version == 3.11 - uses: crazy-max/ghaction-github-pages@v3 + uses: crazy-max/ghaction-github-pages@v4 with: target_branch: gh-pages build_dir: docs/build/html From 7a64bb62777c30d50f04c50c3848e6c25111b3cf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 12 Sep 2023 06:14:36 +0000 Subject: [PATCH 20/29] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/psf/black: 23.7.0 → 23.9.1](https://github.com/psf/black/compare/23.7.0...23.9.1) - [github.com/asottile/pyupgrade: v3.9.0 → v3.10.1](https://github.com/asottile/pyupgrade/compare/v3.9.0...v3.10.1) - [github.com/pycqa/flake8: 6.0.0 → 6.1.0](https://github.com/pycqa/flake8/compare/6.0.0...6.1.0) --- .pre-commit-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a4d511b07..1b3695665 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,7 +12,7 @@ ci: repos: - repo: https://github.com/psf/black - rev: 23.7.0 + rev: 23.9.1 hooks: - id: black @@ -37,7 +37,7 @@ repos: - id: trailing-whitespace - repo: https://github.com/asottile/pyupgrade - rev: v3.9.0 + rev: v3.10.1 hooks: - id: pyupgrade args: ["--py38-plus"] @@ -48,7 +48,7 @@ repos: - id: setup-cfg-fmt - repo: https://github.com/pycqa/flake8 - rev: 6.0.0 + rev: 6.1.0 hooks: - id: flake8 exclude: coffea/processor/templates From 13ebcfc530def1ab6b324cd0edffd5f207d3b60d Mon Sep 17 00:00:00 2001 From: iasonkrom Date: Fri, 15 Sep 2023 18:44:51 -0500 Subject: [PATCH 21/29] request dtype from np.arange and ak.zeros_like --- src/coffea/analysis_tools.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/coffea/analysis_tools.py b/src/coffea/analysis_tools.py index 66b92fe2b..f8bbd21e2 100644 --- a/src/coffea/analysis_tools.py +++ b/src/coffea/analysis_tools.py @@ -610,13 +610,13 @@ def yieldhist(self): labels = ["initial"] + [f"N - {i}" for i in self._names] + ["N"] if not self._delayed_mode: h = hist.Hist(hist.axis.Integer(0, len(labels), name="N-1")) - h.fill(numpy.arange(len(labels)), weight=self._nev) + h.fill(numpy.arange(len(labels), dtype=int), weight=self._nev) else: h = hist.dask.Hist(hist.axis.Integer(0, len(labels), name="N-1")) for i, weight in enumerate(self._masks, 1): h.fill(dask_awkward.full_like(weight, i, dtype=int), weight=weight) - h.fill(dask_awkward.zeros_like(weight)) + h.fill(dask_awkward.zeros_like(weight, dtype=int)) return h, labels @@ -712,7 +712,7 @@ def plot_vars( hist.axis.Integer(0, len(labels), name="N-1"), ) arr = awkward.flatten(var) - h.fill(arr, awkward.zeros_like(arr)) + h.fill(arr, awkward.zeros_like(arr, dtype=int)) for i, mask in enumerate(self.result().masks, 1): arr = awkward.flatten(var[mask]) h.fill(arr, awkward.full_like(arr, i, dtype=int)) @@ -725,7 +725,7 @@ def plot_vars( hist.axis.Integer(0, len(labels), name="N-1"), ) arr = dask_awkward.flatten(var) - h.fill(arr, dask_awkward.zeros_like(arr)) + h.fill(arr, dask_awkward.zeros_like(arr, dtype=int)) for i, mask in enumerate(self.result().masks, 1): arr = dask_awkward.flatten(var[mask]) h.fill(arr, dask_awkward.full_like(arr, i, dtype=int)) @@ -856,8 +856,8 @@ def yieldhist(self): honecut = hist.Hist(hist.axis.Integer(0, len(labels), name="onecut")) hcutflow = honecut.copy() hcutflow.axes.name = ("cutflow",) - honecut.fill(numpy.arange(len(labels)), weight=self._nevonecut) - hcutflow.fill(numpy.arange(len(labels)), weight=self._nevcutflow) + honecut.fill(numpy.arange(len(labels), dtype=int), weight=self._nevonecut) + hcutflow.fill(numpy.arange(len(labels), dtype=int), weight=self._nevcutflow) else: honecut = hist.dask.Hist(hist.axis.Integer(0, len(labels), name="onecut")) @@ -868,12 +868,12 @@ def yieldhist(self): honecut.fill( dask_awkward.full_like(weight, i, dtype=int), weight=weight ) - honecut.fill(dask_awkward.zeros_like(weight)) + honecut.fill(dask_awkward.zeros_like(weight, dtype=int)) for i, weight in enumerate(self._maskscutflow, 1): hcutflow.fill( dask_awkward.full_like(weight, i, dtype=int), weight=weight ) - hcutflow.fill(dask_awkward.zeros_like(weight)) + hcutflow.fill(dask_awkward.zeros_like(weight, dtype=int)) return honecut, hcutflow, labels @@ -975,8 +975,8 @@ def plot_vars( hcutflow.axes.name = name, "cutflow" arr = awkward.flatten(var) - honecut.fill(arr, awkward.zeros_like(arr)) - hcutflow.fill(arr, awkward.zeros_like(arr)) + honecut.fill(arr, awkward.zeros_like(arr, dtype=int)) + hcutflow.fill(arr, awkward.zeros_like(arr, dtype=int)) for i, mask in enumerate(self.result().masksonecut, 1): arr = awkward.flatten(var[mask]) @@ -998,8 +998,8 @@ def plot_vars( hcutflow.axes.name = name, "cutflow" arr = dask_awkward.flatten(var) - honecut.fill(arr, dask_awkward.zeros_like(arr)) - hcutflow.fill(arr, dask_awkward.zeros_like(arr)) + honecut.fill(arr, dask_awkward.zeros_like(arr, dtype=int)) + hcutflow.fill(arr, dask_awkward.zeros_like(arr, dtype=int)) for i, mask in enumerate(self.result().masksonecut, 1): arr = dask_awkward.flatten(var[mask]) From a7434fe393418885cba80cfa2c57fb2dfcf8e223 Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Fri, 15 Sep 2023 21:07:17 -0500 Subject: [PATCH 22/29] remove weirdly shadowed member variable from base schema --- src/coffea/nanoevents/schemas/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coffea/nanoevents/schemas/base.py b/src/coffea/nanoevents/schemas/base.py index 09812eee0..8a1f2251e 100644 --- a/src/coffea/nanoevents/schemas/base.py +++ b/src/coffea/nanoevents/schemas/base.py @@ -105,7 +105,6 @@ class BaseSchema: """ __dask_capable__ = True - behavior = {} def __init__(self, base_form, *args, **kwargs): params = dict(base_form.get("parameters", {})) From a65a3dfea35df64a778ee1fb1387d3fd6d35260e Mon Sep 17 00:00:00 2001 From: iasonkrom Date: Sat, 16 Sep 2023 12:38:55 -0500 Subject: [PATCH 23/29] found a random typo along the way --- src/coffea/analysis_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coffea/analysis_tools.py b/src/coffea/analysis_tools.py index f8bbd21e2..e1176f95c 100644 --- a/src/coffea/analysis_tools.py +++ b/src/coffea/analysis_tools.py @@ -418,7 +418,7 @@ def variations(self): class NminusOneToNpz: - """Object to be returned by NmiusOne.to_npz()""" + """Object to be returned by NminusOne.to_npz()""" def __init__(self, file, labels, nev, masks, saver): self._file = file From acdb1d829db5437f863caa144853676a6ea1d04e Mon Sep 17 00:00:00 2001 From: iasonkrom Date: Sat, 16 Sep 2023 14:07:20 -0500 Subject: [PATCH 24/29] prettier print statements and dask.compute reduction --- src/coffea/analysis_tools.py | 61 ++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/src/coffea/analysis_tools.py b/src/coffea/analysis_tools.py index e1176f95c..6b2ebc77e 100644 --- a/src/coffea/analysis_tools.py +++ b/src/coffea/analysis_tools.py @@ -494,11 +494,17 @@ def maskscutflow(self): return self._maskscutflow def compute(self): - self._nevonecut = list(dask.compute(*self._nevonecut)) - self._nevcutflow = list(dask.compute(*self._nevcutflow)) - self._masksonecut = list(dask.compute(*self._masksonecut)) - self._maskscutflow = list(dask.compute(*self._maskscutflow)) - numpy.savez( + self._nevonecut, self._nevcutflow = dask.compute( + self._nevonecut, self._nevcutflow + ) + self._masksonecut, self._maskscutflow = dask.compute( + self._masksonecut, self._maskscutflow + ) + self._nevonecut = list(self._nevonecut) + self._nevcutflow = list(self._nevcutflow) + self._masksonecut = list(self._masksonecut) + self._maskscutflow = list(self._maskscutflow) + self._saver( self._file, labels=self._labels, nevonecut=self._nevonecut, @@ -581,21 +587,25 @@ def print(self): if self._delayed_mode: self._nev = list(dask.compute(*self._nev)) + nev = self._nev print("N-1 selection stats:") for i, name in enumerate(self._names): - print( - f"Ignoring {name:<20}: pass = {nev[i+1]:<20}\ - all = {nev[0]:<20}\ - -- eff = {nev[i+1]*100/nev[0]:.1f} %" + stats = ( + f"Ignoring {name:<20}" + f"pass = {nev[i+1]:<20}" + f"all = {nev[0]:<20}" + f"-- eff = {nev[i+1]*100/nev[0]:.1f} %" ) + print(stats) - if True: - print( - f"All cuts {'':<20}: pass = {nev[-1]:<20}\ - all = {nev[0]:<20}\ - -- eff = {nev[-1]*100/nev[0]:.1f} %" - ) + stats_all = ( + f"All cuts {'':<20}" + f"pass = {nev[-1]:<20}" + f"all = {nev[0]:<20}" + f"-- eff = {nev[-1]*100/nev[0]:.1f} %" + ) + print(stats_all) def yieldhist(self): """Returns the N-1 selection yields as a ``hist.Hist`` object @@ -824,19 +834,24 @@ def print(self): """Prints the statistics of the Cutflow""" if self._delayed_mode: - self._nevonecut = list(dask.compute(*self._nevonecut)) - self._nevcutflow = list(dask.compute(*self._nevcutflow)) + self._nevonecut, self._nevcutflow = dask.compute( + self._nevonecut, self._nevcutflow + ) + nevonecut = self._nevonecut nevcutflow = self._nevcutflow + print("Cutflow stats:") for i, name in enumerate(self._names): - print( - f"Cut {name:<20}: pass = {nevonecut[i+1]:<20}\ - cumulative pass = {nevcutflow[i+1]:<20}\ - all = {nevonecut[0]:<20}\ - -- eff = {nevonecut[i+1]*100/nevonecut[0]:.1f} %\ - -- cumulative eff = {nevcutflow[i+1]*100/nevcutflow[0]:.1f} %" + stats = ( + f"Cut {name:<20}:" + f"pass = {nevonecut[i+1]:<20}" + f"cumulative pass = {nevcutflow[i+1]:<20}" + f"all = {nevonecut[0]:<20}" + f"-- eff = {nevonecut[i+1]*100/nevonecut[0]:.1f} %{'':<20}" + f"-- cumulative eff = {nevcutflow[i+1]*100/nevcutflow[0]:.1f} %" ) + print(stats) def yieldhist(self): """Returns the cutflow yields as ``hist.Hist`` objects From 2fee783b79cee6c51ee1f89c244b1b272d1f6356 Mon Sep 17 00:00:00 2001 From: iasonkrom Date: Sat, 16 Sep 2023 15:58:36 -0500 Subject: [PATCH 25/29] make the default to be compute=False for to_npz() --- src/coffea/analysis_tools.py | 8 ++++---- tests/test_analysis_tools.py | 24 ++++++++++++------------ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/coffea/analysis_tools.py b/src/coffea/analysis_tools.py index 6b2ebc77e..14fd170f3 100644 --- a/src/coffea/analysis_tools.py +++ b/src/coffea/analysis_tools.py @@ -544,7 +544,7 @@ def result(self): labels = ["initial"] + [f"N - {i}" for i in self._names] + ["N"] return NminusOneResult(labels, self._nev, self._masks) - def to_npz(self, file, compressed=False, compute=True): + def to_npz(self, file, compressed=False, compute=False): """Saves the results of the N-1 selection to a .npz file Parameters @@ -560,7 +560,7 @@ def to_npz(self, file, compressed=False, compute=True): compute : bool, optional Whether to immediately start writing or to return an object that the user can choose when to start writing by calling compute(). - Default is True. + Default is False. Returns ------- @@ -790,7 +790,7 @@ def result(self): self._maskscutflow, ) - def to_npz(self, file, compressed=False, compute=True): + def to_npz(self, file, compressed=False, compute=False): """Saves the results of the cutflow to a .npz file Parameters @@ -806,7 +806,7 @@ def to_npz(self, file, compressed=False, compute=True): compute : bool, optional Whether to immediately start writing or to return an object that the user can choose when to start writing by calling compute(). - Default is True. + Default is False. Returns ------- diff --git a/tests/test_analysis_tools.py b/tests/test_analysis_tools.py index 1e8c46ec1..bb3221432 100644 --- a/tests/test_analysis_tools.py +++ b/tests/test_analysis_tools.py @@ -513,14 +513,14 @@ def test_packed_selection_nminusone(): ): assert np.all(mask == truth) - nminusone.to_npz("nminusone.npz", compressed=False) + nminusone.to_npz("nminusone.npz", compressed=False).compute() with np.load("nminusone.npz") as file: assert np.all(file["labels"] == labels) assert np.all(file["nev"] == nev) assert np.all(file["masks"] == masks) os.remove("nminusone.npz") - nminusone.to_npz("nminusone.npz", compressed=True) + nminusone.to_npz("nminusone.npz", compressed=True).compute() with np.load("nminusone.npz") as file: assert np.all(file["labels"] == labels) assert np.all(file["nev"] == nev) @@ -619,7 +619,7 @@ def test_packed_selection_cutflow(): ): assert np.all(mask == truth) - cutflow.to_npz("cutflow.npz", compressed=False) + cutflow.to_npz("cutflow.npz", compressed=False).compute() with np.load("cutflow.npz") as file: assert np.all(file["labels"] == labels) assert np.all(file["nevonecut"] == nevonecut) @@ -628,7 +628,7 @@ def test_packed_selection_cutflow(): assert np.all(file["maskscutflow"] == maskscutflow) os.remove("cutflow.npz") - cutflow.to_npz("cutflow.npz", compressed=True) + cutflow.to_npz("cutflow.npz", compressed=True).compute() with np.load("cutflow.npz") as file: assert np.all(file["labels"] == labels) assert np.all(file["nevonecut"] == nevonecut) @@ -854,14 +854,14 @@ def test_packed_selection_nminusone_dak(optimization_enabled): ): assert np.all(mask.compute() == truth.compute()) - nminusone.to_npz("nminusone.npz", compressed=False) + nminusone.to_npz("nminusone.npz", compressed=False).compute() with np.load("nminusone.npz") as file: assert np.all(file["labels"] == labels) assert np.all(file["nev"] == list(dask.compute(*nev))) assert np.all(file["masks"] == list(dask.compute(*masks))) os.remove("nminusone.npz") - nminusone.to_npz("nminusone.npz", compressed=True) + nminusone.to_npz("nminusone.npz", compressed=True).compute() with np.load("nminusone.npz") as file: assert np.all(file["labels"] == labels) assert np.all(file["nev"] == list(dask.compute(*nev))) @@ -978,7 +978,7 @@ def test_packed_selection_cutflow_dak(optimization_enabled): ): assert np.all(mask.compute() == truth.compute()) - cutflow.to_npz("cutflow.npz", compressed=False) + cutflow.to_npz("cutflow.npz", compressed=False).compute() with np.load("cutflow.npz") as file: assert np.all(file["labels"] == labels) assert np.all(file["nevonecut"] == list(dask.compute(*nevonecut))) @@ -987,7 +987,7 @@ def test_packed_selection_cutflow_dak(optimization_enabled): assert np.all(file["maskscutflow"] == list(dask.compute(*maskscutflow))) os.remove("cutflow.npz") - cutflow.to_npz("cutflow.npz", compressed=True) + cutflow.to_npz("cutflow.npz", compressed=True).compute() with np.load("cutflow.npz") as file: assert np.all(file["labels"] == labels) assert np.all(file["nevonecut"] == list(dask.compute(*nevonecut))) @@ -1109,14 +1109,14 @@ def test_packed_selection_nminusone_dak_uproot_only(optimization_enabled): ): assert np.all(mask.compute() == truth.compute()) - nminusone.to_npz("nminusone.npz", compressed=False) + nminusone.to_npz("nminusone.npz", compressed=False).compute() with np.load("nminusone.npz") as file: assert np.all(file["labels"] == labels) assert np.all(file["nev"] == list(dask.compute(*nev))) assert np.all(file["masks"] == list(dask.compute(*masks))) os.remove("nminusone.npz") - nminusone.to_npz("nminusone.npz", compressed=True) + nminusone.to_npz("nminusone.npz", compressed=True).compute() with np.load("nminusone.npz") as file: assert np.all(file["labels"] == labels) assert np.all(file["nev"] == list(dask.compute(*nev))) @@ -1233,7 +1233,7 @@ def test_packed_selection_cutflow_dak_uproot_only(optimization_enabled): ): assert np.all(mask.compute() == truth.compute()) - cutflow.to_npz("cutflow.npz", compressed=False) + cutflow.to_npz("cutflow.npz", compressed=False).compute() with np.load("cutflow.npz") as file: assert np.all(file["labels"] == labels) assert np.all(file["nevonecut"] == list(dask.compute(*nevonecut))) @@ -1242,7 +1242,7 @@ def test_packed_selection_cutflow_dak_uproot_only(optimization_enabled): assert np.all(file["maskscutflow"] == list(dask.compute(*maskscutflow))) os.remove("cutflow.npz") - cutflow.to_npz("cutflow.npz", compressed=True) + cutflow.to_npz("cutflow.npz", compressed=True).compute() with np.load("cutflow.npz") as file: assert np.all(file["labels"] == labels) assert np.all(file["nevonecut"] == list(dask.compute(*nevonecut))) From 3bdff8c3d33e73b7fcb57edad830605b0eb27fae Mon Sep 17 00:00:00 2001 From: iasonkrom Date: Sat, 16 Sep 2023 20:21:51 -0500 Subject: [PATCH 26/29] warn in print() when user is about to compute dask stuff --- src/coffea/analysis_tools.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/coffea/analysis_tools.py b/src/coffea/analysis_tools.py index 14fd170f3..a68124d87 100644 --- a/src/coffea/analysis_tools.py +++ b/src/coffea/analysis_tools.py @@ -582,10 +582,16 @@ def to_npz(self, file, compressed=False, compute=False): else: return out - def print(self): + def print(self, compute=False): """Prints the statistics of the N-1 selection""" - if self._delayed_mode: + if self._delayed_mode and not compute: + warnings.warn( + "This will compute dask_awkward arrays. If you really want to do this now, call print(compute=True)" + ) + return + + if self._delayed_mode and compute: self._nev = list(dask.compute(*self._nev)) nev = self._nev @@ -830,10 +836,16 @@ def to_npz(self, file, compressed=False, compute=False): else: return out - def print(self): + def print(self, compute=False): """Prints the statistics of the Cutflow""" - if self._delayed_mode: + if self._delayed_mode and not compute: + warnings.warn( + "This will compute dask_awkward arrays. If you really want to do this now, call print(compute=True)" + ) + return + + if self._delayed_mode and compute: self._nevonecut, self._nevcutflow = dask.compute( self._nevonecut, self._nevcutflow ) From 8e6bb10d8917d59293fd0f613feba05edeb9b8a6 Mon Sep 17 00:00:00 2001 From: iasonkrom Date: Mon, 18 Sep 2023 09:02:25 -0500 Subject: [PATCH 27/29] Revert "warn in print() when user is about to compute dask stuff" This reverts commit 3bdff8c3d33e73b7fcb57edad830605b0eb27fae. --- src/coffea/analysis_tools.py | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/src/coffea/analysis_tools.py b/src/coffea/analysis_tools.py index a68124d87..14fd170f3 100644 --- a/src/coffea/analysis_tools.py +++ b/src/coffea/analysis_tools.py @@ -582,16 +582,10 @@ def to_npz(self, file, compressed=False, compute=False): else: return out - def print(self, compute=False): + def print(self): """Prints the statistics of the N-1 selection""" - if self._delayed_mode and not compute: - warnings.warn( - "This will compute dask_awkward arrays. If you really want to do this now, call print(compute=True)" - ) - return - - if self._delayed_mode and compute: + if self._delayed_mode: self._nev = list(dask.compute(*self._nev)) nev = self._nev @@ -836,16 +830,10 @@ def to_npz(self, file, compressed=False, compute=False): else: return out - def print(self, compute=False): + def print(self): """Prints the statistics of the Cutflow""" - if self._delayed_mode and not compute: - warnings.warn( - "This will compute dask_awkward arrays. If you really want to do this now, call print(compute=True)" - ) - return - - if self._delayed_mode and compute: + if self._delayed_mode: self._nevonecut, self._nevcutflow = dask.compute( self._nevonecut, self._nevcutflow ) From 522f38b036507b0b79d57cfacedb41a0822a7e5c Mon Sep 17 00:00:00 2001 From: iasonkrom Date: Mon, 18 Sep 2023 09:07:50 -0500 Subject: [PATCH 28/29] only warn and not add compute argument in print --- src/coffea/analysis_tools.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/coffea/analysis_tools.py b/src/coffea/analysis_tools.py index 14fd170f3..facf14e97 100644 --- a/src/coffea/analysis_tools.py +++ b/src/coffea/analysis_tools.py @@ -586,6 +586,9 @@ def print(self): """Prints the statistics of the N-1 selection""" if self._delayed_mode: + warnings.warn( + "Printing the N-1 selection statistics is going to compute dask_awkward objects." + ) self._nev = list(dask.compute(*self._nev)) nev = self._nev @@ -834,6 +837,9 @@ def print(self): """Prints the statistics of the Cutflow""" if self._delayed_mode: + warnings.warn( + "Printing the cutflow statistics is going to compute dask_awkward objects." + ) self._nevonecut, self._nevcutflow = dask.compute( self._nevonecut, self._nevcutflow ) From 9feea2b3ac5c2d5b14f5cfb52aa94ce86b26db2e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 19 Sep 2023 05:39:03 +0000 Subject: [PATCH 29/29] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/asottile/pyupgrade: v3.10.1 → v3.11.0](https://github.com/asottile/pyupgrade/compare/v3.10.1...v3.11.0) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1b3695665..d535cd79e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -37,7 +37,7 @@ repos: - id: trailing-whitespace - repo: https://github.com/asottile/pyupgrade - rev: v3.10.1 + rev: v3.11.0 hooks: - id: pyupgrade args: ["--py38-plus"]