From 57474ffb360ce53e6381fe538c3ffb9bbbd3555d Mon Sep 17 00:00:00 2001 From: Lindsey Gray Date: Thu, 9 Nov 2023 11:53:42 -0600 Subject: [PATCH] permit_dask -> delayed in NanoEventsFactory.from_X --- src/coffea/nanoevents/factory.py | 20 +++++++++++--------- src/coffea/processor/executor.py | 2 +- tests/test_analysis_tools.py | 2 +- tests/test_fix823.py | 1 - tests/test_jetmet_tools.py | 1 - tests/test_lookup_tools.py | 2 -- tests/test_nanoevents.py | 14 ++++++++------ tests/test_nanoevents_delphes.py | 4 +++- tests/test_nanoevents_pfnano.py | 4 +++- tests/test_nanoevents_physlite.py | 2 +- tests/test_nanoevents_treemaker.py | 2 +- 11 files changed, 29 insertions(+), 25 deletions(-) diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py index 7af50cd4c..bc7b27d05 100644 --- a/src/coffea/nanoevents/factory.py +++ b/src/coffea/nanoevents/factory.py @@ -245,7 +245,7 @@ def from_root( access_log=None, iteritems_options={}, use_ak_forth=True, - permit_dask=True, + delayed=True, ): """Quickly build NanoEvents from a root file @@ -277,8 +277,8 @@ def from_root( Pass a list instance to record which branches were lazily accessed by this instance use_ak_forth: Toggle using awkward_forth to interpret branches in root file. - permit_dask: - Allow nanoevents to use dask as a backend. + delayed: + Nanoevents will use dask as a backend to construct a delayed task graph representing your analysis. """ if treepath is not uproot._util.unset and not isinstance( @@ -291,7 +291,7 @@ def from_root( ) if ( - permit_dask + delayed and not isinstance(schemaclass, FunctionType) and schemaclass.__dask_capable__ ): @@ -326,7 +326,7 @@ def from_root( **uproot_options, ) return cls(map_schema, opener, None, cache=None, is_dask=True) - elif permit_dask and not schemaclass.__dask_capable__: + elif delayed and not schemaclass.__dask_capable__: warnings.warn( f"{schemaclass} is not dask capable despite allowing dask, generating non-dask nanoevents" ) @@ -380,7 +380,7 @@ def from_root( def from_parquet( cls, file, - treepath="/Events", + treepath=uproot._util.unset, entry_start=None, entry_stop=None, runtime_cache=None, @@ -390,7 +390,7 @@ def from_parquet( parquet_options={}, skyhook_options={}, access_log=None, - permit_dask=False, + delayed=True, ): """Quickly build NanoEvents from a parquet file @@ -419,6 +419,8 @@ def from_parquet( Any options to pass to ``pyarrow.parquet.ParquetFile`` access_log : list, optional Pass a list instance to record which branches were lazily accessed by this instance + delayed: + Nanoevents will use dask as a backend to construct a delayed task graph representing your analysis. """ import pyarrow import pyarrow.dataset as ds @@ -434,7 +436,7 @@ def from_parquet( ) if ( - permit_dask + delayed and not isinstance(schemaclass, FunctionType) and schemaclass.__dask_capable__ ): @@ -453,7 +455,7 @@ def from_parquet( else: raise TypeError("Invalid file type (%s)" % (str(type(file)))) return cls(map_schema, opener, None, cache=None, is_dask=True) - elif permit_dask and not schemaclass.__dask_capable__: + elif delayed and not schemaclass.__dask_capable__: warnings.warn( f"{schemaclass} is not dask capable despite allowing dask, generating non-dask nanoevents" ) diff --git a/src/coffea/processor/executor.py b/src/coffea/processor/executor.py index 18ab1c8c5..76080e007 100644 --- a/src/coffea/processor/executor.py +++ b/src/coffea/processor/executor.py @@ -1678,7 +1678,7 @@ def _work_function( schemaclass=schema, metadata=metadata, access_log=materialized, - permit_dask=True, + delayed=True, ) events = factory.events()[item.entrystart : item.entrystop] elif format == "parquet": diff --git a/tests/test_analysis_tools.py b/tests/test_analysis_tools.py index e7914a119..bc3ebf05a 100644 --- a/tests/test_analysis_tools.py +++ b/tests/test_analysis_tools.py @@ -12,7 +12,7 @@ {os.path.abspath(fname): "Events"}, schemaclass=NanoAODSchema, metadata={"dataset": "DYJets"}, - permit_dask=False, + delayed=False, ).events() dakevents = NanoEventsFactory.from_root( {os.path.abspath(fname): "Events"}, diff --git a/tests/test_fix823.py b/tests/test_fix823.py index fcfff3dad..55068dda0 100644 --- a/tests/test_fix823.py +++ b/tests/test_fix823.py @@ -11,7 +11,6 @@ def test_explicit_delete_after_assign(): {testfile: "Events"}, metadata={"dataset": "nano_dy"}, schemaclass=NanoAODSchema, - permit_dask=True, ).events() genpart = events["GenPart"] diff --git a/tests/test_jetmet_tools.py b/tests/test_jetmet_tools.py index 8be3a97f3..82c01e5ee 100644 --- a/tests/test_jetmet_tools.py +++ b/tests/test_jetmet_tools.py @@ -724,7 +724,6 @@ def test_corrected_jets_factory(optimization_enabled): events = NanoEventsFactory.from_root( {os.path.abspath("tests/samples/nano_dy.root"): "Events"}, metadata={}, - permit_dask=True, ).events() jec_stack_names = [ diff --git a/tests/test_lookup_tools.py b/tests/test_lookup_tools.py index 7fde363de..d3ee1593c 100644 --- a/tests/test_lookup_tools.py +++ b/tests/test_lookup_tools.py @@ -389,7 +389,6 @@ def test_rochester(): # test against nanoaod events = NanoEventsFactory.from_root( {os.path.abspath("tests/samples/nano_dimuon.root"): "Events"}, - permit_dask=True, ).events() data_k = rochester.kScaleDT( @@ -406,7 +405,6 @@ def test_rochester(): # test against mc events = NanoEventsFactory.from_root( {os.path.abspath("tests/samples/nano_dy.root"): "Events"}, - permit_dask=True, ).events() hasgen = ~np.isnan(ak.fill_none(events.Muon.matched_gen.pt, np.nan)) diff --git a/tests/test_nanoevents.py b/tests/test_nanoevents.py index f6b5da7b9..291d6f254 100644 --- a/tests/test_nanoevents.py +++ b/tests/test_nanoevents.py @@ -75,7 +75,7 @@ def test_read_nanomc(suffix): factory = getattr(NanoEventsFactory, f"from_{suffix}")( {path: "Events"}, schemaclass=nanoversion, - permit_dask=False, + delayed=False, ) events = factory.events() @@ -138,7 +138,7 @@ def test_read_from_uri(suffix): factory = getattr(NanoEventsFactory, f"from_{suffix}")( {path: "Events"}, schemaclass=nanoversion, - permit_dask=False, + delayed=False, ) events = factory.events() @@ -153,7 +153,7 @@ def test_read_nanodata(suffix): factory = getattr(NanoEventsFactory, f"from_{suffix}")( {path: "Events"}, schemaclass=nanoversion, - permit_dask=False, + delayed=False, ) events = factory.events() @@ -165,7 +165,7 @@ def test_missing_eventIds_error(): path = os.path.abspath("tests/samples/missing_luminosityBlock.root") + ":Events" with pytest.raises(RuntimeError): factory = NanoEventsFactory.from_root( - path, schemaclass=NanoAODSchema, permit_dask=False + path, schemaclass=NanoAODSchema, delayed=False ) factory.events() @@ -177,7 +177,7 @@ def test_missing_eventIds_warning(): ): NanoAODSchema.error_missing_event_ids = False factory = NanoEventsFactory.from_root( - path, schemaclass=NanoAODSchema, permit_dask=False + path, schemaclass=NanoAODSchema, delayed=False ) factory.events() @@ -186,6 +186,8 @@ def test_missing_eventIds_warning_dask(): path = os.path.abspath("tests/samples/missing_luminosityBlock.root") + ":Events" NanoAODSchema.error_missing_event_ids = False events = NanoEventsFactory.from_root( - path, schemaclass=NanoAODSchema, permit_dask=True + path, + schemaclass=NanoAODSchema, + delayed=True, ).events() events.Muon.pt.compute(scheduler="processes") diff --git a/tests/test_nanoevents_delphes.py b/tests/test_nanoevents_delphes.py index cf2e635e5..566634ba9 100644 --- a/tests/test_nanoevents_delphes.py +++ b/tests/test_nanoevents_delphes.py @@ -10,7 +10,9 @@ def _events(): path = os.path.abspath("tests/samples/delphes.root") factory = NanoEventsFactory.from_root( - {path: "Delphes"}, schemaclass=DelphesSchema, permit_dask=True + {path: "Delphes"}, + schemaclass=DelphesSchema, + delayed=True, ) return factory.events() diff --git a/tests/test_nanoevents_pfnano.py b/tests/test_nanoevents_pfnano.py index fc50e97c1..ef3d9af5b 100644 --- a/tests/test_nanoevents_pfnano.py +++ b/tests/test_nanoevents_pfnano.py @@ -9,7 +9,9 @@ def events(): path = os.path.abspath("tests/samples/pfnano.root") events = NanoEventsFactory.from_root( - {path: "Events"}, schemaclass=PFNanoAODSchema, permit_dask=True + {path: "Events"}, + schemaclass=PFNanoAODSchema, + delayed=True, ).events() return events diff --git a/tests/test_nanoevents_physlite.py b/tests/test_nanoevents_physlite.py index 95f58491d..eab0960a8 100644 --- a/tests/test_nanoevents_physlite.py +++ b/tests/test_nanoevents_physlite.py @@ -11,7 +11,7 @@ def _events(): factory = NanoEventsFactory.from_root( {path: "CollectionTree"}, schemaclass=PHYSLITESchema, - permit_dask=True, + delayed=True, ) return factory.events() diff --git a/tests/test_nanoevents_treemaker.py b/tests/test_nanoevents_treemaker.py index 790272474..2edf3449b 100644 --- a/tests/test_nanoevents_treemaker.py +++ b/tests/test_nanoevents_treemaker.py @@ -11,7 +11,7 @@ def events(): path = os.path.abspath("tests/samples/treemaker.root") events = NanoEventsFactory.from_root( - {path: "PreSelection"}, schemaclass=TreeMakerSchema, permit_dask=True + {path: "PreSelection"}, schemaclass=TreeMakerSchema, delayed=True ).events() return events