From 4e7609ee11ed7db08612f582abb9d115f44fa49e Mon Sep 17 00:00:00 2001 From: Oksana Shadura Date: Tue, 13 Feb 2024 16:28:19 +0100 Subject: [PATCH] Update tests accordingly --- tests/test_adl1.py | 38 -------------------- tests/test_dimuon.py | 3 +- tests/test_doublemuon.py | 76 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 39 deletions(-) delete mode 100644 tests/test_adl1.py create mode 100644 tests/test_doublemuon.py diff --git a/tests/test_adl1.py b/tests/test_adl1.py deleted file mode 100644 index f305b66..0000000 --- a/tests/test_adl1.py +++ /dev/null @@ -1,38 +0,0 @@ -import time - -import awkward as ak -import hist -import matplotlib.pyplot as plt -import numpy as np -from coffea.nanoevents import NanoEventsFactory, NanoAODSchema -from coffea import processor - -import dask -import dask_awkward as dak -import hist.dask as hda - -from distributed import Client -client=Client() - -# The opendata files are non-standard NanoAOD, so some optional data columns are missing -NanoAODSchema.warn_missing_crossrefs = False - -# The warning emitted below indicates steps_per_file is for initial data exploration -# and test. When running at scale there are better ways to specify processing chunks -# of files. -events = NanoEventsFactory.from_root( - "root://eospublic.cern.ch//eos/root-eos/Run2012B_SingleMu.root:Events", - steps_per_file=500, - metadata={"dataset": "SingleMu"} - ).events() - - -@pytest.mark.coffeacalver -def test_adl1(): - q1_hist = ( - hda.Hist.new.Reg(100, 0, 200, name="met", label="$E_{T}^{miss}$ [GeV]") - .Double() - .fill(events.MET.pt) - ) - q1_hist.compute().plot1d(flow="none") - dak.necessary_columns(q1_hist) diff --git a/tests/test_dimuon.py b/tests/test_dimuon.py index 97b7552..df95fcf 100644 --- a/tests/test_dimuon.py +++ b/tests/test_dimuon.py @@ -6,6 +6,7 @@ from coffea.nanoevents import NanoEventsFactory, BaseSchema from dask.distributed import Client +import pytest fileset = { 'DoubleMuon': [ @@ -61,7 +62,7 @@ def process(self, events): def postprocess(self, accumulator): pass -@pytest.mark.coffeav0 +@pytest.mark.v0 def test_processor_dimu_mass(): client = Client() iterative_run = processor.Runner(executor = processor.dask_executor, diff --git a/tests/test_doublemuon.py b/tests/test_doublemuon.py new file mode 100644 index 0000000..dc60409 --- /dev/null +++ b/tests/test_doublemuon.py @@ -0,0 +1,76 @@ +import hist +import dask +import awkward as ak +import hist.dask as hda +import dask_awkward as dak + +from coffea import processor +from coffea.nanoevents.methods import candidate +from coffea.nanoevents import NanoEventsFactory, BaseSchema + +from distributed import Client + +import pytest + +client = Client() + + +class MyProcessor(processor.ProcessorABC): + def __init__(self): + pass + + def process(self, events): + dataset = events.metadata['dataset'] + muons = ak.zip( + { + "pt": events.Muon_pt, + "eta": events.Muon_eta, + "phi": events.Muon_phi, + "mass": events.Muon_mass, + "charge": events.Muon_charge, + }, + with_name="PtEtaPhiMCandidate", + behavior=candidate.behavior, + ) + + h_mass = ( + hda.Hist.new + .StrCat(["opposite", "same"], name="sign") + .Log(1000, 0.2, 200., name="mass", label="$m_{\mu\mu}$ [GeV]") + .Int64() + ) + + cut = (ak.num(muons) == 2) & (ak.sum(muons.charge, axis=1) == 0) + # add first and second muon in every event together + dimuon = muons[cut][:, 0] + muons[cut][:, 1] + h_mass.fill(sign="opposite", mass=dimuon.mass) + + cut = (ak.num(muons) == 2) & (ak.sum(muons.charge, axis=1) != 0) + dimuon = muons[cut][:, 0] + muons[cut][:, 1] + h_mass.fill(sign="same", mass=dimuon.mass) + + return { + dataset: { + "entries": ak.num(events, axis=0), + "mass": h_mass, + } + } + + def postprocess(self, accumulator): + pass + +filename = "root://eospublic.cern.ch//eos/root-eos/cms_opendata_2012_nanoaod/Run2012B_DoubleMuParked.root" + +events = NanoEventsFactory.from_root( + {filename: "Events"}, + metadata={"dataset": "DoubleMuon"}, + schemaclass=BaseSchema, +).events() + + +@pytest.mark.calver +def test_adl1(): + p = MyProcessor() + out = p.process(events) + (computed,) = dask.compute(out) + print(computed)