diff --git a/src/coffea/nanoevents/__init__.py b/src/coffea/nanoevents/__init__.py index 348ca0d8e..8e66d8044 100644 --- a/src/coffea/nanoevents/__init__.py +++ b/src/coffea/nanoevents/__init__.py @@ -4,8 +4,10 @@ from coffea.nanoevents.factory import NanoEventsFactory from coffea.nanoevents.schemas import ( + FCC, BaseSchema, DelphesSchema, + FCCSchema, NanoAODSchema, PDUNESchema, PFNanoAODSchema, @@ -24,4 +26,6 @@ "DelphesSchema", "PDUNESchema", "ScoutingNanoAODSchema", + "FCC", + "FCCSchema", ] diff --git a/src/coffea/nanoevents/methods/fcc.py b/src/coffea/nanoevents/methods/fcc.py new file mode 100644 index 000000000..adc4dc49c --- /dev/null +++ b/src/coffea/nanoevents/methods/fcc.py @@ -0,0 +1,358 @@ +import awkward +import numpy +from dask_awkward.lib.core import dask_property + +from coffea.nanoevents.methods import base, vector + +behavior = {} +behavior.update(base.behavior) + + +class _FCCEvents(behavior["NanoEvents"]): + def __repr__(self): + return "FCC Events" + + +behavior["NanoEvents"] = _FCCEvents + + +def _set_repr_name(classname): + def namefcn(self): + return classname + + behavior[classname].__repr__ = namefcn + + +@awkward.mixin_class(behavior) +class MomentumCandidate(vector.LorentzVector): + """A Lorentz vector with charge + + This mixin class requires the parent class to provide items `px`, `py`, `pz`, `E`, and `charge`. + """ + + @awkward.mixin_class_method(numpy.add, {"MomentumCandidate"}) + def add(self, other): + """Add two candidates together elementwise using `px`, `py`, `pz`, `E`, and `charge` components""" + return awkward.zip( + { + "px": self.px + other.px, + "py": self.py + other.py, + "pz": self.pz + other.pz, + "E": self.E + other.E, + "charge": self.charge + other.charge, + }, + with_name="MomentumCandidate", + behavior=self.behavior, + ) + + def sum(self, axis=-1): + """Sum an array of vectors elementwise using `px`, `py`, `pz`, `E`, and `charge` components""" + return awkward.zip( + { + "px": awkward.sum(self.px, axis=axis), + "py": awkward.sum(self.py, axis=axis), + "pz": awkward.sum(self.pz, axis=axis), + "E": awkward.sum(self.E, axis=axis), + "charge": awkward.sum(self.charge, axis=axis), + }, + with_name="MomentumCandidate", + behavior=self.behavior, + ) + + @property + def absolute_mass(self): + return numpy.sqrt(numpy.abs(self.mass2)) + + +behavior.update( + awkward._util.copy_behaviors(vector.LorentzVector, MomentumCandidate, behavior) +) + +MomentumCandidateArray.ProjectionClass2D = vector.TwoVectorArray # noqa: F821 +MomentumCandidateArray.ProjectionClass3D = vector.ThreeVectorArray # noqa: F821 +MomentumCandidateArray.ProjectionClass4D = vector.LorentzVectorArray # noqa: F821 +MomentumCandidateArray.MomentumClass = MomentumCandidateArray # noqa: F821 + + +@awkward.mixin_class(behavior) +class MCParticle(MomentumCandidate, base.NanoCollection): + """Generated Monte Carlo particles""" + + # Daughters + @dask_property + def get_daughters_index(self): + """ + Obtain the global indices of the daughters of each and every MCParticle + - The output is a doubly nested awkward array + - Needs the presence of Particleidx1 collection + - The Particleidx1.index contains info about the daughters + """ + return self.daughters.Particleidx1_rangesG + + @get_daughters_index.dask + def get_daughters_index(self, dask_array): + """ + Obtain the global indices of the daughters of each and every MCParticle + - The output is a doubly nested awkward array + - Needs the presence of Particleidx1 collection + - The Particleidx1.index contains info about the daughters + """ + return dask_array.daughters.Particleidx1_rangesG + + @dask_property + def get_daughters(self): + """ + Obtain the actual MCParticle daughters to each and every MCParticle + - The output is a doubly nested awkward array + - Needs the presence of Particleidx1 collection + - The Particleidx1.index contains info about the daughters + """ + return self._events().Particle._apply_global_index(self.get_daughters_index) + + @get_daughters.dask + def get_daughters(self, dask_array): + """ + Obtain the actual MCParticle daughters to each and every MCParticle + - The output is a doubly nested awkward array + - Needs the presence of Particleidx1 collection + - The Particleidx1.index contains info about the daughters + """ + return dask_array._events().Particle._apply_global_index( + dask_array.get_daughters_index + ) + + # Parents + @dask_property + def get_parents_index(self): + """ + Obtain the global indices of the parents of each and every MCParticle + - The output is a doubly nested awkward array + - Needs the presence of Particleidx0 collection + - The Particleidx0.index contains info about the parents + """ + return self.parents.Particleidx0_rangesG + + @get_parents_index.dask + def get_parents_index(self, dask_array): + """ + Obtain the indices of the parents of each and every MCParticle + - The output is a doubly nested awkward array + - Needs the presence of Particleidx0 collection + - The Particleidx0.index contains info about the parents + """ + return dask_array.parents.Particleidx0_rangesG + + @dask_property + def get_parents(self): + """ + Obtain the actual MCParticle parents to each and every MCParticle + - The output is a doubly nested awkward array + - Needs the presence of Particleidx0 collection + - The Particleidx0.index contains info about the parents + """ + return self._events().Particle._apply_global_index(self.get_parents_index) + + @get_parents.dask + def get_parents(self, dask_array): + """ + Obtain the actual MCParticle parents to each and every MCParticle + - The output is a doubly nested awkward array + - Needs the presence of Particleidx0 collection + - The Particleidx0.index contains info about the parents + """ + return dask_array._events().Particle._apply_global_index( + dask_array.get_parents_index + ) + + +_set_repr_name("MCParticle") +behavior.update(awkward._util.copy_behaviors(MomentumCandidate, MCParticle, behavior)) + +MCParticleArray.ProjectionClass2D = vector.TwoVectorArray # noqa: F821 +MCParticleArray.ProjectionClass3D = vector.ThreeVectorArray # noqa: F821 +MCParticleArray.ProjectionClass4D = MCParticleArray # noqa: F821 +MCParticleArray.MomentumClass = vector.LorentzVectorArray # noqa: F821 + + +@awkward.mixin_class(behavior) +class ReconstructedParticle(MomentumCandidate, base.NanoCollection): + """Reconstructed particle""" + + def match_collection(self, idx): + """Returns matched particles; pass on an ObjectID array.""" + return self[idx.index] + + @dask_property + def match_muons(self): + """Returns matched muons; drops none values""" + m = self._events().ReconstructedParticles._apply_global_index( + self.Muonidx0_indexGlobal + ) + return awkward.drop_none(m, behavior=self.behavior) + + @match_muons.dask + def match_muons(self, dask_array): + """Returns matched muons; drops none values""" + m = dask_array._events().ReconstructedParticles._apply_global_index( + dask_array.Muonidx0_indexGlobal + ) + return awkward.drop_none(m, behavior=self.behavior) + + @dask_property + def match_electrons(self): + """Returns matched electrons; drops none values""" + e = self._events().ReconstructedParticles._apply_global_index( + self.Electronidx0_indexGlobal + ) + return awkward.drop_none(e, behavior=self.behavior) + + @match_electrons.dask + def match_electrons(self, dask_array): + """Returns matched electrons; drops none values""" + e = dask_array._events().ReconstructedParticles._apply_global_index( + dask_array.Electronidx0_indexGlobal + ) + return awkward.drop_none(e, behavior=self.behavior) + + @dask_property + def match_gen(self): + """Returns the Gen level (MC) particle corresponding to the ReconstructedParticle""" + prepared = self._events().Particle[self._events().MCRecoAssociations.mc.index] + return prepared._apply_global_index(self.MCRecoAssociationsidx0_indexGlobal) + + @match_gen.dask + def match_gen(self, dask_array): + """Returns the Gen level (MC) particle corresponding to the ReconstructedParticle""" + prepared = dask_array._events().Particle[ + dask_array._events().MCRecoAssociations.mc.index + ] + return prepared._apply_global_index( + dask_array.MCRecoAssociationsidx0_indexGlobal + ) + + +_set_repr_name("ReconstructedParticle") +behavior.update( + awkward._util.copy_behaviors(MomentumCandidate, ReconstructedParticle, behavior) +) + +ReconstructedParticleArray.ProjectionClass2D = vector.TwoVectorArray # noqa: F821 +ReconstructedParticleArray.ProjectionClass3D = vector.ThreeVectorArray # noqa: F821 +ReconstructedParticleArray.ProjectionClass4D = ReconstructedParticleArray # noqa: F821 +ReconstructedParticleArray.MomentumClass = vector.LorentzVectorArray # noqa: F821 + + +@awkward.mixin_class(behavior) +class RecoMCParticleLink(base.NanoCollection): + """MCRecoParticleAssociation objects.""" + + @property + def reco_mc_index(self): + """ + Returns an array of indices mapping to generator particles for each reconstructed particle + """ + arr_reco = self.reco.index[:, :, numpy.newaxis] + arr_mc = self.mc.index[:, :, numpy.newaxis] + + joined_indices = awkward.concatenate((arr_reco, arr_mc), axis=2) + + return joined_indices + + @dask_property + def reco_mc(self): + """ + Returns an array of records mapping to generator particle record for each reconstructed particle record + - Needs 'ReconstructedParticles' and 'Particle' collections + """ + reco_index = self.reco.index + mc_index = self.mc.index + r = self._events().ReconstructedParticles[reco_index][:, :, numpy.newaxis] + m = self._events().Particle[mc_index][:, :, numpy.newaxis] + + return awkward.concatenate((r, m), axis=2) + + @reco_mc.dask + def reco_mc(self, dask_array): + """ + Returns an array of records mapping to generator particle record for each reconstructed particle record + - Needs 'ReconstructedParticles' and 'Particle' collections + """ + reco_index = dask_array.reco.index + mc_index = dask_array.mc.index + r = dask_array._events().ReconstructedParticles[reco_index][:, :, numpy.newaxis] + m = dask_array._events().Particle[mc_index][:, :, numpy.newaxis] + + return awkward.concatenate((r, m), axis=2) + + +_set_repr_name("RecoMCParticleLink") + +RecoMCParticleLinkArray.ProjectionClass2D = vector.TwoVectorArray # noqa: F821 +RecoMCParticleLinkArray.ProjectionClass3D = vector.ThreeVectorArray # noqa: F821 +RecoMCParticleLinkArray.ProjectionClass4D = RecoMCParticleLinkArray # noqa: F821 +RecoMCParticleLinkArray.MomentumClass = vector.LorentzVectorArray # noqa: F821 + + +@awkward.mixin_class(behavior) +class ParticleID(base.NanoCollection): + """ParticleID collection""" + + +_set_repr_name("ParticleID") + +ParticleIDArray.ProjectionClass2D = vector.TwoVectorArray # noqa: F821 +ParticleIDArray.ProjectionClass3D = vector.ThreeVectorArray # noqa: F821 +ParticleIDArray.ProjectionClass4D = ParticleIDArray # noqa: F821 +ParticleIDArray.MomentumClass = vector.LorentzVectorArray # noqa: F821 + + +@awkward.mixin_class(behavior) +class ObjectID(base.NanoCollection): + """ + Generic Object ID storage, pointing to another collection + - All the idx collections are assigned this mixin + + Note: The Hash tagged '#' branches have the type + """ + + +_set_repr_name("ObjectID") + +ObjectIDArray.ProjectionClass2D = vector.TwoVectorArray # noqa: F821 +ObjectIDArray.ProjectionClass3D = vector.ThreeVectorArray # noqa: F821 +ObjectIDArray.ProjectionClass4D = ObjectIDArray # noqa: F821 +ObjectIDArray.MomentumClass = vector.LorentzVectorArray # noqa: F821 + + +@awkward.mixin_class(behavior) +class Cluster(base.NanoCollection): + """ + Clusters + + Note: I could not find much info on this, to build its methods + """ + + +_set_repr_name("Cluster") + +ClusterArray.ProjectionClass2D = vector.TwoVectorArray # noqa: F821 +ClusterArray.ProjectionClass3D = vector.ThreeVectorArray # noqa: F821 +ClusterArray.ProjectionClass4D = ClusterArray # noqa: F821 +ClusterArray.MomentumClass = vector.LorentzVectorArray # noqa: F821 + + +@awkward.mixin_class(behavior) +class Track(base.NanoCollection): + """ + Tracks + + Note: I could not find much info on this, to build its methods + """ + + +_set_repr_name("Track") + +TrackArray.ProjectionClass2D = vector.TwoVectorArray # noqa: F821 +TrackArray.ProjectionClass3D = vector.ThreeVectorArray # noqa: F821 +TrackArray.ProjectionClass4D = TrackArray # noqa: F821 +TrackArray.MomentumClass = vector.LorentzVectorArray # noqa: F821 diff --git a/src/coffea/nanoevents/schemas/__init__.py b/src/coffea/nanoevents/schemas/__init__.py index 71f92825e..a598fbf26 100644 --- a/src/coffea/nanoevents/schemas/__init__.py +++ b/src/coffea/nanoevents/schemas/__init__.py @@ -1,5 +1,6 @@ from .base import BaseSchema from .delphes import DelphesSchema +from .fcc import FCC, FCCSchema from .nanoaod import NanoAODSchema, PFNanoAODSchema, ScoutingNanoAODSchema from .pdune import PDUNESchema from .physlite import PHYSLITESchema @@ -14,4 +15,6 @@ "DelphesSchema", "PDUNESchema", "ScoutingNanoAODSchema", + "FCC", + "FCCSchema", ] diff --git a/src/coffea/nanoevents/schemas/fcc.py b/src/coffea/nanoevents/schemas/fcc.py new file mode 100644 index 000000000..aa8e922b4 --- /dev/null +++ b/src/coffea/nanoevents/schemas/fcc.py @@ -0,0 +1,618 @@ +import copy +import re + +from coffea.nanoevents import transforms +from coffea.nanoevents.methods import vector +from coffea.nanoevents.schemas.base import BaseSchema, zip_forms +from coffea.nanoevents.util import concat + +# Collection Regex # +# Any branch name with a forward slash '/' +# Example: 'ReconstructedParticles/ReconstructedParticles.energy' +_all_collections = re.compile(r".*[\/]+.*") + +# Any branch name with a trailing underscore and an integer n between 0 to 9 +# Example: 'EFlowPhoton_1' +_trailing_under = re.compile(r".*_[0-9]") + +# Any branch name with a hashtag '#' +# Example: 'ReconstructedParticles#0/ReconstructedParticles#0.index' +_idxs = re.compile(r".*[\#]+.*") + +# Any branch name with '[' and ']' +# Example: 'ReconstructedParticles/ReconstructedParticles.covMatrix[10]' +_square_braces = re.compile(r".*\[.*\]") + + +__dask_capable__ = True + + +def sort_dict(d): + """Sort a dictionary by key""" + return {k: d[k] for k in sorted(d)} + + +class FCCSchema(BaseSchema): + """ + Schema-builder for Future Circular Collider pregenerated samples. + https://fcc-physics-events.web.cern.ch/ + + This version is tested on the Spring2021 p8_ee_ZH_ecm240 sample + https://fcc-physics-events.web.cern.ch/FCCee/delphes/spring2021/idea/ + /eos/experiment/fcc/ee/generation/DelphesEvents/spring2021/IDEA/p8_ee_ZH_ecm240/events_082532938.root + The FCC samples follow the edm4hep structure. + https://edm4hep.web.cern.ch/index.html + + FCCSchema inherits from the BaseSchema and returns all the collections as a base.Nanoevents record. + - Branches with vector components like + "ReconstructedParticles/ReconstructedParticles.referencePoint.x", + "ReconstructedParticles/ReconstructedParticles.referencePoint.y" and + "ReconstructedParticles/ReconstructedParticles.referencePoint.z", + are zipped together to form the "ReconstructedParticles/ReconstructedParticles.referencePoint" subcollection. + (see FCCSchema._create_subcollections) + This is done for all the branches except the momentum.[x,y,z] branches + - Branches like + "ReconstructedParticles/ReconstructedParticles.energy", + "ReconstructedParticles/ReconstructedParticles.charge", + "ReconstructedParticles/ReconstructedParticles.mass", + "ReconstructedParticles/ReconstructedParticles.referencePoint"(subcollection containing x,y,z), + ... + etc + are zipped together to form the "ReconstructedParticles" collection. + (see FCCSchema._main_collections) + The momentum.[x,y,z] branches along with the energy branch (if available) are used to provide the vector.LorentzVector behavior to the collection. + - Branches with ObjectIDs(indices to another collection) , example + "ReconstructedParticles#0/ReconstructedParticles#0.index" + and + "ReconstructedParticles#0/ReconstructedParticles#0.collectionID" + are zipped together to form the ""ReconstructedParticlesidx0" collection. + (see FCCSchema._idx_collections) + - Branches with a trailing underscore followed by an integer, example + "EFlowTrack_1/EFlowTrack_1.location", + "EFlowTrack_1/EFlowTrack_1.D0", + "EFlowTrack_1/EFlowTrack_1.phi", + ... + etc + are zipped together to form the "EFlowTrack_1" collection. + (see FCCSchema._trailing_underscore_collections) + - Other Unknown, empty, or faulty branches are dealt by FCCSchema._unknown_collections on a case by case basis + """ + + __dask_capable__ = True + + mixins_dictionary = { + "Electron": "ReconstructedParticle", + "Muon": "ReconstructedParticle", + "AllMuon": "ReconstructedParticle", + "EFlowNeutralHadron": "Cluster", + "Particle": "MCParticle", + "Photon": "ReconstructedParticle", + "ReconstructedParticles": "ReconstructedParticle", + "EFlowPhoton": "Cluster", + "MCRecoAssociations": "RecoMCParticleLink", + "MissingET": "ReconstructedParticle", + "ParticleIDs": "ParticleID", + "Jet": "ReconstructedParticle", + "EFlowTrack": "Track", + "*idx": "ObjectID", + } + + _momentum_fields_e = { + "energy": "E", + "momentum.x": "px", + "momentum.y": "py", + "momentum.z": "pz", + } + _replacement = {**_momentum_fields_e} + + _threevec_fields = { + "position": ["position.x", "position.y", "position.z"], + "directionError": ["directionError.x", "directionError.y", "directionError.z"], + "vertex": ["vertex.x", "vertex.y", "vertex.z"], + "endpoint": ["endpoint.x", "endpoint.y", "endpoint.z"], + "referencePoint": ["referencePoint.x", "referencePoint.y", "referencePoint.z"], + "momentumAtEndpoint": [ + "momentumAtEndpoint.x", + "momentumAtEndpoint.y", + "momentumAtEndpoint.z", + ], + "spin": ["spin.x", "spin.y", "spin.z"], + } + + # Cross-References : format: { : } + all_cross_references = { + "MCRecoAssociations#1.index": "Particle", # MC to Reco connection + "MCRecoAssociations#0.index": "ReconstructedParticles", # Reco to MC connection + "Muon#0.index": "ReconstructedParticles", # Matched Muons + "Electron#0.index": "ReconstructedParticles", # Matched Electrons + } + + mc_relations = {"parents": "Particle#0.index", "daughters": "Particle#1.index"} + + def __init__(self, base_form, version="latest"): + super().__init__(base_form) + self._form["fields"], self._form["contents"] = self._build_collections( + self._form["fields"], self._form["contents"] + ) + + def _idx_collections(self, output, branch_forms, all_collections): + """ + Groups the Hash-Tagged '#' branches into an 'idx' collection (string 'idx' instead of string '#' for a python friendly name) + - In general, there could be many such idx collections. + - Each idx collection has two branches --> index and collectionID + - They define the indexes to another collection (Original type: podio::ObjectID) + - The ObjectID mixin class is assigned to all idx collections + - Example: + "ReconstructedParticles#0/ReconstructedParticles#0.index" + and + "ReconstructedParticles#0/ReconstructedParticles#0.collectionID" + are zipped together to form the "ReconstructedParticlesidx0" collection. + Note: Since the individual idx collections like ReconstructedParticlesidx0, ReconstructedParticlesidx1, etc don't have the same dimensions, + I could not zip them together to form a parent branch of the name ReconstructedParticlesidx containing ReconstructedParticlesidx0, ReconstructedParticlesidx1 etc + """ + field_names = list(branch_forms.keys()) + + # Extract all the idx collection names + # Example: "Jet#0/Jet#0.index" --> "Jet#0" + idxs = {k.split("/")[0] for k in all_collections if _idxs.match(k)} + + # Remove grouping branches which are generated from BaseSchema and contain no usable info + # Example: Along with the "Jet#0/Jet#0.index" and "Jet#0/Jet#0.collectionID", BaseSchema may produce "Jet#0" grouping branch. + # It is an empty branch and needs to be removed + _grouping_branches = { + k: branch_forms.pop(k) + for k in field_names + if _idxs.match(k) and "/" not in k + } + + for idx in idxs: + # Create a Python-friendly name + # Example: Jet#0 --> Jetidx0 + repl = idx.replace("#", "idx") + + # The content of the collection + # Example output: {'index':, 'collectionID':} + idx_content = { + k[2 * len(idx) + 2 :]: branch_forms.pop(k) + for k in field_names + if k.startswith(f"{idx}/{idx}.") + } + + # Zip the index and collectionID and assign the collection name repl; Example: Jetidx0 + output[repl] = zip_forms( + sort_dict(idx_content), + idx, + self.mixins_dictionary.get("*idx", "NanoCollection"), + ) + output[repl]["content"]["parameters"].update( + { + "collection_name": repl, + } + ) + + # The Special MCRecoAssociationsidx indexes should be treated differently + # They have the same dimensions + # Prepare them to be compatible to later join as 'MCRecoAssociations' collection in the FCCSchema._build_collections function + # Also see : https://github.com/HEP-FCC/FCCAnalyses/tree/master/examples/basics#association-between-recoparticles-and-montecarloparticles + if ("MCRecoAssociationsidx0" in output.keys()) and ( + "MCRecoAssociationsidx1" in output.keys() + ): + branch_forms["MCRecoAssociations/MCRecoAssociations.reco"] = output.pop( + "MCRecoAssociationsidx0" + ) + branch_forms["MCRecoAssociations/MCRecoAssociations.mc"] = output.pop( + "MCRecoAssociationsidx1" + ) + + return output, branch_forms + + def _main_collections(self, output, branch_forms, all_collections): + """ + Creates all the regular branches. Regular branches have + no hash-tag '#' or underscore '_' or braces '[' or ']'. + Example: + "ReconstructedParticles/ReconstructedParticles.energy", + "ReconstructedParticles/ReconstructedParticles.charge", + "ReconstructedParticles/ReconstructedParticles.mass", + "ReconstructedParticles/ReconstructedParticles.referencePoint"(subcollection containing x,y,z), + ... + etc + are zipped together to form the "ReconstructedParticles" collection. + The momentum.[x,y,z] branches along with the energy branch (if available) are used to + provide the vector.LorentzVector behavior to the collection. + """ + field_names = list(branch_forms.keys()) + + # Extract the regular collection names + # Example collections: {'Jet', 'ReconstructedParticles', 'MCRecoAssociations', ...} + collections = { + collection_name + for collection_name in all_collections + if not _idxs.match(collection_name) + and not _trailing_under.match(collection_name) + } + + # Zip the collections + # Example: 'ReconstructedParticles' + for name in collections: + # Get the mixin class for the collection, if available, otherwise "NanoCollection" by default + mixin = self.mixins_dictionary.get(name, "NanoCollection") + + # Content to be zipped together + # Example collection_content: {'type':, 'energy':, 'momentum.x': ...} + collection_content = { + k[(2 * len(name) + 2) :]: branch_forms.pop(k) + for k in field_names + if k.startswith(f"{name}/{name}.") + } + + # Change the name of momentum fields, to facilitate the vector.LorentzVector behavior + # 'energy' --> 'E' + # 'momentum.x' --> 'px' + # 'momentum.y' --> 'py' + # 'momentum.z' --> 'pz' + collection_content = { + (k.replace(k, self._replacement[k]) if k in self._replacement else k): v + for k, v in collection_content.items() + } + + output[name] = zip_forms(sort_dict(collection_content), name, mixin) + # Update some metadata + output[name]["content"]["parameters"].update( + { + "collection_name": name, + } + ) + + # Remove grouping branches which are generated from BaseSchema and contain no usable info + # Example: Along with the "Jet/Jet.type","Jet/Jet.energy",etc., BaseSchema may produce "Jet" grouping branch. + # It is an empty branch and needs to be removed + if name in field_names: + branch_forms.pop(name) + + return output, branch_forms + + def _trailing_underscore_collections(self, output, branch_forms, all_collections): + """ + Create collection with branches have a trailing underscore followed by a integer '*_[0-9]' + Example: + "EFlowTrack_1/EFlowTrack_1.location", + "EFlowTrack_1/EFlowTrack_1.D0", + "EFlowTrack_1/EFlowTrack_1.phi", + ... + etc + are zipped together to form the "EFlowTrack_1" collection + Note: - I do not understand how these branches are different from other branches except + for the obvious naming difference. + - I found most such branches to be empty..., at least in the test root file. + """ + # Gather all the collection names with trailing underscore followed by an integer + # Example: EFlowTrack_1, ParticleIDs_0, EFlowPhoton_0, EFlowPhoton_1, etc. + collections = { + collection_name + for collection_name in all_collections + if _trailing_under.match(collection_name) + } + + # Collection names that are trailing underscore followed by an integer but do not + # have any associated branches with '/', signifying that those collection names + # are actual singleton branches + singletons = { + collection_name + for collection_name in branch_forms.keys() + if _trailing_under.match(collection_name) + and not _all_collections.match(collection_name) + } + + # Zip branches of a collection that are not singletons + for name in collections: + mixin = self.mixins_dictionary.get(name, "NanoCollection") + + # Contents to be zipped + # Example content: {'type':, 'chi2':, 'ndf':, ...} + field_names = list(branch_forms.keys()) + content = { + k[(2 * len(name) + 2) :]: branch_forms.pop(k) + for k in field_names + if k.startswith(f"{name}/{name}.") + } + + output[name] = zip_forms(sort_dict(content), name, mixin) + # Update some metadata + output[name]["content"]["parameters"].update( + { + "collection_name": name, + } + ) + + # Singleton branches could be assigned directly without zipping + for name in singletons: + output[name] = branch_forms.pop(name) + + return output, branch_forms + + def _unknown_collections(self, output, branch_forms, all_collections): + """ + Process all the unknown, empty or faulty branches that remain + after creating all the collections. + Should be called only after creating all the other relevant collections. + + Note: It is not a neat implementation and needs more testing. + """ + unlisted = copy.deepcopy(branch_forms) + for name, content in unlisted.items(): + if content["class"] == "ListOffsetArray": + if content["content"]["class"] == "RecordArray": + # Remove empty branches + if len(content["content"]["fields"]) == 0: + branch_forms.pop(name) + continue + elif content["content"]["class"] == "RecordArray": + # Remove empty branches + if len(content["contents"]) == 0: + continue + elif content["class"] == "RecordArray": + # Remove empty branches + if len(content["contents"]) == 0: + continue + else: + # If the branch is not empty, try to make a collection + # assuming good behavior of the branch + # Note: It's unlike that such a branch exists + + # Extract the collection name from the branch + record_name = name.split("/")[0] + + # Contents to be zipped + contents = { + k[2 * len(record_name) + 2 :]: branch_forms.pop(k) + for k in unlisted.keys() + if k.startswith(record_name + "/") + } + output[record_name] = zip_forms( + sort_dict(contents), + record_name, + self.mixins_dictionary.get(record_name, "NanoCollection"), + ) + # If a branch is non-empty and is one of its kind (i.e. has no other associated branch) + # call it a singleton and assign it directly to the output + else: + output[name] = content + + return output, branch_forms + + def _create_subcollections(self, branch_forms, all_collections): + """ + Creates 3-vectors, + zip _begin and _end branches, and creates begin_end_counts and global indexes for mc parents or daughters + zip colorFlow.a and colorFlow.a branches + (Does not zip the momentum fields that are required for + the overall LorentzVector behavior of a collection) + """ + field_names = list(branch_forms.keys()) + + # Replace square braces in a name for a Python-friendly name; Example: covMatrix[n] --> covMatrix_n_ + for name in field_names: + if _square_braces.match(name): + new_name = name.replace("[", "_") + new_name = new_name.replace("]", "_") + branch_forms[new_name] = branch_forms.pop(name) + + # Zip _begin and _end branches + # Example: 'Jet/Jet.clusters_begin', 'Jet/Jet.clusters_end' --> 'Jet/Jet.clusters' + begin_end_collection = set({}) + for fullname in field_names: + if fullname.endswith("_begin"): + begin_end_collection.add(fullname.split("_begin")[0]) + elif fullname.endswith("_end"): + begin_end_collection.add(fullname.split("_end")[0]) + for name in begin_end_collection: + begin_end_content = { + k[len(name) + 1 :]: branch_forms.pop(k) + for k in field_names + if k.startswith(name) + } + # Get the offset for this collection + offset_form = { + "class": "NumpyArray", + "itemsize": 8, + "format": "i", + "primitive": "int64", + "form_key": concat( + begin_end_content[list(begin_end_content.keys())[0]]["form_key"], + "!offsets", + ), + } + + # Pick up begin and end branch + begin = [ + begin_end_content[k] + for k in begin_end_content.keys() + if k.endswith("begin") + ] + end = [ + begin_end_content[k] + for k in begin_end_content.keys() + if k.endswith("end") + ] + + # Create counts from begin and end by subtracting them + counts_content = { + "begin_end_counts": transforms.begin_and_end_to_counts_form( + *begin, *end + ) + } + + # Generate Parents and Daughters global indexers + ranges_content = {} + for key, target in self.mc_relations.items(): + col_name = target.split(".")[0] + if name.endswith(key): + range_name = f"{col_name.replace('#','idx')}_ranges" + ranges_content[range_name + "G"] = transforms.index_range_form( + *begin, *end, branch_forms[f"{col_name}/{target}"] + ) + + to_zip = {**begin_end_content, **counts_content, **ranges_content} + + branch_forms[name] = zip_forms(sort_dict(to_zip), name, offsets=offset_form) + + # Zip colorFlow.a and colorFlow.b branches + # Example: 'Particle/Particle.colorFlow.a', 'Particle/Particle.colorFlow.b' --> 'Particle/Particle.colorFlow' + color_collection = set({}) + for name in field_names: + if name.endswith("colorFlow.a"): + color_collection.add(name.split(".a")[0]) + elif name.endswith("colorFlow.b"): + color_collection.add(name.split(".b")[0]) + for name in color_collection: + color_content = { + k[len(name) + 1 :]: branch_forms.pop(k) + for k in field_names + if k.startswith(name) + } + branch_forms[name] = zip_forms(sort_dict(color_content), name) + + # Create three_vectors + # Example: 'Jet/Jet.referencePoint.x', 'Jet/Jet.referencePoint.y', 'Jet/Jet.referencePoint.z' --> 'Jet/Jet.referencePoint' + for name in all_collections: + for threevec_name, subfields in self._threevec_fields.items(): + if all( + f"{name}/{name}.{subfield}" in field_names for subfield in subfields + ): + content = { + "x": branch_forms.pop(f"{name}/{name}.{threevec_name}.x"), + "y": branch_forms.pop(f"{name}/{name}.{threevec_name}.y"), + "z": branch_forms.pop(f"{name}/{name}.{threevec_name}.z"), + } + branch_forms[f"{name}/{name}.{threevec_name}"] = zip_forms( + sort_dict(content), threevec_name, "ThreeVector" + ) + return branch_forms + + def _global_indexers(self, branch_forms, all_collections): + """ + Create global indexers from cross-references + (except parent and daughter cross-references which are dealt in subcollection level) + """ + for cross_ref, target in self.all_cross_references.items(): + collection_name, index_name = cross_ref.split(".") + + # pick up the available fields from target collection to get an offset from + available_fields = [ + name + for name in branch_forms.keys() + if name.startswith(f"{target}/{target}.") + ] + + # By default the idxs have different shape at axis=1 in comparison to target + # So one needs to fill the empty spaces with -1 which could be removed later + compatible_index = transforms.grow_local_index_to_target_shape_form( + branch_forms[f"{collection_name}/{collection_name}.{index_name}"], + branch_forms[available_fields[0]], + ) + + # Pick up the offset from an available field + offset_form = { + "class": "NumpyArray", + "itemsize": 8, + "format": "i", + "primitive": "int64", + "form_key": concat( + *[ + branch_forms[available_fields[0]]["form_key"], + "!offsets", + ] + ), + } + + # Convert local indices to global indices + replaced_name = collection_name.replace("#", "idx") + branch_forms[f"{target}/{target}.{replaced_name}_{index_name}Global"] = ( + transforms.local2global_form(compatible_index, offset_form) + ) + + return branch_forms + + def _build_collections(self, field_names, input_contents): + """ + Builds all the collections with the necessary behaviors defined in the mixins dictionary + """ + branch_forms = {k: v for k, v in zip(field_names, input_contents)} + + # All collection names + # Example: ReconstructedParticles + all_collections = { + collection_name.split("/")[0] + for collection_name in field_names + if _all_collections.match(collection_name) + } + + output = {} + + # Create subcollections before creating collections + # Example: Jet.referencePoint.x, Jet.referencePoint.y, Jet.referencePoint.z --> Jet.referencePoint + branch_forms = self._create_subcollections(branch_forms, all_collections) + + # Create Global Indexers for all cross references + branch_forms = self._global_indexers(branch_forms, all_collections) + + # Process the Hash-Tagged '#' branches + output, branch_forms = self._idx_collections( + output, branch_forms, all_collections + ) + + # Process the trailing underscore followed by an integer branches '*_[0-9]' + output, branch_forms = self._trailing_underscore_collections( + output, branch_forms, all_collections + ) + + # Process all the other regular branches + output, branch_forms = self._main_collections( + output, branch_forms, all_collections + ) + + # Process all the other unknown/faulty/empty branches + output, branch_forms = self._unknown_collections( + output, branch_forms, all_collections + ) + + # sort the output by key + output = sort_dict(output) + + return output.keys(), output.values() + + @classmethod + def behavior(cls): + """Behaviors necessary to implement this schema""" + from coffea.nanoevents.methods import base, fcc + + behavior = {} + behavior.update(base.behavior) + behavior.update(vector.behavior) + behavior.update(fcc.behavior) + return behavior + + +class FCC: + """ + Class to choose the required variant of FCCSchema + Example: from coffea.nanoevents import FCC + FCC.get_schema(version='latest') + + Note: For now, only one variant is available, called the latest version, that points + to the fcc.FCCSchema class. This schema has been made keeping the Spring2021 pre-generated samples. + Its also tested with Winter2023 samples with the uproot_options={"filter_name": lambda x : "PARAMETERS" not in x} + parameter when loading the fileset. This removes the "PARAMETERS" branch that is unreadable in uproot afaik. + More Schema variants could be added later. + """ + + def __init__(self, version="latest"): + self._version = version + + @classmethod + def get_schema(cls, version="latest"): + if version == "latest": + return FCCSchema + else: + pass diff --git a/src/coffea/nanoevents/transforms.py b/src/coffea/nanoevents/transforms.py index 305c5232f..74f122dc5 100644 --- a/src/coffea/nanoevents/transforms.py +++ b/src/coffea/nanoevents/transforms.py @@ -142,6 +142,164 @@ def local2global(stack): stack.append(out) +@numba.njit +def _grow_local_index_to_target_shape_kernel(index, all_index, builder): + for i in range(len(all_index)): + builder.begin_list() + event_all_index = all_index[i] + event_index = index[i] + for all_index_value in event_all_index: + if all_index_value in event_index: + builder.integer(all_index_value) + else: + builder.integer(-1) + builder.end_list() + + return builder + + +def grow_local_index_to_target_shape_form(index, target): + if not index["class"].startswith("ListOffset"): + raise RuntimeError + if not target["class"].startswith("ListOffset"): + raise RuntimeError + form = copy.deepcopy(index) + form["content"]["form_key"] = concat( + index["content"]["form_key"], + target["content"][ + "form_key" + ], # , "!grow_local_index_to_target_shape", "!content" + ) + form["form_key"] = concat( + index["form_key"], target["form_key"], "!grow_local_index_to_target_shape" + ) + form["content"]["itemsize"] = 8 + form["content"]["primitive"] = "int64" + return form + + +def grow_local_index_to_target_shape(stack): + """Grow the local index to the size of target size by replacing unreferenced indices as -1 + + Signature: index,target,!grow_local_index_to_target_shape + Outputs a content array with same shape as target content + """ + target = stack.pop() + index = stack.pop() + all_index = awkward.local_index(target, axis=1) + + useable_index = awkward.Array( + _grow_local_index_to_target_shape_kernel( + awkward.Array(index), awkward.Array(all_index), awkward.ArrayBuilder() + ).snapshot() + ) + + stack.append(useable_index) + + +def begin_and_end_to_counts_form(begin, end): + if not begin["class"].startswith("ListOffset"): + raise RuntimeError + if not end["class"].startswith("ListOffset"): + raise RuntimeError + + form = copy.deepcopy(begin) + + key = concat(begin["form_key"], end["form_key"], "!begin_and_end_to_counts") + form["content"]["form_key"] = concat(key, "!content") + return form + + +def begin_and_end_to_counts(stack): + "Calculate the number of entries from begin to end (end - begin)" + end = stack.pop() + begin = stack.pop() + + stack.append(end - begin) + + +@numba.njit +def _index_range_kernel(begin_end, target, builder): + for ev in range(len(begin_end)): + builder.begin_list() + for j in range(len(begin_end[ev])): + builder.begin_list() + for k in range(begin_end[ev][j][0], begin_end[ev][j][1]): + # builder.integer(k) + builder.integer(target[ev][k]) + builder.end_list() + builder.end_list() + return builder + + +def index_range_form(begin, end, target): + if not begin["class"].startswith("ListOffset"): + raise RuntimeError + if not end["class"].startswith("ListOffset"): + raise RuntimeError + if not target["class"].startswith("ListOffset"): + raise RuntimeError + form = { + "class": "ListOffsetArray", + "offsets": "i64", + "content": { + "class": "ListOffsetArray", + "offsets": "i64", + "content": { + "class": "NumpyArray", + "itemsize": 8, + "format": "i", + "primitive": "int64", + "form_key": concat( + begin["form_key"], + end["form_key"], + target["form_key"], + "!index_range", + "!content", + ), + }, + "form_key": concat( + begin["form_key"], end["form_key"], target["form_key"], "!index_range" + ), + }, + "form_key": concat(begin["form_key"], end["form_key"], target["form_key"]), + } + return form + + +def index_range(stack): + """ + Takes in begin and end arrays and a target array. + This is the process: + Get ranges (double nesting) of begin to end + Corresponding to those index ranges, pick up elements from the target array(which are also indices) + Convert the resulting doubly nested indices into doubly nested global indices (actually flattened across axis=1 + , because unflattening would be done in zip_forms) + """ + target = stack.pop() + end = stack.pop() + begin = stack.pop() + begin_end = awkward.concatenate( + (begin[:, :, numpy.newaxis], end[:, :, numpy.newaxis]), axis=2 + ) + + out = awkward.Array( + _index_range_kernel(begin_end, target, awkward.ArrayBuilder()).snapshot() + ) + + # Convert to global index + counts2 = awkward.flatten(awkward.num(out, axis=2), axis=1) + + out = awkward.flatten(out, axis=2) + stack2 = [out, begin.layout.offsets.data] + local2global(stack2) + out = stack2.pop() + + out = awkward.unflatten(out, counts2, axis=0) + + stack.append(out) + + def counts2nestedindex_form(local_counts, target_offsets): if not local_counts["class"].startswith("ListOffset"): raise RuntimeError diff --git a/tests/samples/test_FCC_Spring2021.root b/tests/samples/test_FCC_Spring2021.root new file mode 100644 index 000000000..290247e39 Binary files /dev/null and b/tests/samples/test_FCC_Spring2021.root differ diff --git a/tests/samples/test_FCC_Winter2023.root b/tests/samples/test_FCC_Winter2023.root new file mode 100644 index 000000000..b309ece37 Binary files /dev/null and b/tests/samples/test_FCC_Winter2023.root differ diff --git a/tests/test_nanoevents_fcc_spring2021.py b/tests/test_nanoevents_fcc_spring2021.py new file mode 100644 index 000000000..f6ce197e3 --- /dev/null +++ b/tests/test_nanoevents_fcc_spring2021.py @@ -0,0 +1,169 @@ +import os + +import awkward +import pytest + +from coffea.nanoevents import FCC, NanoEventsFactory +from coffea.nanoevents.methods.vector import LorentzVectorRecord + + +def _events(**kwargs): + # Path to original sample : /eos/experiment/fcc/ee/generation/DelphesEvents/spring2021/IDEA/p8_ee_ZH_ecm240/events_082532938.root + path = os.path.abspath("tests/samples/test_FCC_Spring2021.root") + factory = NanoEventsFactory.from_root( + {path: "events"}, schemaclass=FCC.get_schema(version="latest"), **kwargs + ) + return factory.events() + + +@pytest.fixture(scope="module") +def eager_events(): + return _events(delayed=False) + + +@pytest.fixture(scope="module") +def delayed_events(): + return _events(delayed=True) + + +@pytest.mark.parametrize( + "field", + [ + "AllMuonidx0", + "EFlowNeutralHadron", + "EFlowNeutralHadron_0", + "EFlowNeutralHadron_1", + "EFlowNeutralHadron_2", + "EFlowNeutralHadronidx0", + "EFlowNeutralHadronidx1", + "EFlowNeutralHadronidx2", + "EFlowPhoton", + "EFlowPhoton_0", + "EFlowPhoton_1", + "EFlowPhoton_2", + "EFlowPhotonidx0", + "EFlowPhotonidx1", + "EFlowPhotonidx2", + "EFlowTrack", + "EFlowTrack_0", + "EFlowTrack_1", + "EFlowTrackidx0", + "EFlowTrackidx1", + "Electronidx0", + "Jet", + "Jetidx0", + "Jetidx1", + "Jetidx2", + "Jetidx3", + "Jetidx4", + "Jetidx5", + "MCRecoAssociations", + "MissingET", + "MissingETidx0", + "MissingETidx1", + "MissingETidx2", + "MissingETidx3", + "MissingETidx4", + "MissingETidx5", + "Muonidx0", + "Particle", + "ParticleIDs", + "ParticleIDs_0", + "Particleidx0", + "Particleidx1", + "Photonidx0", + "ReconstructedParticles", + "ReconstructedParticlesidx0", + "ReconstructedParticlesidx1", + "ReconstructedParticlesidx2", + "ReconstructedParticlesidx3", + "ReconstructedParticlesidx4", + "ReconstructedParticlesidx5", + ], +) +def test_field_is_present(eager_events, delayed_events, field): + eager_fields = eager_events.fields + delayed_fields = delayed_events.fields + assert field in eager_fields + assert field in delayed_fields + + +def test_lorentz_behavior(delayed_events): + assert delayed_events.Particle.behavior["LorentzVector"] == LorentzVectorRecord + assert ( + delayed_events.ReconstructedParticles.behavior["LorentzVector"] + == LorentzVectorRecord + ) + assert isinstance(delayed_events.Particle.eta.compute(), awkward.highlevel.Array) + assert isinstance( + delayed_events.ReconstructedParticles.eta.compute(), awkward.highlevel.Array + ) + + +def test_MC_daughters(delayed_events): + d = delayed_events.Particle.get_daughters.compute() + assert isinstance(d, awkward.highlevel.Array) + assert d.layout.branch_depth[1] == 3 + + +def test_MC_parents(delayed_events): + p = delayed_events.Particle.get_parents.compute() + assert isinstance(p, awkward.highlevel.Array) + assert p.layout.branch_depth[1] == 3 + + +def test_MCRecoAssociations(delayed_events): + mr = delayed_events.MCRecoAssociations.reco_mc.compute() + assert isinstance(mr, awkward.highlevel.Array) + assert mr.layout.branch_depth[1] == 3 + + +def test_KaonParent_to_PionDaughters_Loop(eager_events): + """Test to thoroughly check get_parents and get_daughters + - We look at the decay of Kaon $K_S^0 \\rightarrow pions $ + - Two decay modes: + $$ K_S^0 \\rightarrow \\pi^0 + \\pi^0 $$ + $$ K_S^0 \\rightarrow \\pi^+ + \\pi^- $$ + """ + PDG_IDs = {"K(S)0": 310, "pi+": 211, "pi-": -211, "pi0": 111} + mc = eager_events.Particle + + # Find Single K(S)0 + K_S0_cut = mc.PDG == PDG_IDs["K(S)0"] + K_S0 = mc[K_S0_cut] + single_K_S0_cut = awkward.num(K_S0, axis=1) == 1 + single_K_S0 = K_S0[single_K_S0_cut] + + # Daughter Test + # The Kaon K(S)0 must have only pions as the daughters + + # Find the daughters of Single K(S)0 + daughters_of_K_S0 = single_K_S0.get_daughters + + # Are these valid daughter particles (pi+ or pi- or pi0)? + flat_PDG = awkward.ravel(daughters_of_K_S0.PDG) + is_pi_0 = flat_PDG == PDG_IDs["pi0"] + is_pi_plus = flat_PDG == PDG_IDs["pi+"] + is_pi_minus = flat_PDG == PDG_IDs["pi-"] + names_valid = awkward.all(is_pi_0 | is_pi_plus | is_pi_minus) + assert names_valid + + # Do the daughters have valid charges (-1 or 0)? + nested_bool = awkward.prod(daughters_of_K_S0.charge, axis=2) <= 0 + charge_valid = awkward.all(awkward.ravel(nested_bool)) + assert charge_valid + + # Parent Test + # These pion daughters, just generated, must point back to the single parent K(S)0 + + p = daughters_of_K_S0.get_parents + + # Do the daughters have a single parent? + nested_bool_daughter = awkward.num(p, axis=3) == 1 + daughters_have_single_parent = awkward.all(awkward.ravel(nested_bool_daughter)) + assert daughters_have_single_parent + + # Is that parent K(S)0 ? + nested_bool_parent = p.PDG == PDG_IDs["K(S)0"] + daughters_have_K_S0_parent = awkward.all(awkward.ravel(nested_bool_parent)) + assert daughters_have_K_S0_parent diff --git a/tests/test_nanoevents_fcc_winter2023.py b/tests/test_nanoevents_fcc_winter2023.py new file mode 100644 index 000000000..58ab4c3df --- /dev/null +++ b/tests/test_nanoevents_fcc_winter2023.py @@ -0,0 +1,174 @@ +import os + +import awkward +import pytest + +from coffea.nanoevents import FCC, NanoEventsFactory +from coffea.nanoevents.methods.vector import LorentzVectorRecord + + +def _events(**kwargs): + # Path to original sample: /eos/experiment/fcc/ee/generation/DelphesEvents/winter2023/IDEA/wzp6_ee_mumuH_Hbb_ecm240/events_159112833.root + path = os.path.abspath("tests/samples/test_FCC_Winter2023.root") + factory = NanoEventsFactory.from_root( + {path: "events"}, schemaclass=FCC.get_schema(version="latest"), **kwargs + ) + return factory.events() + + +@pytest.fixture(scope="module") +def eager_events(): + return _events( + delayed=False, uproot_options={"filter_name": lambda x: "PARAMETERS" not in x} + ) + + +@pytest.fixture(scope="module") +def delayed_events(): + return _events( + delayed=True, uproot_options={"filter_name": lambda x: "PARAMETERS" not in x} + ) + + +@pytest.mark.parametrize( + "field", + [ + "CalorimeterHits", + "EFlowNeutralHadron", + "EFlowNeutralHadron_0", + "EFlowNeutralHadron_1", + "EFlowNeutralHadronidx0", + "EFlowNeutralHadronidx1", + "EFlowNeutralHadronidx2", + "EFlowPhoton", + "EFlowPhoton_0", + "EFlowPhoton_1", + "EFlowPhotonidx0", + "EFlowPhotonidx1", + "EFlowPhotonidx2", + "EFlowTrack", + "EFlowTrack_0", + "EFlowTrack_1", + "EFlowTrack_2", + "EFlowTrackidx0", + "EFlowTrackidx1", + "Electronidx0", + "Jet", + "Jetidx0", + "Jetidx1", + "Jetidx2", + "Jetidx3", + "Jetidx4", + "Jetidx5", + "MCRecoAssociations", + "MissingET", + "MissingETidx0", + "MissingETidx1", + "MissingETidx2", + "MissingETidx3", + "MissingETidx4", + "MissingETidx5", + "Muonidx0", + "Particle", + "ParticleIDs", + "ParticleIDs_0", + "Particleidx0", + "Particleidx1", + "Photonidx0", + "ReconstructedParticles", + "ReconstructedParticlesidx0", + "ReconstructedParticlesidx1", + "ReconstructedParticlesidx2", + "ReconstructedParticlesidx3", + "ReconstructedParticlesidx4", + "ReconstructedParticlesidx5", + "TrackerHits", + "TrackerHits_0", + ], +) +def test_field_is_present(eager_events, delayed_events, field): + eager_fields = eager_events.fields + delayed_fields = delayed_events.fields + assert field in eager_fields + assert field in delayed_fields + + +def test_lorentz_behavior(delayed_events): + assert delayed_events.Particle.behavior["LorentzVector"] == LorentzVectorRecord + assert ( + delayed_events.ReconstructedParticles.behavior["LorentzVector"] + == LorentzVectorRecord + ) + assert isinstance(delayed_events.Particle.eta.compute(), awkward.highlevel.Array) + assert isinstance( + delayed_events.ReconstructedParticles.eta.compute(), awkward.highlevel.Array + ) + + +def test_MC_daughters(delayed_events): + d = delayed_events.Particle.get_daughters.compute() + assert isinstance(d, awkward.highlevel.Array) + assert d.layout.branch_depth[1] == 3 + + +def test_MC_parents(delayed_events): + p = delayed_events.Particle.get_parents.compute() + assert isinstance(p, awkward.highlevel.Array) + assert p.layout.branch_depth[1] == 3 + + +def test_MCRecoAssociations(delayed_events): + mr = delayed_events.MCRecoAssociations.reco_mc.compute() + assert isinstance(mr, awkward.highlevel.Array) + assert mr.layout.branch_depth[1] == 3 + + +def test_KaonParent_to_PionDaughters_Loop(eager_events): + """Test to thoroughly check get_parents and get_daughters + - We look at the decay of Kaon $K_S^0 \\rightarrow pions $ + - Two decay modes: + $$ K_S^0 \\rightarrow \\pi^0 + \\pi^0 $$ + $$ K_S^0 \\rightarrow \\pi^+ + \\pi^- $$ + """ + PDG_IDs = {"K(S)0": 310, "pi+": 211, "pi-": -211, "pi0": 111} + mc = eager_events.Particle + + # Find Single K(S)0 + K_S0_cut = mc.PDG == PDG_IDs["K(S)0"] + K_S0 = mc[K_S0_cut] + single_K_S0_cut = awkward.num(K_S0, axis=1) == 1 + single_K_S0 = K_S0[single_K_S0_cut] + + # Daughter Test + # The Kaon K(S)0 must have only pions as the daughters + + # Find the daughters of Single K(S)0 + daughters_of_K_S0 = single_K_S0.get_daughters + + # Are these valid daughter particles (pi+ or pi- or pi0)? + flat_PDG = awkward.ravel(daughters_of_K_S0.PDG) + is_pi_0 = flat_PDG == PDG_IDs["pi0"] + is_pi_plus = flat_PDG == PDG_IDs["pi+"] + is_pi_minus = flat_PDG == PDG_IDs["pi-"] + names_valid = awkward.all(is_pi_0 | is_pi_plus | is_pi_minus) + assert names_valid + + # Do the daughters have valid charges (-1 or 0)? + nested_bool = awkward.prod(daughters_of_K_S0.charge, axis=2) <= 0 + charge_valid = awkward.all(awkward.ravel(nested_bool)) + assert charge_valid + + # Parent Test + # These pion daughters, just generated, must point back to the single parent K(S)0 + + p = daughters_of_K_S0.get_parents + + # Do the daughters have a single parent? + nested_bool_daughter = awkward.num(p, axis=3) == 1 + daughters_have_single_parent = awkward.all(awkward.ravel(nested_bool_daughter)) + assert daughters_have_single_parent + + # Is that parent K(S)0 ? + nested_bool_parent = p.PDG == PDG_IDs["K(S)0"] + daughters_have_K_S0_parent = awkward.all(awkward.ravel(nested_bool_parent)) + assert daughters_have_K_S0_parent