Adding changes to edm4hep.py so that MCParticlesSkimmed are taken int…

…o the procedure. Added the class LorentzVectorM to vector.py so that vectors can be definedwith mass instead of energy.
scikit-hep · Jun 4, 2023 · 4fefeb5 · 4fefeb5
1 parent 09bb414
commit 4fefeb5
Show file tree

Hide file tree

Showing 2 changed files with 266 additions and 8 deletions.
diff --git a/src/coffea/nanoevents/methods/vector.py b/src/coffea/nanoevents/methods/vector.py
@@ -156,6 +156,7 @@ def add(self, other):
             "ThreeVector",
             "SphericalThreeVector",
             "LorentzVector",
+            "LorentzVectorM",
             "PtEtaPhiMLorentzVector",
             "PtEtaPhiELorentzVector",
         },
@@ -359,6 +360,7 @@ def add(self, other):
             "ThreeVector",
             "SphericalThreeVector",
             "LorentzVector",
+            "LorentzVectorM",
             "PtEtaPhiMLorentzVector",
             "PtEtaPhiELorentzVector",
         },
@@ -752,6 +754,247 @@ def nearest(
         return out
 
 
+@awkward.mixin_class(behavior)
+class LorentzVectorM(ThreeVector):
+    """A cartesian Lorentz vector specfied with mass instead of energy
+
+    A heavy emphasis towards a momentum vector interpretation is assumed.
+    (+, -, -, -) metric
+    This mixin class requires the parent class to provide items `x`, `y`, `z`, and `mass`.
+    """
+    @property
+    def t(self):
+        """Equivalent to `energy`"""
+        return numpy.sqrt(self.mass*self.mass+self.x*self.x+self.y*self.y+self.z*self.z)
+
+    @property
+    def energy(self):
+        """Alias for `t`"""
+        return self.t
+
+    @property
+    def eta(self):
+        r"""Pseudorapidity
+
+        :math:`-\ln[\tan(\theta/2)] = \text{arcsinh}(z/r)`
+        """
+        return numpy.arcsinh(self.z / self.r)
+
+    @awkward.mixin_class_method(numpy.absolute)
+    def absolute(self):
+        """Magnitude of this Lorentz vector
+
+        Alias for `mass`
+        """
+        return self.mass
+
+    @awkward.mixin_class_method(numpy.add, {"LorentzVectorM"})
+    def add(self, other):
+        """Add two vectors together elementwise using `x`, `y`, `z`, and `t` components"""
+        return awkward.zip(
+            {
+                "x": self.x + other.x,
+                "y": self.y + other.y,
+                "z": self.z + other.z,
+                "t": self.t + other.t,
+            },
+            with_name="LorentzVector",
+            behavior=self.behavior,
+        )
+
+    @awkward.mixin_class_method(numpy.subtract, {"LorentzVectorM"}, transpose=False)
+    def subtract(self, other):
+        """Subtract a vector from another elementwise using `x`, `y`, `z`, and `t` components"""
+        return awkward.zip(
+            {
+                "x": self.x - other.x,
+                "y": self.y - other.y,
+                "z": self.z - other.z,
+                "t": self.t - other.t,
+            },
+            with_name="LorentzVector",
+            behavior=self.behavior,
+        )
+
+    def sum(self, axis=-1):
+        """Sum an array of vectors elementwise using `x`, `y`, `z`, and `t` components"""
+        return awkward.zip(
+            {
+                "x": awkward.sum(self.x, axis=axis),
+                "y": awkward.sum(self.y, axis=axis),
+                "z": awkward.sum(self.z, axis=axis),
+                "t": awkward.sum(self.t, axis=axis),
+            },
+            with_name="LorentzVector",
+            behavior=self.behavior,
+        )
+
+    @awkward.mixin_class_method(numpy.multiply, {numbers.Number})
+    def multiply(self, other):
+        """Multiply this vector by a scalar elementwise using `x`, `y`, `z`, and `t` components"""
+        return awkward.zip(
+            {
+                "x": self.x * other,
+                "y": self.y * other,
+                "z": self.z * other,
+                "t": self.t * other,
+            },
+            with_name="LorentzVector",
+            behavior=self.behavior,
+        )
+
+    def delta_r2(self, other):
+        """Squared `delta_r`"""
+        deta = self.eta - other.eta
+        dphi = self.delta_phi(other)
+        return deta * deta + dphi * dphi
+
+    def delta_r(self, other):
+        r"""Distance between two Lorentz vectors in (eta,phi) plane
+
+        :math:`\sqrt{\Delta\eta^2 + \Delta\phi^2}`
+        """
+        return numpy.hypot(self.eta - other.eta, self.delta_phi(other))
+
+    @awkward.mixin_class_method(numpy.negative)
+    def negative(self):
+        """Returns the negative of the vector"""
+        return awkward.zip(
+            {"x": -self.x, "y": -self.y, "z": -self.z, "t": -self.t},
+            with_name="LorentzVector",
+            behavior=self.behavior,
+        )
+
+    @property
+    def pvec(self):
+        """The `x`, `y` and `z` components as a `ThreeVector`"""
+        return awkward.zip(
+            {"x": self.x, "y": self.y, "z": self.z},
+            with_name="ThreeVector",
+            behavior=self.behavior,
+        )
+
+    @property
+    def boostvec(self):
+        """The `x`, `y` and `z` components divided by `t` as a `ThreeVector`
+
+        This can be used for boosting. For cases where `|t| <= rho`, this
+        returns the unit vector.
+        """
+        rho = self.rho
+        t = self.t
+        with numpy.errstate(divide="ignore"):
+            out = self.pvec * awkward.where(
+                rho == 0, 0, awkward.where(abs(t) <= rho, 1 / rho, 1 / t)
+            )
+        return out
+
+    def boost(self, other):
+        """Apply a Lorentz boost given by the `ThreeVector` `other` and return it
+
+        Note that this follows the convention that, for example in order to boost
+        a vector into its own rest frame, one needs to use the negative of its `boostvec`
+        """
+        b2 = other.rho2
+        gamma = (1 - b2) ** (-0.5)
+        mask = b2 == 0
+        b2 = awkward.where(mask, 1, b2)
+        gamma2 = awkward.where(mask, 0, (gamma - 1) / b2)
+
+        bp = self.dot(other)
+        t = self.t
+        v = gamma2 * bp * other + t * gamma * other
+
+        return awkward.zip(
+            {
+                "x": self.x + v.x,
+                "y": self.y + v.y,
+                "z": self.z + v.z,
+                "t": gamma * (t + bp),
+            },
+            with_name="LorentzVector",
+            behavior=self.behavior,
+        )
+
+    def metric_table(
+        self,
+        other,
+        axis=1,
+        metric=lambda a, b: a.delta_r(b),
+        return_combinations=False,
+    ):
+        """Return a list of a metric evaluated between this object and another.
+
+        The two arrays should be broadcast-compatible on all axes other than the specified
+        axis, which will be used to form a cartesian product. If axis=None, broadcast arrays directly.
+        The return shape will be that of ``self`` with a new axis with shape of ``other`` appended
+        at the specified axis depths.
+
+        Parameters
+        ----------
+            other : awkward.Array
+                Another array with same shape in all but ``axis``
+            axis : int, optional
+                The axis to form the cartesian product (default 1). If None, the metric
+                is directly evaluated on the input arrays (i.e. they should broadcast)
+            metric : callable
+                A function of two arguments, returning a scalar. The default metric is `delta_r`.
+            return_combinations : bool
+                If True return the combinations of inputs as well as an unzipped tuple
+        """
+        if axis is None:
+            a, b = self, other
+        else:
+            a, b = awkward.unzip(
+                awkward.cartesian([self, other], axis=axis, nested=True)
+            )
+        mval = metric(a, b)
+        if return_combinations:
+            return mval, (a, b)
+        return mval
+
+    def nearest(
+        self,
+        other,
+        axis=1,
+        metric=lambda a, b: a.delta_r(b),
+        return_metric=False,
+        threshold=None,
+    ):
+        """Return nearest object to this one
+
+        Finds item in ``other`` satisfying ``min(metric(self, other))``.
+        The two arrays should be broadcast-compatible on all axes other than the specified
+        axis, which will be used to form a cartesian product. If axis=None, broadcast arrays directly.
+        The return shape will be that of ``self``.
+
+        Parameters
+        ----------
+            other : awkward.Array
+                Another array with same shape in all but ``axis``
+            axis : int, optional
+                The axis to form the cartesian product (default 1). If None, the metric
+                is directly evaluated on the input arrays (i.e. they should broadcast)
+            metric : callable
+                A function of two arguments, returning a scalar. The default metric is `delta_r`.
+            return_metric : bool, optional
+                If true, return both the closest object and its metric (default false)
+            threshold : Number, optional
+                If set, any objects with ``metric > threshold`` will be masked from the result
+        """
+        mval, (a, b) = self.metric_table(other, axis, metric, return_combinations=True)
+        if axis is None:
+            # NotImplementedError: awkward.firsts with axis=-1
+            axis = other.layout.purelist_depth - 2
+        mmin = awkward.argmin(mval, axis=axis + 1, keepdims=True)
+        out = awkward.firsts(b[mmin], axis=axis + 1)
+        metric = awkward.firsts(mval[mmin], axis=axis + 1)
+        if threshold is not None:
+            out = out.mask[metric <= threshold]
+        if return_metric:
+            return out, metric
+        return out
+
 @awkward.mixin_class(behavior)
 class PtEtaPhiMLorentzVector(LorentzVector, SphericalThreeVector):
     """A Lorentz vector using pseudorapidity and mass
@@ -999,6 +1242,7 @@ def negative(self):
     "ThreeVector",
     "SphericalThreeVector",
     "LorentzVector",
+    "LorentzVectorM",
     "PtEtaPhiMLorentzVector",
     "PtEtaPhiELorentzVector",
 ]
diff --git a/src/coffea/nanoevents/schemas/edm4hep.py b/src/coffea/nanoevents/schemas/edm4hep.py
@@ -41,7 +41,9 @@ class EDM4HEPSchema(BaseSchema):
 
     __dask_capable__ = True
 
-    _momentum_fields = {"energy", "momentum.x", "momentum.y", "momentum.z"}
+    # originally this was just _momentum_fields = {"energy", "momentum.x", "momentum.y", "momentum.z"}
+    _momentum_fields_e = {"energy", "momentum.x", "momentum.y", "momentum.z"}
+    _momentum_fields_m = {"mass", "momentum.x", "momentum.y", "momentum.z"} 
 
     def __init__(self, base_form, *args, **kwargs):
         super().__init__(base_form, *args, **kwargs)
@@ -64,32 +66,45 @@ def _build_collections(self, branch_forms):
             "Tracks": "LorentzVector"
         }
         for objname in composite_objects:
-            if objname != "PandoraPFOs":
+            if objname != "PandoraPFOs" and objname != "MCParticlesSkimmed": 
                 continue
-
             # grab the * from "objname/objname.*"
             components = {
                 k[2 * len(objname) + 2 :]
                 for k in branch_forms
                 if k.startswith(objname + "/")
             }
 
-            print(components)
+            print(components) 
 
-            if all(comp in components for comp in self._momentum_fields):
+            if all(comp in components for comp in self._momentum_fields_e):
                 form = zip_forms(
                     {
                         "x": branch_forms.pop(f"{objname}/{objname}.momentum.x"),
                         "y": branch_forms.pop(f"{objname}/{objname}.momentum.y"),
                         "z": branch_forms.pop(f"{objname}/{objname}.momentum.z"),
                         "t": branch_forms.pop(f"{objname}/{objname}.energy"),
                         "charge": branch_forms.pop(f"{objname}/{objname}.charge"),
-                        "pdgId": branch_forms.pop(f"{objname}/{objname}.type"),
+                        "pdgId": branch_forms.pop(f"{objname}/{objname}.type"),  
                     },
                     objname,
                     composite_behavior.get(objname, "LorentzVector"),
                 )
                 branch_forms[objname] = form
+            elif all(comp in components for comp in self._momentum_fields_m): 
+                form = zip_forms(
+                    {
+                        "x": branch_forms.pop(f"{objname}/{objname}.momentum.x"),
+                        "y": branch_forms.pop(f"{objname}/{objname}.momentum.y"),
+                        "z": branch_forms.pop(f"{objname}/{objname}.momentum.z"),
+                        "mass": branch_forms.pop(f"{objname}/{objname}.mass"), 
+                        "charge": branch_forms.pop(f"{objname}/{objname}.charge"),
+                        "pdgId": branch_forms.pop(f"{objname}/{objname}.PDG"),
+                    },
+                    objname,
+                    composite_behavior.get(objname, "LorentzVectorM"), 
+                )
+                branch_forms[objname] = form
             elif components == {
                 "fCoordinates.fX",
                 "fCoordinates.fY",
@@ -109,9 +124,8 @@ def _build_collections(self, branch_forms):
                 raise ValueError(
                     f"Unrecognized class with split branches: {components}"
                 )
-
         # Generating collection from branch name
-        collections = [k for k in branch_forms if k == "PandoraPFOs"]
+        collections = [k for k in branch_forms if k == "PandoraPFOs" or k == "MCParticlesSkimmed"] # added second case 
         collections = {
             "_".join(k.split("_")[:-1])
             for k in collections