scikit-hep · lgray · Oct 16, 2024 · Sep 11, 2024 · Sep 12, 2024 · Sep 12, 2024
diff --git a/src/coffea/nanoevents/methods/fcc.py b/src/coffea/nanoevents/methods/fcc.py
@@ -79,62 +79,6 @@ def map_index_to_array(array, index, axis=1):
         raise AttributeError("Only axis = 1 or axis = 2 supported at the moment.")
 
 
-# Function required to create a range array from a begin and end array
-@numba.njit
-def index_range_numba_wrap(begin_end, builder):
-    for ev in begin_end:
-        builder.begin_list()
-        for j in ev:
-            builder.begin_list()
-            for k in range(j[0], j[1]):
-                builder.integer(k)
-            builder.end_list()
-        builder.end_list()
-    return builder
-
-
-def index_range(begin, end):
-    """
-    Function required to create a range array from a begin and end array
-    Example: If,
-            begin = [
-                        [0, 2, 4, 3, ...],
-                        [1, 0, 4, 6, ...]
-                        ...
-                    ]
-            end = [
-                        [1, 2, 5, 5, ...],
-                        [3, 1, 7, 6, ...]
-                        ...
-                    ]
-            then, output is,
-            output = [
-                        [[0], [], [4], [3,4], ...],
-                        [[1,2], [0], [4,5,6], [], ...]
-                        ...
-                    ]
-    """
-    begin_end = awkward.concatenate(
-        (begin[:, :, numpy.newaxis], end[:, :, numpy.newaxis]), axis=2
-    )
-    if awkward.backend(begin) == "typetracer" or awkward.backend(end) == "typetracer":
-        # To make the function dask compatible
-        # here we fake the output of numba wrapper function since
-        # operating on length-zero data returns the wrong layout!
-        # We need the axis 2, therefore, we should return the typetracer layout of [[[]]]
-        awkward.typetracer.length_zero_if_typetracer(
-            begin
-        )  # force touching of the necessary data
-        awkward.typetracer.length_zero_if_typetracer(
-            end
-        )  # force touching of the necessary data
-        return awkward.Array(
-            awkward.Array([[[0]]]).layout.to_typetracer(forget_length=True)
-        )
-
-    return index_range_numba_wrap(begin_end, awkward.ArrayBuilder()).snapshot()
-
-
 @awkward.mixin_class(behavior)
 class MomentumCandidate(vector.LorentzVector):
     """A Lorentz vector with charge
@@ -190,36 +134,66 @@ def absolute_mass(self):
 class MCParticle(MomentumCandidate, base.NanoCollection):
     """Generated Monte Carlo particles"""
 
-    @property
-    def alt_get_daughters_index(self):
+    def _apply_nested_global_index(self, index, nested_counts, _dask_array_=None):
+        """As _apply_global_index but expects one additional layer of nesting to get specified."""
+        if isinstance(index, int):
+            out = self._content()[index]
+            return awkward.Record(out, behavior=self.behavior)
+
+        def flat_take(layout):
+            idx = awkward.Array(layout)
+            return self._content()[idx.mask[idx >= 0]]
+
+        def descend(layout, depth, **kwargs):
+            if layout.purelist_depth == 1:
+                return flat_take(layout)
+
+        (index_out,) = awkward.broadcast_arrays(
+            index._meta if isinstance(index, dask_awkward.Array) else index
+        )
+        nested_counts_out = (
+            nested_counts._meta
+            if isinstance(nested_counts, dask_awkward.Array)
+            else nested_counts
+        )
+        index_out = awkward.unflatten(
+            index_out, awkward.flatten(nested_counts_out), axis=-1
+        )
+        layout_out = awkward.transform(descend, index_out.layout, highlevel=False)
+        out = awkward.Array(layout_out, behavior=self.behavior)
+
+        if isinstance(index, dask_awkward.Array):
+            return _dask_array_.map_partitions(
+                base._ClassMethodFn("_apply_nested_global_index"),
+                index,
+                nested_counts,
+                label="_apply_nested_global_index",
+                meta=out,
+            )
+        return out   
+
+    # Daughters
+    @dask_property
+    def get_daughters_index(self):
         """
         Obtain the indexes of the daughters of each and every MCParticle
         - The output is a doubly nested awkward array
         - Needs the presence of Particleidx1 collection
         - The Particleidx1.index contains info about the daughters
         """
-        ranges = index_range(self.daughters.begin, self.daughters.end)
+        # return map_index_to_array(self._events().Particleidx1.index, self.daughters.begin_end_ranges, axis=2)
+        return self.daughters.Particleidx1_rangesG
 
-        return awkward.values_astype(
-            map_index_to_array(self._events().Particleidx1.index, ranges, axis=2),
-            "int64",
-        )
-
-
-    # Daughters
-    @dask_property
-    def get_daughters_index(self):
+    @get_daughters_index.dask
+    def get_daughters_index(self, dask_array):
         """
         Obtain the indexes of the daughters of each and every MCParticle
         - The output is a doubly nested awkward array
         - Needs the presence of Particleidx1 collection
         - The Particleidx1.index contains info about the daughters
         """
-        ranges = index_range(self.daughters.begin, self.daughters.end)
-        return awkward.values_astype(
-            map_index_to_array(self._events().Particleidx1.index, ranges, axis=2),
-            "int64",
-        )
+        # return map_index_to_array(dask_array._events().Particleidx1.index, dask_array.daughters.begin_end_ranges, axis=2)
+        return dask_array.daughters.Particleidx1_rangesG
 
     @dask_property
     def get_daughters(self):
@@ -229,7 +203,8 @@ def get_daughters(self):
         - Needs the presence of Particleidx1 collection
         - The Particleidx1.index contains info about the daughters
         """
-        return map_index_to_array(self, self.get_daughters_index, axis=2)
+        # return map_index_to_array(self, self.get_daughters_index, axis=2)
+        return self._events().Particle._apply_global_index(self.get_daughters_index)
 
     @get_daughters.dask
     def get_daughters(self, dask_array):
@@ -239,7 +214,8 @@ def get_daughters(self, dask_array):
         - Needs the presence of Particleidx1 collection
         - The Particleidx1.index contains info about the daughters
         """
-        return map_index_to_array(dask_array, dask_array.get_daughters_index, axis=2)
+        # return map_index_to_array(dask_array, dask_array.get_daughters_index, axis=2)
+        return dask_array._events().Particle._apply_global_index(dask_array.get_daughters_index)
 
     # Parents
     @dask_property
@@ -250,13 +226,8 @@ def get_parents_index(self):
         - Needs the presence of Particleidx0 collection
         - The Particleidx0.index contains info about the parents
         """
-        ranges = index_range(self.parents.begin, self.parents.end)
-        # rangesG = index_range(self.parents.beginG, self.parents.endG)
-        # Explore how to map the global index to produces doubly nested output
-        return awkward.values_astype(
-            map_index_to_array(self._events().Particleidx0.index, ranges, axis=2),
-            "int64",
-        )
+        # return map_index_to_array(self._events().Particleidx0.index, self.parents.begin_end_ranges, axis=2)
+        return self.parents.Particleidx0_rangesG
 
     @get_parents_index.dask
     def get_parents_index(self, dask_array):
@@ -265,16 +236,9 @@ def get_parents_index(self, dask_array):
         - The output is a doubly nested awkward array
         - Needs the presence of Particleidx0 collection
         - The Particleidx0.index contains info about the parents
-
-        Note: Seems like all the functions need to mapped manually
         """
-        ranges = dask_awkward.map_partitions(
-            index_range, dask_array.parents.begin, dask_array.parents.end
-        )
-        daughters = dask_awkward.map_partitions(
-            map_index_to_array, dask_array._events().Particleidx0.index, ranges, axis=2
-        )
-        return awkward.values_astype(daughters, "int32")
+        # return map_index_to_array(dask_array._events().Particleidx0.index, dask_array.parents.begin_end_ranges, axis=2)
+        return dask_array.parents.Particleidx0_rangesG
 
     @dask_property
     def get_parents(self):
@@ -284,7 +248,8 @@ def get_parents(self):
         - Needs the presence of Particleidx0 collection
         - The Particleidx0.index contains info about the parents
         """
-        return map_index_to_array(self, self.get_parents_index, axis=2)
+        # return map_index_to_array(self, self.get_parents_index, axis=2)
+        return self._events().Particle._apply_global_index(self.get_parents_index)
 
     @get_parents.dask
     def get_parents(self, dask_array):
@@ -294,7 +259,8 @@ def get_parents(self, dask_array):
         - Needs the presence of Particleidx0 collection
         - The Particleidx0.index contains info about the parents
         """
-        return map_index_to_array(dask_array, dask_array.get_parents_index, axis=2)
+        # return map_index_to_array(dask_array, dask_array.get_parents_index, axis=2)
+        return dask_array._events().Particle._apply_global_index(dask_array.get_parents_index)
 
 
 _set_repr_name("MCParticle")

diff --git a/src/coffea/nanoevents/schemas/fcc.py b/src/coffea/nanoevents/schemas/fcc.py
@@ -123,12 +123,17 @@ class FCCSchema(BaseSchema):
     all_cross_references = {
         "MCRecoAssociations#1.index": "Particle", #MC to Reco connection
         "MCRecoAssociations#0.index": "ReconstructedParticles", #Reco to MC connection
-        "Particle#0.index":"Particle", #Parents
-        "Particle#1.index":"Particle", #Daughters
+        # "Particle#0.index":"Particle", #Parents
+        # "Particle#1.index":"Particle", #Daughters
         "Muon#0.index":"ReconstructedParticles", #Matched Muons
         "Electron#0.index":"ReconstructedParticles", #Matched Electrons
     }
 
+    mc_relations = {
+        "parents" : "Particle#0.index",
+        "daughters" : "Particle#1.index"
+    }
+
     def __init__(self, base_form, version="latest"):
         super().__init__(base_form)
         self._form["fields"], self._form["contents"] = self._build_collections(
@@ -391,6 +396,7 @@ def _create_subcollections(self, branch_forms, all_collections):
         """
         field_names = list(branch_forms.keys())
 
+
         # Replace square braces in a name for a Python-friendly name; Example: covMatrix[n] --> covMatrix_n_
         for name in field_names:
             if _square_braces.match(name):
@@ -419,11 +425,46 @@ def _create_subcollections(self, branch_forms, all_collections):
                 "primitive": "int64",
                 "form_key": concat(begin_end_content[list(begin_end_content.keys())[0]]["form_key"],"!offsets"),
             }
-            begin_end_content_global = {
-                k+"G": transforms.local2global_form(begin_end_content[k], offset_form)
+
+            # begin_end_content_global = {
+            #     k+"G": transforms.local2global_form(begin_end_content[k], offset_form)
+            #     for k in begin_end_content.keys()
+            # }
+
+            begin = [
+                begin_end_content[k]
+                for k in begin_end_content.keys()
+                if k.endswith("begin")
+            ]
+            end = [
+                begin_end_content[k]
                 for k in begin_end_content.keys()
+                if k.endswith("end")
+            ]
+            counts_content = {
+                "begin_end_counts": transforms.begin_and_end_to_counts_form(*begin, *end)
             }
-            branch_forms[name] = zip_forms(sort_dict({**begin_end_content,**begin_end_content_global}), name)
+            # Parents and Daughters
+            ranges_content = {}
+            for key, target in self.mc_relations.items():
+                col_name = target.split(".")[0]
+                if name.endswith(key):
+                    range_name = f"{col_name.replace("#","idx")}_ranges"
+                    ranges_content[range_name+"G"] = transforms.index_range_form(
+                        *begin,
+                        *end,
+                        branch_forms[f"{col_name}/{target}"]
+                    )
+
+            to_zip = {**begin_end_content, **counts_content, **ranges_content}
+
+            branch_forms[name] = zip_forms(
+                sort_dict(
+                    to_zip
+                ),
+                name,
+                offsets=offset_form
+            )
 
         # Zip colorFlow.a and colorFlow.b branches
         # Example: 'Particle/Particle.colorFlow.a', 'Particle/Particle.colorFlow.b' --> 'Particle/Particle.colorFlow'
@@ -465,28 +506,28 @@ def _global_indexers(self, branch_forms, all_collections):
 
             #pick up the available fields from target collection to get an offset from
             available_fields = [name for name in branch_forms.keys() if name.startswith(f"{target}/{target}.")]
-            
+
             # By default the idxs have different shape at axis=1 in comparison to target
             # So one needs to fill the empty spaces with -1 which could be removed later
             compatible_index = transforms.grow_local_index_to_target_shape_form(
                 branch_forms[f"{collection_name}/{collection_name}.{index_name}"],
                 branch_forms[available_fields[0]]
             )
-            
+
             offset_form = {
                 "class": "NumpyArray",
                 "itemsize": 8,
                 "format": "i",
                 "primitive": "int64",
                 "form_key": concat(*[branch_forms[available_fields[0]]["form_key"],"!offsets",]),
             }
-            
+
             replaced_name = collection_name.replace('#', 'idx')
             branch_forms[f"{target}/{target}.{replaced_name}_{index_name}Global"] = transforms.local2global_form(
                 compatible_index,
                 offset_form
             )
-            
+
         return branch_forms
 
     def _build_collections(self, field_names, input_contents):

diff --git a/src/coffea/nanoevents/schemas/nanoaod.py b/src/coffea/nanoevents/schemas/nanoaod.py
@@ -214,11 +214,9 @@ def _build_collections(self, field_names, input_contents):
         # Create offsets virtual arrays
         for name in collections:
             if "n" + name in branch_forms:
-                if name == 'Electron': print(branch_forms[name+'_phi'],"\n",branch_forms["n" + name])
                 branch_forms["o" + name] = transforms.counts2offsets_form(
                     branch_forms["n" + name]
                 )
-                if name == 'Electron': print(branch_forms["o" + name])
 
         # Check the presence of the event_ids
         missing_event_ids = [
@@ -261,7 +259,6 @@ def _build_collections(self, field_names, input_contents):
             branch_forms[indexer + "G"] = transforms.local2global_form(
                 branch_forms[indexer], branch_forms["o" + target]
             )
-            if indexer=="Electron_jetIdx" : print(branch_forms[indexer + "G"])
 
         # Create nested indexer from Idx1, Idx2, ... arrays
         for name, indexers in self.nested_items.items():