Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Schema for the oldstyle edm4hep Future Circular Collider simulation Samples #1182

Merged
merged 25 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
e2e9fa6
Schema for the oldstyle edm4hep Future Circular Collider simulation S…
prayagyadav Sep 11, 2024
1acf6c5
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 12, 2024
d876d78
Merge branch 'master' into fcc-schema
prayagyadav Sep 12, 2024
d61109a
Merge branch 'master' into fcc-schema
lgray Sep 20, 2024
366f3aa
Merge branch 'CoffeaTeam:master' into fcc-schema
prayagyadav Sep 24, 2024
3de0383
Automated Commit from Fedora
prayagyadav Oct 7, 2024
eb541dc
Automated Commit from Fedora
prayagyadav Oct 10, 2024
9519219
parents daughters cross reference works
prayagyadav Oct 14, 2024
8db4c83
Merge branch 'CoffeaTeam:master' into review-fcc-schema
prayagyadav Oct 14, 2024
e3c62df
Merge branch 'CoffeaTeam:master' into fcc-schema
prayagyadav Oct 14, 2024
253198d
Cleaned
prayagyadav Oct 14, 2024
9d8035f
Merge branch 'review-fcc-schema' of https://github.com/prayagyadav/co…
prayagyadav Oct 14, 2024
d31bb12
Fixed all the cross references
prayagyadav Oct 14, 2024
a1b81a3
fix quoting issue
lgray Oct 14, 2024
10d76d7
Merge branch 'master' into fcc-schema
lgray Oct 14, 2024
0fc7362
fix accidental deletions of .replace
lgray Oct 14, 2024
be47625
Merge branch 'CoffeaTeam:master' into fcc-schema
prayagyadav Oct 15, 2024
a4324db
added parents daughters tests and winter2023 sample
prayagyadav Oct 15, 2024
09f6f61
Merge branch 'fcc-schema' of https://github.com/prayagyadav/coffea in…
prayagyadav Oct 15, 2024
b5b53f2
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 15, 2024
8c2b2e8
Link to original samples
prayagyadav Oct 15, 2024
b22b2ad
Merge branch 'fcc-schema' of https://github.com/prayagyadav/coffea in…
prayagyadav Oct 15, 2024
8c2b222
Merge branch 'master' into fcc-schema
lgray Oct 15, 2024
e810809
fix mistakes in comments
prayagyadav Oct 16, 2024
27d492a
Merge branch 'master' into fcc-schema
lgray Oct 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 59 additions & 93 deletions src/coffea/nanoevents/methods/fcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,62 +79,6 @@ def map_index_to_array(array, index, axis=1):
raise AttributeError("Only axis = 1 or axis = 2 supported at the moment.")


# Function required to create a range array from a begin and end array
@numba.njit
def index_range_numba_wrap(begin_end, builder):
for ev in begin_end:
builder.begin_list()
for j in ev:
builder.begin_list()
for k in range(j[0], j[1]):
builder.integer(k)
builder.end_list()
builder.end_list()
return builder


def index_range(begin, end):
"""
Function required to create a range array from a begin and end array
Example: If,
begin = [
[0, 2, 4, 3, ...],
[1, 0, 4, 6, ...]
...
]
end = [
[1, 2, 5, 5, ...],
[3, 1, 7, 6, ...]
...
]
then, output is,
output = [
[[0], [], [4], [3,4], ...],
[[1,2], [0], [4,5,6], [], ...]
...
]
"""
begin_end = awkward.concatenate(
(begin[:, :, numpy.newaxis], end[:, :, numpy.newaxis]), axis=2
)
if awkward.backend(begin) == "typetracer" or awkward.backend(end) == "typetracer":
# To make the function dask compatible
# here we fake the output of numba wrapper function since
# operating on length-zero data returns the wrong layout!
# We need the axis 2, therefore, we should return the typetracer layout of [[[]]]
awkward.typetracer.length_zero_if_typetracer(
begin
) # force touching of the necessary data
awkward.typetracer.length_zero_if_typetracer(
end
) # force touching of the necessary data
return awkward.Array(
awkward.Array([[[0]]]).layout.to_typetracer(forget_length=True)
)

return index_range_numba_wrap(begin_end, awkward.ArrayBuilder()).snapshot()


@awkward.mixin_class(behavior)
class MomentumCandidate(vector.LorentzVector):
"""A Lorentz vector with charge
Expand Down Expand Up @@ -190,36 +134,66 @@ def absolute_mass(self):
class MCParticle(MomentumCandidate, base.NanoCollection):
"""Generated Monte Carlo particles"""

@property
def alt_get_daughters_index(self):
def _apply_nested_global_index(self, index, nested_counts, _dask_array_=None):
"""As _apply_global_index but expects one additional layer of nesting to get specified."""
if isinstance(index, int):
out = self._content()[index]
return awkward.Record(out, behavior=self.behavior)

def flat_take(layout):
idx = awkward.Array(layout)
return self._content()[idx.mask[idx >= 0]]

def descend(layout, depth, **kwargs):
if layout.purelist_depth == 1:
return flat_take(layout)

(index_out,) = awkward.broadcast_arrays(
index._meta if isinstance(index, dask_awkward.Array) else index
)
nested_counts_out = (
nested_counts._meta
if isinstance(nested_counts, dask_awkward.Array)
else nested_counts
)
index_out = awkward.unflatten(
index_out, awkward.flatten(nested_counts_out), axis=-1
)
layout_out = awkward.transform(descend, index_out.layout, highlevel=False)
out = awkward.Array(layout_out, behavior=self.behavior)

if isinstance(index, dask_awkward.Array):
return _dask_array_.map_partitions(
base._ClassMethodFn("_apply_nested_global_index"),
index,
nested_counts,
label="_apply_nested_global_index",
meta=out,
)
return out

# Daughters
@dask_property
def get_daughters_index(self):
"""
Obtain the indexes of the daughters of each and every MCParticle
- The output is a doubly nested awkward array
- Needs the presence of Particleidx1 collection
- The Particleidx1.index contains info about the daughters
"""
ranges = index_range(self.daughters.begin, self.daughters.end)
# return map_index_to_array(self._events().Particleidx1.index, self.daughters.begin_end_ranges, axis=2)
return self.daughters.Particleidx1_rangesG

return awkward.values_astype(
map_index_to_array(self._events().Particleidx1.index, ranges, axis=2),
"int64",
)


# Daughters
@dask_property
def get_daughters_index(self):
@get_daughters_index.dask
def get_daughters_index(self, dask_array):
"""
Obtain the indexes of the daughters of each and every MCParticle
- The output is a doubly nested awkward array
- Needs the presence of Particleidx1 collection
- The Particleidx1.index contains info about the daughters
"""
ranges = index_range(self.daughters.begin, self.daughters.end)
return awkward.values_astype(
map_index_to_array(self._events().Particleidx1.index, ranges, axis=2),
"int64",
)
# return map_index_to_array(dask_array._events().Particleidx1.index, dask_array.daughters.begin_end_ranges, axis=2)
return dask_array.daughters.Particleidx1_rangesG

@dask_property
def get_daughters(self):
Expand All @@ -229,7 +203,8 @@ def get_daughters(self):
- Needs the presence of Particleidx1 collection
- The Particleidx1.index contains info about the daughters
"""
return map_index_to_array(self, self.get_daughters_index, axis=2)
# return map_index_to_array(self, self.get_daughters_index, axis=2)
return self._events().Particle._apply_global_index(self.get_daughters_index)

@get_daughters.dask
def get_daughters(self, dask_array):
Expand All @@ -239,7 +214,8 @@ def get_daughters(self, dask_array):
- Needs the presence of Particleidx1 collection
- The Particleidx1.index contains info about the daughters
"""
return map_index_to_array(dask_array, dask_array.get_daughters_index, axis=2)
# return map_index_to_array(dask_array, dask_array.get_daughters_index, axis=2)
return dask_array._events().Particle._apply_global_index(dask_array.get_daughters_index)

# Parents
@dask_property
Expand All @@ -250,13 +226,8 @@ def get_parents_index(self):
- Needs the presence of Particleidx0 collection
- The Particleidx0.index contains info about the parents
"""
ranges = index_range(self.parents.begin, self.parents.end)
# rangesG = index_range(self.parents.beginG, self.parents.endG)
# Explore how to map the global index to produces doubly nested output
return awkward.values_astype(
map_index_to_array(self._events().Particleidx0.index, ranges, axis=2),
"int64",
)
# return map_index_to_array(self._events().Particleidx0.index, self.parents.begin_end_ranges, axis=2)
return self.parents.Particleidx0_rangesG

@get_parents_index.dask
def get_parents_index(self, dask_array):
Expand All @@ -265,16 +236,9 @@ def get_parents_index(self, dask_array):
- The output is a doubly nested awkward array
- Needs the presence of Particleidx0 collection
- The Particleidx0.index contains info about the parents

Note: Seems like all the functions need to mapped manually
"""
ranges = dask_awkward.map_partitions(
index_range, dask_array.parents.begin, dask_array.parents.end
)
daughters = dask_awkward.map_partitions(
map_index_to_array, dask_array._events().Particleidx0.index, ranges, axis=2
)
return awkward.values_astype(daughters, "int32")
# return map_index_to_array(dask_array._events().Particleidx0.index, dask_array.parents.begin_end_ranges, axis=2)
return dask_array.parents.Particleidx0_rangesG

@dask_property
def get_parents(self):
Expand All @@ -284,7 +248,8 @@ def get_parents(self):
- Needs the presence of Particleidx0 collection
- The Particleidx0.index contains info about the parents
"""
return map_index_to_array(self, self.get_parents_index, axis=2)
# return map_index_to_array(self, self.get_parents_index, axis=2)
return self._events().Particle._apply_global_index(self.get_parents_index)

@get_parents.dask
def get_parents(self, dask_array):
Expand All @@ -294,7 +259,8 @@ def get_parents(self, dask_array):
- Needs the presence of Particleidx0 collection
- The Particleidx0.index contains info about the parents
"""
return map_index_to_array(dask_array, dask_array.get_parents_index, axis=2)
# return map_index_to_array(dask_array, dask_array.get_parents_index, axis=2)
return dask_array._events().Particle._apply_global_index(dask_array.get_parents_index)


_set_repr_name("MCParticle")
Expand Down
59 changes: 50 additions & 9 deletions src/coffea/nanoevents/schemas/fcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,17 @@ class FCCSchema(BaseSchema):
all_cross_references = {
"MCRecoAssociations#1.index": "Particle", #MC to Reco connection
"MCRecoAssociations#0.index": "ReconstructedParticles", #Reco to MC connection
"Particle#0.index":"Particle", #Parents
"Particle#1.index":"Particle", #Daughters
# "Particle#0.index":"Particle", #Parents
# "Particle#1.index":"Particle", #Daughters
"Muon#0.index":"ReconstructedParticles", #Matched Muons
"Electron#0.index":"ReconstructedParticles", #Matched Electrons
}

mc_relations = {
"parents" : "Particle#0.index",
"daughters" : "Particle#1.index"
}

def __init__(self, base_form, version="latest"):
super().__init__(base_form)
self._form["fields"], self._form["contents"] = self._build_collections(
Expand Down Expand Up @@ -391,6 +396,7 @@ def _create_subcollections(self, branch_forms, all_collections):
"""
field_names = list(branch_forms.keys())


# Replace square braces in a name for a Python-friendly name; Example: covMatrix[n] --> covMatrix_n_
for name in field_names:
if _square_braces.match(name):
Expand Down Expand Up @@ -419,11 +425,46 @@ def _create_subcollections(self, branch_forms, all_collections):
"primitive": "int64",
"form_key": concat(begin_end_content[list(begin_end_content.keys())[0]]["form_key"],"!offsets"),
}
begin_end_content_global = {
k+"G": transforms.local2global_form(begin_end_content[k], offset_form)

# begin_end_content_global = {
# k+"G": transforms.local2global_form(begin_end_content[k], offset_form)
# for k in begin_end_content.keys()
# }

begin = [
begin_end_content[k]
for k in begin_end_content.keys()
if k.endswith("begin")
]
end = [
begin_end_content[k]
for k in begin_end_content.keys()
if k.endswith("end")
]
counts_content = {
"begin_end_counts": transforms.begin_and_end_to_counts_form(*begin, *end)
}
branch_forms[name] = zip_forms(sort_dict({**begin_end_content,**begin_end_content_global}), name)
# Parents and Daughters
ranges_content = {}
for key, target in self.mc_relations.items():
col_name = target.split(".")[0]
if name.endswith(key):
range_name = f"{col_name.replace("#","idx")}_ranges"
ranges_content[range_name+"G"] = transforms.index_range_form(
*begin,
*end,
branch_forms[f"{col_name}/{target}"]
)

to_zip = {**begin_end_content, **counts_content, **ranges_content}

branch_forms[name] = zip_forms(
sort_dict(
to_zip
),
name,
offsets=offset_form
)

# Zip colorFlow.a and colorFlow.b branches
# Example: 'Particle/Particle.colorFlow.a', 'Particle/Particle.colorFlow.b' --> 'Particle/Particle.colorFlow'
Expand Down Expand Up @@ -465,28 +506,28 @@ def _global_indexers(self, branch_forms, all_collections):

#pick up the available fields from target collection to get an offset from
available_fields = [name for name in branch_forms.keys() if name.startswith(f"{target}/{target}.")]

# By default the idxs have different shape at axis=1 in comparison to target
# So one needs to fill the empty spaces with -1 which could be removed later
compatible_index = transforms.grow_local_index_to_target_shape_form(
branch_forms[f"{collection_name}/{collection_name}.{index_name}"],
branch_forms[available_fields[0]]
)

offset_form = {
"class": "NumpyArray",
"itemsize": 8,
"format": "i",
"primitive": "int64",
"form_key": concat(*[branch_forms[available_fields[0]]["form_key"],"!offsets",]),
}

replaced_name = collection_name.replace('#', 'idx')
branch_forms[f"{target}/{target}.{replaced_name}_{index_name}Global"] = transforms.local2global_form(
compatible_index,
offset_form
)

return branch_forms

def _build_collections(self, field_names, input_contents):
Expand Down
3 changes: 0 additions & 3 deletions src/coffea/nanoevents/schemas/nanoaod.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,11 +214,9 @@ def _build_collections(self, field_names, input_contents):
# Create offsets virtual arrays
for name in collections:
if "n" + name in branch_forms:
if name == 'Electron': print(branch_forms[name+'_phi'],"\n",branch_forms["n" + name])
branch_forms["o" + name] = transforms.counts2offsets_form(
branch_forms["n" + name]
)
if name == 'Electron': print(branch_forms["o" + name])

# Check the presence of the event_ids
missing_event_ids = [
Expand Down Expand Up @@ -261,7 +259,6 @@ def _build_collections(self, field_names, input_contents):
branch_forms[indexer + "G"] = transforms.local2global_form(
branch_forms[indexer], branch_forms["o" + target]
)
if indexer=="Electron_jetIdx" : print(branch_forms[indexer + "G"])

# Create nested indexer from Idx1, Idx2, ... arrays
for name, indexers in self.nested_items.items():
Expand Down
Loading