From f491a36c04e1fa2c3b63d14468345465d398afa6 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Thu, 31 Aug 2023 17:20:00 +0200 Subject: [PATCH 1/4] allow for collections that contain non-list fields --- src/coffea/nanoevents/schemas/physlite.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/coffea/nanoevents/schemas/physlite.py b/src/coffea/nanoevents/schemas/physlite.py index 1b9b89205..3b6508e2e 100644 --- a/src/coffea/nanoevents/schemas/physlite.py +++ b/src/coffea/nanoevents/schemas/physlite.py @@ -118,14 +118,21 @@ def _build_collections(self, branch_forms): to_zip, objname, self.mixins.get(objname, None), - bypass=True, - ) - content = contents[objname]["content"] - content["parameters"] = dict( - content.get("parameters", {}), collection_name=objname + bypass=False, ) except NotImplementedError: warnings.warn(f"Can't zip collection {objname}") + if "content" in contents[objname]: + # in this case we were able to zip everything together to a ListOffsetArray(RecordArray) + assert "List" in contents[objname]["class"] + content = contents[objname]["content"] + else: + # in this case this was not possible (e.g. because we also had non-list fields) + assert contents[objname]["class"] == "RecordArray" + content = contents[objname] + content["parameters"] = dict( + content.get("parameters", {}), collection_name=objname + ) return contents @staticmethod From 79ae6d5d7e4a9eb018b5cf1efb57713ea3c96b34 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Thu, 31 Aug 2023 17:34:35 +0200 Subject: [PATCH 2/4] skip empty records --- src/coffea/nanoevents/schemas/physlite.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/coffea/nanoevents/schemas/physlite.py b/src/coffea/nanoevents/schemas/physlite.py index 3b6508e2e..11446b7a2 100644 --- a/src/coffea/nanoevents/schemas/physlite.py +++ b/src/coffea/nanoevents/schemas/physlite.py @@ -79,6 +79,9 @@ def _build_collections(self, branch_forms): key_fields = key.split("/")[-1].split(".") top_key = key_fields[0] sub_key = ".".join(key_fields[1:]) + if ak_form["class"] == "RecordArray" and not ak_form["fields"]: + # skip empty records (e.g. the branches ending in "." only containing the base class) + continue objname = top_key.replace("Analysis", "").replace("AuxDyn", "") zip_groups[objname].append(((key, sub_key), ak_form)) From 94b648fd0926ad6150813d9d4f0cd289bde4515d Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Thu, 31 Aug 2023 17:38:25 +0200 Subject: [PATCH 3/4] don't zip branches that are not grouped with anything else (e.g. index_ref in newer PHYSLITE) --- src/coffea/nanoevents/schemas/physlite.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coffea/nanoevents/schemas/physlite.py b/src/coffea/nanoevents/schemas/physlite.py index 11446b7a2..52e3ac747 100644 --- a/src/coffea/nanoevents/schemas/physlite.py +++ b/src/coffea/nanoevents/schemas/physlite.py @@ -100,6 +100,10 @@ def _build_collections(self, branch_forms): # zip the forms contents = {} for objname, keys_and_form in zip_groups.items(): + if len(keys_and_form) == 1: + # don't zip if there is only one item + contents[objname] = keys_and_form[0][1] + continue to_zip = {} for (key, sub_key), form in keys_and_form: if "." in sub_key: From 0334acd8feb208602b0f173d473921e155215cd4 Mon Sep 17 00:00:00 2001 From: Nikolai Hartmann Date: Thu, 31 Aug 2023 18:03:57 +0200 Subject: [PATCH 4/4] also remove Aux from branch names to zip them with AuxDyn and potentially non-aux branches --- src/coffea/nanoevents/schemas/physlite.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/coffea/nanoevents/schemas/physlite.py b/src/coffea/nanoevents/schemas/physlite.py index 52e3ac747..c45240d6a 100644 --- a/src/coffea/nanoevents/schemas/physlite.py +++ b/src/coffea/nanoevents/schemas/physlite.py @@ -82,7 +82,9 @@ def _build_collections(self, branch_forms): if ak_form["class"] == "RecordArray" and not ak_form["fields"]: # skip empty records (e.g. the branches ending in "." only containing the base class) continue - objname = top_key.replace("Analysis", "").replace("AuxDyn", "") + objname = ( + top_key.replace("Analysis", "").replace("AuxDyn", "").replace("Aux", "") + ) zip_groups[objname].append(((key, sub_key), ak_form))