From 0104086e461737689e00630227106b970a971ea8 Mon Sep 17 00:00:00 2001 From: Manuel Huber Date: Wed, 20 Nov 2024 12:47:50 +0100 Subject: [PATCH] lh5concat: allow concatenating of group-like structs --- src/lgdo/cli.py | 32 ++++++++++++++++++++++++++++++-- tests/test_cli.py | 18 ++++++++++++++++++ 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/src/lgdo/cli.py b/src/lgdo/cli.py index 6563fd66..b1846a00 100644 --- a/src/lgdo/cli.py +++ b/src/lgdo/cli.py @@ -7,7 +7,7 @@ import logging import sys -from . import Array, Table, VectorOfVectors, __version__, lh5 +from . import Array, Scalar, Struct, Table, VectorOfVectors, __version__, lh5 from . import logging as lgdogging # eheheh log = logging.getLogger(__name__) @@ -212,6 +212,7 @@ def lh5concat(args=None): store = lh5.LH5Store() h5f0 = store.gimme_file(file0) lgdos = {} + lgdo_structs = {} # loop over object list in the first file for name in obj_list: # now loop over groups starting from root @@ -222,7 +223,7 @@ def lh5concat(args=None): if current in lgdos: break - # not even an LGDO! + # not even an LGDO (i.e. a plain HDF5 group)! if "datatype" not in h5f0[current].attrs: continue @@ -232,14 +233,30 @@ def lh5concat(args=None): # read all! obj, _ = store.read(current, h5f0) lgdos[current] = obj + elif isinstance(obj, Struct): + # structs might be used in a "group-like" fashion (i.e. they might only + # contain array-like objects). + # note: handle after handling tables, as tables also satisfy this check. + lgdo_structs[current] = obj.attrs["datatype"] + continue + elif isinstance(obj, Scalar): + msg = f"cannot concat scalar field {current}" + log.warning(msg) break msg = f"first-level, array-like objects: {lgdos.keys()}" log.debug(msg) + msg = f"nested structs: {lgdo_structs.keys()}" + log.debug(msg) h5f0.close() + if lgdos == {}: + msg = "did not find any field to concatenate, exit" + log.error(msg) + return + # 2. remove (nested) table fields based on obj_list def _inplace_table_filter(name, table, obj_list): @@ -298,3 +315,14 @@ def _inplace_table_filter(name, table, obj_list): _inplace_table_filter(name, obj, obj_list) store.write(obj, name, args.output, wo_mode="append") + + # 5. reset datatypes of the "group-like" structs + + if lgdo_structs != {}: + output_file = store.gimme_file(args.output, mode="a") + for struct, struct_dtype in lgdo_structs.items(): + msg = f"reset datatype of struct {struct} to {struct_dtype}" + log.debug(msg) + + output_file[struct].attrs["datatype"] = struct_dtype + output_file.close() diff --git a/tests/test_cli.py b/tests/test_cli.py index ac054854..50d6155d 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -149,3 +149,21 @@ def test_lh5concat(lgnd_test_data, tmptestdir): assert tbl.packet_id[i] == tbl2.packet_id[i - 10] assert np.array_equal(tbl.tracelist[i], tbl2.tracelist[i - 10]) assert np.array_equal(tbl.waveform.values[i], tbl2.waveform.values[i - 10]) + + # test concatenating arrays in structs. + infile1 = f"{tmptestdir}/concat_test_struct_0.lh5" + tb1 = types.Table(col_dict={"col": types.Array(np.zeros(4))}) + struct1 = types.Struct({"x": tb1}) + store.write(struct1, "stp", infile1, wo_mode="overwrite_file") + + infile2 = f"{tmptestdir}/concat_test_struct_1.lh5" + tb2 = types.Table(col_dict={"col": types.Array(np.ones(7))}) + struct2 = types.Struct({"x": tb2}) + store.write(struct2, "stp", infile2, wo_mode="overwrite_file") + + outfile = f"{tmptestdir}/concat_test_struct_out.lh5" + cli.lh5concat(["--output", outfile, "--", infile1, infile2]) + + out_stp = store.read("stp", outfile)[0] + assert out_stp.attrs["datatype"] == "struct{x}" + assert np.all(out_stp.x["col"].nda == np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]))