Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lh5concat: allow concatenating of group-like structs #121

Merged
merged 1 commit into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 30 additions & 2 deletions src/lgdo/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import logging
import sys

from . import Array, Table, VectorOfVectors, __version__, lh5
from . import Array, Scalar, Struct, Table, VectorOfVectors, __version__, lh5
from . import logging as lgdogging # eheheh

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -212,6 +212,7 @@ def lh5concat(args=None):
store = lh5.LH5Store()
h5f0 = store.gimme_file(file0)
lgdos = {}
lgdo_structs = {}
# loop over object list in the first file
for name in obj_list:
# now loop over groups starting from root
Expand All @@ -222,7 +223,7 @@ def lh5concat(args=None):
if current in lgdos:
break

# not even an LGDO!
# not even an LGDO (i.e. a plain HDF5 group)!
if "datatype" not in h5f0[current].attrs:
continue

Expand All @@ -232,14 +233,30 @@ def lh5concat(args=None):
# read all!
obj, _ = store.read(current, h5f0)
lgdos[current] = obj
elif isinstance(obj, Struct):
# structs might be used in a "group-like" fashion (i.e. they might only
# contain array-like objects).
# note: handle after handling tables, as tables also satisfy this check.
lgdo_structs[current] = obj.attrs["datatype"]
continue
elif isinstance(obj, Scalar):
msg = f"cannot concat scalar field {current}"
log.warning(msg)

break

msg = f"first-level, array-like objects: {lgdos.keys()}"
log.debug(msg)
msg = f"nested structs: {lgdo_structs.keys()}"
log.debug(msg)

h5f0.close()

if lgdos == {}:
msg = "did not find any field to concatenate, exit"
log.error(msg)
return

# 2. remove (nested) table fields based on obj_list

def _inplace_table_filter(name, table, obj_list):
Expand Down Expand Up @@ -298,3 +315,14 @@ def _inplace_table_filter(name, table, obj_list):
_inplace_table_filter(name, obj, obj_list)

store.write(obj, name, args.output, wo_mode="append")

# 5. reset datatypes of the "group-like" structs

if lgdo_structs != {}:
output_file = store.gimme_file(args.output, mode="a")
for struct, struct_dtype in lgdo_structs.items():
msg = f"reset datatype of struct {struct} to {struct_dtype}"
log.debug(msg)

output_file[struct].attrs["datatype"] = struct_dtype
output_file.close()
18 changes: 18 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,21 @@ def test_lh5concat(lgnd_test_data, tmptestdir):
assert tbl.packet_id[i] == tbl2.packet_id[i - 10]
assert np.array_equal(tbl.tracelist[i], tbl2.tracelist[i - 10])
assert np.array_equal(tbl.waveform.values[i], tbl2.waveform.values[i - 10])

# test concatenating arrays in structs.
infile1 = f"{tmptestdir}/concat_test_struct_0.lh5"
tb1 = types.Table(col_dict={"col": types.Array(np.zeros(4))})
struct1 = types.Struct({"x": tb1})
store.write(struct1, "stp", infile1, wo_mode="overwrite_file")

infile2 = f"{tmptestdir}/concat_test_struct_1.lh5"
tb2 = types.Table(col_dict={"col": types.Array(np.ones(7))})
struct2 = types.Struct({"x": tb2})
store.write(struct2, "stp", infile2, wo_mode="overwrite_file")

outfile = f"{tmptestdir}/concat_test_struct_out.lh5"
cli.lh5concat(["--output", outfile, "--", infile1, infile2])

out_stp = store.read("stp", outfile)[0]
assert out_stp.attrs["datatype"] == "struct{x}"
assert np.all(out_stp.x["col"].nda == np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]))