diff --git a/src/lgdo/utils.py b/src/lgdo/utils.py index 782a3d3..4d9a061 100644 --- a/src/lgdo/utils.py +++ b/src/lgdo/utils.py @@ -49,6 +49,14 @@ def get_element_type(obj: object) -> str: return "complex" if kind in ["S", "U"]: return "string" + if ( + kind == "O" + and dt.metadata is not None + and dt.metadata.get("vlen", None) in (str, bytes) + ): + # variable length strings in HDF5 are read as numpy object arrays in h5py. + # see also h5py.check_vlen_dtype. + return "string" # couldn't figure it out msg = "cannot determine lgdo element_type for object of type" diff --git a/tests/test_lgdo_utils.py b/tests/test_lgdo_utils.py index 70a71e6..72d79a7 100644 --- a/tests/test_lgdo_utils.py +++ b/tests/test_lgdo_utils.py @@ -1,11 +1,15 @@ from __future__ import annotations +import h5py import numpy as np from lgdo import utils def test_get_element_type(): + # variable length HD5 string datatype. + h5py_str_dtype = h5py.string_dtype(encoding="ascii", length=None) + objs = [ ("hi", "string"), (True, "bool"), @@ -16,6 +20,7 @@ def test_get_element_type(): (1 + 1j, "complex"), (b"hi", "string"), (np.array(["hi"]), "string"), + (np.array([b"hi"], h5py_str_dtype), "string"), ] for obj, name in objs: