Skip to content

Commit

Permalink
Merge pull request #24 from MoritzNeuberger/issue_3_lh5_store_refactor
Browse files Browse the repository at this point in the history
Refactor of LH5 I/O routines, deprecation of existing methods
  • Loading branch information
gipert authored Nov 24, 2023
2 parents 56298ed + ff8d627 commit 772fe17
Show file tree
Hide file tree
Showing 25 changed files with 2,586 additions and 2,304 deletions.
2 changes: 1 addition & 1 deletion docs/source/extensions/numbadoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def import_object(self) -> bool:
"""
success = super().import_object()
if success:
# Store away numba wrapper
# store away numba wrapper
self.jitobj = self.object
# And bend references to underlying python function
if hasattr(self.object, "py_func"):
Expand Down
44 changes: 21 additions & 23 deletions docs/source/notebooks/DataCompression.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@
"metadata": {},
"outputs": [],
"source": [
"store = lgdo.LH5Store()\n",
"store.write_object(data, \"data\", \"data.lh5\", wo_mode=\"of\")\n",
"store = lgdo.lh5.LH5Store()\n",
"store.write(data, \"data\", \"data.lh5\", wo_mode=\"of\")\n",
"lgdo.show(\"data.lh5\")"
]
},
Expand Down Expand Up @@ -110,7 +110,7 @@
"metadata": {},
"outputs": [],
"source": [
"lgdo.lh5_store.DEFAULT_HDF5_SETTINGS"
"lgdo.lh5.store.DEFAULT_HDF5_SETTINGS"
]
},
{
Expand All @@ -131,18 +131,18 @@
"outputs": [],
"source": [
"# use another built-in filter\n",
"lgdo.lh5_store.DEFAULT_HDF5_SETTINGS = {\"compression\": \"lzf\"}\n",
"lgdo.lh5.store.DEFAULT_HDF5_SETTINGS = {\"compression\": \"lzf\"}\n",
"\n",
"# specify filter name and options\n",
"lgdo.lh5_store.DEFAULT_HDF5_SETTINGS = {\"compression\": \"gzip\", \"compression_opts\": 7}\n",
"lgdo.lh5.store.DEFAULT_HDF5_SETTINGS = {\"compression\": \"gzip\", \"compression_opts\": 7}\n",
"\n",
"# specify a registered filter provided by hdf5plugin\n",
"import hdf5plugin\n",
"\n",
"lgdo.lh5_store.DEFAULT_HDF5_SETTINGS = {\"compression\": hdf5plugin.Blosc()}\n",
"lgdo.lh5.store.DEFAULT_HDF5_SETTINGS = {\"compression\": hdf5plugin.Blosc()}\n",
"\n",
"# shuffle bytes before compressing (typically better compression ratio with no performance penalty)\n",
"lgdo.lh5_store.DEFAULT_HDF5_SETTINGS = {\"shuffle\": True, \"compression\": \"lzf\"}"
"lgdo.lh5.store.DEFAULT_HDF5_SETTINGS = {\"shuffle\": True, \"compression\": \"lzf\"}"
]
},
{
Expand All @@ -166,7 +166,7 @@
"metadata": {},
"outputs": [],
"source": [
"store.write_object(data, \"data\", \"data.lh5\", wo_mode=\"of\")\n",
"store.write(data, \"data\", \"data.lh5\", wo_mode=\"of\")\n",
"show_h5ds_opts(\"data/col1\")"
]
},
Expand All @@ -175,7 +175,7 @@
"id": "f597a9e2",
"metadata": {},
"source": [
"Nice. Shuffling bytes before compressing significantly reduced size on disk. Last but not least, `create_dataset()` keyword arguments can be passed to `write_object()`. They will be forwarded as is, overriding default settings."
"Nice. Shuffling bytes before compressing significantly reduced size on disk. Last but not least, `create_dataset()` keyword arguments can be passed to `write()`. They will be forwarded as is, overriding default settings."
]
},
{
Expand All @@ -185,9 +185,7 @@
"metadata": {},
"outputs": [],
"source": [
"store.write_object(\n",
" data, \"data\", \"data.lh5\", wo_mode=\"of\", shuffle=True, compression=\"gzip\"\n",
")\n",
"store.write(data, \"data\", \"data.lh5\", wo_mode=\"of\", shuffle=True, compression=\"gzip\")\n",
"show_h5ds_opts(\"data/col1\")"
]
},
Expand All @@ -207,7 +205,7 @@
"outputs": [],
"source": [
"data[\"col2\"].attrs[\"hdf5_settings\"] = {\"compression\": \"gzip\"}\n",
"store.write_object(data, \"data\", \"data.lh5\", wo_mode=\"of\")\n",
"store.write(data, \"data\", \"data.lh5\", wo_mode=\"of\")\n",
"\n",
"show_h5ds_opts(\"data/col1\")\n",
"show_h5ds_opts(\"data/col2\")"
Expand All @@ -221,7 +219,7 @@
"We are now storing table columns with different compression settings.\n",
"\n",
"<div class=\"alert alert-info\">\n",
"**Note:** since any [h5py.Group.create_dataset()](https://docs.h5py.org/en/stable/high/group.html#h5py.Group.create_dataset) keyword argument can be used in `write_object()` or set in the `hdf5_settings` attribute, other HDF5 dataset settings can be configured, like the chunk size.\n",
"**Note:** since any [h5py.Group.create_dataset()](https://docs.h5py.org/en/stable/high/group.html#h5py.Group.create_dataset) keyword argument can be used in `write()` or set in the `hdf5_settings` attribute, other HDF5 dataset settings can be configured, like the chunk size.\n",
"</div>"
]
},
Expand All @@ -232,7 +230,7 @@
"metadata": {},
"outputs": [],
"source": [
"store.write_object(data, \"data\", \"data.lh5\", wo_mode=\"of\", chunks=2)"
"store.write(data, \"data\", \"data.lh5\", wo_mode=\"of\", chunks=2)"
]
},
{
Expand All @@ -257,7 +255,7 @@
"from legendtestdata import LegendTestData\n",
"\n",
"ldata = LegendTestData()\n",
"wfs, n_rows = store.read_object(\n",
"wfs, n_rows = store.read(\n",
" \"geds/raw/waveform\",\n",
" ldata.get_path(\"lh5/LDQTA_r117_20200110T105115Z_cal_geds_raw.lh5\"),\n",
")\n",
Expand Down Expand Up @@ -347,7 +345,7 @@
" t0=wfs.t0,\n",
" dt=wfs.dt,\n",
")\n",
"store.write_object(enc_wfs, \"waveforms\", \"data.lh5\", wo_mode=\"o\")\n",
"store.write(enc_wfs, \"waveforms\", \"data.lh5\", wo_mode=\"o\")\n",
"lgdo.show(\"data.lh5\", attrs=True)"
]
},
Expand All @@ -372,7 +370,7 @@
"metadata": {},
"outputs": [],
"source": [
"obj, _ = store.read_object(\"waveforms\", \"data.lh5\")\n",
"obj, _ = store.read(\"waveforms\", \"data.lh5\")\n",
"obj.values"
]
},
Expand All @@ -391,7 +389,7 @@
"metadata": {},
"outputs": [],
"source": [
"obj, _ = store.read_object(\"waveforms\", \"data.lh5\", decompress=False)\n",
"obj, _ = store.read(\"waveforms\", \"data.lh5\", decompress=False)\n",
"obj.values"
]
},
Expand Down Expand Up @@ -433,9 +431,9 @@
"from lgdo.compression import ULEB128ZigZagDiff\n",
"\n",
"wfs.values.attrs[\"compression\"] = ULEB128ZigZagDiff()\n",
"store.write_object(wfs, \"waveforms\", \"data.lh5\", wo_mode=\"of\")\n",
"store.write(wfs, \"waveforms\", \"data.lh5\", wo_mode=\"of\")\n",
"\n",
"obj, _ = store.read_object(\"waveforms\", \"data.lh5\", decompress=False)\n",
"obj, _ = store.read(\"waveforms\", \"data.lh5\", decompress=False)\n",
"obj.values.attrs[\"codec\"]"
]
},
Expand All @@ -447,8 +445,8 @@
"Further reading:\n",
"\n",
"- [Available waveform compression algorithms](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.compression.html)\n",
"- [read_object() docstring](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.LH5Store.read_object)\n",
"- [write_object() docstring](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.LH5Store.write_object)"
"- [read() docstring](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5.store.LH5Store.read)\n",
"- [write() docstring](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.LH5Store.write)"
]
}
],
Expand Down
22 changes: 9 additions & 13 deletions docs/source/notebooks/LH5Files.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
"id": "c136b537",
"metadata": {},
"source": [
"We can use `lgdo.lh5_store.ls()` [[docs]](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.ls) to inspect the file contents:"
"We can use `lgdo.lh5.ls()` [[docs]](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5.ls) to inspect the file contents:"
]
},
{
Expand Down Expand Up @@ -131,7 +131,7 @@
"metadata": {},
"outputs": [],
"source": [
"store.read_object(\"geds/raw\", lh5_file)"
"store.read(\"geds/raw\", lh5_file)"
]
},
{
Expand All @@ -149,7 +149,7 @@
"metadata": {},
"outputs": [],
"source": [
"obj, n_rows = store.read_object(\"geds/raw/timestamp\", lh5_file)\n",
"obj, n_rows = store.read(\"geds/raw/timestamp\", lh5_file)\n",
"obj"
]
},
Expand All @@ -170,7 +170,7 @@
"metadata": {},
"outputs": [],
"source": [
"obj, n_rows = store.read_object(\"geds/raw/timestamp\", lh5_file, start_row=15, n_rows=10)\n",
"obj, n_rows = store.read(\"geds/raw/timestamp\", lh5_file, start_row=15, n_rows=10)\n",
"print(obj)"
]
},
Expand All @@ -189,7 +189,7 @@
"metadata": {},
"outputs": [],
"source": [
"obj, n_rows = store.read_object(\n",
"obj, n_rows = store.read(\n",
" \"geds/raw\", lh5_file, field_mask=(\"timestamp\", \"energy\"), idx=[1, 3, 7, 9, 10, 15]\n",
")\n",
"print(obj)"
Expand All @@ -200,7 +200,7 @@
"id": "b3f52d77",
"metadata": {},
"source": [
"As you might have noticed, `read_object()` loads all the requested data in memory at once. This can be a problem when dealing with large datasets. `LH5Iterator` [[docs]](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.LH5Iterator) makes it possible to handle data one chunk at a time (sequentially) to avoid running out of memory:"
"As you might have noticed, `read_object()` loads all the requested data in memory at once. This can be a problem when dealing with large datasets. `LH5Iterator` [[docs]](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5.iterator.LH5Iterator) makes it possible to handle data one chunk at a time (sequentially) to avoid running out of memory:"
]
},
{
Expand Down Expand Up @@ -260,9 +260,7 @@
"source": [
"store = LH5Store()\n",
"\n",
"store.write_object(\n",
" scalar, name=\"message\", lh5_file=\"my_objects.lh5\", wo_mode=\"overwrite_file\"\n",
")"
"store.write(scalar, name=\"message\", lh5_file=\"my_objects.lh5\", wo_mode=\"overwrite_file\")"
]
},
{
Expand Down Expand Up @@ -300,10 +298,8 @@
"metadata": {},
"outputs": [],
"source": [
"store.write_object(array, name=\"numbers\", group=\"closet\", lh5_file=\"my_objects.lh5\")\n",
"store.write_object(\n",
" wf_table, name=\"waveforms\", group=\"closet\", lh5_file=\"my_objects.lh5\"\n",
")\n",
"store.write(array, name=\"numbers\", group=\"closet\", lh5_file=\"my_objects.lh5\")\n",
"store.write(wf_table, name=\"waveforms\", group=\"closet\", lh5_file=\"my_objects.lh5\")\n",
"show(\"my_objects.lh5\")"
]
},
Expand Down
4 changes: 2 additions & 2 deletions src/lgdo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,11 @@
"VectorOfVectors",
"VectorOfEncodedVectors",
"WaveformTable",
"LH5Iterator",
"LH5Store",
"load_dfs",
"load_nda",
"ls",
"show",
"LH5Iterator",
"LH5Store",
"__version__",
]
2 changes: 1 addition & 1 deletion src/lgdo/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


def lh5ls():
""":func:`.show` command line interface."""
""":func:`.lh5.show` command line interface."""
parser = argparse.ArgumentParser(
prog="lh5ls", description="Inspect LEGEND HDF5 (LH5) file contents"
)
Expand Down
Loading

0 comments on commit 772fe17

Please sign in to comment.