diff --git a/docs/source/conf.py b/docs/source/conf.py index b9980d88..ab8317e2 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -51,6 +51,7 @@ intersphinx_mapping = { "python": ("https://docs.python.org/3", None), "numpy": ("https://numpy.org/doc/stable", None), + "awkward": ("https://awkward-array.org/doc/stable", None), "numba": ("https://numba.readthedocs.io/en/stable", None), "pandas": ("https://pandas.pydata.org/docs", None), "h5py": ("https://docs.h5py.org/en/stable", None), diff --git a/setup.cfg b/setup.cfg index ba37b8e9..db6a843d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -30,7 +30,7 @@ classifiers = [options] packages = find: install_requires = - awkward + awkward>=2 colorlog h5py>=3.2 hdf5plugin diff --git a/src/lgdo/types/array.py b/src/lgdo/types/array.py index cae36076..39c1f9db 100644 --- a/src/lgdo/types/array.py +++ b/src/lgdo/types/array.py @@ -141,15 +141,9 @@ def __repr__(self) -> str: + f", attrs={repr(self.attrs)})" ) - def convert( - self, fmt: str = "pandas.DataFrame", with_units: bool = True + def view_as( + self, fmt: str, with_units: bool = True ) -> pd.DataFrame | np.NDArray | ak.Array: - """Convert the data of the Array object to a third-party format. - Supported options are: - "pandas.DataFrame" - "numpy.ndarray" - "awkward.Array" - """ if fmt == "pandas.DataFrame": return pd.DataFrame(self.nda) elif fmt == "numpy.ndarray": diff --git a/src/lgdo/types/arrayofequalsizedarrays.py b/src/lgdo/types/arrayofequalsizedarrays.py index 9257cf6b..155c6b51 100644 --- a/src/lgdo/types/arrayofequalsizedarrays.py +++ b/src/lgdo/types/arrayofequalsizedarrays.py @@ -134,20 +134,7 @@ def to_vov(self, cumulative_length: np.ndarray = None) -> vov.VectorOfVectors: attrs=attrs, ) - def convert( - self, fmt: str = "pandas.DataFrame", with_units: bool = True + def view_as( + self, fmt: str, with_units: bool = True ) -> pd.DataFrame | np.NDArray | ak.Array: - """Convert the data of the ArrayOfEqualSizedArrays object to a third-party format. - Supported options are: - "pandas.DataFrame" - "numpy.ndarray" - "awkward.Array" - """ - if fmt == "pandas.DataFrame": - return pd.DataFrame(self.nda) - elif fmt == "numpy.ndarray": - return self.nda - elif fmt == "awkward.Array": - return ak.Array(self.nda) - else: - raise TypeError(f"{fmt} is not a supported third-party format.") + return super().view_as(fmt, with_units) diff --git a/src/lgdo/types/encoded.py b/src/lgdo/types/encoded.py index 6a2d05e0..1ad3e35d 100644 --- a/src/lgdo/types/encoded.py +++ b/src/lgdo/types/encoded.py @@ -227,11 +227,11 @@ def __repr__(self) -> str: np.set_printoptions(**npopt) return out - def convert( - self, fmt: str = "pandas.DataFrame", with_units: bool = True + def view_as( + self, fmt: str, with_units: bool = True ) -> pd.DataFrame | np.NDArray | ak.Array: raise NotImplementedError( - "'convert' not yet implemented for VectorOfEncodedVectors." + "'view_as' not yet implemented for VectorOfEncodedVectors." ) @@ -398,9 +398,9 @@ def __repr__(self) -> str: np.set_printoptions(**npopt) return out - def convert( - self, fmt: str = "pandas.DataFrame", with_units: bool = True + def view_as( + self, fmt: str, with_units: bool = True ) -> pd.DataFrame | np.NDArray | ak.Array: raise NotImplementedError( - "'convert' not yet implemented for ArrayOfEncodedEqualSizedArrays." + "'view_as' not yet implemented for ArrayOfEncodedEqualSizedArrays." ) diff --git a/src/lgdo/types/fixedsizearray.py b/src/lgdo/types/fixedsizearray.py index 87016729..30de9790 100644 --- a/src/lgdo/types/fixedsizearray.py +++ b/src/lgdo/types/fixedsizearray.py @@ -6,9 +6,7 @@ from typing import Any -import awkward as ak import numpy as np -import pandas as pd from .array import Array @@ -44,20 +42,5 @@ def __init__( def datatype_name(self) -> str: return "fixedsize_array" - def convert( - self, fmt: str = "pandas.DataFrame", with_units: bool = True - ) -> pd.DataFrame | np.NDArray | ak.Array: - """Convert the data of the FixedSizeArray object to a third-party format. - Supported options are: - "pandas.DataFrame" - "numpy.ndarray" - "awkward.Array" - """ - if fmt == "pandas.DataFrame": - return pd.DataFrame(self.nda) - elif fmt == "numpy.ndarray": - return self.nda - elif fmt == "awkward.Array": - return ak.Array(self.nda) - else: - raise TypeError(f"{fmt} is not a supported third-party format.") + def view_as(self, fmt: str, with_units: bool = True): + return super.view_as(fmt, with_units) diff --git a/src/lgdo/types/lgdo.py b/src/lgdo/types/lgdo.py index a8b10238..5227b5ec 100644 --- a/src/lgdo/types/lgdo.py +++ b/src/lgdo/types/lgdo.py @@ -35,10 +35,28 @@ def form_datatype(self) -> str: pass @abstractmethod - def convert( - self, fmt: str = "pandas.DataFrame", with_units: bool = True + def view_as( + self, library: str, with_units: bool = True ) -> pd.DataFrame | np.NDArray | ak.Array: - """Convert the data of the LGDO object to a third-party format.""" + """View the LGDO data object as a third-party format data structure. + + This is typically a zero-copy or nearly zero-copy operation unless + explicitly stated in the concrete LGDO documentation. + + Typical supported third-party formats are: + + - ``pd``: :mod:`pandas` + - ``np``: :mod:`numpy` + - ``ak``: :mod:`awkward` + + But the actual supported formats may vary depending on the concrete + LGDO class. + + Parameters + ---------- + library + format of the returned data view. + """ pass def getattrs(self, datatype: bool = False) -> dict: diff --git a/src/lgdo/types/scalar.py b/src/lgdo/types/scalar.py index 9db1da76..eb7aebff 100644 --- a/src/lgdo/types/scalar.py +++ b/src/lgdo/types/scalar.py @@ -5,9 +5,7 @@ import logging from typing import Any -import awkward as ak import numpy as np -import pandas as pd from .. import utils as utils from .lgdo import LGDO @@ -44,6 +42,9 @@ def datatype_name(self) -> str: def form_datatype(self) -> str: return self.datatype_name() + def view_as(self, fmt: str, with_units: bool = True): + return self.value + def __eq__(self, other: Scalar) -> bool: if isinstance(other, Scalar): return self.value == other.value and self.attrs == self.attrs @@ -59,22 +60,3 @@ def __repr__(self) -> str: self.__class__.__name__ + f"(value={repr(self.value)}, attrs={repr(self.attrs)})" ) - - def convert( - self, fmt: str = "pandas.DataFrame", with_units: bool = True - ) -> pd.DataFrame | np.NDArray | ak.Array: - """Convert the data of the Scalar object to a third-party format. - Supported options are: - "pandas.DataFrame" - "numpy.ndarray" - "awkward.Array" - Not sure why you would need it though ... - """ - if fmt == "pandas.DataFrame": - return pd.DataFrame([self.value]) - elif fmt == "numpy.ndarray": - return np.array([self.value]) - elif fmt == "awkward.Array": - return ak.Array([self.value]) - else: - raise TypeError(f"{fmt} is not a supported third-party format.") diff --git a/src/lgdo/types/struct.py b/src/lgdo/types/struct.py index 070c97d0..e73d126a 100644 --- a/src/lgdo/types/struct.py +++ b/src/lgdo/types/struct.py @@ -109,18 +109,19 @@ def __repr__(self) -> str: np.set_printoptions(**npopt) return " ".join(out.replace("\n", " ").split()) - def convert( - self, fmt: str = "pandas.DataFrame", with_units: bool = True + def view_as( + self, fmt: str, with_units: bool = True ) -> pd.DataFrame | np.NDArray | ak.Array: """Convert the data of the Struct object to a third-party format. - Supported options are: - "pandas.DataFrame" - "numpy.ndarray" - "awkward.Array" - - Note: - - conversion to ndarray only works when the values are of the equal length, returns a dict containing "keys" and "values" keys for the corresponding NDArray - - conversion to awkward array only works when the key is a string and values are of equal length + Supported options are ... + + Note + ---- + - conversion to ndarray only works when the values are of the equal + length, returns a dict containing "keys" and "values" keys for + the corresponding NDArray + - conversion to awkward array only works when the key is a string + and values are of equal length """ if fmt == "pandas.DataFrame": return pd.DataFrame(self) diff --git a/src/lgdo/types/table.py b/src/lgdo/types/table.py index 32b8f07c..c4001cd1 100644 --- a/src/lgdo/types/table.py +++ b/src/lgdo/types/table.py @@ -352,19 +352,19 @@ def __str__(self): return string - def convert( - self, fmt: str = "pandas.DataFrame", with_units: bool = True + def view_as( + self, fmt: str, with_units: bool = True ) -> pd.DataFrame | np.NDArray | ak.Array: """Convert the data of the Table object to a third-party format. - Supported options are: - "pandas.DataFrame" - "numpy.ndarray" - "awkward.Array" - - Note: - - conversion to ndarray only works when the values are of the equal length, returns a dict containing "keys" and "values" keys for the corresponding NDArray - - conversion to awkward array only works when the key is a string and values are of equal length - + Supported options are ... + + Note + ---- + - conversion to ndarray only works when the values are of the equal + length, returns a dict containing "keys" and "values" keys for + the corresponding NDArray + - conversion to awkward array only works when the key is a string + and values are of equal length """ if fmt == "pandas.DataFrame": return pd.DataFrame(self) diff --git a/src/lgdo/types/vectorofvectors.py b/src/lgdo/types/vectorofvectors.py index da4c372a..a0cf4658 100644 --- a/src/lgdo/types/vectorofvectors.py +++ b/src/lgdo/types/vectorofvectors.py @@ -421,19 +421,16 @@ def to_aoesa(self, preserve_dtype: bool = False) -> aoesa.ArrayOfEqualSizedArray return aoesa.ArrayOfEqualSizedArrays(nda=nda, attrs=self.getattrs()) - def convert( - self, fmt: str = "pandas.DataFrame", with_units: bool = True + def view_as( + self, fmt: str, with_units: bool = True ) -> pd.DataFrame | np.NDArray | ak.Array: """Convert the data of the Table object to a third-party format. - Supported options are: - "pandas.DataFrame" - "numpy.ndarray" - "awkward.Array" + Supported options are ... """ if fmt == "pandas.DataFrame": - return self.to_aoesa().convert("pandas.DataFrame") + return self.to_aoesa().view_as("pandas.DataFrame") elif fmt == "numpy.ndarray": - return self.to_aoesa().convert("numpy.ndarray") + return self.to_aoesa().view_as("numpy.ndarray") elif fmt == "awkward.Array": lengths_of_individual_vectors = np.diff(self.cumulative_length, prepend=[0]) return ak.unflatten(self.flattened_data, lengths_of_individual_vectors) diff --git a/src/lgdo/types/waveform_table.py b/src/lgdo/types/waveform_table.py index 131e25d9..9be7db3a 100644 --- a/src/lgdo/types/waveform_table.py +++ b/src/lgdo/types/waveform_table.py @@ -265,7 +265,7 @@ def __str__(self): np.set_printoptions(**npopt) return string - def convert( - self, fmt: str = "pandas.DataFrame" + def view_as( + self, fmt: str, with_units: bool = True ) -> pd.DataFrame | np.NDArray | ak.Array: raise NotImplementedError("'convert' not yet implemented for WaveformTable.")