From 0a79c74cb1d6bf3e29b999188c0c6e4d9f95078b Mon Sep 17 00:00:00 2001 From: Leif Denby Date: Thu, 15 Aug 2024 13:25:25 +0000 Subject: [PATCH] cleanup doctrings --- neural_lam/datastore/base.py | 14 +--- neural_lam/datastore/mllam.py | 95 +++++++------------------ neural_lam/datastore/multizarr/store.py | 51 +++---------- neural_lam/datastore/npyfiles/store.py | 40 +++-------- 4 files changed, 46 insertions(+), 154 deletions(-) diff --git a/neural_lam/datastore/base.py b/neural_lam/datastore/base.py index 480476fe..b19cbf23 100644 --- a/neural_lam/datastore/base.py +++ b/neural_lam/datastore/base.py @@ -12,17 +12,9 @@ class BaseDatastore(abc.ABC): - """Base class for weather - data used in the neural- - lam package. A datastore - defines the interface for - accessing weather data by - providing methods to - access the data in a - processed format that can - be used for training and - evaluation of neural - networks. + """Base class for weather data used in the neural- lam package. A datastore defines + the interface for accessing weather data by providing methods to access the data in + a processed format that can be used for training and evaluation of neural networks. NOTE: All methods return either primitive types, `numpy.ndarray`, `xarray.DataArray` or `xarray.Dataset` objects, not `pytorch.Tensor` diff --git a/neural_lam/datastore/mllam.py b/neural_lam/datastore/mllam.py index d22f041a..5e44837a 100644 --- a/neural_lam/datastore/mllam.py +++ b/neural_lam/datastore/mllam.py @@ -19,24 +19,11 @@ class MLLAMDatastore(BaseCartesianDatastore): """Datastore class for the MLLAM dataset.""" def __init__(self, config_path, n_boundary_points=30, reuse_existing=True): - """Construct a new - MLLAMDatastore from - the configuration file - at `config_path`. A - boundary mask is - created with - `n_boundary_points` - boundary points. If - `reuse_existing` is - True, the dataset is - loaded from a zarr - file if it exists - (unless the config has - been modified since - the zarr was created), - otherwise it is - created from the - configuration file. + """Construct a new MLLAMDatastore from the configuration file at `config_path`. + A boundary mask is created with `n_boundary_points` boundary points. If + `reuse_existing` is True, the dataset is loaded from a zarr file if it exists + (unless the config has been modified since the zarr was created), otherwise it + is created from the configuration file. Parameters ---------- @@ -74,6 +61,11 @@ def __init__(self, config_path, n_boundary_points=30, reuse_existing=True): self._ds.to_zarr(fp_ds) self._n_boundary_points = n_boundary_points + print("Training with the following features:") + for category in ["state", "forcing", "static"]: + if len(self.get_vars_names(category)) > 0: + print(f"{category}: {' '.join(self.get_vars_names(category))}") + @property def root_path(self) -> Path: """The root path of the dataset. @@ -166,24 +158,11 @@ def get_num_data_vars(self, category: str) -> int: return len(self.get_vars_names(category)) def get_dataarray(self, category: str, split: str) -> xr.DataArray: - """Return the - processed data (as a - single `xr.DataArray`) - for the given category - of data and - test/train/val-split - that covers all the - data (in space and - time) of a given - category (state/forcin - g/static). "state" is - the only required - category, for other - categories, the method - will return `None` if - the category is not - found in the - datastore. + """Return the processed data (as a single `xr.DataArray`) for the given category + of data and test/train/val-split that covers all the data (in space and time) of + a given category (state/forcin g/static). "state" is the only required category, + for other categories, the method will return `None` if the category is not found + in the datastore. The returned dataarray will at minimum have dimensions of `(grid_index, {category}_feature)` so that any spatial dimensions have been stacked @@ -236,23 +215,10 @@ def get_dataarray(self, category: str, split: str) -> xr.DataArray: return da_category.sel(time=slice(t_start, t_end)) def get_normalization_dataarray(self, category: str) -> xr.Dataset: - """Return the - normalization - dataarray for the - given category. This - should contain a - `{category}_mean` and - `{category}_std` - variable for each - variable in the - category. For - `category=="state"`, - the dataarray should - also contain a - `state_diff_mean` and - `state_diff_std` - variable for the one- - step differences of + """Return the normalization dataarray for the given category. This should + contain a `{category}_mean` and `{category}_std` variable for each variable in + the category. For `category=="state"`, the dataarray should also contain a + `state_diff_mean` and `state_diff_std` variable for the one- step differences of the state variables. Parameters @@ -283,24 +249,11 @@ def get_normalization_dataarray(self, category: str) -> xr.Dataset: @property def boundary_mask(self) -> xr.DataArray: - """Produce a 0/1 mask - for the boundary - points of the dataset, - these will sit at the - edges of the domain - (in x/y extent) and - will be used to mask - out the boundary - points from the loss - function and to - overwrite the boundary - points from the - prediction. For now - this is created when - the mask is requested, - but in the future this - could be saved to the - zarr file. + """Produce a 0/1 mask for the boundary points of the dataset, these will sit at + the edges of the domain (in x/y extent) and will be used to mask out the + boundary points from the loss function and to overwrite the boundary points from + the prediction. For now this is created when the mask is requested, but in the + future this could be saved to the zarr file. Returns ------- diff --git a/neural_lam/datastore/multizarr/store.py b/neural_lam/datastore/multizarr/store.py index 23b33fe2..ebcc65e8 100644 --- a/neural_lam/datastore/multizarr/store.py +++ b/neural_lam/datastore/multizarr/store.py @@ -18,19 +18,10 @@ class MultiZarrDatastore(BaseCartesianDatastore): DIMS_TO_KEEP = {"time", "grid_index", "variable_name"} def __init__(self, config_path): - """Create a multi-zarr - datastore from the - given configuration - file. The - configuration file - should be a YAML file, - the format of which is - should be inferred - from the example - configuration file in - `tests/datastore_examp - les/multizarr/data_con - fig.yml`. + """Create a multi-zarr datastore from the given configuration file. The + configuration file should be a YAML file, the format of which is should be + inferred from the example configuration file in `tests/datastore_examp + les/multizarr/data_con fig.yml`. Parameters ---------- @@ -390,33 +381,13 @@ def get_xy(self, category, stacked=True): @functools.lru_cache() def get_normalization_dataarray(self, category: str) -> xr.Dataset: - """Return the - normalization - dataarray for the - given category. This - should contain a - `{category}_mean` and - `{category}_std` - variable for each - variable in the - category. For - `category=="state"`, - the dataarray should - also contain a - `state_diff_mean` and - `state_diff_std` - variable for the one- - step differences of - the state variables. - The return dataarray - should at least have - dimensions of `({categ - ory}_feature)`, but - can also include for - example `grid_index` - (if the normalisation - is done per grid point - for example). + """Return the normalization dataarray for the given category. This should + contain a `{category}_mean` and `{category}_std` variable for each variable in + the category. For `category=="state"`, the dataarray should also contain a + `state_diff_mean` and `state_diff_std` variable for the one- step differences of + the state variables. The return dataarray should at least have dimensions of + `({categ ory}_feature)`, but can also include for example `grid_index` (if the + normalisation is done per grid point for example). Parameters ---------- diff --git a/neural_lam/datastore/npyfiles/store.py b/neural_lam/datastore/npyfiles/store.py index cff20043..ff43a626 100644 --- a/neural_lam/datastore/npyfiles/store.py +++ b/neural_lam/datastore/npyfiles/store.py @@ -281,21 +281,10 @@ def get_dataarray(self, category: str, split: str) -> DataArray: def _get_single_timeseries_dataarray( self, features: List[str], split: str, member: int = None ) -> DataArray: - """Get the data array - spanning the complete - time series for a - given set of features - and split of data. For - state features the - `member` argument - should be specified to - select the ensemble - member to load. The - data will be loaded - using dask.delayed, so - that the data isn't - actually loaded until - it's needed. + """Get the data array spanning the complete time series for a given set of + features and split of data. For state features the `member` argument should be + specified to select the ensemble member to load. The data will be loaded using + dask.delayed, so that the data isn't actually loaded until it's needed. Parameters ---------- @@ -614,23 +603,10 @@ def boundary_mask(self) -> xr.DataArray: return da_mask_stacked_xy def get_normalization_dataarray(self, category: str) -> xr.Dataset: - """Return the - normalization - dataarray for the - given category. This - should contain a - `{category}_mean` and - `{category}_std` - variable for each - variable in the - category. For - `category=="state"`, - the dataarray should - also contain a - `state_diff_mean` and - `state_diff_std` - variable for the one- - step differences of + """Return the normalization dataarray for the given category. This should + contain a `{category}_mean` and `{category}_std` variable for each variable in + the category. For `category=="state"`, the dataarray should also contain a + `state_diff_mean` and `state_diff_std` variable for the one- step differences of the state variables. Parameters