diff --git a/src/nwp_consumer/internal/entities/parameters.py b/src/nwp_consumer/internal/entities/parameters.py index 4bbc6126..a15fc0d2 100644 --- a/src/nwp_consumer/internal/entities/parameters.py +++ b/src/nwp_consumer/internal/entities/parameters.py @@ -85,7 +85,7 @@ def __str__(self) -> str: class Parameter(StrEnum): """Parameters of interest to OCF. - Inheriting from StrEnum and using `auto()` makes the values + Inheriting from StrEnum and using ``auto()`` makes the values of the enums equal to the lowercased enum name. See Also: diff --git a/src/nwp_consumer/internal/ports/repositories.py b/src/nwp_consumer/internal/ports/repositories.py index 6bfe5983..aea603a9 100644 --- a/src/nwp_consumer/internal/ports/repositories.py +++ b/src/nwp_consumer/internal/ports/repositories.py @@ -54,7 +54,7 @@ def fetch_init_data(self, it: dt.datetime) \ As per the typing, the return value is a generator of functions that may produce one or more xarray datasets. - The generator-of-functions approach (typed here as `Iterator[Callable...]`) + The generator-of-functions approach (typed here as ``Iterator[Callable...]``) is important, as it allows for lazy evaluation: by returning a generator of delayed objects, joblib can parallelize the download and the results can be accumulated in a low-memory fashion (see @@ -82,22 +82,23 @@ def fetch_init_data(self, it: dt.datetime) \ ... '''Download and convert a raw file to an xarray dataset.''' ... return Success([xr.open_dataset(file).to_dataarray()]) - .. important:: No downloading or processing should be done in this method*. All of that + .. warning:: No downloading or processing should be done in this method*. All of that should be handled in the function that is yielded by the generator - ``_download_and_convert`` in the example above. This is to allow for parallelization of the download and processing. - *It is however, worth considering the most efficient way to download and process the data. - The above assumes that the data comes in many files, but there is a possibility of the - case where the source provides one large file with many underlying datasets within. - In this case, it may be more efficient to download the large file in the - `fetch_init_data` method and then process the datasets within via the yielded functions. - - TODO: For the moment, this returns a list of `xarray.DataArray` objects. It may be - TODO: more efficient to return a generator here to avoid reading all the datasets into - TODO: memory at once, however, often the source of these datasets is `cfgrib.open_datasets` - TODO: which has no option for returning a generator, hence the current choice of `list`. - TODO: This may be revisited in the future. + .. note:: It is however, worth considering the most efficient way to download and process the data. + The above assumes that the data comes in many files, but there is a possibility of the + case where the source provides one large file with many underlying datasets within. + In this case, it may be more efficient to download the large file in the + `fetch_init_data` method and then process the datasets within via the yielded functions. + + .. note:: For the moment, this returns a list of ``xarray.DataArray`` objects. It may be + more efficient to return a generator here to avoid reading all the datasets into + memory at once, however, often the source of these datasets is ``cfgrib.open_datasets`` + which has no option for returning a generator, hence the current choice of ``list``. + This may be revisited in the future, for instance by recreating the ``open_datasets`` + function in a manner which returns a generator of datasets. Args: it: The initialization time for which to fetch data. diff --git a/src/nwp_consumer/internal/repositories/model_repositories/ecmwf_realtime.py b/src/nwp_consumer/internal/repositories/model_repositories/ecmwf_realtime.py index db8acfba..f726bcc8 100644 --- a/src/nwp_consumer/internal/repositories/model_repositories/ecmwf_realtime.py +++ b/src/nwp_consumer/internal/repositories/model_repositories/ecmwf_realtime.py @@ -14,7 +14,7 @@ - 8 digits representing the target time in the format mmddHHMM - 1 digit representing the file number(?) -So a file named `A2D10250000D10260100` would be for an initialization +So a file named ``A2D10250000D10260100`` would be for an initialization time of 2024-10-25 00:00 and a target time of 2024-10-26 01:00 (step of 25 hours). The file contents is specific to the order agreed with the data provider. @@ -225,7 +225,6 @@ def _convert(path: pathlib.Path) -> ResultE[list[xr.DataArray]]: """Convert a grib file to an xarray DataArray. Args: - model: Metadata of the model producing the data path: The path to the grib file. """ try: diff --git a/src/nwp_consumer/internal/repositories/model_repositories/metoffice_global.py b/src/nwp_consumer/internal/repositories/model_repositories/metoffice_global.py index 137acec0..320d6d58 100644 --- a/src/nwp_consumer/internal/repositories/model_repositories/metoffice_global.py +++ b/src/nwp_consumer/internal/repositories/model_repositories/metoffice_global.py @@ -11,7 +11,7 @@ `this PDF `_. For further details on the repository, see the -`CedaMetOfficeGlobalModelRepository.metadata` implementation. +`CedaMetOfficeGlobalModelRepository.repository` implementation. Data discrepancies and corrections ==================================