From b4fa84e1cd1e593188ceaafae50fe1f532364c52 Mon Sep 17 00:00:00 2001 From: Bart Schilperoort Date: Mon, 7 Aug 2023 15:07:14 +0200 Subject: [PATCH 01/16] Implement config, recipe loader & recipe runner. --- pyproject.toml | 2 + src/zampy/datasets/__init__.py | 8 +++ src/zampy/recipe.py | 126 +++++++++++++++++++++++++++++++++ 3 files changed, 136 insertions(+) create mode 100644 src/zampy/recipe.py diff --git a/pyproject.toml b/pyproject.toml index ad83aae..9de7f9f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,7 @@ classifiers = [ ] dependencies = [ "requests", + "pyyaml", "netcdf4", "numpy", "pandas", @@ -75,6 +76,7 @@ dev = [ "mypy", "types-requests", # type stubs for request lib "types-urllib3", # type stubs for url lib + "types-PyYAML", "pytest", "pytest-cov", "pre-commit", diff --git a/src/zampy/datasets/__init__.py b/src/zampy/datasets/__init__.py index e1b1724..ccf538e 100644 --- a/src/zampy/datasets/__init__.py +++ b/src/zampy/datasets/__init__.py @@ -6,3 +6,11 @@ __all__ = ["dataset_protocol", "validation", "EthCanopyHeight", "ERA5"] + + +# This object tracks which datasets are available. +DATASETS: dict[str, type[dataset_protocol.Dataset]] = { + # All lowercase key. + "era5": ERA5, + "eth_canopy_height": EthCanopyHeight, +} diff --git a/src/zampy/recipe.py b/src/zampy/recipe.py new file mode 100644 index 0000000..5b4a05f --- /dev/null +++ b/src/zampy/recipe.py @@ -0,0 +1,126 @@ +""""All functionality to read and execute Zampy recipes.""" +from pathlib import Path +from typing import Any +import numpy as np +import yaml +from zampy.datasets import DATASETS +from zampy.datasets import converter +from zampy.datasets.dataset_protocol import Dataset +from zampy.datasets.dataset_protocol import SpatialBounds +from zampy.datasets.dataset_protocol import TimeBounds + + +def recipe_loader(recipe_filename: str) -> dict: + """Load the yaml recipe into a dictionary, and do some validation.""" + with open(recipe_filename) as f: + recipe: dict = yaml.safe_load(f) + + if not all(("name", "download", "convert" in recipe.keys())): + msg = ( + "One of the following items are missing from the recipe:\n" + "name, download, convert." + ) + raise ValueError(msg) + + if "datasets" not in recipe["download"].keys(): + msg = "No dataset entry found in the recipe." + raise ValueError(msg) + + if not all(("convention", "frequency", "resolution" in recipe["convert"].keys())): + msg = ( + "One of the following items are missing from the recipe:\n" + "name, download, convert." + ) + raise ValueError(msg) + + return recipe + + +def config_loader() -> dict: + """Load the zampty config and validate the contents.""" + config_path = Path.home() / ".config" / "zampy" / "zampy_config.yml" + + if not config_path.exists(): + msg = f"No config file was found at '{config_path}'" + raise FileNotFoundError(msg) + + with config_path.open() as f: + config: dict = yaml.safe_load(f) + + if "working_directory" not in config.keys(): + msg = "No `working_directory` key found in the config file." + raise ValueError(msg) + + return config + + +class RecipeManager: + """The recipe manager is used to get the required info, and then run the recipe.""" + + def __init__(self, recipe_filename: str) -> None: + """Instantiate the recipe manager, using a prepared recipe.""" + # Load & parse recipe + recipe = recipe_loader(recipe_filename) + + start_year, end_year = recipe["download"]["years"] + self.timebounds = TimeBounds( + np.datetime64(f"{start_year}"), np.datetime64(f"{end_year}") + ) + self.spatialbounds = SpatialBounds(*recipe["download"]["bbox"]) + + self.datasets: dict[str, Any] = recipe["download"]["datasets"] + + self.convention = recipe["convert"]["convention"] + self.frequency = recipe["convert"]["frequency"] + self.resolution = recipe["convert"]["resolution"] + + # Load & parse config + config = config_loader() + self.download_dir = Path(config["working_directory"]) / "download" + self.ingest_dir = Path(config["working_directory"]) / "ingest" + self.data_dir = ( + Path(config["working_directory"]) / "output" / str(recipe["name"]) + ) # TODO: strip illegal chars from name. + + # Create required directories if they do not exist yet: + for dir in [self.data_dir, self.download_dir, self.ingest_dir]: + dir.mkdir(parents=True, exist_ok=True) + + def run(self) -> None: + """Run the full recipe.""" + for dataset_name in self.datasets: + _dataset = DATASETS[dataset_name.lower()] + dataset: Dataset = _dataset() + variables: list[str] = self.datasets[dataset_name]["variables"] + + # Download datset + dataset.download( + download_dir=self.download_dir, + time_bounds=self.timebounds, + spatial_bounds=self.spatialbounds, + variable_names=variables, + ) + + dataset.ingest(self.download_dir, self.ingest_dir) + + ds = dataset.load( + ingest_dir=self.ingest_dir, + time_bounds=self.timebounds, + spatial_bounds=self.spatialbounds, + variable_names=variables, + resolution=self.resolution, + regrid_method="flox", + ) + + ds = converter.convert(ds, dataset, convention=self.convention) + + ds = ds.resample(time=self.frequency).mean() + + comp = dict(zlib=True, complevel=5) + encoding = {var: comp for var in ds.data_vars} + fname = ( # e.g. "era5_2010-2020.nc" + f"{dataset_name.lower()}_" + f"{self.timebounds.start}-{self.timebounds.end}" + ".nc" + ) + ds.to_netcdf(path=self.data_dir / fname, encoding=encoding) From febaad671055b19c79d85acf939c23f8bf1ca1d1 Mon Sep 17 00:00:00 2001 From: Bart Schilperoort Date: Mon, 7 Aug 2023 15:23:53 +0200 Subject: [PATCH 02/16] Add recipe running instructions --- docs/using_zampy.md | 54 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 docs/using_zampy.md diff --git a/docs/using_zampy.md b/docs/using_zampy.md new file mode 100644 index 0000000..7a534fc --- /dev/null +++ b/docs/using_zampy.md @@ -0,0 +1,54 @@ +# Using Zampy + +## Installing Zampy +Zampy can be installed by doing: +```sh +pip install zampy git+https://github.com/EcoExtreML/zampy +``` + +## Configuration +Zampy needs to be configured with a simple configuration file. + +This file is created under your -*user's home*-/.config directory: + +`~/.config/zampy/zampy_config.yml` + +```yml +working_directory: /home/bart/Zampy + +``` + +## Formulating a recipe +Recipes have the following structure: + +```yml +name: "test_recipe" + +download: + years: [2019, 2020] + bbox: [54, 6, 50, 3] # NESW + + datasets: + era5: + variables: + - 10m_v_component_of_wind + - surface_pressure + +convert: + convention: ALMA + frequency: 1H # outputs at 1 hour frequency. Pandas-like freq-keyword. + resolution: 0.5 # output resolution in degrees. +``` + +You can specify multiple datasets and multiple variables per dataset. + +## Running a recipe +Save this recipe to disk and run the following code in Python (modifying the path to the file): + +```py +from zampy.recipe import RecipeManager +r = RecipeManager(recipe_filename="/home/username/path_to_file/simple_recipe.yml") +r.run() +``` + +This will execute the recipe (i.e. download, ingest, convert, resample and save the data). From 74bf2563a119343293cfee69bba28e1056b6728c Mon Sep 17 00:00:00 2001 From: Bart Schilperoort Date: Mon, 7 Aug 2023 15:25:06 +0200 Subject: [PATCH 03/16] Add mkdocs configuration --- mkdocs.yml | 42 ++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 14 ++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 mkdocs.yml diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..f54197d --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,42 @@ +site_name: Zampy Documentation + +theme: + name: material + features: + - navigation.instant + - navigation.tabs + - navigation.tabs.sticky + + palette: + # Palette toggle for light mode + - scheme: default + toggle: + icon: material/weather-sunny + name: Switch to dark mode + primary: light green + accent: green + + # Palette toggle for dark mode + - scheme: slate + toggle: + icon: material/weather-night + name: Switch to light mode + primary: blue grey + accent: teal + +plugins: + - mkdocs-jupyter: + include_source: True + - search + - mkdocstrings: + handlers: + python: + options: + docstring_style: google + docstring_options: + ignore_init_summary: no + merge_init_into_class: yes + show_submodules: no + +extra: + generator: false diff --git a/pyproject.toml b/pyproject.toml index 9de7f9f..32baa0a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,6 +81,13 @@ dev = [ "pytest-cov", "pre-commit", ] +docs = [ + "mkdocs", + "mkdocs-material", + "mkdocs-jupyter", + "mkdocstrings[python]", + "mkdocs-gen-files", +] [tool.hatch.envs.default] features = ["dev"] @@ -101,6 +108,13 @@ coverage = [ "pytest --cov --cov-report term --cov-report xml --junitxml=xunit-result.xml tests/", ] +[tool.hatch.envs.docs] +features = ["docs"] + +[tool.hatch.envs.docs.scripts] +build = ["mkdocs build"] +serve = ["mkdocs serve"] + # [tool.hatch.envs.conda] # type = "conda" # python = "3.10" From 83174f12bff289d01bae4d76d3ff4fac1f610a97 Mon Sep 17 00:00:00 2001 From: Bart Schilperoort Date: Mon, 7 Aug 2023 15:37:37 +0200 Subject: [PATCH 04/16] Add command line interface --- pyproject.toml | 3 +++ src/zampy/cli.py | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 src/zampy/cli.py diff --git a/pyproject.toml b/pyproject.toml index 32baa0a..dd58330 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,6 +67,9 @@ dependencies = [ ] dynamic = ["version"] +[project.scripts] +zampy="zampy.cli:run_recipe" + [project.optional-dependencies] dev = [ "bump2version", diff --git a/src/zampy/cli.py b/src/zampy/cli.py new file mode 100644 index 0000000..a5f5f7b --- /dev/null +++ b/src/zampy/cli.py @@ -0,0 +1,20 @@ +"""Implements CLI interface for Zampy.""" +import click +from zampy.recipe import RecipeManager + + +@click.command() +@click.option( + "--filename", + prompt="Path to the recipe filename", + help="Path to the recipe filename.", +) +def run_recipe(filename: str) -> None: + """Run the recipe using the CLI.""" + click.echo(f"Executing recipe: {filename}") + rm = RecipeManager(filename) + rm.run() + + +if __name__ == "__main__": + run_recipe() From e0949e37a4749bf5d97bb44424e9b4350e2ff552 Mon Sep 17 00:00:00 2001 From: Bart Schilperoort Date: Mon, 7 Aug 2023 15:39:48 +0200 Subject: [PATCH 05/16] Update documentation with CLI --- docs/using_zampy.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/docs/using_zampy.md b/docs/using_zampy.md index 7a534fc..f6d5bcc 100644 --- a/docs/using_zampy.md +++ b/docs/using_zampy.md @@ -43,12 +43,10 @@ convert: You can specify multiple datasets and multiple variables per dataset. ## Running a recipe -Save this recipe to disk and run the following code in Python (modifying the path to the file): +Save this recipe to disk and run the following code in your shell: -```py -from zampy.recipe import RecipeManager -r = RecipeManager(recipe_filename="/home/username/path_to_file/simple_recipe.yml") -r.run() +```sh +zampy --filename /home/username/path_to_file/simple_recipe.yml ``` This will execute the recipe (i.e. download, ingest, convert, resample and save the data). From dee6566db645ead2fc07b553da58bd6b8dcd9fd8 Mon Sep 17 00:00:00 2001 From: Bart Schilperoort Date: Tue, 8 Aug 2023 08:33:32 +0200 Subject: [PATCH 06/16] Fix era5/eth issues --- src/zampy/recipe.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/zampy/recipe.py b/src/zampy/recipe.py index 5b4a05f..19c1af8 100644 --- a/src/zampy/recipe.py +++ b/src/zampy/recipe.py @@ -62,9 +62,10 @@ def __init__(self, recipe_filename: str) -> None: # Load & parse recipe recipe = recipe_loader(recipe_filename) - start_year, end_year = recipe["download"]["years"] + self.start_year, self.end_year = recipe["download"]["years"] self.timebounds = TimeBounds( - np.datetime64(f"{start_year}"), np.datetime64(f"{end_year}") + np.datetime64(f"{self.start_year}-01-01T00:00"), + np.datetime64(f"{self.end_year}-12-13T23:59"), ) self.spatialbounds = SpatialBounds(*recipe["download"]["bbox"]) @@ -119,8 +120,11 @@ def run(self) -> None: comp = dict(zlib=True, complevel=5) encoding = {var: comp for var in ds.data_vars} fname = ( # e.g. "era5_2010-2020.nc" - f"{dataset_name.lower()}_" - f"{self.timebounds.start}-{self.timebounds.end}" - ".nc" + f"{dataset_name.lower()}_" f"{self.start_year}-{self.end_year}" ".nc" ) ds.to_netcdf(path=self.data_dir / fname, encoding=encoding) + + print( + "Finished running the recipe. Output data can be found at:\n" + f" {self.data_dir}" + ) From e49d0ded681cf09c8b44d9172886e4ae1f1ef5e7 Mon Sep 17 00:00:00 2001 From: Bart Schilperoort Date: Tue, 8 Aug 2023 08:40:46 +0200 Subject: [PATCH 07/16] Update docs, add syntax highlighting --- docs/index.md | 10 ++++++++++ docs/using_zampy.md | 14 ++++++++------ mkdocs.yml | 17 ++++++++++++++++- 3 files changed, 34 insertions(+), 7 deletions(-) create mode 100644 docs/index.md diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..f919fef --- /dev/null +++ b/docs/index.md @@ -0,0 +1,10 @@ +# Zampy + +A tool for downloading Land Surface Model input data. + +### Name origin + +Named after *Zam*; [the Avestan language term for the Zoroastrian concept of "earth"](https://en.wikipedia.org/wiki/Zam). + +## How to use Zampy +See the section ["using Zampy"](using_zampy.md). diff --git a/docs/using_zampy.md b/docs/using_zampy.md index f6d5bcc..ca297b7 100644 --- a/docs/using_zampy.md +++ b/docs/using_zampy.md @@ -2,7 +2,7 @@ ## Installing Zampy Zampy can be installed by doing: -```sh +```bash pip install zampy git+https://github.com/EcoExtreML/zampy ``` @@ -13,19 +13,18 @@ This file is created under your -*user's home*-/.config directory: `~/.config/zampy/zampy_config.yml` -```yml +```yaml working_directory: /home/bart/Zampy - ``` ## Formulating a recipe Recipes have the following structure: -```yml +```yaml name: "test_recipe" download: - years: [2019, 2020] + years: [2020, 2020] bbox: [54, 6, 50, 3] # NESW datasets: @@ -33,6 +32,9 @@ download: variables: - 10m_v_component_of_wind - surface_pressure + eth_canopy_height: + variables: + - height_of_vegetation convert: convention: ALMA @@ -45,7 +47,7 @@ You can specify multiple datasets and multiple variables per dataset. ## Running a recipe Save this recipe to disk and run the following code in your shell: -```sh +```bash zampy --filename /home/username/path_to_file/simple_recipe.yml ``` diff --git a/mkdocs.yml b/mkdocs.yml index f54197d..17d0134 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -2,11 +2,17 @@ site_name: Zampy Documentation theme: name: material + highlightjs: true + hljs_languages: + - yaml + - python + - bash features: - navigation.instant - navigation.tabs - navigation.tabs.sticky - + - content.code.copy + palette: # Palette toggle for light mode - scheme: default @@ -38,5 +44,14 @@ plugins: merge_init_into_class: yes show_submodules: no +markdown_extensions: + - pymdownx.highlight: + anchor_linenums: true + line_spans: __span + pygments_lang_class: true + - pymdownx.inlinehilite + - pymdownx.snippets + - pymdownx.superfences + extra: generator: false From 37dd2e2a79ebb419bdd8eda158a601620f6e9b96 Mon Sep 17 00:00:00 2001 From: Bart Schilperoort Date: Tue, 8 Aug 2023 11:22:47 +0200 Subject: [PATCH 08/16] Add integration test for recipe runner --- pyproject.toml | 1 + src/zampy/utils/regrid.py | 2 +- tests/test_recipes/generate_test_data.py | 72 ++++++++++++++++++++++ tests/test_recipes/recipes/era5_recipe.yml | 16 +++++ tests/test_recipes/test_simple_recipe.py | 44 +++++++++++++ 5 files changed, 134 insertions(+), 1 deletion(-) create mode 100644 tests/test_recipes/generate_test_data.py create mode 100644 tests/test_recipes/recipes/era5_recipe.yml create mode 100644 tests/test_recipes/test_simple_recipe.py diff --git a/pyproject.toml b/pyproject.toml index dd58330..63f043f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -82,6 +82,7 @@ dev = [ "types-PyYAML", "pytest", "pytest-cov", + "pytest-mock", "pre-commit", ] docs = [ diff --git a/src/zampy/utils/regrid.py b/src/zampy/utils/regrid.py index 969b538..c52af19 100644 --- a/src/zampy/utils/regrid.py +++ b/src/zampy/utils/regrid.py @@ -105,7 +105,7 @@ def _groupby_regrid( ds_out = ds_out.swap_dims( {"latitude_bins": "latitude", "longitude_bins": "longitude"} ) - ds_out = ds_out.drop(["latitude_bins", "longitude_bins"]) + ds_out = ds_out.drop_vars(["latitude_bins", "longitude_bins"]) return ds_out.transpose("time", "latitude", "longitude", ...) diff --git a/tests/test_recipes/generate_test_data.py b/tests/test_recipes/generate_test_data.py new file mode 100644 index 0000000..46b9b96 --- /dev/null +++ b/tests/test_recipes/generate_test_data.py @@ -0,0 +1,72 @@ +"""Generates test data for running the recipe tests.""" +from pathlib import Path +import numpy as np +import pandas as pd +import xarray as xr +from zampy.datasets.dataset_protocol import SpatialBounds +from zampy.datasets.dataset_protocol import TimeBounds + + +def generate_era5_file( + varname: str, + time_bounds: TimeBounds, + spatial_bounds: SpatialBounds, + test_value: float, + resolution: float, + time_res="1H", +) -> xr.Dataset: + time_coords = pd.date_range( + start=time_bounds.start, end=time_bounds.end, freq=time_res, inclusive="left" + ) + lat_coords = np.arange( + start=np.round(spatial_bounds.south - 1), + stop=np.round(spatial_bounds.north + 1), + step=resolution, + ) + lon_coords = np.arange( + start=np.round(spatial_bounds.west - 1), + stop=np.round(spatial_bounds.east + 1), + step=resolution, + ) + data = np.zeros((len(lon_coords), len(lat_coords), len(time_coords))) + test_value + + ds = xr.Dataset( + data_vars={ERA5_LOOKUP[varname][1]: (("longitude", "latitude", "time"), data)}, + coords={ + "longitude": lon_coords, + "latitude": lat_coords, + "time": time_coords, + }, + ) + ds[ERA5_LOOKUP[varname][1]].attrs["units"] = ERA5_LOOKUP[varname][0] + ds["latitude"].attrs["units"] = "degrees_north" + ds["longitude"].attrs["units"] = "degrees_east" + + return ds + + +ERA5_LOOKUP = { # name: (unit, fname) + "10m_u_component_of_wind": ("m s**-1", "u10"), + "10m_v_component_of_wind": ("m s**-1", "v10"), + "surface_pressure": ("Pa", "sp"), +} + + +def generate_era5_files( + directory: Path, + variables: list[str], + spatial_bounds: SpatialBounds, + time_bounds: TimeBounds, +) -> None: + data_dir_era5 = directory / "era5" + data_dir_era5.mkdir() + + for var in variables: + ds = generate_era5_file( + varname=var, + time_bounds=time_bounds, + spatial_bounds=spatial_bounds, + test_value=1.0, + resolution=0.25, + ) + ds.to_netcdf(path=data_dir_era5 / f"era5_{var}.nc") diff --git a/tests/test_recipes/recipes/era5_recipe.yml b/tests/test_recipes/recipes/era5_recipe.yml new file mode 100644 index 0000000..576fb15 --- /dev/null +++ b/tests/test_recipes/recipes/era5_recipe.yml @@ -0,0 +1,16 @@ +name: "era5_recipe" + +download: + years: [2020, 2020] + bbox: [51, 4, 50, 3] # NESW + + datasets: + era5: + variables: + - 10m_v_component_of_wind + - surface_pressure + +convert: + convention: ALMA + frequency: 1H # outputs at 1 hour frequency. Pandas-like freq-keyword. + resolution: 0.5 # output resolution in degrees. diff --git a/tests/test_recipes/test_simple_recipe.py b/tests/test_recipes/test_simple_recipe.py new file mode 100644 index 0000000..c5734c1 --- /dev/null +++ b/tests/test_recipes/test_simple_recipe.py @@ -0,0 +1,44 @@ +"""Testing a simple recipe.""" +from pathlib import Path +from unittest.mock import patch +import generate_test_data +import numpy as np +import xarray as xr +from zampy.datasets import DATASETS +from zampy.datasets.dataset_protocol import SpatialBounds +from zampy.datasets.dataset_protocol import TimeBounds +from zampy.datasets.dataset_protocol import write_properties_file +from zampy.recipe import RecipeManager + + +RECIPE_FILE = Path(__file__).parent / "recipes" / "era5_recipe.yml" + + +def test_recipe(tmp_path: Path, mocker): + with (patch.object(DATASETS["era5"], "download"),): + mocker.patch( + "zampy.recipe.config_loader", + return_value={"working_directory": str(tmp_path.absolute())}, + ) + rm = RecipeManager(str(RECIPE_FILE.absolute())) + + spatial_bounds = SpatialBounds(51, 4, 50, 3) + time_bounds = TimeBounds( + np.datetime64("2020-01-01T00:00"), np.datetime64("2020-12-31T23:59") + ) + variables = ["10m_v_component_of_wind", "surface_pressure"] + + generate_test_data.generate_era5_files( + directory=tmp_path / "download", + variables=variables, + spatial_bounds=spatial_bounds, + time_bounds=time_bounds, + ) + write_properties_file( + tmp_path / "download" / "era5", spatial_bounds, time_bounds, variables + ) + + rm.run() + + ds = xr.open_mfdataset(str(tmp_path / "output" / "era5_recipe" / "*.nc")) + assert all(var in ds.data_vars for var in ["Psurf", "Wind_N"]) From eebca29d6eb5e6655331a96483844a3e02bf5630 Mon Sep 17 00:00:00 2001 From: Bart Schilperoort Date: Tue, 8 Aug 2023 12:20:43 +0200 Subject: [PATCH 09/16] Add scipy to dependencies. Remove py3.8 add py3.11 --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 63f043f..11b5773 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ name = "zampy" description = "python package for getting Land Surface Model input data." readme = "README.md" license = "Apache-2.0" -requires-python = ">=3.8, <3.11" +requires-python = ">=3.9, <3.12" authors = [ {email = "b.schilperoort@esciencecenter.nl"}, {name = "Bart Schilperoort, Yang Liu, Fakhereh Alidoost"} @@ -43,7 +43,6 @@ classifiers = [ "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", ] @@ -55,6 +54,7 @@ dependencies = [ "pandas", "matplotlib", "xarray", + "scipy", # required for xarray.interpolate "rioxarray", # required for TIFF files "tqdm", "dask[diagnostics]", From 090675fdefd0ac30bec869399bd7810da5bda9a4 Mon Sep 17 00:00:00 2001 From: Bart Schilperoort Date: Tue, 8 Aug 2023 12:46:06 +0200 Subject: [PATCH 10/16] Make the recipe path a positional argument in cli --- src/zampy/cli.py | 13 +++++-------- src/zampy/recipe.py | 8 ++++---- tests/test_recipes/test_simple_recipe.py | 2 +- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/zampy/cli.py b/src/zampy/cli.py index a5f5f7b..e7b4564 100644 --- a/src/zampy/cli.py +++ b/src/zampy/cli.py @@ -1,18 +1,15 @@ """Implements CLI interface for Zampy.""" +from pathlib import Path import click from zampy.recipe import RecipeManager @click.command() -@click.option( - "--filename", - prompt="Path to the recipe filename", - help="Path to the recipe filename.", -) -def run_recipe(filename: str) -> None: +@click.argument("recipe", type=click.Path(exists=True, path_type=Path)) +def run_recipe(recipe: Path) -> None: """Run the recipe using the CLI.""" - click.echo(f"Executing recipe: {filename}") - rm = RecipeManager(filename) + click.echo(f"Executing recipe: {recipe}") + rm = RecipeManager(recipe) rm.run() diff --git a/src/zampy/recipe.py b/src/zampy/recipe.py index 19c1af8..bb4e289 100644 --- a/src/zampy/recipe.py +++ b/src/zampy/recipe.py @@ -10,9 +10,9 @@ from zampy.datasets.dataset_protocol import TimeBounds -def recipe_loader(recipe_filename: str) -> dict: +def recipe_loader(recipe_path: Path) -> dict: """Load the yaml recipe into a dictionary, and do some validation.""" - with open(recipe_filename) as f: + with recipe_path.open() as f: recipe: dict = yaml.safe_load(f) if not all(("name", "download", "convert" in recipe.keys())): @@ -57,10 +57,10 @@ def config_loader() -> dict: class RecipeManager: """The recipe manager is used to get the required info, and then run the recipe.""" - def __init__(self, recipe_filename: str) -> None: + def __init__(self, recipe_path: Path) -> None: """Instantiate the recipe manager, using a prepared recipe.""" # Load & parse recipe - recipe = recipe_loader(recipe_filename) + recipe = recipe_loader(recipe_path) self.start_year, self.end_year = recipe["download"]["years"] self.timebounds = TimeBounds( diff --git a/tests/test_recipes/test_simple_recipe.py b/tests/test_recipes/test_simple_recipe.py index c5734c1..064c7b9 100644 --- a/tests/test_recipes/test_simple_recipe.py +++ b/tests/test_recipes/test_simple_recipe.py @@ -20,7 +20,7 @@ def test_recipe(tmp_path: Path, mocker): "zampy.recipe.config_loader", return_value={"working_directory": str(tmp_path.absolute())}, ) - rm = RecipeManager(str(RECIPE_FILE.absolute())) + rm = RecipeManager(RECIPE_FILE.absolute()) spatial_bounds = SpatialBounds(51, 4, 50, 3) time_bounds = TimeBounds( From 55dd844d0c36c12f64512c5a26f29bc9a67473f6 Mon Sep 17 00:00:00 2001 From: Bart Schilperoort Date: Tue, 8 Aug 2023 13:00:54 +0200 Subject: [PATCH 11/16] Corrections/improvements in docs: using zampy. --- docs/using_zampy.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/docs/using_zampy.md b/docs/using_zampy.md index ca297b7..149e91f 100644 --- a/docs/using_zampy.md +++ b/docs/using_zampy.md @@ -9,16 +9,14 @@ pip install zampy git+https://github.com/EcoExtreML/zampy ## Configuration Zampy needs to be configured with a simple configuration file. -This file is created under your -*user's home*-/.config directory: - -`~/.config/zampy/zampy_config.yml` +You need to create this file under your -*user's home*-/.config directory: `~/.config/zampy/zampy_config.yml`, and should contain the following: ```yaml -working_directory: /home/bart/Zampy +working_directory: /path_to_a_working_directory/ #for example: /home/bart/Zampy ``` ## Formulating a recipe -Recipes have the following structure: +A "recipe" is a file with `yml` extension and has the following structure: ```yaml name: "test_recipe" From 5324fbee454d8512a5f647a02e10e9fe62ec0f85 Mon Sep 17 00:00:00 2001 From: Bart Schilperoort Date: Tue, 8 Aug 2023 13:02:24 +0200 Subject: [PATCH 12/16] Fix incorrect end time, concat filename string --- src/zampy/recipe.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/zampy/recipe.py b/src/zampy/recipe.py index bb4e289..419a445 100644 --- a/src/zampy/recipe.py +++ b/src/zampy/recipe.py @@ -65,7 +65,7 @@ def __init__(self, recipe_path: Path) -> None: self.start_year, self.end_year = recipe["download"]["years"] self.timebounds = TimeBounds( np.datetime64(f"{self.start_year}-01-01T00:00"), - np.datetime64(f"{self.end_year}-12-13T23:59"), + np.datetime64(f"{self.end_year}-12-31T23:59"), ) self.spatialbounds = SpatialBounds(*recipe["download"]["bbox"]) @@ -81,7 +81,7 @@ def __init__(self, recipe_path: Path) -> None: self.ingest_dir = Path(config["working_directory"]) / "ingest" self.data_dir = ( Path(config["working_directory"]) / "output" / str(recipe["name"]) - ) # TODO: strip illegal chars from name. + ) # Create required directories if they do not exist yet: for dir in [self.data_dir, self.download_dir, self.ingest_dir]: @@ -120,7 +120,7 @@ def run(self) -> None: comp = dict(zlib=True, complevel=5) encoding = {var: comp for var in ds.data_vars} fname = ( # e.g. "era5_2010-2020.nc" - f"{dataset_name.lower()}_" f"{self.start_year}-{self.end_year}" ".nc" + f"{dataset_name.lower()}_{self.start_year}-{self.end_year}.nc" ) ds.to_netcdf(path=self.data_dir / fname, encoding=encoding) From 3b54607018df53541db7e39fe2e3214d169ca8db Mon Sep 17 00:00:00 2001 From: Bart Schilperoort Date: Tue, 8 Aug 2023 13:03:24 +0200 Subject: [PATCH 13/16] remove py38 from CI, add py311 --- .github/workflows/build.yml | 2 +- sonar-project.properties | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3b5a87b..0807701 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -17,7 +17,7 @@ jobs: fail-fast: false matrix: os: ['ubuntu-latest', 'macos-latest', 'windows-latest'] - python-version: ['3.8', '3.9', '3.10'] + python-version: ['3.9', '3.10', '3.11'] env: MPLBACKEND: Agg # https://github.com/orgs/community/discussions/26434 steps: diff --git a/sonar-project.properties b/sonar-project.properties index c5a0059..8f280f2 100644 --- a/sonar-project.properties +++ b/sonar-project.properties @@ -10,4 +10,4 @@ sonar.links.ci=https://github.com/EcoExtreML/zampy/actions sonar.python.coverage.reportPaths=coverage.xml sonar.python.xunit.reportPath=xunit-result.xml sonar.python.pylint.reportPaths=pylint-report.txt -sonar.python.version=3.8, 3.9, 3.10 \ No newline at end of file +sonar.python.version=3.9, 3.10, 3.11 \ No newline at end of file From 56aec3fdd52f30e2909562b04c4ba1c05e711c3a Mon Sep 17 00:00:00 2001 From: Bart Schilperoort Date: Tue, 8 Aug 2023 13:20:38 +0200 Subject: [PATCH 14/16] Add recipe_loader tests, fix bugs --- src/zampy/recipe.py | 7 ++- tests/test_recipes/test_recipe_loader.py | 80 ++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 2 deletions(-) create mode 100644 tests/test_recipes/test_recipe_loader.py diff --git a/src/zampy/recipe.py b/src/zampy/recipe.py index 419a445..7c43a70 100644 --- a/src/zampy/recipe.py +++ b/src/zampy/recipe.py @@ -15,7 +15,7 @@ def recipe_loader(recipe_path: Path) -> dict: with recipe_path.open() as f: recipe: dict = yaml.safe_load(f) - if not all(("name", "download", "convert" in recipe.keys())): + if not all(key in recipe.keys() for key in ["name", "download", "convert"]): msg = ( "One of the following items are missing from the recipe:\n" "name, download, convert." @@ -26,7 +26,10 @@ def recipe_loader(recipe_path: Path) -> dict: msg = "No dataset entry found in the recipe." raise ValueError(msg) - if not all(("convention", "frequency", "resolution" in recipe["convert"].keys())): + if not all( + key in recipe["convert"].keys() + for key in ["convention", "frequency", "resolution"] + ): msg = ( "One of the following items are missing from the recipe:\n" "name, download, convert." diff --git a/tests/test_recipes/test_recipe_loader.py b/tests/test_recipes/test_recipe_loader.py new file mode 100644 index 0000000..1f3d7cc --- /dev/null +++ b/tests/test_recipes/test_recipe_loader.py @@ -0,0 +1,80 @@ +"""Test the recipe loader.""" +import pytest +from zampy.recipe import recipe_loader + + +valid_recipe = """ +name: "Test recipe 2" +download: + years: [2020, 2020] + bbox: [54, 6, 50, 3] # NESW + datasets: + era5: + variables: + - 10m_v_component_of_wind + - surface_pressure +convert: + convention: ALMA + frequency: 1H + resolution: 0.5 +""" + +recipe_missing_datasets = """ +name: "Test recipe 2" +download: + years: [2020, 2020] + bbox: [54, 6, 50, 3] # NESW +convert: + convention: ALMA + frequency: 1H + resolution: 0.5 +""" + +recipe_missing_name = """ +download: + years: [2020, 2020] + bbox: [54, 6, 50, 3] # NESW + datasets: + era5: + variables: + - 10m_v_component_of_wind + - surface_pressure +convert: + convention: ALMA + frequency: 1H + resolution: 0.5 +""" + +recipe_missing_convention = """ +name: "Test recipe 2" +download: + years: [2020, 2020] + bbox: [54, 6, 50, 3] # NESW + datasets: + era5: + variables: + - 10m_v_component_of_wind + - surface_pressure +convert: + frequency: 1H + resolution: 0.5 +""" + + +def test_valid_recipe(tmp_path): + recipe_path = tmp_path / "valid_recipe.yml" + with recipe_path.open("w") as f: + f.write(valid_recipe) + recipe_loader(recipe_path) + + +@pytest.mark.parametrize( + "recipe", [recipe_missing_convention, recipe_missing_datasets, recipe_missing_name] +) +def test_invalid_recipes(tmp_path, recipe): + recipe_path = tmp_path / "invalid_recipe.yml" + with recipe_path.open("w") as f: + f.write(recipe) + + with pytest.raises(ValueError): + recipe_loader(recipe_path) From 09f4804b65bad8c35ef758abd58fcdcc34902b1b Mon Sep 17 00:00:00 2001 From: Bart Schilperoort Date: Tue, 8 Aug 2023 13:42:00 +0200 Subject: [PATCH 15/16] Add tests for the config loader --- src/zampy/recipe.py | 2 +- tests/test_recipes/test_config_loader.py | 41 ++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 tests/test_recipes/test_config_loader.py diff --git a/src/zampy/recipe.py b/src/zampy/recipe.py index 7c43a70..cac5274 100644 --- a/src/zampy/recipe.py +++ b/src/zampy/recipe.py @@ -50,7 +50,7 @@ def config_loader() -> dict: with config_path.open() as f: config: dict = yaml.safe_load(f) - if "working_directory" not in config.keys(): + if not isinstance(config, dict) or "working_directory" not in config.keys(): msg = "No `working_directory` key found in the config file." raise ValueError(msg) diff --git a/tests/test_recipes/test_config_loader.py b/tests/test_recipes/test_config_loader.py new file mode 100644 index 0000000..a3d34ad --- /dev/null +++ b/tests/test_recipes/test_config_loader.py @@ -0,0 +1,41 @@ +from pathlib import Path +import pytest +from zampy.recipe import config_loader + + +def test_valid_config(tmp_path: Path, mocker): + mocker.patch( + "pathlib.Path.home", + return_value=tmp_path, + ) + config_dir = tmp_path / ".config" / "zampy" + config_dir.mkdir(parents=True) + valid_config = f"working_directory: {tmp_path}\n" + with (config_dir / "zampy_config.yml").open("w") as f: + f.write(valid_config) + + config = config_loader() + assert config == {"working_directory": str(tmp_path)} + + +def test_missing_config(tmp_path: Path, mocker): + mocker.patch( + "pathlib.Path.home", + return_value=tmp_path, + ) + with pytest.raises(FileNotFoundError): + config_loader() + + +def test_missing_key(tmp_path: Path, mocker): + mocker.patch( + "pathlib.Path.home", + return_value=tmp_path, + ) + config_dir = tmp_path / ".config" / "zampy" + config_dir.mkdir(parents=True) + with (config_dir / "zampy_config.yml").open("w") as f: + f.write("nonsense") + + with pytest.raises(ValueError, match="No `working_directory` key"): + config_loader() From b1728b8f78aa227915b4d4997463407b5b79e57c Mon Sep 17 00:00:00 2001 From: Bart Schilperoort Date: Tue, 8 Aug 2023 14:38:56 +0200 Subject: [PATCH 16/16] Remove eth_canopy_height from example in docs --- docs/using_zampy.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/using_zampy.md b/docs/using_zampy.md index 149e91f..52bde20 100644 --- a/docs/using_zampy.md +++ b/docs/using_zampy.md @@ -30,9 +30,6 @@ download: variables: - 10m_v_component_of_wind - surface_pressure - eth_canopy_height: - variables: - - height_of_vegetation convert: convention: ALMA