From ad9183a0cfcf73b055b05fabe90dfeec29103fba Mon Sep 17 00:00:00 2001 From: Yang Date: Tue, 12 Sep 2023 15:38:47 +0200 Subject: [PATCH 01/21] dataset info land cover --- README.md | 1 + docs/available_datasets.md | 5 ++++ src/zampy/datasets/catalog.py | 2 ++ src/zampy/datasets/land_cover.py | 45 ++++++++++++++++++++++++++++++++ src/zampy/reference/variables.py | 2 ++ 5 files changed, 55 insertions(+) create mode 100644 src/zampy/datasets/land_cover.py diff --git a/README.md b/README.md index 7f9c930..f714470 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ To download the following datasets, users need access to CDS via cdsapi: - ERA5 - ERA5 land - LAI +- land cover First, you need to be a registered user on *CDS* via the [registration page](https://cds.climate.copernicus.eu/user/register?destination=%2F%23!%2Fhome). diff --git a/docs/available_datasets.md b/docs/available_datasets.md index 5b40124..bf47da7 100644 --- a/docs/available_datasets.md +++ b/docs/available_datasets.md @@ -39,3 +39,8 @@ You can add these yourself by creating a pull request, or open an issue to reque Note: model level is set to "60" and all steps are included for downloading. For more information, see [their webpage](https://ads.atmosphere.copernicus.eu/cdsapp#!/dataset/cams-global-ghg-reanalysis-egg4). + +=== "Land cover classification gridded maps" + - `land_cover` + + For more information, see [their webpage](https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-land-cover). diff --git a/src/zampy/datasets/catalog.py b/src/zampy/datasets/catalog.py index 579835d..cf869c4 100644 --- a/src/zampy/datasets/catalog.py +++ b/src/zampy/datasets/catalog.py @@ -4,6 +4,7 @@ from zampy.datasets.era5 import ERA5 from zampy.datasets.era5 import ERA5Land from zampy.datasets.eth_canopy_height import EthCanopyHeight +from zampy.datasets.land_cover import LandCover from zampy.datasets.prism_dem import PrismDEM30 from zampy.datasets.prism_dem import PrismDEM90 @@ -17,4 +18,5 @@ "eth_canopy_height": EthCanopyHeight, "prism_dem_30": PrismDEM30, "prism_dem_90": PrismDEM90, + "land_cover": LandCover, } diff --git a/src/zampy/datasets/land_cover.py b/src/zampy/datasets/land_cover.py new file mode 100644 index 0000000..5a3c9b0 --- /dev/null +++ b/src/zampy/datasets/land_cover.py @@ -0,0 +1,45 @@ +"""Land cover classification dataset.""" + +import numpy as np +from zampy.datasets.dataset_protocol import SpatialBounds +from zampy.datasets.dataset_protocol import TimeBounds +from zampy.datasets.dataset_protocol import Variable +from zampy.reference.variables import VARIABLE_REFERENCE_LOOKUP +from zampy.reference.variables import unit_registry + + +## Ignore missing class/method docstrings: they are implemented in the Dataset class. +# ruff: noqa: D102 + + +class LandCover: # noqa: D101 + """Land cover classification gridded maps.""" + name = "land-cover" + time_bounds = TimeBounds(np.datetime64("1992-01-01"), np.datetime64("2020-12-31")) + spatial_bounds = SpatialBounds(90, 180, -90, -180) + crs = "EPSG:4326" + + raw_variables = [ + Variable(name="lccs_class", unit=unit_registry.dimensionless), + ] + variable_names = ["land_cover"] + variables = [VARIABLE_REFERENCE_LOOKUP[var] for var in variable_names] + + license = "ESA CCI licence; licence-to-use-copernicus-products; VITO licence" + + bib = """ + @article{buchhorn2020copernicus, + title={Copernicus global land cover layers—collection 2}, + author={Buchhorn, Marcel and Lesiv, Myroslava and Tsendbazar, Nandin-Erdene and Herold, Martin and Bertels, Luc and Smets, Bruno}, + journal={Remote Sensing}, + volume={12}, + number={6}, + pages={1044}, + year={2020}, + publisher={MDPI} + } + """ + + data_url = "https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-land-cover?tab=overview" + + cds_dataset = "satellite-land-cover" diff --git a/src/zampy/reference/variables.py b/src/zampy/reference/variables.py index e84bb78..fd1fca6 100644 --- a/src/zampy/reference/variables.py +++ b/src/zampy/reference/variables.py @@ -17,6 +17,7 @@ def unit_registration() -> UnitRegistry: "kilogram_per_square_meter_second = kilogram/(meter**2*second)" ) unit_registry.define("milimeter_per_second = watt/meter**2") + unit_registry.define("dimensionless = []") return unit_registry @@ -53,6 +54,7 @@ def unit_registration() -> UnitRegistry: Variable("latitude", unit=unit_registry.degree_north), Variable("longitude", unit=unit_registry.degree_east), Variable("elevation", unit=unit_registry.meter), + Variable("land_cover", unit=unit_registry.dimensionless), ) VARIABLE_REFERENCE_LOOKUP = {var.name: var for var in VARIABLE_REFERENCE} From 313a14be5a776f195c1d8329d8358d6deec8d551 Mon Sep 17 00:00:00 2001 From: Yang Date: Wed, 13 Sep 2023 10:59:14 +0200 Subject: [PATCH 02/21] add downloading function and demo notebook --- demo/land_cover_dataset_demo.ipynb | 98 ++++++++++++++++++++++++++++++ src/zampy/datasets/cds_utils.py | 52 ++++++++++++++++ src/zampy/datasets/land_cover.py | 44 +++++++++++++- 3 files changed, 193 insertions(+), 1 deletion(-) create mode 100644 demo/land_cover_dataset_demo.ipynb diff --git a/demo/land_cover_dataset_demo.ipynb b/demo/land_cover_dataset_demo.ipynb new file mode 100644 index 0000000..92f152d --- /dev/null +++ b/demo/land_cover_dataset_demo.ipynb @@ -0,0 +1,98 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Handle land cover dataset with Zampy\n", + "Demo notebook for developers." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/yangliu/mambaforge/envs/ecoextreml/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from zampy.datasets.catalog import LandCover\n", + "from zampy.datasets.dataset_protocol import TimeBounds, SpatialBounds\n", + "from pathlib import Path" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "work_dir = Path(\"/home/yangliu/EcoExtreML/temp\")\n", + "download_dir = work_dir / \"download\"\n", + "ingest_dir = work_dir / \"ingest\"\n", + "times = TimeBounds(np.datetime64(\"2010-01-01T00:00:00\"), np.datetime64(\"2011-01-31T23:00:00\"))\n", + "bbox_demo = SpatialBounds(54, 56, 1, 3)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 50%|█████ | 1/2 [00:00<00:00, 2.97it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File 'land-cover_LCCS_MAP_300m_2010.zip' already exists, skipping...\n" + ] + } + ], + "source": [ + "land_cover_dataset = LandCover()\n", + "land_cover_dataset.download(\n", + " download_dir=download_dir,\n", + " time_bounds=times,\n", + " spatial_bounds=bbox_demo,\n", + " variable_names=[\"land_cover\"],\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ecoextreml", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/zampy/datasets/cds_utils.py b/src/zampy/datasets/cds_utils.py index d6aec09..e81f583 100644 --- a/src/zampy/datasets/cds_utils.py +++ b/src/zampy/datasets/cds_utils.py @@ -19,11 +19,13 @@ "reanalysis-era5-single-levels": "era5", "reanalysis-era5-land": "era5-land", "cams-global-ghg-reanalysis-egg4": "cams", + "satellite-land-cover": "land-cover", } SERVER_API = { "era5": "cdsapi", "era5-land": "cdsapi", "cams": "adsapi", + "land-cover": "cdsapi", } CONFIG_PATH = Path.home() / ".config" / "zampy" / "zampy_config.yml" @@ -87,6 +89,56 @@ def cds_request( ) +def cds_request_land_cover( + dataset: str, + time_bounds: TimeBounds, + path: Path, + overwrite: bool, +) -> None: + """Download land cover data via CDS API. + + To raise a request via CDS API using `zampy`, user needs to set up the + zampy configuration file `zampy_config.yml` following the instructions on + https://github.com/EcoExtreML/zampy/blob/main/README.md#instructions-for-cds-datasets-eg-era5. + + Args: + dataset: Dataset name for retrieval via `cdsapi`. + time_bounds: Zampy time bounds object. + path: File path to which the data should be saved. + overwrite: If an existing file (of the same size!) should be overwritten. + """ + fname = PRODUCT_FNAME[dataset] + + url, api_key = cds_api_key(fname) + + c = cdsapi.Client( + url=url, + key=api_key, + verify=True, + quiet=True, + ) + + years_months = time_bounds_to_year_month(time_bounds) + years = {year for (year, _) in years_months} + + for year in tqdm(years): + if int(year) < 2016: + version = "v2.0.7cds" + else: + version = "v2.1.1" + r = c.retrieve( + dataset, + { + 'variable': "all", + 'format': "zip", + 'year': year, + 'version': version, + }, + ) + fpath = path / f"{fname}_LCCS_MAP_300m_{year}.zip" + _check_and_download(r, fpath, overwrite) + + def cds_api_key(product_name: str) -> tuple[str, str]: """Load url and CDS/ADS API key. diff --git a/src/zampy/datasets/land_cover.py b/src/zampy/datasets/land_cover.py index 5a3c9b0..725abb0 100644 --- a/src/zampy/datasets/land_cover.py +++ b/src/zampy/datasets/land_cover.py @@ -1,9 +1,14 @@ """Land cover classification dataset.""" +from pathlib import Path import numpy as np +from zampy.datasets import cds_utils +from zampy.datasets import validation from zampy.datasets.dataset_protocol import SpatialBounds from zampy.datasets.dataset_protocol import TimeBounds from zampy.datasets.dataset_protocol import Variable +from zampy.datasets.dataset_protocol import copy_properties_file +from zampy.datasets.dataset_protocol import write_properties_file from zampy.reference.variables import VARIABLE_REFERENCE_LOOKUP from zampy.reference.variables import unit_registry @@ -14,6 +19,7 @@ class LandCover: # noqa: D101 """Land cover classification gridded maps.""" + name = "land-cover" time_bounds = TimeBounds(np.datetime64("1992-01-01"), np.datetime64("2020-12-31")) spatial_bounds = SpatialBounds(90, 180, -90, -180) @@ -30,7 +36,7 @@ class LandCover: # noqa: D101 bib = """ @article{buchhorn2020copernicus, title={Copernicus global land cover layers—collection 2}, - author={Buchhorn, Marcel and Lesiv, Myroslava and Tsendbazar, Nandin-Erdene and Herold, Martin and Bertels, Luc and Smets, Bruno}, + author={Buchhorn, Marcel et al.}, journal={Remote Sensing}, volume={12}, number={6}, @@ -43,3 +49,39 @@ class LandCover: # noqa: D101 data_url = "https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-land-cover?tab=overview" cds_dataset = "satellite-land-cover" + + def __init__(self) -> None: + """Init.""" + pass + + def download( + self, + download_dir: Path, + time_bounds: TimeBounds, + spatial_bounds: SpatialBounds, + variable_names: list[str], + overwrite: bool = False, + ) -> bool: + validation.validate_download_request( + self, + download_dir, + time_bounds, + spatial_bounds, + variable_names, + ) + + download_folder = download_dir / self.name + download_folder.mkdir(parents=True, exist_ok=True) + + cds_utils.cds_request_land_cover( + dataset=self.cds_dataset, + time_bounds=time_bounds, + path=download_folder, + overwrite=overwrite, + ) + + write_properties_file( + download_folder, spatial_bounds, time_bounds, variable_names + ) + + return True From 1f592aa3bd52f622e8eceef9f1626601df541c58 Mon Sep 17 00:00:00 2001 From: Yang Date: Mon, 16 Oct 2023 17:17:06 +0200 Subject: [PATCH 03/21] add ingest function and unzip --- src/zampy/datasets/land_cover.py | 51 ++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/src/zampy/datasets/land_cover.py b/src/zampy/datasets/land_cover.py index 725abb0..469f9a5 100644 --- a/src/zampy/datasets/land_cover.py +++ b/src/zampy/datasets/land_cover.py @@ -2,6 +2,7 @@ from pathlib import Path import numpy as np +from zipfile import ZipFile from zampy.datasets import cds_utils from zampy.datasets import validation from zampy.datasets.dataset_protocol import SpatialBounds @@ -85,3 +86,53 @@ def download( ) return True + + def ingest( + self, + download_dir: Path, + ingest_dir: Path, + overwrite: bool = False, + ) -> bool: + download_folder = download_dir / self.name + ingest_folder = ingest_dir / self.name + ingest_folder.mkdir(parents=True, exist_ok=True) + + archive_file_pattern = f"{self.name}_*.zip" + archive_files = list(download_folder.glob(archive_file_pattern)) + + for file in archive_files: + unzip_raw_to_netcdf( + ingest_folder, + file=file, + overwrite=overwrite, + ) + + copy_properties_file(download_folder, ingest_folder) + + return True + + +def unzip_raw_to_netcdf( + ingest_folder: Path, + file: Path, + overwrite: bool = False, +) -> None: + """Convert a downloaded zip netcdf file to a standard CF/Zampy netCDF file. + + Args: + ingest_folder: Folder where the files have to be written to. + file: Path to the land cover .zip archive. + overwrite: Overwrite all existing files. If False, file that already exist will + be skipped. + """ + ncfile = ingest_folder / file.with_suffix(".nc").name + if ncfile.exists() and not overwrite: + print(f"File '{ncfile.name}' already exists, skipping...") + else: + extract_netcdf_to_zampy(file, ingest_folder) + + +def extract_netcdf_to_zampy(file, ingest_folder): + with ZipFile(file, 'r') as zip_object: + zipped_file_name = zip_object.namelist()[0] + zip_object.extract(zipped_file_name, path = ingest_folder) From dcfd5d3bc75707a65f3d36bc7ab3ec2062ee9f8c Mon Sep 17 00:00:00 2001 From: Yang Date: Tue, 17 Oct 2023 12:06:23 +0200 Subject: [PATCH 04/21] coarsen land cover and finish ingest --- pyproject.toml | 1 + src/zampy/datasets/land_cover.py | 62 ++++++++++++++++++++++++++++---- 2 files changed, 57 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e787454..2c180a4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,6 +65,7 @@ dependencies = [ "pint-xarray", "flox", # For better groupby methods. "cdsapi", + "xarray-regrid", # for land cover data regridding ] dynamic = ["version"] diff --git a/src/zampy/datasets/land_cover.py b/src/zampy/datasets/land_cover.py index 469f9a5..cf7b571 100644 --- a/src/zampy/datasets/land_cover.py +++ b/src/zampy/datasets/land_cover.py @@ -1,8 +1,11 @@ """Land cover classification dataset.""" +import os from pathlib import Path -import numpy as np from zipfile import ZipFile +import numpy as np +import xarray as xr +import xarray_regrid from zampy.datasets import cds_utils from zampy.datasets import validation from zampy.datasets.dataset_protocol import SpatialBounds @@ -18,7 +21,7 @@ # ruff: noqa: D102 -class LandCover: # noqa: D101 +class LandCover: """Land cover classification gridded maps.""" name = "land-cover" @@ -129,10 +132,57 @@ def unzip_raw_to_netcdf( if ncfile.exists() and not overwrite: print(f"File '{ncfile.name}' already exists, skipping...") else: - extract_netcdf_to_zampy(file, ingest_folder) + ds = extract_netcdf_to_zampy(ingest_folder, file) + ds.to_netcdf(path=ncfile) + + +def extract_netcdf_to_zampy(ingest_folder: Path, file: Path) -> xr.Dataset: + """Extract zipped data and convert to zampy format. + Args: + ingest_folder: Folder where the files have to be written to. + file: Path to the land cover .zip archive. -def extract_netcdf_to_zampy(file, ingest_folder): - with ZipFile(file, 'r') as zip_object: + Returns: + Coarse land cover data satisfying zampy standard. + """ + with ZipFile(file, "r") as zip_object: zipped_file_name = zip_object.namelist()[0] - zip_object.extract(zipped_file_name, path = ingest_folder) + zip_object.extract(zipped_file_name, path=ingest_folder) + + # only keep land cover class variable + ds = xr.open_dataset(ingest_folder / zipped_file_name) + var_list = [var for var in ds.data_vars] + raw_variable = "lccs_class" + var_list.remove(raw_variable) + ds = ds.drop_vars(var_list) + + # coarsen to fit into memory + ds = ds.sortby(["lat", "lon"]) + ds = ds.rename({"lat": "latitude", "lon": "longitude"}) + new_grid = xarray_regrid.Grid( + north=90, + east=180, + south=-90, + west=-180, + resolution_lat=0.25, + resolution_lon=0.25, + ) + + target_dataset = xarray_regrid.create_regridding_dataset(new_grid) + + ds_regrid = ds.regrid.most_common(target_dataset, time_dim="time", max_mem=1e9) + + # rename variable to follow the zampy convention + variable_name = "land_cover" + ds_regrid = ds_regrid.rename({raw_variable: variable_name}) + ds_regrid[variable_name].attrs["units"] = str( + VARIABLE_REFERENCE_LOOKUP[variable_name].unit + ) + ds_regrid[variable_name].attrs["description"] = VARIABLE_REFERENCE_LOOKUP[ + variable_name + ].desc + + os.remove(ingest_folder / zipped_file_name) + + return ds_regrid From ab853f77dd61897a3e8fad280146528068c77f47 Mon Sep 17 00:00:00 2001 From: Yang Date: Tue, 17 Oct 2023 13:03:42 +0200 Subject: [PATCH 05/21] add load method --- demo/land_cover_dataset_demo.ipynb | 553 ++++++++++++++++++++++++++++- src/zampy/datasets/cds_utils.py | 8 +- src/zampy/datasets/land_cover.py | 38 +- 3 files changed, 593 insertions(+), 6 deletions(-) diff --git a/demo/land_cover_dataset_demo.ipynb b/demo/land_cover_dataset_demo.ipynb index 92f152d..521266a 100644 --- a/demo/land_cover_dataset_demo.ipynb +++ b/demo/land_cover_dataset_demo.ipynb @@ -42,6 +42,13 @@ "bbox_demo = SpatialBounds(54, 56, 1, 3)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Download dataset." + ] + }, { "cell_type": "code", "execution_count": 3, @@ -51,7 +58,7 @@ "name": "stderr", "output_type": "stream", "text": [ - " 50%|█████ | 1/2 [00:00<00:00, 2.97it/s]" + " 50%|█████ | 1/2 [00:00<00:00, 1.84it/s]" ] }, { @@ -60,6 +67,37 @@ "text": [ "File 'land-cover_LCCS_MAP_300m_2010.zip' already exists, skipping...\n" ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 2/2 [00:00<00:00, 2.30it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File 'land-cover_LCCS_MAP_300m_2011.zip' already exists, skipping...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -71,6 +109,519 @@ " variable_names=[\"land_cover\"],\n", ")" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Data ingestion to the unified format in `zampy`." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File 'land-cover_LCCS_MAP_300m_2011.nc' already exists, skipping...\n", + "File 'land-cover_LCCS_MAP_300m_2010.nc' already exists, skipping...\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# this step could take some time\n", + "land_cover_dataset.ingest(download_dir, ingest_dir)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "ds = land_cover_dataset.load(\n", + " ingest_dir=ingest_dir,\n", + " time_bounds=times,\n", + " spatial_bounds=bbox_demo,\n", + " variable_names=[\"land_cover\"],\n", + " resolution=1.0,\n", + " regrid_method=\"most_common\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:     (time: 2, latitude: 54, longitude: 54)\n",
+       "Coordinates:\n",
+       "  * time        (time) datetime64[ns] 2010-01-01 2011-01-01\n",
+       "  * latitude    (latitude) float64 1.0 2.0 3.0 4.0 5.0 ... 51.0 52.0 53.0 54.0\n",
+       "  * longitude   (longitude) float64 3.0 4.0 5.0 6.0 7.0 ... 53.0 54.0 55.0 56.0\n",
+       "Data variables:\n",
+       "    land_cover  (time, latitude, longitude) float32 210.0 210.0 ... 10.0 10.0\n",
+       "Attributes: (12/38)\n",
+       "    id:                         ESACCI-LC-L4-LCCS-Map-300m-P1Y-2010-v2.0.7cds\n",
+       "    title:                      Land Cover Map of ESA CCI brokered by CDS\n",
+       "    summary:                    This dataset characterizes the land cover of ...\n",
+       "    type:                       ESACCI-LC-L4-LCCS-Map-300m-P1Y\n",
+       "    project:                    Climate Change Initiative - European Space Ag...\n",
+       "    references:                 http://www.esa-landcover-cci.org/\n",
+       "    ...                         ...\n",
+       "    geospatial_lon_max:         180\n",
+       "    spatial_resolution:         300m\n",
+       "    geospatial_lat_units:       degrees_north\n",
+       "    geospatial_lat_resolution:  0.002778\n",
+       "    geospatial_lon_units:       degrees_east\n",
+       "    geospatial_lon_resolution:  0.002778
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 2, latitude: 54, longitude: 54)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2010-01-01 2011-01-01\n", + " * latitude (latitude) float64 1.0 2.0 3.0 4.0 5.0 ... 51.0 52.0 53.0 54.0\n", + " * longitude (longitude) float64 3.0 4.0 5.0 6.0 7.0 ... 53.0 54.0 55.0 56.0\n", + "Data variables:\n", + " land_cover (time, latitude, longitude) float32 210.0 210.0 ... 10.0 10.0\n", + "Attributes: (12/38)\n", + " id: ESACCI-LC-L4-LCCS-Map-300m-P1Y-2010-v2.0.7cds\n", + " title: Land Cover Map of ESA CCI brokered by CDS\n", + " summary: This dataset characterizes the land cover of ...\n", + " type: ESACCI-LC-L4-LCCS-Map-300m-P1Y\n", + " project: Climate Change Initiative - European Space Ag...\n", + " references: http://www.esa-landcover-cci.org/\n", + " ... ...\n", + " geospatial_lon_max: 180\n", + " spatial_resolution: 300m\n", + " geospatial_lat_units: degrees_north\n", + " geospatial_lat_resolution: 0.002778\n", + " geospatial_lon_units: degrees_east\n", + " geospatial_lon_resolution: 0.002778" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds" + ] } ], "metadata": { diff --git a/src/zampy/datasets/cds_utils.py b/src/zampy/datasets/cds_utils.py index e81f583..ff03511 100644 --- a/src/zampy/datasets/cds_utils.py +++ b/src/zampy/datasets/cds_utils.py @@ -129,10 +129,10 @@ def cds_request_land_cover( r = c.retrieve( dataset, { - 'variable': "all", - 'format': "zip", - 'year': year, - 'version': version, + "variable": "all", + "format": "zip", + "year": year, + "version": version, }, ) fpath = path / f"{fname}_LCCS_MAP_300m_{year}.zip" diff --git a/src/zampy/datasets/land_cover.py b/src/zampy/datasets/land_cover.py index cf7b571..2b964a8 100644 --- a/src/zampy/datasets/land_cover.py +++ b/src/zampy/datasets/land_cover.py @@ -114,6 +114,42 @@ def ingest( return True + def load( + self, + ingest_dir: Path, + time_bounds: TimeBounds, + spatial_bounds: SpatialBounds, + resolution: float, + regrid_method: str, # Unused in land-cover dataset + variable_names: list[str], + ) -> xr.Dataset: + files: list[Path] = [] + for var in variable_names: + if var not in self.variable_names: + msg = ( + "One or more variables are not in this dataset.\n" + f"Please check input. Dataset: '{self.name}'\n" + f"Variables: '{variable_names}'" + ) + raise ValueError(msg) + files = list((ingest_dir / self.name).glob(f"{self.name}_*.nc")) + + ds = xr.open_mfdataset(files, chunks={"latitude": 200, "longitude": 200}) + ds = ds.sel(time=slice(time_bounds.start, time_bounds.end)) + new_grid = xarray_regrid.Grid( + north=spatial_bounds.north, + east=spatial_bounds.east, + south=spatial_bounds.south, + west=spatial_bounds.west, + resolution_lat=resolution, + resolution_lon=resolution, + ) + target_dataset = xarray_regrid.create_regridding_dataset(new_grid) + + ds_regrid = ds.regrid.most_common(target_dataset, time_dim="time", max_mem=1e9) + + return ds_regrid + def unzip_raw_to_netcdf( ingest_folder: Path, @@ -165,7 +201,7 @@ def extract_netcdf_to_zampy(ingest_folder: Path, file: Path) -> xr.Dataset: east=180, south=-90, west=-180, - resolution_lat=0.25, + resolution_lat=0.25, # same as resolution of ERA5, must be sufficient resolution_lon=0.25, ) From 15372f2912bf87e4c18a6b78101ec8817a076c38 Mon Sep 17 00:00:00 2001 From: Yang Date: Tue, 17 Oct 2023 13:22:28 +0200 Subject: [PATCH 06/21] add convert function and update ALMA --- demo/land_cover_dataset_demo.ipynb | 519 +++++++++++++++++++++++++++-- src/zampy/conventions/ALMA.json | 4 + src/zampy/datasets/land_cover.py | 25 ++ 3 files changed, 526 insertions(+), 22 deletions(-) diff --git a/demo/land_cover_dataset_demo.ipynb b/demo/land_cover_dataset_demo.ipynb index 521266a..fc6533f 100644 --- a/demo/land_cover_dataset_demo.ipynb +++ b/demo/land_cover_dataset_demo.ipynb @@ -58,30 +58,17 @@ "name": "stderr", "output_type": "stream", "text": [ - " 50%|█████ | 1/2 [00:00<00:00, 1.84it/s]" + "100%|██████████| 2/2 [00:00<00:00, 3.90it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ + "File 'land-cover_LCCS_MAP_300m_2011.zip' already exists, skipping...\n", "File 'land-cover_LCCS_MAP_300m_2010.zip' already exists, skipping...\n" ] }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 2/2 [00:00<00:00, 2.30it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File 'land-cover_LCCS_MAP_300m_2011.zip' already exists, skipping...\n" - ] - }, { "name": "stderr", "output_type": "stream", @@ -554,14 +541,14 @@ " geospatial_lat_units: degrees_north\n", " geospatial_lat_resolution: 0.002778\n", " geospatial_lon_units: degrees_east\n", - " geospatial_lon_resolution: 0.002778
  • id :
    ESACCI-LC-L4-LCCS-Map-300m-P1Y-2010-v2.0.7cds
    title :
    Land Cover Map of ESA CCI brokered by CDS
    summary :
    This dataset characterizes the land cover of a particular year (see time_coverage). The land cover was derived from the analysis of satellite data time series of the full period.
    type :
    ESACCI-LC-L4-LCCS-Map-300m-P1Y
    project :
    Climate Change Initiative - European Space Agency
    references :
    http://www.esa-landcover-cci.org/
    institution :
    UCLouvain
    contact :
    https://www.ecmwf.int/en/about/contact-us/get-support
    comment :
    Conventions :
    CF-1.6
    standard_name_vocabulary :
    NetCDF Climate and Forecast (CF) Standard Names version 21
    keywords :
    land cover classification,satellite,observation
    keywords_vocabulary :
    NASA Global Change Master Directory (GCMD) Science Keywords
    license :
    ESA CCI Data Policy: free and open access
    naming_authority :
    org.esa-cci
    cdm_data_type :
    grid
    TileSize :
    2025:2025
    tracking_id :
    96ac9aca-1ca7-45c6-b4a5-ab448c692646
    product_version :
    2.0.7cds
    creation_date :
    20181130T095431Z
    creator_name :
    UCLouvain
    creator_url :
    http://www.uclouvain.be/
    creator_email :
    landcover-cci@uclouvain.be
    source :
    MERIS FR L1B version 5.05, MERIS RR L1B version 8.0, SPOT VGT P
    history :
    amorgos-4,0, lc-sdr-1.0, lc-sr-1.0, lc-classification-1.0,lc-user-tools-3.13,lc-user-tools-4.3
    time_coverage_start :
    20100101
    time_coverage_end :
    20101231
    time_coverage_duration :
    P1Y
    time_coverage_resolution :
    P1Y
    geospatial_lat_min :
    -90.0
    geospatial_lat_max :
    90.0
    geospatial_lon_min :
    -180
    geospatial_lon_max :
    180
    spatial_resolution :
    300m
    geospatial_lat_units :
    degrees_north
    geospatial_lat_resolution :
    0.002778
    geospatial_lon_units :
    degrees_east
    geospatial_lon_resolution :
    0.002778
  • " ], "text/plain": [ "\n", @@ -622,6 +609,494 @@ "source": [ "ds" ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "land_cover renamed to land_cover.\n", + "Conversion of dataset 'land-cover' following ALMA convention is complete!\n" + ] + } + ], + "source": [ + "from zampy.datasets import converter\n", + "\n", + "ds_convert = converter.convert(ds, land_cover_dataset, \"ALMA\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset>\n",
    +       "Dimensions:     (time: 2, latitude: 54, longitude: 54)\n",
    +       "Coordinates:\n",
    +       "  * time        (time) datetime64[ns] 2010-01-01 2011-01-01\n",
    +       "  * latitude    (latitude) float64 1.0 2.0 3.0 4.0 5.0 ... 51.0 52.0 53.0 54.0\n",
    +       "  * longitude   (longitude) float64 3.0 4.0 5.0 6.0 7.0 ... 53.0 54.0 55.0 56.0\n",
    +       "Data variables:\n",
    +       "    land_cover  (time, latitude, longitude) float32 210.0 210.0 ... 10.0 10.0\n",
    +       "Attributes: (12/38)\n",
    +       "    id:                         ESACCI-LC-L4-LCCS-Map-300m-P1Y-2010-v2.0.7cds\n",
    +       "    title:                      Land Cover Map of ESA CCI brokered by CDS\n",
    +       "    summary:                    This dataset characterizes the land cover of ...\n",
    +       "    type:                       ESACCI-LC-L4-LCCS-Map-300m-P1Y\n",
    +       "    project:                    Climate Change Initiative - European Space Ag...\n",
    +       "    references:                 http://www.esa-landcover-cci.org/\n",
    +       "    ...                         ...\n",
    +       "    geospatial_lon_max:         180\n",
    +       "    spatial_resolution:         300m\n",
    +       "    geospatial_lat_units:       degrees_north\n",
    +       "    geospatial_lat_resolution:  0.002778\n",
    +       "    geospatial_lon_units:       degrees_east\n",
    +       "    geospatial_lon_resolution:  0.002778
    " + ], + "text/plain": [ + "\n", + "Dimensions: (time: 2, latitude: 54, longitude: 54)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2010-01-01 2011-01-01\n", + " * latitude (latitude) float64 1.0 2.0 3.0 4.0 5.0 ... 51.0 52.0 53.0 54.0\n", + " * longitude (longitude) float64 3.0 4.0 5.0 6.0 7.0 ... 53.0 54.0 55.0 56.0\n", + "Data variables:\n", + " land_cover (time, latitude, longitude) float32 210.0 210.0 ... 10.0 10.0\n", + "Attributes: (12/38)\n", + " id: ESACCI-LC-L4-LCCS-Map-300m-P1Y-2010-v2.0.7cds\n", + " title: Land Cover Map of ESA CCI brokered by CDS\n", + " summary: This dataset characterizes the land cover of ...\n", + " type: ESACCI-LC-L4-LCCS-Map-300m-P1Y\n", + " project: Climate Change Initiative - European Space Ag...\n", + " references: http://www.esa-landcover-cci.org/\n", + " ... ...\n", + " geospatial_lon_max: 180\n", + " spatial_resolution: 300m\n", + " geospatial_lat_units: degrees_north\n", + " geospatial_lat_resolution: 0.002778\n", + " geospatial_lon_units: degrees_east\n", + " geospatial_lon_resolution: 0.002778" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_convert" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/src/zampy/conventions/ALMA.json b/src/zampy/conventions/ALMA.json index 2971ee0..f5c04ea 100644 --- a/src/zampy/conventions/ALMA.json +++ b/src/zampy/conventions/ALMA.json @@ -76,5 +76,9 @@ "co2_concentration": { "variable": "CO2air", "units": "kilogram/kilogram" + }, + "land_cover": { + "variable": "land_cover", + "units": "" } } \ No newline at end of file diff --git a/src/zampy/datasets/land_cover.py b/src/zampy/datasets/land_cover.py index 2b964a8..22a7c6f 100644 --- a/src/zampy/datasets/land_cover.py +++ b/src/zampy/datasets/land_cover.py @@ -2,11 +2,13 @@ import os from pathlib import Path +from typing import Union from zipfile import ZipFile import numpy as np import xarray as xr import xarray_regrid from zampy.datasets import cds_utils +from zampy.datasets import converter from zampy.datasets import validation from zampy.datasets.dataset_protocol import SpatialBounds from zampy.datasets.dataset_protocol import TimeBounds @@ -150,6 +152,29 @@ def load( return ds_regrid + def convert( + self, + ingest_dir: Path, + convention: Union[str, Path], + ) -> bool: + converter.check_convention(convention) + ingest_folder = ingest_dir / self.name + + data_file_pattern = "land-cover_LCCS_MAP_*.nc" + + data_files = list(ingest_folder.glob(data_file_pattern)) + + for file in data_files: + # start conversion process + print(f"Start processing file `{file.name}`.") + ds = xr.open_dataset(file) + ds = converter.convert(ds, dataset=self, convention=convention) + # TODO: support derived variables + # TODO: other calculations + # call ds.compute() + + return True + def unzip_raw_to_netcdf( ingest_folder: Path, From e659074a859fd517dae1f11182b7728b90348e51 Mon Sep 17 00:00:00 2001 From: Yang Date: Wed, 18 Oct 2023 15:00:56 +0200 Subject: [PATCH 07/21] add test for cds util request land cover --- tests/test_cds_utils.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tests/test_cds_utils.py b/tests/test_cds_utils.py index 2309071..c70f805 100644 --- a/tests/test_cds_utils.py +++ b/tests/test_cds_utils.py @@ -123,6 +123,37 @@ def test_cds_request_cams_co2(mock_retrieve, valid_path_config): ) +@patch("cdsapi.Client.retrieve") +def test_cds_request_land_cover(mock_retrieve, valid_path_config): + """ "Test cds request for downloading data from CDS server.""" + dataset = "satellite-land-cover" + time_bounds = TimeBounds( + np.datetime64("1996-01-01T00:00:00"), np.datetime64("1996-12-31T00:00:00") + ) + path = Path(__file__).resolve().parent + overwrite = True + + # create a dummy .cdsapirc + patching = patch("zampy.datasets.cds_utils.CONFIG_PATH", valid_path_config) + with patching: + cds_utils.cds_request_land_cover( + dataset, + time_bounds, + path, + overwrite, + ) + + mock_retrieve.assert_called_with( + dataset, + { + "variable": "all", + "format": "zip", + "year": "1996", + "version": "v2.0.7cds", + }, + ) + + def test_cds_api_key_config_exist(valid_path_config): """Test zampy config exists.""" patching = patch("zampy.datasets.cds_utils.CONFIG_PATH", valid_path_config) From ba32cc722519f4c8999afb0a5b0bfb57b4836454 Mon Sep 17 00:00:00 2001 From: Yang Date: Wed, 18 Oct 2023 15:49:39 +0200 Subject: [PATCH 08/21] use TemporaryDirectory for temporary unzipped files --- src/zampy/datasets/land_cover.py | 82 ++++++++++++++++---------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/src/zampy/datasets/land_cover.py b/src/zampy/datasets/land_cover.py index 22a7c6f..ded6f90 100644 --- a/src/zampy/datasets/land_cover.py +++ b/src/zampy/datasets/land_cover.py @@ -1,7 +1,7 @@ """Land cover classification dataset.""" -import os from pathlib import Path +from tempfile import TemporaryDirectory from typing import Union from zipfile import ZipFile import numpy as np @@ -193,11 +193,11 @@ def unzip_raw_to_netcdf( if ncfile.exists() and not overwrite: print(f"File '{ncfile.name}' already exists, skipping...") else: - ds = extract_netcdf_to_zampy(ingest_folder, file) + ds = extract_netcdf_to_zampy(file) ds.to_netcdf(path=ncfile) -def extract_netcdf_to_zampy(ingest_folder: Path, file: Path) -> xr.Dataset: +def extract_netcdf_to_zampy(file: Path) -> xr.Dataset: """Extract zipped data and convert to zampy format. Args: @@ -207,43 +207,43 @@ def extract_netcdf_to_zampy(ingest_folder: Path, file: Path) -> xr.Dataset: Returns: Coarse land cover data satisfying zampy standard. """ - with ZipFile(file, "r") as zip_object: - zipped_file_name = zip_object.namelist()[0] - zip_object.extract(zipped_file_name, path=ingest_folder) - - # only keep land cover class variable - ds = xr.open_dataset(ingest_folder / zipped_file_name) - var_list = [var for var in ds.data_vars] - raw_variable = "lccs_class" - var_list.remove(raw_variable) - ds = ds.drop_vars(var_list) - - # coarsen to fit into memory - ds = ds.sortby(["lat", "lon"]) - ds = ds.rename({"lat": "latitude", "lon": "longitude"}) - new_grid = xarray_regrid.Grid( - north=90, - east=180, - south=-90, - west=-180, - resolution_lat=0.25, # same as resolution of ERA5, must be sufficient - resolution_lon=0.25, - ) - - target_dataset = xarray_regrid.create_regridding_dataset(new_grid) - - ds_regrid = ds.regrid.most_common(target_dataset, time_dim="time", max_mem=1e9) - - # rename variable to follow the zampy convention - variable_name = "land_cover" - ds_regrid = ds_regrid.rename({raw_variable: variable_name}) - ds_regrid[variable_name].attrs["units"] = str( - VARIABLE_REFERENCE_LOOKUP[variable_name].unit - ) - ds_regrid[variable_name].attrs["description"] = VARIABLE_REFERENCE_LOOKUP[ - variable_name - ].desc - - os.remove(ingest_folder / zipped_file_name) + with TemporaryDirectory() as temp_dir: + unzip_folder = Path(temp_dir) + with ZipFile(file, "r") as zip_object: + zipped_file_name = zip_object.namelist()[0] + zip_object.extract(zipped_file_name, path=unzip_folder) + + # only keep land cover class variable + ds = xr.open_dataset(unzip_folder / zipped_file_name) + var_list = [var for var in ds.data_vars] + raw_variable = "lccs_class" + var_list.remove(raw_variable) + ds = ds.drop_vars(var_list) + + # coarsen to fit into memory + ds = ds.sortby(["lat", "lon"]) + ds = ds.rename({"lat": "latitude", "lon": "longitude"}) + new_grid = xarray_regrid.Grid( + north=90, + east=180, + south=-90, + west=-180, + resolution_lat=0.25, # same as resolution of ERA5, must be sufficient + resolution_lon=0.25, + ) + + target_dataset = xarray_regrid.create_regridding_dataset(new_grid) + + ds_regrid = ds.regrid.most_common(target_dataset, time_dim="time", max_mem=1e9) + + # rename variable to follow the zampy convention + variable_name = "land_cover" + ds_regrid = ds_regrid.rename({raw_variable: variable_name}) + ds_regrid[variable_name].attrs["units"] = str( + VARIABLE_REFERENCE_LOOKUP[variable_name].unit + ) + ds_regrid[variable_name].attrs["description"] = VARIABLE_REFERENCE_LOOKUP[ + variable_name + ].desc return ds_regrid From fccc9afb2bd4540760c3dba07ac3b719719977ea Mon Sep 17 00:00:00 2001 From: Yang Date: Wed, 18 Oct 2023 15:59:39 +0200 Subject: [PATCH 09/21] add tests for download and ingest --- .../land-cover_LCCS_MAP_300m_1996.zip | Bin 0 -> 2515 bytes tests/test_data/land-cover/properties.json | 0 tests/test_datasets/test_land_cover.py | 94 ++++++++++++++++++ 3 files changed, 94 insertions(+) create mode 100644 tests/test_data/land-cover/land-cover_LCCS_MAP_300m_1996.zip create mode 100644 tests/test_data/land-cover/properties.json create mode 100644 tests/test_datasets/test_land_cover.py diff --git a/tests/test_data/land-cover/land-cover_LCCS_MAP_300m_1996.zip b/tests/test_data/land-cover/land-cover_LCCS_MAP_300m_1996.zip new file mode 100644 index 0000000000000000000000000000000000000000..24385604e5ea9bdc02fe94c6a9aa3d7467f0381b GIT binary patch literal 2515 zcmb7`cQo4zAI5)15TgiDHLt6-AV$qvLCg@;-lRqes&)}oE4618O;Ngxx~i?l>Q$>z zv2IZ-B`sds5Q>^_?|c7y&w2lNzt1_(bDn>m|GwvZ?5voW`2hf61LSYmJI`dE#V@b{ zfP?}7r~o2>se=&$VF9y7z^v7OIl=*E;~N1}Q&9QN_fjg% zSO*;Dr^Z)Q!6RQ3;BU6pMe8OJJFh$Z`FJ>Bk8|_Y>Sp}v{lvwNaGG{3cez0M$b{7T z6fZAlLu9rhN#-qS%MtMOQC57FeduOkK$-1hc-^_eWrJ-E%pbfwo12~LzC0ECKv0?w zo-?>J2v;`@U&+~!_PZj~o19Fi z*Tm8vq!rCu2IYmTj31XFb;6D2i!|Q2mC*rQ;dw?n(ng6e*gi?ZT!3Tl!~!hpLi6Ui zHzgl=k=C+bZ`LEaWHWLsOBDukOT(4DNLu2-*Vp5Y^}&5N*-sNb(bcaS2561j zZ;nqRVQ``1mj<<5_r_Oih)u}F=>*T{QR(tmya_fYAy2C?mb>oTP5cg~opIc&%a?Cf zdyiCvT2!B}!aDb2I1jW&hoh?INb%ave5*0nZK_(%na5KeQdn={DEvZNUbOVjz7>L$ zj8y9h8%!RI(*rct#mtiQ(+29e_QpMd9w2|92h( z;>dA=&353q{yZaz!5#dgHFEuo?SnOP-Hh$S1Ac@Amj=M|`z9-e8^o1XB`O?y&1@w^=D0qbhq?Q4)_q4|rO%hC2c@YNpgUq6$LQ2|9;XBqR?@}$d}NU)=`jCY%)=3T~BxvlD%urJ;=Lvjjg!&w${Hx0 z^f(Nhlh<8-P{G!Uzk20$Vfr25B$*n|DQksWEqw`bum6B;ulYy{(U70^H5ep=sA3}~ z!UfrOd&H9Jx{h_BC*B*3&cIaC%^&ZOlA}Ah|^V3y0HM6+Npp_@?@hUax zgg}bRPuyI*%D8X(i3W$f-Va#qUVM;46h$O%fT_2xb-|f+bmu~d2F0yUgM5(C)I6g} zQGddFm&K2rKLk^S70=6Cf)zJy55JGes4h-;c+aCK@+<4^J8T}a!W~zA90$JqxT4Jp z_JBm`sc})BWVr>-$^5nI)RT198^cI2*fuUrH{3~0*=u9I|9A?!E?ydG8uIm;C-W)2 z4YqfcvHbp0N=;R-jCj(UDRZAcB3fgq_#7k$@=gM{D$pgOIjvWc=Fagia8**s{(3vJ z{?(*kqvZ>%QUwb4Dwg+zyFb%iYY8XRK)c*NQY_O2mFwuz&e8GDtaP zah7d){NVla&w2nUrgRh!F?`#mnann_QRgleOk-HfPKGy#QjTcJV_#WZc5XH*aZ`u} zBWLJ~y}IE`H5Y*7?)K&GG#Q5Evrkd}tU&kav}UC!5S|#Kcb9!jId!+;H^G#Z47|cB z!>L@PH$uj64w=sd2E28S1`mJ7ZHOJmjR6shE`zt?p6g!~7p_VaqIPwwE02OzLLX~b zG8F{Vro~u)CPhaJ`Lk*r#;p9sThJpez9#w&I4}x1-g3j8>C;ZPAVYHuHAh`exD%ES zJ=VYxO8a!2mA-Sl=bv(o`G7s~iyx%7&C|4H52tsGh(Ef6#r24G+>nM)T67hAbiwrR z7457kE5O0K`fhJf3t;f;gIX$1&s?Pz*T<-8+79ByZ)i+24KIF<&*#)x73TX%PdZ`u zE2$Y!@!0jwX50(8es?4Fbl>2!GM(AH$Y~N?jC?UE%QonYdhT>Xxq9kG*Tf~T-|ni{iF z{5P9#Az5wC;$eX~Bt4>f|q%nibVsh1fX+oY^`qln+>!Y5+g8ES4H z8(ph|U*ZhvW{*X;^L@*_D4HmCl8F?4ec1NL+z>SdspFdDC$0aiVq;bShe?>>f=zBK zo0VX6_5vbQluJ2LcW!jfr>8|t>N)DmsVtQf#!(4(u_p9vr%cdZf{Qc+W0y-JZDY%A zt!uR>w7B%&k~En0G)8sAon;R6l)RQ$Y&^)>a?>BF;u}MJeGf+*e%z#OoPq4D7#PKv q{uf9I{sOFj?qTZR`Tt`T=pU^5FQl@w0 Date: Wed, 18 Oct 2023 16:11:41 +0200 Subject: [PATCH 10/21] add tests for load and convert --- tests/test_datasets/test_land_cover.py | 36 +++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/tests/test_datasets/test_land_cover.py b/tests/test_datasets/test_land_cover.py index 882ef15..34e3fdd 100644 --- a/tests/test_datasets/test_land_cover.py +++ b/tests/test_datasets/test_land_cover.py @@ -36,9 +36,7 @@ def test_download(self, mock_retrieve, valid_path_config, dummy_dir): """Test download functionality. Here we mock the downloading and save property file to a fake path. """ - time_bounds = TimeBounds( - np.datetime64("1996-01-01"), np.datetime64("1996-12-31") - ) + times = TimeBounds(np.datetime64("1996-01-01"), np.datetime64("1996-12-31")) bbox = SpatialBounds(54, 56, 1, 3) variable = ["land_cover"] download_dir = Path(dummy_dir, "download") @@ -49,7 +47,7 @@ def test_download(self, mock_retrieve, valid_path_config, dummy_dir): with patching: land_cover_dataset.download( download_dir=download_dir, - time_bounds=time_bounds, + time_bounds=times, spatial_bounds=bbox, variable_names=variable, overwrite=True, @@ -92,3 +90,33 @@ def test_ingest(self, dummy_dir): """Test ingest function.""" ds, _ = self.ingest_dummy_data(dummy_dir) assert isinstance(ds, xr.Dataset) + + def test_load(self, dummy_dir): + """Test load function.""" + times = TimeBounds(np.datetime64("1996-01-01"), np.datetime64("1996-12-31")) + bbox = SpatialBounds(39, -107, 37, -109) + variable = ["land_cover"] + + _, land_cover_dataset = self.ingest_dummy_data(dummy_dir) + + ds = land_cover_dataset.load( + ingest_dir=Path(dummy_dir), + time_bounds=times, + spatial_bounds=bbox, + variable_names=variable, + resolution=1.0, + regrid_method="most_common", + ) + + # we assert the regridded coordinates + expected_lat = [37.0, 38.0, 39.0] + expected_lon = [-109.0, -108.0, -107.0] + + np.testing.assert_allclose(ds.latitude.values, expected_lat) + np.testing.assert_allclose(ds.longitude.values, expected_lon) + + def test_convert(self, dummy_dir): + """Test convert function.""" + _, land_cover_dataset = self.ingest_dummy_data(dummy_dir) + land_cover_dataset.convert(ingest_dir=Path(dummy_dir), convention="ALMA") + # TODO: finish this test when the function is complete. From a42a3a9af63e092de8b662829beafd5ba4d25638 Mon Sep 17 00:00:00 2001 From: Yang Date: Wed, 18 Oct 2023 16:56:28 +0200 Subject: [PATCH 11/21] increase coverage --- src/zampy/datasets/land_cover.py | 3 +-- tests/test_datasets/test_land_cover.py | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/zampy/datasets/land_cover.py b/src/zampy/datasets/land_cover.py index ded6f90..5131521 100644 --- a/src/zampy/datasets/land_cover.py +++ b/src/zampy/datasets/land_cover.py @@ -201,11 +201,10 @@ def extract_netcdf_to_zampy(file: Path) -> xr.Dataset: """Extract zipped data and convert to zampy format. Args: - ingest_folder: Folder where the files have to be written to. file: Path to the land cover .zip archive. Returns: - Coarse land cover data satisfying zampy standard. + Coarse land cover data in zampy format. """ with TemporaryDirectory() as temp_dir: unzip_folder = Path(temp_dir) diff --git a/tests/test_datasets/test_land_cover.py b/tests/test_datasets/test_land_cover.py index 34e3fdd..8b4e8c8 100644 --- a/tests/test_datasets/test_land_cover.py +++ b/tests/test_datasets/test_land_cover.py @@ -6,6 +6,7 @@ import numpy as np import pytest import xarray as xr +import zampy.datasets.land_cover from zampy.datasets.catalog import LandCover from zampy.datasets.dataset_protocol import SpatialBounds from zampy.datasets.dataset_protocol import TimeBounds @@ -120,3 +121,19 @@ def test_convert(self, dummy_dir): _, land_cover_dataset = self.ingest_dummy_data(dummy_dir) land_cover_dataset.convert(ingest_dir=Path(dummy_dir), convention="ALMA") # TODO: finish this test when the function is complete. + + +def test_unzip_raw_to_netcdf(): + ds = zampy.datasets.land_cover.extract_netcdf_to_zampy( + data_folder / "land-cover/land-cover_LCCS_MAP_300m_1996.zip" + ) + assert isinstance(ds, xr.Dataset) + + +def test_extract_netcdf_to_zampy(dummy_dir): + zampy.datasets.land_cover.unzip_raw_to_netcdf( + Path(dummy_dir), + data_folder / "land-cover/land-cover_LCCS_MAP_300m_1996.zip", + ) + dataset_path = Path(dummy_dir) / "land-cover_LCCS_MAP_300m_1996.nc" + assert dataset_path.exists() From 4ca978d772f753c9795e75ebc3a138e76704101e Mon Sep 17 00:00:00 2001 From: Yang Date: Thu, 19 Oct 2023 12:40:08 +0200 Subject: [PATCH 12/21] drop py3.9 and fix unit tests for windows --- .github/workflows/build.yml | 2 +- pyproject.toml | 7 +-- src/zampy/datasets/land_cover.py | 83 +++++++++++++------------- tests/test_datasets/test_land_cover.py | 3 +- 4 files changed, 48 insertions(+), 47 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0807701..97efe94 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -17,7 +17,7 @@ jobs: fail-fast: false matrix: os: ['ubuntu-latest', 'macos-latest', 'windows-latest'] - python-version: ['3.9', '3.10', '3.11'] + python-version: ['3.10', '3.11'] env: MPLBACKEND: Agg # https://github.com/orgs/community/discussions/26434 steps: diff --git a/pyproject.toml b/pyproject.toml index 2c180a4..83fbff0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ name = "zampy" description = "python package for getting Land Surface Model input data." readme = "README.md" license = "Apache-2.0" -requires-python = ">=3.9, <3.12" +requires-python = ">=3.10, <3.12" authors = [ {email = "b.schilperoort@esciencecenter.nl"}, {name = "Bart Schilperoort, Yang Liu, Fakhereh Alidoost"} @@ -43,7 +43,6 @@ classifiers = [ "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ] @@ -137,11 +136,11 @@ testpaths = ["tests"] [tool.mypy] ignore_missing_imports = true disallow_untyped_defs = true -python_version = "3.9" +python_version = "3.10" [tool.black] line-length = 88 -target-version = ['py39', 'py310', 'py311'] +target-version = ['py310', 'py311'] include = '\.pyi?$' [tool.ruff] diff --git a/src/zampy/datasets/land_cover.py b/src/zampy/datasets/land_cover.py index 5131521..500d6d9 100644 --- a/src/zampy/datasets/land_cover.py +++ b/src/zampy/datasets/land_cover.py @@ -1,7 +1,7 @@ """Land cover classification dataset.""" +import os from pathlib import Path -from tempfile import TemporaryDirectory from typing import Union from zipfile import ZipFile import numpy as np @@ -193,56 +193,57 @@ def unzip_raw_to_netcdf( if ncfile.exists() and not overwrite: print(f"File '{ncfile.name}' already exists, skipping...") else: - ds = extract_netcdf_to_zampy(file) + ds = extract_netcdf_to_zampy(ingest_folder, file) ds.to_netcdf(path=ncfile) -def extract_netcdf_to_zampy(file: Path) -> xr.Dataset: +def extract_netcdf_to_zampy(ingest_folder: Path, file: Path) -> xr.Dataset: """Extract zipped data and convert to zampy format. Args: + ingest_folder: Folder where the files have to be written to. file: Path to the land cover .zip archive. Returns: Coarse land cover data in zampy format. """ - with TemporaryDirectory() as temp_dir: - unzip_folder = Path(temp_dir) - with ZipFile(file, "r") as zip_object: - zipped_file_name = zip_object.namelist()[0] - zip_object.extract(zipped_file_name, path=unzip_folder) - - # only keep land cover class variable - ds = xr.open_dataset(unzip_folder / zipped_file_name) - var_list = [var for var in ds.data_vars] - raw_variable = "lccs_class" - var_list.remove(raw_variable) - ds = ds.drop_vars(var_list) - - # coarsen to fit into memory - ds = ds.sortby(["lat", "lon"]) - ds = ds.rename({"lat": "latitude", "lon": "longitude"}) - new_grid = xarray_regrid.Grid( - north=90, - east=180, - south=-90, - west=-180, - resolution_lat=0.25, # same as resolution of ERA5, must be sufficient - resolution_lon=0.25, - ) - - target_dataset = xarray_regrid.create_regridding_dataset(new_grid) - - ds_regrid = ds.regrid.most_common(target_dataset, time_dim="time", max_mem=1e9) - - # rename variable to follow the zampy convention - variable_name = "land_cover" - ds_regrid = ds_regrid.rename({raw_variable: variable_name}) - ds_regrid[variable_name].attrs["units"] = str( - VARIABLE_REFERENCE_LOOKUP[variable_name].unit - ) - ds_regrid[variable_name].attrs["description"] = VARIABLE_REFERENCE_LOOKUP[ - variable_name - ].desc + with ZipFile(file, "r") as zip_object: + zipped_file_name = zip_object.namelist()[0] + zip_object.extract(zipped_file_name, path=ingest_folder) + + # only keep land cover class variable + ds = xr.open_dataset(ingest_folder / zipped_file_name) + var_list = [var for var in ds.data_vars] + raw_variable = "lccs_class" + var_list.remove(raw_variable) + ds = ds.drop_vars(var_list) + + # coarsen to fit into memory + ds = ds.sortby(["lat", "lon"]) + ds = ds.rename({"lat": "latitude", "lon": "longitude"}) + new_grid = xarray_regrid.Grid( + north=90, + east=180, + south=-90, + west=-180, + resolution_lat=0.25, # same as resolution of ERA5, must be sufficient + resolution_lon=0.25, + ) + + target_dataset = xarray_regrid.create_regridding_dataset(new_grid) + + ds_regrid = ds.regrid.most_common(target_dataset, time_dim="time", max_mem=1e9) + + # rename variable to follow the zampy convention + variable_name = "land_cover" + ds_regrid = ds_regrid.rename({raw_variable: variable_name}) + ds_regrid[variable_name].attrs["units"] = str( + VARIABLE_REFERENCE_LOOKUP[variable_name].unit + ) + ds_regrid[variable_name].attrs["description"] = VARIABLE_REFERENCE_LOOKUP[ + variable_name + ].desc + + os.remove(ingest_folder / zipped_file_name) return ds_regrid diff --git a/tests/test_datasets/test_land_cover.py b/tests/test_datasets/test_land_cover.py index 8b4e8c8..19e44a9 100644 --- a/tests/test_datasets/test_land_cover.py +++ b/tests/test_datasets/test_land_cover.py @@ -123,8 +123,9 @@ def test_convert(self, dummy_dir): # TODO: finish this test when the function is complete. -def test_unzip_raw_to_netcdf(): +def test_unzip_raw_to_netcdf(dummy_dir): ds = zampy.datasets.land_cover.extract_netcdf_to_zampy( + dummy_dir, data_folder / "land-cover/land-cover_LCCS_MAP_300m_1996.zip" ) assert isinstance(ds, xr.Dataset) From bf94de839228aa192227cf18bbd6867e8ba052c8 Mon Sep 17 00:00:00 2001 From: Yang Date: Thu, 19 Oct 2023 12:47:41 +0200 Subject: [PATCH 13/21] Revert "drop py3.9 and fix unit tests for windows" This reverts commit 4ca978d772f753c9795e75ebc3a138e76704101e. --- .github/workflows/build.yml | 2 +- pyproject.toml | 7 ++- src/zampy/datasets/land_cover.py | 83 +++++++++++++------------- tests/test_datasets/test_land_cover.py | 3 +- 4 files changed, 47 insertions(+), 48 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 97efe94..0807701 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -17,7 +17,7 @@ jobs: fail-fast: false matrix: os: ['ubuntu-latest', 'macos-latest', 'windows-latest'] - python-version: ['3.10', '3.11'] + python-version: ['3.9', '3.10', '3.11'] env: MPLBACKEND: Agg # https://github.com/orgs/community/discussions/26434 steps: diff --git a/pyproject.toml b/pyproject.toml index 83fbff0..2c180a4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ name = "zampy" description = "python package for getting Land Surface Model input data." readme = "README.md" license = "Apache-2.0" -requires-python = ">=3.10, <3.12" +requires-python = ">=3.9, <3.12" authors = [ {email = "b.schilperoort@esciencecenter.nl"}, {name = "Bart Schilperoort, Yang Liu, Fakhereh Alidoost"} @@ -43,6 +43,7 @@ classifiers = [ "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ] @@ -136,11 +137,11 @@ testpaths = ["tests"] [tool.mypy] ignore_missing_imports = true disallow_untyped_defs = true -python_version = "3.10" +python_version = "3.9" [tool.black] line-length = 88 -target-version = ['py310', 'py311'] +target-version = ['py39', 'py310', 'py311'] include = '\.pyi?$' [tool.ruff] diff --git a/src/zampy/datasets/land_cover.py b/src/zampy/datasets/land_cover.py index 500d6d9..5131521 100644 --- a/src/zampy/datasets/land_cover.py +++ b/src/zampy/datasets/land_cover.py @@ -1,7 +1,7 @@ """Land cover classification dataset.""" -import os from pathlib import Path +from tempfile import TemporaryDirectory from typing import Union from zipfile import ZipFile import numpy as np @@ -193,57 +193,56 @@ def unzip_raw_to_netcdf( if ncfile.exists() and not overwrite: print(f"File '{ncfile.name}' already exists, skipping...") else: - ds = extract_netcdf_to_zampy(ingest_folder, file) + ds = extract_netcdf_to_zampy(file) ds.to_netcdf(path=ncfile) -def extract_netcdf_to_zampy(ingest_folder: Path, file: Path) -> xr.Dataset: +def extract_netcdf_to_zampy(file: Path) -> xr.Dataset: """Extract zipped data and convert to zampy format. Args: - ingest_folder: Folder where the files have to be written to. file: Path to the land cover .zip archive. Returns: Coarse land cover data in zampy format. """ - with ZipFile(file, "r") as zip_object: - zipped_file_name = zip_object.namelist()[0] - zip_object.extract(zipped_file_name, path=ingest_folder) - - # only keep land cover class variable - ds = xr.open_dataset(ingest_folder / zipped_file_name) - var_list = [var for var in ds.data_vars] - raw_variable = "lccs_class" - var_list.remove(raw_variable) - ds = ds.drop_vars(var_list) - - # coarsen to fit into memory - ds = ds.sortby(["lat", "lon"]) - ds = ds.rename({"lat": "latitude", "lon": "longitude"}) - new_grid = xarray_regrid.Grid( - north=90, - east=180, - south=-90, - west=-180, - resolution_lat=0.25, # same as resolution of ERA5, must be sufficient - resolution_lon=0.25, - ) - - target_dataset = xarray_regrid.create_regridding_dataset(new_grid) - - ds_regrid = ds.regrid.most_common(target_dataset, time_dim="time", max_mem=1e9) - - # rename variable to follow the zampy convention - variable_name = "land_cover" - ds_regrid = ds_regrid.rename({raw_variable: variable_name}) - ds_regrid[variable_name].attrs["units"] = str( - VARIABLE_REFERENCE_LOOKUP[variable_name].unit - ) - ds_regrid[variable_name].attrs["description"] = VARIABLE_REFERENCE_LOOKUP[ - variable_name - ].desc - - os.remove(ingest_folder / zipped_file_name) + with TemporaryDirectory() as temp_dir: + unzip_folder = Path(temp_dir) + with ZipFile(file, "r") as zip_object: + zipped_file_name = zip_object.namelist()[0] + zip_object.extract(zipped_file_name, path=unzip_folder) + + # only keep land cover class variable + ds = xr.open_dataset(unzip_folder / zipped_file_name) + var_list = [var for var in ds.data_vars] + raw_variable = "lccs_class" + var_list.remove(raw_variable) + ds = ds.drop_vars(var_list) + + # coarsen to fit into memory + ds = ds.sortby(["lat", "lon"]) + ds = ds.rename({"lat": "latitude", "lon": "longitude"}) + new_grid = xarray_regrid.Grid( + north=90, + east=180, + south=-90, + west=-180, + resolution_lat=0.25, # same as resolution of ERA5, must be sufficient + resolution_lon=0.25, + ) + + target_dataset = xarray_regrid.create_regridding_dataset(new_grid) + + ds_regrid = ds.regrid.most_common(target_dataset, time_dim="time", max_mem=1e9) + + # rename variable to follow the zampy convention + variable_name = "land_cover" + ds_regrid = ds_regrid.rename({raw_variable: variable_name}) + ds_regrid[variable_name].attrs["units"] = str( + VARIABLE_REFERENCE_LOOKUP[variable_name].unit + ) + ds_regrid[variable_name].attrs["description"] = VARIABLE_REFERENCE_LOOKUP[ + variable_name + ].desc return ds_regrid diff --git a/tests/test_datasets/test_land_cover.py b/tests/test_datasets/test_land_cover.py index 19e44a9..8b4e8c8 100644 --- a/tests/test_datasets/test_land_cover.py +++ b/tests/test_datasets/test_land_cover.py @@ -123,9 +123,8 @@ def test_convert(self, dummy_dir): # TODO: finish this test when the function is complete. -def test_unzip_raw_to_netcdf(dummy_dir): +def test_unzip_raw_to_netcdf(): ds = zampy.datasets.land_cover.extract_netcdf_to_zampy( - dummy_dir, data_folder / "land-cover/land-cover_LCCS_MAP_300m_1996.zip" ) assert isinstance(ds, xr.Dataset) From c4cb0966f905423c89bad03c88b40a90b42cf5bc Mon Sep 17 00:00:00 2001 From: Yang Date: Thu, 19 Oct 2023 12:51:17 +0200 Subject: [PATCH 14/21] drop py39 --- .github/workflows/build.yml | 2 +- pyproject.toml | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0807701..97efe94 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -17,7 +17,7 @@ jobs: fail-fast: false matrix: os: ['ubuntu-latest', 'macos-latest', 'windows-latest'] - python-version: ['3.9', '3.10', '3.11'] + python-version: ['3.10', '3.11'] env: MPLBACKEND: Agg # https://github.com/orgs/community/discussions/26434 steps: diff --git a/pyproject.toml b/pyproject.toml index 2c180a4..1dc6d30 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ name = "zampy" description = "python package for getting Land Surface Model input data." readme = "README.md" license = "Apache-2.0" -requires-python = ">=3.9, <3.12" +requires-python = ">=3.10, <3.12" authors = [ {email = "b.schilperoort@esciencecenter.nl"}, {name = "Bart Schilperoort, Yang Liu, Fakhereh Alidoost"} @@ -43,7 +43,6 @@ classifiers = [ "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ] @@ -141,7 +140,7 @@ python_version = "3.9" [tool.black] line-length = 88 -target-version = ['py39', 'py310', 'py311'] +target-version = ['py310', 'py311'] include = '\.pyi?$' [tool.ruff] @@ -172,7 +171,7 @@ line-length = 88 exclude = ["docs", "build"] # Allow unused variables when underscore-prefixed. dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" -target-version = "py39" +target-version = "py310" [tool.ruff.per-file-ignores] "tests/**" = ["D"] From abf11dabad7e0fef3f76046efddc810365abcb7d Mon Sep 17 00:00:00 2001 From: Yang Date: Thu, 19 Oct 2023 13:15:34 +0200 Subject: [PATCH 15/21] close dataset to fix tests on windows --- src/zampy/datasets/land_cover.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/zampy/datasets/land_cover.py b/src/zampy/datasets/land_cover.py index 5131521..d579513 100644 --- a/src/zampy/datasets/land_cover.py +++ b/src/zampy/datasets/land_cover.py @@ -235,6 +235,7 @@ def extract_netcdf_to_zampy(file: Path) -> xr.Dataset: ds_regrid = ds.regrid.most_common(target_dataset, time_dim="time", max_mem=1e9) + ds.close() # rename variable to follow the zampy convention variable_name = "land_cover" ds_regrid = ds_regrid.rename({raw_variable: variable_name}) From ed70294feab3d367626e23f966f359e79467e7d7 Mon Sep 17 00:00:00 2001 From: Yang Date: Thu, 19 Oct 2023 13:28:29 +0200 Subject: [PATCH 16/21] Revert "close dataset to fix tests on windows" This reverts commit abf11dabad7e0fef3f76046efddc810365abcb7d. --- src/zampy/datasets/land_cover.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/zampy/datasets/land_cover.py b/src/zampy/datasets/land_cover.py index d579513..5131521 100644 --- a/src/zampy/datasets/land_cover.py +++ b/src/zampy/datasets/land_cover.py @@ -235,7 +235,6 @@ def extract_netcdf_to_zampy(file: Path) -> xr.Dataset: ds_regrid = ds.regrid.most_common(target_dataset, time_dim="time", max_mem=1e9) - ds.close() # rename variable to follow the zampy convention variable_name = "land_cover" ds_regrid = ds_regrid.rename({raw_variable: variable_name}) From 5c2d2ccd315ea9f953d70c0e0b78a3cd757a9b32 Mon Sep 17 00:00:00 2001 From: Yang Date: Thu, 19 Oct 2023 13:36:07 +0200 Subject: [PATCH 17/21] fix linter after dropping py39 --- pyproject.toml | 2 +- src/zampy/datasets/converter.py | 7 ++----- src/zampy/datasets/dataset_protocol.py | 3 +-- src/zampy/datasets/ecmwf_dataset.py | 3 +-- src/zampy/datasets/eth_canopy_height.py | 7 +++---- src/zampy/datasets/land_cover.py | 3 +-- src/zampy/datasets/prism_dem.py | 5 ++--- src/zampy/datasets/utils.py | 6 ++---- 8 files changed, 13 insertions(+), 23 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1dc6d30..08bd5e2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -136,7 +136,7 @@ testpaths = ["tests"] [tool.mypy] ignore_missing_imports = true disallow_untyped_defs = true -python_version = "3.9" +python_version = "3.10" [tool.black] line-length = 88 diff --git a/src/zampy/datasets/converter.py b/src/zampy/datasets/converter.py index f641950..6692be8 100644 --- a/src/zampy/datasets/converter.py +++ b/src/zampy/datasets/converter.py @@ -2,7 +2,6 @@ import json import warnings from pathlib import Path -from typing import Union import cf_xarray.units # noqa: F401 import pint_xarray # noqa: F401 import xarray as xr @@ -14,7 +13,7 @@ conventions_path = Path(__file__).resolve().parents[1] / "conventions" -def check_convention(convention: Union[str, Path]) -> None: +def check_convention(convention: str | Path) -> None: """Check if the given convention is supported.""" if isinstance(convention, str): if convention.upper() not in CONVENTIONS: @@ -33,9 +32,7 @@ def check_convention(convention: Union[str, Path]) -> None: print(f"Starting data conversion to the convention defined in '{convention}'") -def convert( - data: xr.Dataset, dataset: Dataset, convention: Union[str, Path] -) -> xr.Dataset: +def convert(data: xr.Dataset, dataset: Dataset, convention: str | Path) -> xr.Dataset: """Convert a loaded dataset to the specified convention. Args: diff --git a/src/zampy/datasets/dataset_protocol.py b/src/zampy/datasets/dataset_protocol.py index 1b1de29..097abdc 100644 --- a/src/zampy/datasets/dataset_protocol.py +++ b/src/zampy/datasets/dataset_protocol.py @@ -4,7 +4,6 @@ from dataclasses import dataclass from pathlib import Path from typing import Any -from typing import Optional from typing import Protocol import numpy as np import xarray as xr @@ -19,7 +18,7 @@ class Variable: name: str unit: Any # pint unit. typing has issues with pint 0.21 - desc: Optional[str] = "" + desc: str | None = "" @dataclass diff --git a/src/zampy/datasets/ecmwf_dataset.py b/src/zampy/datasets/ecmwf_dataset.py index 4ceb1e7..2254ae5 100644 --- a/src/zampy/datasets/ecmwf_dataset.py +++ b/src/zampy/datasets/ecmwf_dataset.py @@ -1,7 +1,6 @@ """Base module for datasets available on CDS.""" from pathlib import Path -from typing import Union import xarray as xr from zampy.datasets import cds_utils from zampy.datasets import converter @@ -129,7 +128,7 @@ def load( def convert( self, ingest_dir: Path, - convention: Union[str, Path], + convention: str | Path, ) -> bool: converter.check_convention(convention) ingest_folder = ingest_dir / self.name diff --git a/src/zampy/datasets/eth_canopy_height.py b/src/zampy/datasets/eth_canopy_height.py index 7845651..c82e686 100644 --- a/src/zampy/datasets/eth_canopy_height.py +++ b/src/zampy/datasets/eth_canopy_height.py @@ -1,7 +1,6 @@ """ETH canopy height dataset.""" import gzip from pathlib import Path -from typing import Union import numpy as np import xarray as xr from zampy.datasets import converter @@ -109,7 +108,7 @@ def ingest( sd_files = list(download_folder.glob(sd_file_pattern)) is_sd_file = len(data_files) * [False] + len(sd_files) * [True] - for file, sd_file in zip(data_files + sd_files, is_sd_file): + for file, sd_file in zip(data_files + sd_files, is_sd_file, strict=True): convert_tiff_to_netcdf( ingest_folder, file=file, @@ -144,7 +143,7 @@ def load( def convert( self, ingest_dir: Path, - convention: Union[str, Path], + convention: str | Path, ) -> bool: converter.check_convention(convention) ingest_folder = ingest_dir / self.name @@ -188,7 +187,7 @@ def get_filenames(bounds: SpatialBounds, sd_file: bool = False) -> list[str]: fnames = [""] * len(lats) - for i, (lat, lon) in enumerate(zip(lats, lons)): + for i, (lat, lon) in enumerate(zip(lats, lons, strict=True)): lat_ = int(lat // step * step) lon_ = int(lon // step * step) diff --git a/src/zampy/datasets/land_cover.py b/src/zampy/datasets/land_cover.py index 5131521..cab2661 100644 --- a/src/zampy/datasets/land_cover.py +++ b/src/zampy/datasets/land_cover.py @@ -2,7 +2,6 @@ from pathlib import Path from tempfile import TemporaryDirectory -from typing import Union from zipfile import ZipFile import numpy as np import xarray as xr @@ -155,7 +154,7 @@ def load( def convert( self, ingest_dir: Path, - convention: Union[str, Path], + convention: str | Path, ) -> bool: converter.check_convention(convention) ingest_folder = ingest_dir / self.name diff --git a/src/zampy/datasets/prism_dem.py b/src/zampy/datasets/prism_dem.py index 17f47b2..3d37e7d 100644 --- a/src/zampy/datasets/prism_dem.py +++ b/src/zampy/datasets/prism_dem.py @@ -3,7 +3,6 @@ import tarfile from pathlib import Path from typing import Literal -from typing import Union import numpy as np import xarray as xr from rasterio.io import MemoryFile @@ -153,7 +152,7 @@ def preproc(ds: xr.Dataset) -> xr.Dataset: def convert( self, ingest_dir: Path, - convention: Union[str, Path], + convention: str | Path, ) -> bool: converter.check_convention(convention) ingest_folder = ingest_dir / self.name @@ -301,7 +300,7 @@ def get_archive_filenames( else: raise ValueError("Unknown glo_number.") - for i, (lat, lon) in enumerate(zip(lats, lons)): + for i, (lat, lon) in enumerate(zip(lats, lons, strict=True)): lat_ = int(lat // step * step) lon_ = int(lon // step * step) diff --git a/src/zampy/datasets/utils.py b/src/zampy/datasets/utils.py index 1b27975..ee15680 100644 --- a/src/zampy/datasets/utils.py +++ b/src/zampy/datasets/utils.py @@ -1,8 +1,6 @@ """Shared utilities from datasets.""" import urllib.request from pathlib import Path -from typing import Optional -from typing import Union import requests from tqdm import tqdm @@ -11,8 +9,8 @@ class TqdmUpdate(tqdm): """Wrap a tqdm progress bar to be updateable by urllib.request.urlretrieve.""" def update_to( - self, b: int = 1, bsize: int = 1, tsize: Optional[int] = None - ) -> Union[bool, None]: + self, b: int = 1, bsize: int = 1, tsize: int | None = None + ) -> bool | None: """Update the progress bar. Args: From 703a01c48b1bf03f07227ef763f26ea8e169936e Mon Sep 17 00:00:00 2001 From: Yang Date: Fri, 20 Oct 2023 09:57:55 +0200 Subject: [PATCH 18/21] try context manager to fix tests on windows --- src/zampy/datasets/land_cover.py | 42 +++++++++++++++++--------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/src/zampy/datasets/land_cover.py b/src/zampy/datasets/land_cover.py index cab2661..ce1dc2f 100644 --- a/src/zampy/datasets/land_cover.py +++ b/src/zampy/datasets/land_cover.py @@ -205,34 +205,36 @@ def extract_netcdf_to_zampy(file: Path) -> xr.Dataset: Returns: Coarse land cover data in zampy format. """ - with TemporaryDirectory() as temp_dir: + with TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir: unzip_folder = Path(temp_dir) with ZipFile(file, "r") as zip_object: zipped_file_name = zip_object.namelist()[0] zip_object.extract(zipped_file_name, path=unzip_folder) # only keep land cover class variable - ds = xr.open_dataset(unzip_folder / zipped_file_name) - var_list = [var for var in ds.data_vars] - raw_variable = "lccs_class" - var_list.remove(raw_variable) - ds = ds.drop_vars(var_list) - - # coarsen to fit into memory - ds = ds.sortby(["lat", "lon"]) - ds = ds.rename({"lat": "latitude", "lon": "longitude"}) - new_grid = xarray_regrid.Grid( - north=90, - east=180, - south=-90, - west=-180, - resolution_lat=0.25, # same as resolution of ERA5, must be sufficient - resolution_lon=0.25, - ) + with xr.open_dataset(unzip_folder / zipped_file_name) as ds: + var_list = [var for var in ds.data_vars] + raw_variable = "lccs_class" + var_list.remove(raw_variable) + ds = ds.drop_vars(var_list) # noqa: PLW2901 + + # coarsen to fit into memory + ds = ds.sortby(["lat", "lon"]) # noqa: PLW2901 + ds = ds.rename({"lat": "latitude", "lon": "longitude"}) # noqa: PLW2901 + new_grid = xarray_regrid.Grid( + north=90, + east=180, + south=-90, + west=-180, + resolution_lat=0.25, # same as resolution of ERA5, must be sufficient + resolution_lon=0.25, + ) - target_dataset = xarray_regrid.create_regridding_dataset(new_grid) + target_dataset = xarray_regrid.create_regridding_dataset(new_grid) - ds_regrid = ds.regrid.most_common(target_dataset, time_dim="time", max_mem=1e9) + ds_regrid = ds.regrid.most_common( + target_dataset, time_dim="time", max_mem=1e9 + ) # rename variable to follow the zampy convention variable_name = "land_cover" From 5a890adda9c6c5a35a2b87e32461cb5bebd2b240 Mon Sep 17 00:00:00 2001 From: Yang Date: Tue, 24 Oct 2023 13:09:59 +0200 Subject: [PATCH 19/21] address comments --- src/zampy/datasets/land_cover.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/zampy/datasets/land_cover.py b/src/zampy/datasets/land_cover.py index ce1dc2f..f780c75 100644 --- a/src/zampy/datasets/land_cover.py +++ b/src/zampy/datasets/land_cover.py @@ -199,13 +199,18 @@ def unzip_raw_to_netcdf( def extract_netcdf_to_zampy(file: Path) -> xr.Dataset: """Extract zipped data and convert to zampy format. + Since the original land cover field is too large to fit into + the memory in general, in this function the loaded land cover + data are regridded. They are regrid to a resoltuion of 0.25 + degree, same as the native resolution of ERA5 data. + Args: file: Path to the land cover .zip archive. Returns: Coarse land cover data in zampy format. """ - with TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir: + with TemporaryDirectory() as temp_dir: unzip_folder = Path(temp_dir) with ZipFile(file, "r") as zip_object: zipped_file_name = zip_object.namelist()[0] @@ -226,7 +231,7 @@ def extract_netcdf_to_zampy(file: Path) -> xr.Dataset: east=180, south=-90, west=-180, - resolution_lat=0.25, # same as resolution of ERA5, must be sufficient + resolution_lat=0.25, # same as resolution of ERA5 resolution_lon=0.25, ) From 002859549eb8b16ccf081524dc32f12b17e90388 Mon Sep 17 00:00:00 2001 From: Yang Date: Wed, 15 Nov 2023 10:07:36 +0100 Subject: [PATCH 20/21] Apply suggestions from code review Co-authored-by: Bart Schilperoort --- src/zampy/datasets/land_cover.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/zampy/datasets/land_cover.py b/src/zampy/datasets/land_cover.py index f780c75..c399d82 100644 --- a/src/zampy/datasets/land_cover.py +++ b/src/zampy/datasets/land_cover.py @@ -164,13 +164,9 @@ def convert( data_files = list(ingest_folder.glob(data_file_pattern)) for file in data_files: - # start conversion process print(f"Start processing file `{file.name}`.") ds = xr.open_dataset(file) ds = converter.convert(ds, dataset=self, convention=convention) - # TODO: support derived variables - # TODO: other calculations - # call ds.compute() return True @@ -231,8 +227,8 @@ def extract_netcdf_to_zampy(file: Path) -> xr.Dataset: east=180, south=-90, west=-180, - resolution_lat=0.25, # same as resolution of ERA5 - resolution_lon=0.25, + resolution_lat=0.05, + resolution_lon=0.05, ) target_dataset = xarray_regrid.create_regridding_dataset(new_grid) From aefa03bb235a8377e8561edafc0756e69c46d470 Mon Sep 17 00:00:00 2001 From: Yang Date: Wed, 15 Nov 2023 12:30:58 +0100 Subject: [PATCH 21/21] address review comments --- src/zampy/datasets/land_cover.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/zampy/datasets/land_cover.py b/src/zampy/datasets/land_cover.py index c399d82..66e8f13 100644 --- a/src/zampy/datasets/land_cover.py +++ b/src/zampy/datasets/land_cover.py @@ -195,10 +195,9 @@ def unzip_raw_to_netcdf( def extract_netcdf_to_zampy(file: Path) -> xr.Dataset: """Extract zipped data and convert to zampy format. - Since the original land cover field is too large to fit into - the memory in general, in this function the loaded land cover - data are regridded. They are regrid to a resoltuion of 0.25 - degree, same as the native resolution of ERA5 data. + Since the native resolution of land cover field is too high + in general, in this function the loaded land cover data + are regridded. They are regrid to a resoltuion of 0.05 degree. Args: file: Path to the land cover .zip archive. @@ -219,7 +218,6 @@ def extract_netcdf_to_zampy(file: Path) -> xr.Dataset: var_list.remove(raw_variable) ds = ds.drop_vars(var_list) # noqa: PLW2901 - # coarsen to fit into memory ds = ds.sortby(["lat", "lon"]) # noqa: PLW2901 ds = ds.rename({"lat": "latitude", "lon": "longitude"}) # noqa: PLW2901 new_grid = xarray_regrid.Grid(