diff --git a/demo/land_cover_dataset_demo.ipynb b/demo/land_cover_dataset_demo.ipynb new file mode 100644 index 0000000..92f152d --- /dev/null +++ b/demo/land_cover_dataset_demo.ipynb @@ -0,0 +1,98 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Handle land cover dataset with Zampy\n", + "Demo notebook for developers." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/yangliu/mambaforge/envs/ecoextreml/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from zampy.datasets.catalog import LandCover\n", + "from zampy.datasets.dataset_protocol import TimeBounds, SpatialBounds\n", + "from pathlib import Path" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "work_dir = Path(\"/home/yangliu/EcoExtreML/temp\")\n", + "download_dir = work_dir / \"download\"\n", + "ingest_dir = work_dir / \"ingest\"\n", + "times = TimeBounds(np.datetime64(\"2010-01-01T00:00:00\"), np.datetime64(\"2011-01-31T23:00:00\"))\n", + "bbox_demo = SpatialBounds(54, 56, 1, 3)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 50%|█████ | 1/2 [00:00<00:00, 2.97it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File 'land-cover_LCCS_MAP_300m_2010.zip' already exists, skipping...\n" + ] + } + ], + "source": [ + "land_cover_dataset = LandCover()\n", + "land_cover_dataset.download(\n", + " download_dir=download_dir,\n", + " time_bounds=times,\n", + " spatial_bounds=bbox_demo,\n", + " variable_names=[\"land_cover\"],\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ecoextreml", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/zampy/datasets/cds_utils.py b/src/zampy/datasets/cds_utils.py index d6aec09..e81f583 100644 --- a/src/zampy/datasets/cds_utils.py +++ b/src/zampy/datasets/cds_utils.py @@ -19,11 +19,13 @@ "reanalysis-era5-single-levels": "era5", "reanalysis-era5-land": "era5-land", "cams-global-ghg-reanalysis-egg4": "cams", + "satellite-land-cover": "land-cover", } SERVER_API = { "era5": "cdsapi", "era5-land": "cdsapi", "cams": "adsapi", + "land-cover": "cdsapi", } CONFIG_PATH = Path.home() / ".config" / "zampy" / "zampy_config.yml" @@ -87,6 +89,56 @@ def cds_request( ) +def cds_request_land_cover( + dataset: str, + time_bounds: TimeBounds, + path: Path, + overwrite: bool, +) -> None: + """Download land cover data via CDS API. + + To raise a request via CDS API using `zampy`, user needs to set up the + zampy configuration file `zampy_config.yml` following the instructions on + https://github.com/EcoExtreML/zampy/blob/main/README.md#instructions-for-cds-datasets-eg-era5. + + Args: + dataset: Dataset name for retrieval via `cdsapi`. + time_bounds: Zampy time bounds object. + path: File path to which the data should be saved. + overwrite: If an existing file (of the same size!) should be overwritten. + """ + fname = PRODUCT_FNAME[dataset] + + url, api_key = cds_api_key(fname) + + c = cdsapi.Client( + url=url, + key=api_key, + verify=True, + quiet=True, + ) + + years_months = time_bounds_to_year_month(time_bounds) + years = {year for (year, _) in years_months} + + for year in tqdm(years): + if int(year) < 2016: + version = "v2.0.7cds" + else: + version = "v2.1.1" + r = c.retrieve( + dataset, + { + 'variable': "all", + 'format': "zip", + 'year': year, + 'version': version, + }, + ) + fpath = path / f"{fname}_LCCS_MAP_300m_{year}.zip" + _check_and_download(r, fpath, overwrite) + + def cds_api_key(product_name: str) -> tuple[str, str]: """Load url and CDS/ADS API key. diff --git a/src/zampy/datasets/land_cover.py b/src/zampy/datasets/land_cover.py index 5a3c9b0..725abb0 100644 --- a/src/zampy/datasets/land_cover.py +++ b/src/zampy/datasets/land_cover.py @@ -1,9 +1,14 @@ """Land cover classification dataset.""" +from pathlib import Path import numpy as np +from zampy.datasets import cds_utils +from zampy.datasets import validation from zampy.datasets.dataset_protocol import SpatialBounds from zampy.datasets.dataset_protocol import TimeBounds from zampy.datasets.dataset_protocol import Variable +from zampy.datasets.dataset_protocol import copy_properties_file +from zampy.datasets.dataset_protocol import write_properties_file from zampy.reference.variables import VARIABLE_REFERENCE_LOOKUP from zampy.reference.variables import unit_registry @@ -14,6 +19,7 @@ class LandCover: # noqa: D101 """Land cover classification gridded maps.""" + name = "land-cover" time_bounds = TimeBounds(np.datetime64("1992-01-01"), np.datetime64("2020-12-31")) spatial_bounds = SpatialBounds(90, 180, -90, -180) @@ -30,7 +36,7 @@ class LandCover: # noqa: D101 bib = """ @article{buchhorn2020copernicus, title={Copernicus global land cover layers—collection 2}, - author={Buchhorn, Marcel and Lesiv, Myroslava and Tsendbazar, Nandin-Erdene and Herold, Martin and Bertels, Luc and Smets, Bruno}, + author={Buchhorn, Marcel et al.}, journal={Remote Sensing}, volume={12}, number={6}, @@ -43,3 +49,39 @@ class LandCover: # noqa: D101 data_url = "https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-land-cover?tab=overview" cds_dataset = "satellite-land-cover" + + def __init__(self) -> None: + """Init.""" + pass + + def download( + self, + download_dir: Path, + time_bounds: TimeBounds, + spatial_bounds: SpatialBounds, + variable_names: list[str], + overwrite: bool = False, + ) -> bool: + validation.validate_download_request( + self, + download_dir, + time_bounds, + spatial_bounds, + variable_names, + ) + + download_folder = download_dir / self.name + download_folder.mkdir(parents=True, exist_ok=True) + + cds_utils.cds_request_land_cover( + dataset=self.cds_dataset, + time_bounds=time_bounds, + path=download_folder, + overwrite=overwrite, + ) + + write_properties_file( + download_folder, spatial_bounds, time_bounds, variable_names + ) + + return True