Skip to content

Commit

Permalink
add downloading function and demo notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
Yang committed Sep 13, 2023
1 parent ad9183a commit 313a14b
Show file tree
Hide file tree
Showing 3 changed files with 193 additions and 1 deletion.
98 changes: 98 additions & 0 deletions demo/land_cover_dataset_demo.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Handle land cover dataset with Zampy\n",
"Demo notebook for developers."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/yangliu/mambaforge/envs/ecoextreml/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"import numpy as np\n",
"from zampy.datasets.catalog import LandCover\n",
"from zampy.datasets.dataset_protocol import TimeBounds, SpatialBounds\n",
"from pathlib import Path"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"work_dir = Path(\"/home/yangliu/EcoExtreML/temp\")\n",
"download_dir = work_dir / \"download\"\n",
"ingest_dir = work_dir / \"ingest\"\n",
"times = TimeBounds(np.datetime64(\"2010-01-01T00:00:00\"), np.datetime64(\"2011-01-31T23:00:00\"))\n",
"bbox_demo = SpatialBounds(54, 56, 1, 3)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 50%|█████ | 1/2 [00:00<00:00, 2.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File 'land-cover_LCCS_MAP_300m_2010.zip' already exists, skipping...\n"
]
}
],
"source": [
"land_cover_dataset = LandCover()\n",
"land_cover_dataset.download(\n",
" download_dir=download_dir,\n",
" time_bounds=times,\n",
" spatial_bounds=bbox_demo,\n",
" variable_names=[\"land_cover\"],\n",
")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "ecoextreml",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
52 changes: 52 additions & 0 deletions src/zampy/datasets/cds_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@
"reanalysis-era5-single-levels": "era5",
"reanalysis-era5-land": "era5-land",
"cams-global-ghg-reanalysis-egg4": "cams",
"satellite-land-cover": "land-cover",
}
SERVER_API = {
"era5": "cdsapi",
"era5-land": "cdsapi",
"cams": "adsapi",
"land-cover": "cdsapi",
}
CONFIG_PATH = Path.home() / ".config" / "zampy" / "zampy_config.yml"

Expand Down Expand Up @@ -87,6 +89,56 @@ def cds_request(
)


def cds_request_land_cover(
dataset: str,
time_bounds: TimeBounds,
path: Path,
overwrite: bool,
) -> None:
"""Download land cover data via CDS API.
To raise a request via CDS API using `zampy`, user needs to set up the
zampy configuration file `zampy_config.yml` following the instructions on
https://github.com/EcoExtreML/zampy/blob/main/README.md#instructions-for-cds-datasets-eg-era5.
Args:
dataset: Dataset name for retrieval via `cdsapi`.
time_bounds: Zampy time bounds object.
path: File path to which the data should be saved.
overwrite: If an existing file (of the same size!) should be overwritten.
"""
fname = PRODUCT_FNAME[dataset]

url, api_key = cds_api_key(fname)

c = cdsapi.Client(
url=url,
key=api_key,
verify=True,
quiet=True,
)

years_months = time_bounds_to_year_month(time_bounds)
years = {year for (year, _) in years_months}

for year in tqdm(years):
if int(year) < 2016:
version = "v2.0.7cds"
else:
version = "v2.1.1"
r = c.retrieve(
dataset,
{
'variable': "all",
'format': "zip",
'year': year,
'version': version,
},
)
fpath = path / f"{fname}_LCCS_MAP_300m_{year}.zip"
_check_and_download(r, fpath, overwrite)


def cds_api_key(product_name: str) -> tuple[str, str]:
"""Load url and CDS/ADS API key.
Expand Down
44 changes: 43 additions & 1 deletion src/zampy/datasets/land_cover.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
"""Land cover classification dataset."""

from pathlib import Path
import numpy as np
from zampy.datasets import cds_utils
from zampy.datasets import validation
from zampy.datasets.dataset_protocol import SpatialBounds
from zampy.datasets.dataset_protocol import TimeBounds
from zampy.datasets.dataset_protocol import Variable
from zampy.datasets.dataset_protocol import copy_properties_file
from zampy.datasets.dataset_protocol import write_properties_file
from zampy.reference.variables import VARIABLE_REFERENCE_LOOKUP
from zampy.reference.variables import unit_registry

Expand All @@ -14,6 +19,7 @@

class LandCover: # noqa: D101
"""Land cover classification gridded maps."""

name = "land-cover"
time_bounds = TimeBounds(np.datetime64("1992-01-01"), np.datetime64("2020-12-31"))
spatial_bounds = SpatialBounds(90, 180, -90, -180)
Expand All @@ -30,7 +36,7 @@ class LandCover: # noqa: D101
bib = """
@article{buchhorn2020copernicus,
title={Copernicus global land cover layers—collection 2},
author={Buchhorn, Marcel and Lesiv, Myroslava and Tsendbazar, Nandin-Erdene and Herold, Martin and Bertels, Luc and Smets, Bruno},
author={Buchhorn, Marcel et al.},
journal={Remote Sensing},
volume={12},
number={6},
Expand All @@ -43,3 +49,39 @@ class LandCover: # noqa: D101
data_url = "https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-land-cover?tab=overview"

cds_dataset = "satellite-land-cover"

def __init__(self) -> None:
"""Init."""
pass

def download(
self,
download_dir: Path,
time_bounds: TimeBounds,
spatial_bounds: SpatialBounds,
variable_names: list[str],
overwrite: bool = False,
) -> bool:
validation.validate_download_request(
self,
download_dir,
time_bounds,
spatial_bounds,
variable_names,
)

download_folder = download_dir / self.name
download_folder.mkdir(parents=True, exist_ok=True)

cds_utils.cds_request_land_cover(
dataset=self.cds_dataset,
time_bounds=time_bounds,
path=download_folder,
overwrite=overwrite,
)

write_properties_file(
download_folder, spatial_bounds, time_bounds, variable_names
)

return True

0 comments on commit 313a14b

Please sign in to comment.