Skip to content

Commit

Permalink
First code for downloading canopy height data
Browse files Browse the repository at this point in the history
  • Loading branch information
BSchilperoort committed Apr 14, 2023
1 parent 966d3da commit 38bc761
Show file tree
Hide file tree
Showing 8 changed files with 205 additions and 9 deletions.
18 changes: 10 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,13 @@ classifiers = [
"Programming Language :: Python :: 3.10",
]
dependencies = [
"requests",
"netcdf4",
"numpy",
"pandas",
"matplotlib",
"xarray",
"scikit-learn",
"tqdm",
]
dynamic = ["version"]

Expand All @@ -62,9 +63,10 @@ dev = [
"bump2version",
"hatch",
"ruff",
"isort",
"black",
"mypy",
"types-requests", # type stubs for request lib
"types-urllib3", # type stubs for url lib
"pytest",
"pytest-cov",
]
Expand All @@ -74,13 +76,12 @@ features = ["dev"]

[tool.hatch.envs.default.scripts]
lint = [
"ruff check .",
"mypy .",
"isort --check-only --diff .",
"black --check --diff .",
"ruff check src/ tests/",
"mypy src/",
"black --check --diff src/ tests/",
]
format = ["isort .", "black .", "lint",]
test = ["pytest ./lilio/ ./tests/ --doctest-modules",]
format = ["black src/ tests/", "ruff check src/ tests/ --fix", "lint",]
test = ["pytest ./src/zampy/ ./tests/ --doctest-modules",]
coverage = [
"pytest --cov --cov-report term --cov-report xml --junitxml=xunit-result.xml tests/",
]
Expand All @@ -90,6 +91,7 @@ testpaths = ["tests"]

[tool.mypy]
ignore_missing_imports = true
disallow_untyped_defs = true

[tool.black]
line-length = 88
Expand Down
5 changes: 5 additions & 0 deletions src/zampy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
"""zampy."""
from . import datasets


__author__ = "Bart Schilperoort"
__email__ = "[email protected]"
__version__ = "0.1.0"


__all__ = ["datasets"]
Binary file added src/zampy/__pycache__/__init__.cpython-310.pyc
Binary file not shown.
7 changes: 6 additions & 1 deletion src/zampy/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,6 @@
"""datasets implementations."""
"""Datasets implementations."""
from .dataset_protocol import Dataset
from .eth_canopy_height import EthCanopyHeight


__all__ = ["Dataset", "EthCanopyHeight"]
Binary file not shown.
60 changes: 60 additions & 0 deletions src/zampy/datasets/dataset_protocol.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""Outline of the dataset protocol."""
from dataclasses import dataclass
from pathlib import Path
from typing import Protocol
from typing import List
from typing import Tuple
import numpy as np
import xarray as xr


@dataclass
class Variable:
"""zampy variable."""

name: str
unit: str


@dataclass
class SpatialBounds:
"""zampy spatial bounds object."""

north: float
east: float
south: float
west: float


class Dataset(Protocol):
"""Dataset."""

name: str
start_time: np.datetime64
end_time: np.datetime64
bounds: SpatialBounds
crs: str
license: str
bib: str

def __init__(self) -> None:
"""Init."""
...

def download(
self,
download_dir: Path,
spatial_bounds: SpatialBounds,
temporal_bounds: Tuple[np.datetime64, np.datetime64],
variables: List[Variable],
) -> bool:
"""Download the data.
Returns:
Download success
"""
...

def load(self) -> xr.Dataset:
"""Get the dataset as an xarray Dataset."""
...
111 changes: 111 additions & 0 deletions src/zampy/datasets/eth_canopy_height.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
"""ETH canopy height dataset."""
import gzip
from pathlib import Path
from typing import List
from typing import Tuple
import numpy as np
import requests
from tqdm import tqdm
from .dataset_protocol import Dataset
from .dataset_protocol import SpatialBounds
from .dataset_protocol import Variable


VALID_NAME_FILE = (
Path(__file__).parent / "assets" / "h_canopy_filenames_compressed.txt.gz"
)


class EthCanopyHeight(Dataset):
"""The ETH canopy height dataset."""

name = "ETH_canopy_height"
start_time = np.datetime64("2020-01-01")
end_time = np.datetime64("2020-12-31")
bounds = SpatialBounds(90, 180, -90, -180)
crs = "EPSG:4326"

variables = (
Variable(name="LAI", unit="-"),
Variable(name="LAI_SD", unit="-"),
)

license = "cc-by-4.0"
bib = """
@article{lang2022,
title={A high-resolution canopy height model of the Earth},
author={Lang, Nico and Jetz, Walter and Schindler, Konrad and Wegner, Jan Dirk},
journal={arXiv preprint arXiv:2204.08322},
doi={10.48550/arXiv.2204.08322}
year={2022}
}
"""

data_url = "https://share.phys.ethz.ch/~pf/nlangdata/ETH_GlobalCanopyHeight_10m_2020_version1/3deg_cogs/"

def download(
self,
download_dir: Path,
spatial_bounds: SpatialBounds,
temporal_bounds: Tuple[np.datetime64, np.datetime64],
variables: List[Variable],
) -> bool:
"""Download the ETH tiles to the download directory."""
download_folder = download_dir / self.name
download_files = get_filenames(spatial_bounds)

download_folder.mkdir(parents=True, exist_ok=True)
for fname in tqdm(
download_files,
desc="Downloading canopy height files",
unit="files"
):
file = requests.get(self.data_url + fname)
(download_folder / fname).open(mode="wb").write(file.content)
return True


def get_filenames(bounds: SpatialBounds) -> List[str]:
"""Get all valid ETH canopy height dataset filenames within given spatial bounds."""
step = 3

locs = np.meshgrid(
np.arange(
start=bounds.south, stop=bounds.north + step, step=step
),
np.arange(
start=bounds.west, stop=bounds.east + step, step=step
),
)
lats = locs[0].flatten()
lons = locs[1].flatten()

fnames = [""] * len(lats)

for i, (lat, lon) in enumerate(zip(lats, lons)):
lat_ = int(lat // step * step)
lon_ = int(lon // step * step)

latstr = str(abs(lat_)).rjust(2, "0")
lonstr = str(abs(lon_)).rjust(3, "0")
latstr = f"N{latstr}" if lat_ >= 0 else f"S{latstr}"
lonstr = f"E{lonstr}" if lon_ >= 0 else f"W{lonstr}"

fnames[i] = f"ETH_GlobalCanopyHeight_10m_2020_{latstr}{lonstr}_Map.tif"
return get_valid_filenames(fnames)


def get_valid_filenames(filenames: List[str]) -> List[str]:
"""Remove the invalid filenames from the list of tile names."""
valid_name_file = (
Path(__file__).parent / "assets" / "h_canopy_filenames_compressed.txt.gz"
)

with gzip.open(valid_name_file, "rb") as f:
valid_filenames = f.read().decode("utf-8")

valid_names = []
for fname in filenames:
if fname in valid_filenames:
valid_names.append(fname)
return valid_names
13 changes: 13 additions & 0 deletions test_lib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import zampy
from pathlib import Path

bounds = zampy.datasets.eth_canopy_height.SpatialBounds(60, 10, 50, 0)

dataset = zampy.datasets.EthCanopyHeight()

dataset.download(
download_dir=Path("/home/bart/Zampy"),
spatial_bounds=bounds,
temporal_bounds=None,
variables=None,
)

0 comments on commit 38bc761

Please sign in to comment.