-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for land cover data (#73)
* Add LAI to global data selection, tests * Correct test data generation * Add instructions, download script & parsing nb * Move global data operations to separate folder * Modify nearest_non_nan to check max_distance * Fix typing issue * Start split of global data, add verification * Finish refactoring global data * Fix bugs * Add checks for tile existance for DEM and h_canopy * Compress txt assets * Improve err msgs. Add tests for raises * Fix linter issues * Add landcover table, download+parse scripts * Add land cover data support & test data * Add checks, cleanup landcover parse notebook * Apply suggestions from Sarah's code review Co-authored-by: SarahAlidoost <[email protected]> --------- Co-authored-by: SarahAlidoost <[email protected]>
- Loading branch information
1 parent
4d5ec56
commit a5be424
Showing
11 changed files
with
481 additions
and
31,784 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
"lccs_class","IGBP_STEMMUS_SCOPE" | ||
0,"No vegetation" | ||
10,"Croplands" | ||
20,"Croplands" | ||
30,"Cropland mosaics" | ||
40,"Cropland mosaics" | ||
50,"Evergreen Broadleaf Forest" | ||
60,"Deciduous Broadleaf Forests" | ||
70,"Evergreen Needleleaf Forests" | ||
80,"Deciduous needleleaf forests" | ||
90,"Mixed Forests" | ||
100,"Woody savannas" | ||
110,"Savannas" | ||
120,"Closed shrublands" | ||
130,"Grasslands" | ||
140,"Not available in IGBP" | ||
150,"Open shrublands" | ||
160,"Permanent Wetlands" | ||
170,"Permanent Wetlands" | ||
180,"Permanent Wetlands" | ||
190,"Urban and built-up lands" | ||
200,"Barren" | ||
210,"Water bodies" | ||
220,"Snow and ice" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
"""Module for loading and validating the ESA CCI land cover dataset.""" | ||
from pathlib import Path | ||
from typing import Dict | ||
from typing import List | ||
from typing import Tuple | ||
from typing import Union | ||
import numpy as np | ||
import pandas as pd | ||
import xarray as xr | ||
from PyStemmusScope.global_data import utils | ||
|
||
|
||
RESOLUTION_CCI = 1 / 360 # Resolution of the dataset in degrees | ||
FILEPATH_LANDCOVER_TABLE = Path(__file__).parent / "assets" / "lccs_to_igbp_table.csv" | ||
|
||
|
||
def retrieve_landcover_data( | ||
global_data_dir: Path, | ||
latlon: Union[Tuple[int, int], Tuple[float, float]], | ||
time_range: Tuple[np.datetime64, np.datetime64], | ||
timestep: str, | ||
) -> Dict[str, np.ndarray]: | ||
"""Get the land cover data from the CCI netCDF files. | ||
Args: | ||
global_data_dir: Path to the directory containing the global datasets. | ||
latlon: Latitude and longitude of the site. | ||
time_range: Start and end time of the model run. | ||
timestep: Desired timestep of the model, this is derived from the forcing data. | ||
In a pandas-timedelta compatible format. For example: "1800S" | ||
Returns: | ||
Dictionary containing IGBP and LCCS land cover classes. | ||
""" | ||
files_cci = list((global_data_dir / "landcover").glob("*.nc")) | ||
|
||
if len(files_cci) == 0: | ||
raise FileNotFoundError( | ||
f"No netCDF files found in the folder '{global_data_dir / 'landcover'}'" | ||
) | ||
|
||
return extract_landcover_data( | ||
files_cci=files_cci, | ||
latlon=latlon, | ||
time_range=time_range, | ||
timestep=timestep, | ||
) | ||
|
||
|
||
def extract_landcover_data( | ||
files_cci: List[Path], | ||
latlon: Union[Tuple[int, int], Tuple[float, float]], | ||
time_range: Tuple[np.datetime64, np.datetime64], | ||
timestep: str, | ||
) -> Dict[str, np.ndarray]: | ||
"""Extract the land cover data from the CCI netCDF files. | ||
Args: | ||
files_cci: List of CCI land cover files. | ||
latlon: Latitude and longitude of the site. | ||
time_range: Start and end time of the model run. | ||
timestep: Desired timestep of the model, this is derived from the forcing data. | ||
In a pandas-timedelta compatible format. For example: "1800S" | ||
Returns: | ||
Dictionary containing IGBP and LCCS land cover classes. | ||
""" | ||
cci_dataset = xr.open_mfdataset(files_cci) | ||
|
||
check_cci_dataset(cci_dataset, latlon, time_range) # Assert spatial/temporal bounds | ||
|
||
lat_bounds = cci_dataset["lat_bounds"].load() # Load so that they are not | ||
lon_bounds = cci_dataset["lon_bounds"].load() # dask arrays | ||
lat_idx = np.logical_and( # type: ignore | ||
lat_bounds.isel(bounds=0) >= latlon[0], lat_bounds.isel(bounds=1) < latlon[0] | ||
).argmax(dim="lat") | ||
lon_idx = np.logical_and( # type: ignore | ||
lon_bounds.isel(bounds=0) <= latlon[1], lon_bounds.isel(bounds=1) > latlon[1] | ||
).argmax(dim="lon") | ||
|
||
lccs_id = cci_dataset.isel(lat=lat_idx, lon=lon_idx)["lccs_class"] | ||
|
||
# If time is size 1, interp fails. Adding an extra datapoint prevents this. | ||
if lccs_id["time"].size == 1: | ||
data_copy = lccs_id.copy() | ||
data_copy["time"] = lccs_id["time"] + np.timedelta64(1, "D") | ||
lccs_id = xr.concat((lccs_id, data_copy), dim="time") | ||
|
||
lccs_id = lccs_id.interp( | ||
time=pd.date_range(time_range[0], time_range[1], freq=timestep), | ||
method="nearest", | ||
kwargs={"fill_value": "extrapolate", "bounds_error": False}, | ||
) | ||
|
||
landcover_lookup_table = get_landcover_table(cci_dataset) | ||
igbp_lookup_table = get_lccs_to_igbp_table() | ||
|
||
return { | ||
"LCCS_landcover": np.array( | ||
[landcover_lookup_table[_id] for _id in lccs_id.to_numpy()] | ||
), | ||
"IGBP_veg_long": np.array( | ||
[igbp_lookup_table[_id] for _id in lccs_id.to_numpy()] | ||
), | ||
} | ||
|
||
|
||
def get_lccs_to_igbp_table() -> Dict[int, str]: | ||
"""Read the land cover translation table, and turn it into a lookup dictionary.""" | ||
df = pd.read_csv(FILEPATH_LANDCOVER_TABLE, index_col="lccs_class") | ||
return df.to_dict()["IGBP_STEMMUS_SCOPE"] | ||
|
||
|
||
def get_landcover_table(cci_dataset: xr.Dataset) -> Dict[int, str]: | ||
"""Get the lookup table to convert the flag values to a land cover name. | ||
The lookup table for the land cover classes is contained in the netCDF file, under | ||
the lcc_class attributes. This function extracts it and turns it into a (dict) | ||
lookup table. | ||
Args: | ||
cci_dataset: The CCI dataset netCDF file. | ||
Returns: | ||
The landcover class lookup table | ||
""" | ||
flag_meanings = cci_dataset["lccs_class"].attrs["flag_meanings"].split(" ") | ||
flag_values = cci_dataset["lccs_class"].attrs["flag_values"] | ||
return dict(zip(flag_values, flag_meanings)) | ||
|
||
|
||
def check_cci_dataset( | ||
cci_dataset: xr.Dataset, | ||
latlon: Union[Tuple[int, int], Tuple[float, float]], | ||
time_range: Tuple[np.datetime64, np.datetime64], | ||
) -> None: | ||
"""Validate the cci dataset for spatial and temporal bounds.""" | ||
# Assert spatial bounds | ||
if ( | ||
latlon[0] > cci_dataset["lat"].max() or latlon[0] < cci_dataset["lat"].min() | ||
) or (latlon[1] > cci_dataset["lon"].max() or latlon[1] < cci_dataset["lon"].min()): | ||
raise utils.MissingDataError( | ||
f"\nThe specified location {latlon} was not within bounds of the CCI land" | ||
f"\ncover dataset." | ||
f"\nPlease check the netCDF files or select a different location" | ||
) | ||
|
||
# Assert temporal bounds | ||
first_year_cci = pd.to_datetime(cci_dataset["time"].min().to_numpy()).year | ||
last_year_cci = pd.to_datetime(cci_dataset["time"].max().to_numpy()).year | ||
first_year_range = pd.to_datetime(time_range[0]).year | ||
last_year_range = pd.to_datetime(time_range[-1]).year | ||
# As the data is yearly, allow some leeway with the time bounds | ||
if (first_year_range + 1 < first_year_cci) or (last_year_range - 1 > last_year_cci): | ||
raise utils.MissingDataError( | ||
f"\nThe specified time range {time_range} was not within the range of the" | ||
f"\nCCI land cover dataset:" | ||
f"\n({cci_dataset['time'].min(), cci_dataset['time'].max()})" | ||
f"\nPlease check the netCDF files or select a different location" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
123 changes: 123 additions & 0 deletions
123
global_data/data_analysis_notebooks/parse_landcover.ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
"""Download land cover data using the cdsapi.""" | ||
from pathlib import Path | ||
import cdsapi | ||
|
||
|
||
with (Path.home() / ".cdsloginrc").open(encoding="utf8") as f: | ||
uid = f.readline().strip() | ||
api_key = f.readline().strip() | ||
|
||
|
||
c = cdsapi.Client( | ||
url="https://cds.climate.copernicus.eu/api/v2", | ||
key=f"{uid}:{api_key}", | ||
verify=True, | ||
) | ||
|
||
|
||
years = [2013] | ||
|
||
|
||
for year in years: | ||
c.retrieve( | ||
"satellite-land-cover", | ||
{ | ||
"variable": "all", | ||
"format": "zip", | ||
"year": f"{year}", | ||
"version": "v2.0.7cds", | ||
}, | ||
f"cds_landcover_{year}.zip", | ||
) |
Oops, something went wrong.