Skip to content

Commit

Permalink
coarsen land cover and finish ingest
Browse files Browse the repository at this point in the history
  • Loading branch information
Yang committed Oct 17, 2023
1 parent 1f592aa commit dcfd5d3
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 6 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ dependencies = [
"pint-xarray",
"flox", # For better groupby methods.
"cdsapi",
"xarray-regrid", # for land cover data regridding
]
dynamic = ["version"]

Expand Down
62 changes: 56 additions & 6 deletions src/zampy/datasets/land_cover.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
"""Land cover classification dataset."""

import os
from pathlib import Path
import numpy as np
from zipfile import ZipFile
import numpy as np
import xarray as xr
import xarray_regrid
from zampy.datasets import cds_utils
from zampy.datasets import validation
from zampy.datasets.dataset_protocol import SpatialBounds
Expand All @@ -18,7 +21,7 @@
# ruff: noqa: D102


class LandCover: # noqa: D101
class LandCover:
"""Land cover classification gridded maps."""

name = "land-cover"
Expand Down Expand Up @@ -129,10 +132,57 @@ def unzip_raw_to_netcdf(
if ncfile.exists() and not overwrite:
print(f"File '{ncfile.name}' already exists, skipping...")
else:
extract_netcdf_to_zampy(file, ingest_folder)
ds = extract_netcdf_to_zampy(ingest_folder, file)
ds.to_netcdf(path=ncfile)


def extract_netcdf_to_zampy(ingest_folder: Path, file: Path) -> xr.Dataset:
"""Extract zipped data and convert to zampy format.
Args:
ingest_folder: Folder where the files have to be written to.
file: Path to the land cover .zip archive.
def extract_netcdf_to_zampy(file, ingest_folder):
with ZipFile(file, 'r') as zip_object:
Returns:
Coarse land cover data satisfying zampy standard.
"""
with ZipFile(file, "r") as zip_object:
zipped_file_name = zip_object.namelist()[0]
zip_object.extract(zipped_file_name, path = ingest_folder)
zip_object.extract(zipped_file_name, path=ingest_folder)

# only keep land cover class variable
ds = xr.open_dataset(ingest_folder / zipped_file_name)
var_list = [var for var in ds.data_vars]
raw_variable = "lccs_class"
var_list.remove(raw_variable)
ds = ds.drop_vars(var_list)

# coarsen to fit into memory
ds = ds.sortby(["lat", "lon"])
ds = ds.rename({"lat": "latitude", "lon": "longitude"})
new_grid = xarray_regrid.Grid(
north=90,
east=180,
south=-90,
west=-180,
resolution_lat=0.25,
resolution_lon=0.25,
)

target_dataset = xarray_regrid.create_regridding_dataset(new_grid)

ds_regrid = ds.regrid.most_common(target_dataset, time_dim="time", max_mem=1e9)

# rename variable to follow the zampy convention
variable_name = "land_cover"
ds_regrid = ds_regrid.rename({raw_variable: variable_name})
ds_regrid[variable_name].attrs["units"] = str(
VARIABLE_REFERENCE_LOOKUP[variable_name].unit
)
ds_regrid[variable_name].attrs["description"] = VARIABLE_REFERENCE_LOOKUP[
variable_name
].desc

os.remove(ingest_folder / zipped_file_name)

return ds_regrid

0 comments on commit dcfd5d3

Please sign in to comment.