Skip to content

Commit

Permalink
Move to xarray-regrid for all regridding.
Browse files Browse the repository at this point in the history
  • Loading branch information
BSchilperoort committed Jan 29, 2024
1 parent 7ad91da commit 61ac1e8
Show file tree
Hide file tree
Showing 28 changed files with 47 additions and 565 deletions.
1 change: 0 additions & 1 deletion demo/cams_co2_dataset_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,6 @@
" spatial_bounds=bbox_demo,\n",
" variable_names=[\"co2_concentration\"],\n",
" resolution=1.0,\n",
" regrid_method=\"flox\",\n",
")"
]
},
Expand Down
1 change: 0 additions & 1 deletion demo/era5-land_dataset_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,6 @@
" spatial_bounds=bbox_demo,\n",
" variable_names=[\"air_temperature\", \"dewpoint_temperature\"],\n",
" resolution=1.0,\n",
" regrid_method=\"flox\",\n",
")"
]
},
Expand Down
1 change: 0 additions & 1 deletion demo/era5_dataset_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,6 @@
" spatial_bounds=bbox_demo,\n",
" variable_names=[\"eastward_component_of_wind\"],\n",
" resolution=1.0,\n",
" regrid_method=\"flox\",\n",
")"
]
},
Expand Down
1 change: 0 additions & 1 deletion demo/eth_dataset_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@
" spatial_bounds=bbox_demo,\n",
" variable_names=[\"height_of_vegetation\"],\n",
" resolution=0.05,\n",
" regrid_method=\"flox\",\n",
")"
]
},
Expand Down
3 changes: 1 addition & 2 deletions demo/land_cover_dataset_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,6 @@
" spatial_bounds=bbox_demo,\n",
" variable_names=[\"land_cover\"],\n",
" resolution=1.0,\n",
" regrid_method=\"most_common\",\n",
")"
]
},
Expand Down Expand Up @@ -1115,7 +1114,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
"version": "3.10.12"
},
"orig_nbformat": 4
},
Expand Down
1 change: 0 additions & 1 deletion demo/prism_dem_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,6 @@
" spatial_bounds=bbox_demo,\n",
" variable_names=[\"elevation\"],\n",
" resolution=0.01,\n",
" regrid_method=\"flox\",\n",
")"
]
},
Expand Down
20 changes: 0 additions & 20 deletions environment.yml

This file was deleted.

1 change: 0 additions & 1 deletion example_recipe.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ convert:
standard: ALMA-PLUMBER2
frequency: 1H # outputs at 1 hour frequency. Pandas-like freq-keyword.
resolution: 0.01 # output resolution in degrees.
conversion-method: "flox" # Either flox or xesmf. xesmf requires conda + linux.

additional_variables: # Possible future addition
saturation_vapor_pressure:
Expand Down
15 changes: 2 additions & 13 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ disable = true # Requires confirmation when publishing to pypi.
name = "zampy"
description = "python package for getting Land Surface Model input data."
readme = "README.md"
license = "Apache-2.0"
license = "Apache 2.0"
requires-python = ">=3.10, <3.12"
authors = [
{email = "[email protected]"},
Expand Down Expand Up @@ -62,9 +62,8 @@ dependencies = [
"pint",
"cf_xarray", # required to auto-pint CF compliant datasets.
"pint-xarray",
"flox", # For better groupby methods.
"cdsapi",
"xarray-regrid", # for land cover data regridding
"xarray-regrid", # for regridding
]
dynamic = ["version"]

Expand Down Expand Up @@ -120,16 +119,6 @@ features = ["docs"]
build = ["mkdocs build"]
serve = ["mkdocs serve"]

# [tool.hatch.envs.conda]
# type = "conda"
# python = "3.10"
# command = "micromamba"
# environment-file = "environment.yml"
# extra-dependencies = ["pytest", "pytest-cov"]

# [tool.hatch.envs.conda.scripts]
# test = ["pytest ./tests/",]

[tool.pytest.ini_options]
testpaths = ["tests"]

Expand Down
4 changes: 0 additions & 4 deletions src/zampy/datasets/dataset_protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ def load(
time_bounds: TimeBounds,
spatial_bounds: SpatialBounds,
resolution: float,
regrid_method: str,
variable_names: list[str],
) -> xr.Dataset:
"""Get the dataset as an xarray Dataset.
Expand All @@ -142,9 +141,6 @@ def load(
loaded.
resolution: The desired resolution of the loaded data. The ingested data
will be regridded to match this resolution.
regrid_method: Which routines to use to resample. Either "flox" (default) or
"esmf". Of these two, esmf is the more robust and accurate regridding
method, however it can be difficult to install.
variable_names: Which variables should be loaded.
Expand Down
10 changes: 7 additions & 3 deletions src/zampy/datasets/ecmwf_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from pathlib import Path
import xarray as xr
import xarray_regrid # noqa: F401
from zampy.datasets import cds_utils
from zampy.datasets import converter
from zampy.datasets import validation
Expand All @@ -10,7 +11,7 @@
from zampy.datasets.dataset_protocol import Variable
from zampy.datasets.dataset_protocol import copy_properties_file
from zampy.datasets.dataset_protocol import write_properties_file
from zampy.utils import regrid
from zampy.datasets.utils import make_grid


## Ignore missing class/method docstrings: they are implemented in the Dataset class.
Expand Down Expand Up @@ -111,7 +112,6 @@ def load(
time_bounds: TimeBounds,
spatial_bounds: SpatialBounds,
resolution: float,
regrid_method: str,
variable_names: list[str],
) -> xr.Dataset:
files: list[Path] = []
Expand All @@ -121,7 +121,11 @@ def load(

ds = xr.open_mfdataset(files, chunks={"latitude": 200, "longitude": 200})
ds = ds.sel(time=slice(time_bounds.start, time_bounds.end))
ds = regrid.regrid_data(ds, spatial_bounds, resolution, regrid_method)

grid = xarray_regrid.create_regridding_dataset(
make_grid(spatial_bounds, resolution)
)
ds = ds.regrid.linear(grid)

return ds

Expand Down
9 changes: 6 additions & 3 deletions src/zampy/datasets/eth_canopy_height.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from pathlib import Path
import numpy as np
import xarray as xr
import xarray_regrid
from zampy.datasets import converter
from zampy.datasets import utils
from zampy.datasets import validation
Expand All @@ -13,7 +14,6 @@
from zampy.datasets.dataset_protocol import write_properties_file
from zampy.reference.variables import VARIABLE_REFERENCE_LOOKUP
from zampy.reference.variables import unit_registry
from zampy.utils import regrid


VALID_NAME_FILE = (
Expand Down Expand Up @@ -126,7 +126,6 @@ def load(
time_bounds: TimeBounds,
spatial_bounds: SpatialBounds,
resolution: float,
regrid_method: str,
variable_names: list[str],
) -> xr.Dataset:
files: list[Path] = []
Expand All @@ -137,7 +136,11 @@ def load(

ds = xr.open_mfdataset(files, chunks={"latitude": 2000, "longitude": 2000})
ds = ds.sel(time=slice(time_bounds.start, time_bounds.end))
ds = regrid.regrid_data(ds, spatial_bounds, resolution, regrid_method)

grid = xarray_regrid.create_regridding_dataset(
utils.make_grid(spatial_bounds, resolution)
)
ds = ds.regrid.linear(grid)
return ds

def convert(
Expand Down
31 changes: 5 additions & 26 deletions src/zampy/datasets/fapar_lai.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from tqdm import tqdm
from zampy.datasets import cds_utils
from zampy.datasets import converter
from zampy.datasets import utils
from zampy.datasets import validation
from zampy.datasets.dataset_protocol import SpatialBounds
from zampy.datasets.dataset_protocol import TimeBounds
Expand Down Expand Up @@ -141,16 +142,17 @@ def load(
time_bounds: TimeBounds,
spatial_bounds: SpatialBounds,
resolution: float,
regrid_method: str, # should be deprecated.
variable_names: list[str],
) -> xr.Dataset:
files = list((ingest_dir / self.name).glob("*.nc"))

ds = xr.open_mfdataset(files, parallel=True)
ds = ds.sel(time=slice(time_bounds.start, time_bounds.end))

target_dataset = create_regridding_ds(spatial_bounds, resolution)
ds = ds.regrid.linear(target_dataset)
grid = xarray_regrid.create_regridding_dataset(
utils.make_grid(spatial_bounds, resolution)
)
ds = ds.regrid.linear(grid)

return ds

Expand All @@ -175,29 +177,6 @@ def convert( # Will be removed, see issue #43.
return True


def create_regridding_ds(
spatial_bounds: SpatialBounds, resolution: float
) -> xr.Dataset:
"""Create dataset to use with xarray-regrid regridding.
Args:
spatial_bounds: Spatial bounds of the new dataset.
resolution: Latitude and longitude resolution of the new dataset.
Returns:
The dataset ready to be used in regridding.
"""
new_grid = xarray_regrid.Grid(
north=spatial_bounds.north,
east=spatial_bounds.east,
south=spatial_bounds.south,
west=spatial_bounds.west,
resolution_lat=resolution,
resolution_lon=resolution,
)
return xarray_regrid.create_regridding_dataset(new_grid)


def get_year_month_pairs(time_bounds: TimeBounds) -> list[tuple[int, int]]:
"""Get the year and month pairs covering the input time bounds."""
start = pd.to_datetime(time_bounds.start)
Expand Down
18 changes: 6 additions & 12 deletions src/zampy/datasets/land_cover.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import xarray_regrid
from zampy.datasets import cds_utils
from zampy.datasets import converter
from zampy.datasets import utils
from zampy.datasets import validation
from zampy.datasets.dataset_protocol import SpatialBounds
from zampy.datasets.dataset_protocol import TimeBounds
Expand Down Expand Up @@ -121,7 +122,6 @@ def load(
time_bounds: TimeBounds,
spatial_bounds: SpatialBounds,
resolution: float,
regrid_method: str, # Unused in land-cover dataset
variable_names: list[str],
) -> xr.Dataset:
files: list[Path] = []
Expand All @@ -137,19 +137,13 @@ def load(

ds = xr.open_mfdataset(files, chunks={"latitude": 200, "longitude": 200})
ds = ds.sel(time=slice(time_bounds.start, time_bounds.end))
new_grid = xarray_regrid.Grid(
north=spatial_bounds.north,
east=spatial_bounds.east,
south=spatial_bounds.south,
west=spatial_bounds.west,
resolution_lat=resolution,
resolution_lon=resolution,
)
target_dataset = xarray_regrid.create_regridding_dataset(new_grid)

ds_regrid = ds.regrid.most_common(target_dataset, time_dim="time", max_mem=1e9)
grid = xarray_regrid.create_regridding_dataset(
utils.make_grid(spatial_bounds, resolution)
)
ds = ds.regrid.most_common(grid, time_dim="time", max_mem=1e9)

return ds_regrid
return ds

def convert(
self,
Expand Down
9 changes: 6 additions & 3 deletions src/zampy/datasets/prism_dem.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import Literal
import numpy as np
import xarray as xr
import xarray_regrid
from rasterio.io import MemoryFile
from zampy.datasets import converter
from zampy.datasets import utils
Expand All @@ -16,7 +17,6 @@
from zampy.datasets.dataset_protocol import write_properties_file
from zampy.reference.variables import VARIABLE_REFERENCE_LOOKUP
from zampy.reference.variables import unit_registry
from zampy.utils import regrid


VALID_NAME_FILES = [
Expand Down Expand Up @@ -127,7 +127,6 @@ def load(
time_bounds: TimeBounds, # Unused in PrismDEM
spatial_bounds: SpatialBounds,
resolution: float,
regrid_method: str,
variable_names: list[str],
) -> xr.Dataset:
for var in variable_names:
Expand All @@ -145,7 +144,11 @@ def preproc(ds: xr.Dataset) -> xr.Dataset:
return ds.isel(latitude=slice(None, -1), longitude=slice(None, -1))

ds = xr.open_mfdataset(files, preprocess=preproc)
ds = regrid.regrid_data(ds, spatial_bounds, resolution, regrid_method)

grid = xarray_regrid.create_regridding_dataset(
utils.make_grid(spatial_bounds, resolution)
)
ds = ds.regrid.linear(grid)

return ds

Expand Down
14 changes: 14 additions & 0 deletions src/zampy/datasets/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
import urllib.request
from pathlib import Path
import requests
import xarray_regrid
from tqdm import tqdm
from zampy.datasets.dataset_protocol import SpatialBounds


class TqdmUpdate(tqdm):
Expand Down Expand Up @@ -52,3 +54,15 @@ def get_file_size(fpath: Path) -> int:
return 0
else:
return fpath.stat().st_size


def make_grid(spatial_bounds: SpatialBounds, resolution: float) -> xarray_regrid.Grid:
"""MAke a regridding grid for passing to xarray-regrid."""
return xarray_regrid.Grid(
north=spatial_bounds.north,
east=spatial_bounds.east,
south=spatial_bounds.south,
west=spatial_bounds.west,
resolution_lat=resolution,
resolution_lon=resolution,
)
1 change: 0 additions & 1 deletion src/zampy/recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ def run(self) -> None:
spatial_bounds=self.spatialbounds,
variable_names=variables,
resolution=self.resolution,
regrid_method="flox",
)

ds = converter.convert(ds, dataset, convention=self.convention)
Expand Down
Loading

0 comments on commit 61ac1e8

Please sign in to comment.