Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/add spatial disaggregation recipe #57

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
11 changes: 11 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,14 @@ Groupers
DAY_GROUPER
MONTH_GROUPER
PaddedDOYGrouper

Spatial Models
=================

Xarray Wrappers
~~~~~~~~~~~~~~~

.. autosummary::
:toctree: generated/

SpatialDisaggregator
48 changes: 47 additions & 1 deletion docs/roadmap.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ Components
1. `pointwise_models`: a collection of linear models that are intended to be
applied point-by-point. These may be sklearn Pipelines or custom sklearn-like
models (e.g. BCSDTemperature).
2. `spatial_models`: a collection of spatial models where the models have spatial
dependence.
2. `global_models`: (not implemented) concept space for deep learning-based
models.
3. `metrics`: (not implemented) concept space for a benchmarking suite
Expand All @@ -82,6 +84,50 @@ Pointwise models

Other methods, like LOCA, MACA, BCCA, etc, should also be possible.

Spatial models
~~~~~~~~~~~~~~~~

This category of methods includes models that have spatial dependence.
Thus far only the BCSD method (see Thrasher et al., 2012) for spatial disaggregation has
been implemented (for temperature and precipitation). Because the method involves regridding,
which depends on the xESMF package, a recipe is provided herein versus a full implementation
in scikit-downscale.

.. code-block:: Python

from skdownscale.spatial_models import SpatialDisaggregator, apply_weights
import xesmf as xe
import xarray as xr

# da_climo_fine: xarray.DataArray (daily multi-decade climatology)
# da_obs : xarray.DataArray (daily obs data)
# da_model_train: xarray.DataArray (daily training data)
# ds_sd: xarray.Dataset (daily spatially disaggregated data)

# regrid climo to model resolution
obs_to_mod_weights = "filename"
regridder_obs_to_mod = xe.Regridder(da_obs.isel(time=0), da_model_train.isel(time=0),
'bilinear', filename=obs_to_mod_weights, reuse_weights=True)
climo_regrid = xr.map_blocks(apply_weights, regridder_obs_to_mod,
args=[da_climo_fine])
climo_coarse = climo_regrid.compute()

# fit the scaling factor model
sfc = SpatialDisaggregator.fit(da_model_train, climo_coarse, var_name='tasmax')

# regrid scale factor
mod_to_obs_weights = "filename"
regridder_mod_to_obs = xe.Regridder(da_model_train.isel(time=0),
da_obs.isel(time=0), 'bilinear',
filename=mod_to_obs_weights, reuse_weights=True)
sff_regrid = xr.map_blocks(apply_weights, regridder_mod_to_obs, args=[sfc])
sff = sff_regrid.compute()

# predict using scale factor
ds_varname = 'scale_factor_fine'
sff_ds = sff.to_dataset(name=ds_varname)
ds_sd = SpatialDisaggregator.predict(sff_ds, da_climo_fine, var_name=ds_varname)

Global models
~~~~~~~~~~~~~

Expand Down Expand Up @@ -110,7 +156,7 @@ Dependencies
------------

- Core: Xarray, Pandas, Dask, Scikit-learn, Numpy, Scipy
- Optional: Statsmodels, Keras, PyTorch, Tensorflow, etc.
- Optional: Statsmodels, Keras, PyTorch, Tensorflow, xESMF, etc.

Related projects
----------------
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ select=B,C,E,F,W,T4,B9

[isort]
known_first_party=skdownscale
known_third_party = click,matplotlib,numpy,pandas,pkg_resources,probscale,pytest,scipy,seaborn,setuptools,sklearn,xarray,xsd
known_third_party = click,matplotlib,numpy,pandas,pkg_resources,probscale,pytest,scipy,seaborn,setuptools,sklearn,xarray,xesmf,xsd
multi_line_output=3
include_trailing_comma=True
line_length=100
Expand Down
6 changes: 3 additions & 3 deletions skdownscale/pointwise_models/arrm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@


def arrm_breakpoints(X, y, window_width, max_breakpoints):
'''Calculate breakpoints in x and y
"""Calculate breakpoints in x and y

Parameters
----------
Expand All @@ -26,7 +26,7 @@ def arrm_breakpoints(X, y, window_width, max_breakpoints):
Returns
-------
breakpoints : ndarray
'''
"""
min_width = 10

npoints = len(X)
Expand Down Expand Up @@ -97,7 +97,7 @@ def arrm_breakpoints(X, y, window_width, max_breakpoints):


class PiecewiseLinearRegression(RegressorMixin, BaseEstimator):
""" Piecewise Linear Regression
"""Piecewise Linear Regression

Parameters
----------
Expand Down
8 changes: 4 additions & 4 deletions skdownscale/pointwise_models/gard.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class AnalogBase(RegressorMixin, BaseEstimator):
_fit_attributes = ['kdtree_', 'X_', 'y_', 'k_']

def fit(self, X, y):
""" Fit Analog model using a KDTree
"""Fit Analog model using a KDTree

Parameters
----------
Expand Down Expand Up @@ -54,7 +54,7 @@ def fit(self, X, y):


class AnalogRegression(AnalogBase):
""" AnalogRegression
"""AnalogRegression

Parameters
----------
Expand Down Expand Up @@ -82,7 +82,7 @@ def __init__(self, n_analogs=200, kdtree_kwargs=None, query_kwargs=None, lr_kwar
self.lr_kwargs = lr_kwargs

def predict(self, X):
""" Predict using the AnalogRegression model
"""Predict using the AnalogRegression model

Parameters
----------
Expand Down Expand Up @@ -130,7 +130,7 @@ def _predict_one_step(self, lr_model, X):


class PureAnalog(AnalogBase):
""" PureAnalog
"""PureAnalog

Attributes
----------
Expand Down
8 changes: 4 additions & 4 deletions skdownscale/pointwise_models/grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


class GroupedRegressor:
""" Grouped Regressor
"""Grouped Regressor

Wrapper supporting fitting seperate estimators distinct groups

Expand Down Expand Up @@ -45,7 +45,7 @@ def __init__(
self.predict_grouper_kwargs = predict_grouper_kwargs

def fit(self, X, y, **fit_kwargs):
""" Fit the grouped regressor
"""Fit the grouped regressor

Parameters
----------
Expand Down Expand Up @@ -76,7 +76,7 @@ def fit(self, X, y, **fit_kwargs):
return self

def predict(self, X):
""" Predict estimator target for X
"""Predict estimator target for X

Parameters
----------
Expand All @@ -100,7 +100,7 @@ def predict(self, X):


class PaddedDOYGrouper:
""" Grouper to group an Index by day-of-year +/ pad
"""Grouper to group an Index by day-of-year +/ pad

Parameters
----------
Expand Down
12 changes: 6 additions & 6 deletions skdownscale/pointwise_models/quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@


def plotting_positions(n, alpha=0.4, beta=0.4):
'''Returns a monotonic array of plotting positions.
"""Returns a monotonic array of plotting positions.

Parameters
----------
Expand All @@ -35,7 +35,7 @@ def plotting_positions(n, alpha=0.4, beta=0.4):
See Also
--------
scipy.stats.mstats.plotting_positions
'''
"""
return (np.arange(1, n + 1) - alpha) / (n + 1.0 - alpha - beta)


Expand Down Expand Up @@ -182,10 +182,10 @@ def fit(self, X, y, **kwargs):
self : object
"""
X = check_array(
X, dtype='numeric', ensure_min_samples=2 * self.n_endpoints + 1, ensure_2d=True
X, dtype='numeric', ensure_min_samples=2 * self.n_endpoints + 1, ensure_2d=True,
)
y = check_array(
y, dtype='numeric', ensure_min_samples=2 * self.n_endpoints + 1, ensure_2d=False
y, dtype='numeric', ensure_min_samples=2 * self.n_endpoints + 1, ensure_2d=False,
)

X = check_max_features(X, n=1)
Expand Down Expand Up @@ -287,9 +287,9 @@ def predict(self, X, **kwargs):
return y_hat

def _calc_extrapolated_cdf(
self, data, sort=True, extrapolate=None, pp_min=SYNTHETIC_MIN, pp_max=SYNTHETIC_MAX
self, data, sort=True, extrapolate=None, pp_min=SYNTHETIC_MIN, pp_max=SYNTHETIC_MAX,
):
""" Calculate a new extrapolated cdf
"""Calculate a new extrapolated cdf

The goal of this function is to create a CDF with bounds outside the [0, 1] range.
This allows for quantile mapping beyond observed data points.
Expand Down
6 changes: 3 additions & 3 deletions skdownscale/pointwise_models/zscore.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


class ZScoreRegressor(TimeSynchronousDownscaler):
""" Z Score Regressor bias correction model wrapper
"""Z Score Regressor bias correction model wrapper

Apply a scikit-learn model (e.g. Pipeline) point-by-point. The pipeline
must implement the fit and predict methods.
Expand All @@ -28,7 +28,7 @@ def __init__(self, window_width=31):
self.window_width = window_width

def fit(self, X, y):
""" Fit Z-Score Model finds the shift and scale parameters
"""Fit Z-Score Model finds the shift and scale parameters
to inform bias correction.

Parameters
Expand Down Expand Up @@ -65,7 +65,7 @@ def fit(self, X, y):
return self

def predict(self, X):
""" Predict performs the z-score bias correction
"""Predict performs the z-score bias correction
on the future model dataset, X.

Parameters
Expand Down
2 changes: 2 additions & 0 deletions skdownscale/spatial_models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .regridding import apply_weights
from .sd import SpatialDisaggregator
8 changes: 8 additions & 0 deletions skdownscale/spatial_models/regridding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import xesmf as xe


def apply_weights(regridder, input_data):
regridder._grid_in = None
regridder._grid_out = None
result = regridder(input_data)
return result
124 changes: 124 additions & 0 deletions skdownscale/spatial_models/sd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import numpy as np
import pandas as pd
import xarray as xr


class SpatialDisaggregator:
"""
Spatial disaggregation model class
Apply spatial disaggregation algorithm to an xarray Dataset with fit
and predict methods using NASA-NEX method for spatial disaggregation
(see Thrasher et al, 2012).

Parameters
----------
var : str
specifies the variable being downscaled. Default is
temperature and other option is precipitation.
Comment on lines +16 to +17
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minor nit but these are over-indented (same for all docstrings here)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is fixed now.

"""

def __init__(self, var='temperature'):
self._var = var

if var == 'temperature':
pass
elif var == 'precipitation':
pass
else:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's write this as:

Suggested change
if var == 'temperature':
pass
elif var == 'precipitation':
pass
else:
if var not in ['temperature', 'precipitation'] :

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

raise NotImplementedError(
'functionality for spatial disaggregation' ' of %s has not yet been added' % var
)

def fit(self, ds_bc, climo_coarse, var_name, lat_name='lat', lon_name='lon'):
"""
Fit the scaling factor used in spatial disaggregation

Parameters
-----------
ds_bc : xarray.Dataset
Daily bias corrected data at the model resolution
climo_coarse : xarray.DataArray or xarray.Dataset
Observed climatology that has been regridded (coarsened)
to the model resolution
var_name : str
Specifies the data variable name within ds_bc of the
daily bias corrected data
lat_name : str
Name of the latitude dimension of ds_bc and climo_coarse,
default is 'lat'.
lon_name : str
Name of the longitude dimension of ds_bc and climo_coarse,
default is 'lon'.
"""

# check that climo has been regridded to model res
if not np.array_equal(ds_bc[lat_name], climo_coarse[lat_name]):
raise ValueError('climo latitude dimension does not match model res')
if not np.array_equal(ds_bc[lon_name], climo_coarse[lon_name]):
raise ValueError('climo longitude dimension does not match model res')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe something like this:

Suggested change
if not np.array_equal(ds_bc[lat_name], climo_coarse[lat_name]):
raise ValueError('climo latitude dimension does not match model res')
if not np.array_equal(ds_bc[lon_name], climo_coarse[lon_name]):
raise ValueError('climo longitude dimension does not match model res')
if not ds_bc[lat_name].equals(climo_coarse[lat_name]):
raise ValueError('climo latitude coordinate does not match model res')
if not ds_bc[lon_name].equals(climo_coarse[lon_name]):
raise ValueError('climo longitude coordinate does not match model res')


scf = self._calculate_scaling_factor(ds_bc, climo_coarse, var_name, self._var)

return scf

def predict(self, scf, climo_fine, var_name, lat_name='lat', lon_name='lon'):
"""
Predict (apply) the scaling factor to the observed climatology.

Parameters
-----------
scf : xarray.Dataset
Scale factor that has been regridded to the resolution of
the observed data.
climo_fine : xarray.DataArray or xarray.Dataset
Observed climatology at its native resolution
var_name : str
Specifies the data variable name within ds_bc of the
daily bias corrected data
lat_name : str
Name of the latitude dimension of ds_bc and climo_coarse,
default is 'lat'.
lon_name : str
Name of the longitude dimension of ds_bc and climo_coarse,
default is 'lon'
"""

# check that scale factor has been regridded to obs res
if not np.array_equal(scf[lat_name], climo_fine[lat_name]):
raise ValueError('scale factor latitude dimension does not match obs res')
if not np.array_equal(scf[lon_name], climo_fine[lon_name]):
raise ValueError('scale factor longitude dimension does not match obs res')

downscaled = self._apply_scaling_factor(scf, climo_fine, var_name, self._var)

return downscaled

def _calculate_scaling_factor(self, ds_bc, climo, var_name, var):
"""
compute scaling factor
"""
# Necessary workaround to xarray's check with zero dimensions
# https://github.com/pydata/xarray/issues/3575
da = ds_bc[var_name]
if sum(da.shape) == 0:
return da
groupby_type = ds_bc.time.dt.dayofyear
gb = da.groupby(groupby_type)

if var == 'temperature':
return gb - climo
elif var == 'precipitation':
return gb / climo

def _apply_scaling_factor(self, scf, climo, var_name, var):
"""
apply scaling factor
"""
groupby_type = scf.time.dt.dayofyear
da = scf[var_name]
sff_daily = da.groupby(groupby_type)

if var == 'temperature':
return sff_daily + climo
elif var == 'precipitation':
return sff_daily * climo