diff --git a/docs/api.rst b/docs/api.rst index f3cd4aa..de159c6 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -59,3 +59,14 @@ Groupers DAY_GROUPER MONTH_GROUPER PaddedDOYGrouper + +Spatial Models +================= + +Xarray Wrappers +~~~~~~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + + SpatialDisaggregator diff --git a/docs/roadmap.rst b/docs/roadmap.rst index a234c42..60ae181 100644 --- a/docs/roadmap.rst +++ b/docs/roadmap.rst @@ -59,6 +59,8 @@ Components 1. `pointwise_models`: a collection of linear models that are intended to be applied point-by-point. These may be sklearn Pipelines or custom sklearn-like models (e.g. BCSDTemperature). +2. `spatial_models`: a collection of spatial models where the models have spatial + dependence. 2. `global_models`: (not implemented) concept space for deep learning-based models. 3. `metrics`: (not implemented) concept space for a benchmarking suite @@ -82,6 +84,50 @@ Pointwise models Other methods, like LOCA, MACA, BCCA, etc, should also be possible. +Spatial models +~~~~~~~~~~~~~~~~ + +This category of methods includes models that have spatial dependence. +Thus far only the BCSD method (see Thrasher et al., 2012) for spatial disaggregation has +been implemented (for temperature and precipitation). Because the method involves regridding, +which depends on the xESMF package, a recipe is provided herein versus a full implementation +in scikit-downscale. + +.. code-block:: Python + + from skdownscale.spatial_models import SpatialDisaggregator, apply_weights + import xesmf as xe + import xarray as xr + + # da_climo_fine: xarray.DataArray (daily multi-decade climatology) + # da_obs : xarray.DataArray (daily obs data) + # da_model_train: xarray.DataArray (daily training data) + # ds_sd: xarray.Dataset (daily spatially disaggregated data) + + # regrid climo to model resolution + obs_to_mod_weights = "filename" + regridder_obs_to_mod = xe.Regridder(da_obs.isel(time=0), da_model_train.isel(time=0), + 'bilinear', filename=obs_to_mod_weights, reuse_weights=True) + climo_regrid = xr.map_blocks(apply_weights, regridder_obs_to_mod, + args=[da_climo_fine]) + climo_coarse = climo_regrid.compute() + + # fit the scaling factor model + sfc = SpatialDisaggregator.fit(da_model_train, climo_coarse, var_name='tasmax') + + # regrid scale factor + mod_to_obs_weights = "filename" + regridder_mod_to_obs = xe.Regridder(da_model_train.isel(time=0), + da_obs.isel(time=0), 'bilinear', + filename=mod_to_obs_weights, reuse_weights=True) + sff_regrid = xr.map_blocks(apply_weights, regridder_mod_to_obs, args=[sfc]) + sff = sff_regrid.compute() + + # predict using scale factor + ds_varname = 'scale_factor_fine' + sff_ds = sff.to_dataset(name=ds_varname) + ds_sd = SpatialDisaggregator.predict(sff_ds, da_climo_fine, var_name=ds_varname) + Global models ~~~~~~~~~~~~~ @@ -110,7 +156,7 @@ Dependencies ------------ - Core: Xarray, Pandas, Dask, Scikit-learn, Numpy, Scipy -- Optional: Statsmodels, Keras, PyTorch, Tensorflow, etc. +- Optional: Statsmodels, Keras, PyTorch, Tensorflow, xESMF, etc. Related projects ---------------- diff --git a/setup.cfg b/setup.cfg index 8118e19..9515b4b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -11,7 +11,7 @@ select=B,C,E,F,W,T4,B9 [isort] known_first_party=skdownscale -known_third_party = click,matplotlib,numpy,pandas,pkg_resources,probscale,pytest,scipy,seaborn,setuptools,sklearn,xarray,xsd +known_third_party = click,matplotlib,numpy,pandas,pkg_resources,probscale,pytest,scipy,seaborn,setuptools,sklearn,xarray,xesmf,xsd multi_line_output=3 include_trailing_comma=True line_length=100 diff --git a/skdownscale/pointwise_models/arrm.py b/skdownscale/pointwise_models/arrm.py index a1c1190..bf16871 100644 --- a/skdownscale/pointwise_models/arrm.py +++ b/skdownscale/pointwise_models/arrm.py @@ -12,7 +12,7 @@ def arrm_breakpoints(X, y, window_width, max_breakpoints): - '''Calculate breakpoints in x and y + """Calculate breakpoints in x and y Parameters ---------- @@ -26,7 +26,7 @@ def arrm_breakpoints(X, y, window_width, max_breakpoints): Returns ------- breakpoints : ndarray - ''' + """ min_width = 10 npoints = len(X) @@ -97,7 +97,7 @@ def arrm_breakpoints(X, y, window_width, max_breakpoints): class PiecewiseLinearRegression(RegressorMixin, BaseEstimator): - """ Piecewise Linear Regression + """Piecewise Linear Regression Parameters ---------- diff --git a/skdownscale/pointwise_models/gard.py b/skdownscale/pointwise_models/gard.py index ebe4642..cb1ea41 100644 --- a/skdownscale/pointwise_models/gard.py +++ b/skdownscale/pointwise_models/gard.py @@ -22,7 +22,7 @@ class AnalogBase(RegressorMixin, BaseEstimator): _fit_attributes = ['kdtree_', 'X_', 'y_', 'k_'] def fit(self, X, y): - """ Fit Analog model using a KDTree + """Fit Analog model using a KDTree Parameters ---------- @@ -54,7 +54,7 @@ def fit(self, X, y): class AnalogRegression(AnalogBase): - """ AnalogRegression + """AnalogRegression Parameters ---------- @@ -82,7 +82,7 @@ def __init__(self, n_analogs=200, kdtree_kwargs=None, query_kwargs=None, lr_kwar self.lr_kwargs = lr_kwargs def predict(self, X): - """ Predict using the AnalogRegression model + """Predict using the AnalogRegression model Parameters ---------- @@ -130,7 +130,7 @@ def _predict_one_step(self, lr_model, X): class PureAnalog(AnalogBase): - """ PureAnalog + """PureAnalog Attributes ---------- diff --git a/skdownscale/pointwise_models/grouping.py b/skdownscale/pointwise_models/grouping.py index 8df4025..4f0cb16 100644 --- a/skdownscale/pointwise_models/grouping.py +++ b/skdownscale/pointwise_models/grouping.py @@ -5,7 +5,7 @@ class GroupedRegressor: - """ Grouped Regressor + """Grouped Regressor Wrapper supporting fitting seperate estimators distinct groups @@ -45,7 +45,7 @@ def __init__( self.predict_grouper_kwargs = predict_grouper_kwargs def fit(self, X, y, **fit_kwargs): - """ Fit the grouped regressor + """Fit the grouped regressor Parameters ---------- @@ -76,7 +76,7 @@ def fit(self, X, y, **fit_kwargs): return self def predict(self, X): - """ Predict estimator target for X + """Predict estimator target for X Parameters ---------- @@ -100,7 +100,7 @@ def predict(self, X): class PaddedDOYGrouper: - """ Grouper to group an Index by day-of-year +/ pad + """Grouper to group an Index by day-of-year +/ pad Parameters ---------- diff --git a/skdownscale/pointwise_models/quantile.py b/skdownscale/pointwise_models/quantile.py index 15f2c6f..04187a3 100644 --- a/skdownscale/pointwise_models/quantile.py +++ b/skdownscale/pointwise_models/quantile.py @@ -17,7 +17,7 @@ def plotting_positions(n, alpha=0.4, beta=0.4): - '''Returns a monotonic array of plotting positions. + """Returns a monotonic array of plotting positions. Parameters ---------- @@ -35,7 +35,7 @@ def plotting_positions(n, alpha=0.4, beta=0.4): See Also -------- scipy.stats.mstats.plotting_positions - ''' + """ return (np.arange(1, n + 1) - alpha) / (n + 1.0 - alpha - beta) @@ -182,10 +182,10 @@ def fit(self, X, y, **kwargs): self : object """ X = check_array( - X, dtype='numeric', ensure_min_samples=2 * self.n_endpoints + 1, ensure_2d=True + X, dtype='numeric', ensure_min_samples=2 * self.n_endpoints + 1, ensure_2d=True, ) y = check_array( - y, dtype='numeric', ensure_min_samples=2 * self.n_endpoints + 1, ensure_2d=False + y, dtype='numeric', ensure_min_samples=2 * self.n_endpoints + 1, ensure_2d=False, ) X = check_max_features(X, n=1) @@ -287,9 +287,9 @@ def predict(self, X, **kwargs): return y_hat def _calc_extrapolated_cdf( - self, data, sort=True, extrapolate=None, pp_min=SYNTHETIC_MIN, pp_max=SYNTHETIC_MAX + self, data, sort=True, extrapolate=None, pp_min=SYNTHETIC_MIN, pp_max=SYNTHETIC_MAX, ): - """ Calculate a new extrapolated cdf + """Calculate a new extrapolated cdf The goal of this function is to create a CDF with bounds outside the [0, 1] range. This allows for quantile mapping beyond observed data points. diff --git a/skdownscale/pointwise_models/zscore.py b/skdownscale/pointwise_models/zscore.py index 144d7a8..a1c965f 100644 --- a/skdownscale/pointwise_models/zscore.py +++ b/skdownscale/pointwise_models/zscore.py @@ -7,7 +7,7 @@ class ZScoreRegressor(TimeSynchronousDownscaler): - """ Z Score Regressor bias correction model wrapper + """Z Score Regressor bias correction model wrapper Apply a scikit-learn model (e.g. Pipeline) point-by-point. The pipeline must implement the fit and predict methods. @@ -28,7 +28,7 @@ def __init__(self, window_width=31): self.window_width = window_width def fit(self, X, y): - """ Fit Z-Score Model finds the shift and scale parameters + """Fit Z-Score Model finds the shift and scale parameters to inform bias correction. Parameters @@ -65,7 +65,7 @@ def fit(self, X, y): return self def predict(self, X): - """ Predict performs the z-score bias correction + """Predict performs the z-score bias correction on the future model dataset, X. Parameters diff --git a/skdownscale/spatial_models/__init__.py b/skdownscale/spatial_models/__init__.py new file mode 100644 index 0000000..3e2aa05 --- /dev/null +++ b/skdownscale/spatial_models/__init__.py @@ -0,0 +1,2 @@ +from .regridding import apply_weights +from .sd import SpatialDisaggregator diff --git a/skdownscale/spatial_models/regridding.py b/skdownscale/spatial_models/regridding.py new file mode 100644 index 0000000..422618f --- /dev/null +++ b/skdownscale/spatial_models/regridding.py @@ -0,0 +1,8 @@ +import xesmf as xe + + +def apply_weights(regridder, input_data): + regridder._grid_in = None + regridder._grid_out = None + result = regridder(input_data) + return result diff --git a/skdownscale/spatial_models/sd.py b/skdownscale/spatial_models/sd.py new file mode 100644 index 0000000..49f7900 --- /dev/null +++ b/skdownscale/spatial_models/sd.py @@ -0,0 +1,121 @@ +import numpy as np +import pandas as pd +import xarray as xr + + +class SpatialDisaggregator: + """ + Spatial disaggregation model class + Apply spatial disaggregation algorithm to an xarray Dataset with fit + and predict methods using NASA-NEX method for spatial disaggregation + (see Thrasher et al, 2012). + + Parameters + ---------- + var : str + specifies the variable being downscaled. Default is + temperature and other option is precipitation. + """ + + def __init__(self, var="temperature"): + self._var = var + + if var not in ['temperature', 'precipitation']: + raise NotImplementedError( + "functionality for spatial disaggregation" + " of %s has not yet been added" % var + ) + + def fit(self, ds_bc, climo_coarse, var_name, lat_name="lat", lon_name="lon"): + """ + Fit the scaling factor used in spatial disaggregation + + Parameters + ----------- + ds_bc : xarray.Dataset + Daily bias corrected data at the model resolution + climo_coarse : xarray.DataArray or xarray.Dataset + Observed climatology that has been regridded (coarsened) + to the model resolution + var_name : str + Specifies the data variable name within ds_bc of the + daily bias corrected data + lat_name : str + Name of the latitude dimension of ds_bc and climo_coarse, + default is 'lat'. + lon_name : str + Name of the longitude dimension of ds_bc and climo_coarse, + default is 'lon'. + """ + + # check that climo has been regridded to model res + if not ds_bc[lat_name].equals(climo_coarse[lat_name]): + raise ValueError("climo latitude dimension does not match model res") + if not ds_bc[lon_name].equals(climo_coarse[lon_name]): + raise ValueError("climo longitude dimension does not match model res") + + scf = self._calculate_scaling_factor(ds_bc, climo_coarse, var_name, self._var) + + return scf + + def predict(self, scf, climo_fine, var_name, lat_name="lat", lon_name="lon"): + """ + Predict (apply) the scaling factor to the observed climatology. + + Parameters + ----------- + scf : xarray.Dataset + Scale factor that has been regridded to the resolution of + the observed data. + climo_fine : xarray.DataArray or xarray.Dataset + Observed climatology at its native resolution + var_name : str + Specifies the data variable name within ds_bc of the + daily bias corrected data + lat_name : str + Name of the latitude dimension of ds_bc and climo_coarse, + default is 'lat'. + lon_name : str + Name of the longitude dimension of ds_bc and climo_coarse, + default is 'lon' + """ + + # check that scale factor has been regridded to obs res + if not np.array_equal(scf[lat_name], climo_fine[lat_name]): + raise ValueError("scale factor latitude dimension does not match obs res") + if not np.array_equal(scf[lon_name], climo_fine[lon_name]): + raise ValueError("scale factor longitude dimension does not match obs res") + + downscaled = self._apply_scaling_factor(scf, climo_fine, var_name, self._var) + + return downscaled + + def _calculate_scaling_factor(self, ds_bc, climo, var_name, var): + """ + compute scaling factor + """ + # Necessary workaround to xarray's check with zero dimensions + # https://github.com/pydata/xarray/issues/3575 + da = ds_bc[var_name] + if sum(da.shape) == 0: + return da + groupby_type = ds_bc.time.dt.dayofyear + gb = da.groupby(groupby_type) + + if var == "temperature": + return gb - climo + elif var == "precipitation": + return gb / climo + + def _apply_scaling_factor(self, scf, climo, var_name, var): + """ + apply scaling factor + """ + groupby_type = scf.time.dt.dayofyear + da = scf[var_name] + sff_daily = da.groupby(groupby_type) + + if var == "temperature": + return sff_daily + climo + elif var == "precipitation": + return sff_daily * climo diff --git a/skdownscale/test/test_spatial_models.py b/skdownscale/test/test_spatial_models.py new file mode 100644 index 0000000..15e5564 --- /dev/null +++ b/skdownscale/test/test_spatial_models.py @@ -0,0 +1,86 @@ +import numpy as np +import pandas as pd +import pytest +import xarray as xr + +from skdownscale.spatial_models import SpatialDisaggregator + + +@pytest.fixture +def test_dataset(request): + n = 3650 + x = np.sin(np.linspace(-10 * np.pi, 10 * np.pi, n)) * 0.5 + start_time = "1950-01-01" + time = xr.cftime_range( + start=start_time, freq="D", periods=len(x), calendar="standard" + ) + ds = xr.Dataset( + {request.param: (["time", "lon", "lat"], x[:, np.newaxis, np.newaxis])}, + coords={ + "index": time, + "time": time, + "lon": (["lon"], [1.0]), + "lat": (["lat"], [1.0]), + }, + ) + return ds + + +@pytest.mark.parametrize( + "test_dataset, var_name", + [ + pytest.param("temperature", "temperature"), + pytest.param("precipitation", "precipitation"), + ], + indirect=["test_dataset"], +) +def test_spatialdisaggregator_fit(test_dataset, var_name): + time = pd.date_range(start="2017-01-01", end="2020-01-01") + data_X = np.linspace(0, 1, len(time)) + + X = xr.DataArray(data_X, name=var_name, dims=["time"], coords={"time": time}) + # groupby_type = X.time.dt.dayofyear + climo = test_dataset[var_name].groupby("time.dayofyear").mean() + + spatial_disaggregator = SpatialDisaggregator(var_name) + scale_factor = spatial_disaggregator.fit(test_dataset, climo, var_name) + + if var_name == "temperature": + np.testing.assert_allclose( + scale_factor, test_dataset[var_name].groupby("time.dayofyear") - climo + ) + elif var_name == "precipitation": + np.testing.assert_allclose( + scale_factor, test_dataset[var_name].groupby("time.dayofyear") / climo + ) + + +@pytest.mark.parametrize( + "test_dataset, var_name", + [ + pytest.param("temperature", "temperature"), + pytest.param("precipitation", "precipitation"), + ], + indirect=["test_dataset"], +) +def test_spatialdisaggregator_predict(test_dataset, var_name): + time = pd.date_range(start="2017-01-01", end="2020-01-01") + data_X = np.linspace(0, 1, len(time)) + + scale_factor = xr.DataArray( + data_X, name=var_name, dims=["time"], coords={"time": time} + ) + # groupby_type = scale_factor.time.dt.dayofyear + climo = test_dataset[var_name].groupby("time.dayofyear").mean() + + spatial_disaggregator = SpatialDisaggregator(var_name) + data_downscaled = spatial_disaggregator.predict(test_dataset, climo, var_name) + + if var_name == "temperature": + np.testing.assert_allclose( + data_downscaled, test_dataset[var_name].groupby("time.dayofyear") + climo + ) + elif var_name == "precipitation": + np.testing.assert_allclose( + data_downscaled, test_dataset[var_name].groupby("time.dayofyear") * climo + )