From 104f1acbb5e43997838a4fd61e4685f2b1749c0b Mon Sep 17 00:00:00 2001 From: Diana Date: Thu, 10 Dec 2020 09:52:23 -0800 Subject: [PATCH 01/10] add first part of recipe --- docs/api.rst | 12 ++++++++++++ docs/roadmap.rst | 9 +++++++++ 2 files changed, 21 insertions(+) diff --git a/docs/api.rst b/docs/api.rst index 7343714..cad6aea 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -48,3 +48,15 @@ Groupers DAY_GROUPER MONTH_GROUPER PaddedDOYGrouper + +Spatial Models +================= + +Xarray Wrappers +~~~~~~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + + SpatialDisaggregator + diff --git a/docs/roadmap.rst b/docs/roadmap.rst index a234c42..3ccd8f6 100644 --- a/docs/roadmap.rst +++ b/docs/roadmap.rst @@ -59,6 +59,8 @@ Components 1. `pointwise_models`: a collection of linear models that are intended to be applied point-by-point. These may be sklearn Pipelines or custom sklearn-like models (e.g. BCSDTemperature). +2. `spatial_models`: a collection of spatial models where the models have spatial + dependence. 2. `global_models`: (not implemented) concept space for deep learning-based models. 3. `metrics`: (not implemented) concept space for a benchmarking suite @@ -82,6 +84,13 @@ Pointwise models Other methods, like LOCA, MACA, BCCA, etc, should also be possible. +Spatial models +~~~~~~~~~~~~~~~~ + +This category of methods includes models that have spatial dependence. +This far only the BCSD method (see Thrasher et al., 2012) for spatial disaggregation has +been implemented (for temperature and precipitation). + Global models ~~~~~~~~~~~~~ From e599e753a4c42c3c687ff93c0350ba72784a1596 Mon Sep 17 00:00:00 2001 From: Diana Date: Wed, 13 Jan 2021 10:17:50 -0800 Subject: [PATCH 02/10] add remainder of recipe and code for spatialdisaggregator class --- docs/roadmap.rst | 43 +++++++- skdownscale/spatial_models/__init__.py | 2 + skdownscale/spatial_models/regridding.py | 7 ++ skdownscale/spatial_models/sd.py | 124 +++++++++++++++++++++++ 4 files changed, 173 insertions(+), 3 deletions(-) create mode 100644 skdownscale/spatial_models/__init__.py create mode 100644 skdownscale/spatial_models/regridding.py create mode 100644 skdownscale/spatial_models/sd.py diff --git a/docs/roadmap.rst b/docs/roadmap.rst index 3ccd8f6..1fdcdbc 100644 --- a/docs/roadmap.rst +++ b/docs/roadmap.rst @@ -88,8 +88,45 @@ Spatial models ~~~~~~~~~~~~~~~~ This category of methods includes models that have spatial dependence. -This far only the BCSD method (see Thrasher et al., 2012) for spatial disaggregation has -been implemented (for temperature and precipitation). +Thus far only the BCSD method (see Thrasher et al., 2012) for spatial disaggregation has +been implemented (for temperature and precipitation). Because the method involves regridding, +which depends on the xESMF package, a recipe is provided herein versus a full implementation +in scikit-downscale. + +.. code-block:: Python + + from skdownscale.spatial_models import SpatialDisaggregator, apply_weights + import xesmf as xe + import xarray as xr + + # da_climo_fine: xarray.DataArray (daily multi-decade climatology) + # da_obs : xarray.DataArray (daily obs data) + # da_model_train: xarray.DataArray (daily training data) + # ds_sd: xarray.Dataset (daily spatially disaggregated data) + + # regrid climo to model resolution + obs_to_mod_weights = "filename" + regridder_obs_to_mod = xe.Regridder(da_obs.isel(time=0), da_model_train.isel(time=0), + 'bilinear', filename=obs_to_mod_weights, reuse_weights=True) + climo_regrid = xr.map_blocks(apply_weights, regridder_obs_to_mod, + args=[da_climo_fine]) + climo_coarse = climo_regrid.compute() + + # fit the scaling factor model + sfc = SpatialDisaggregator.fit(da_model_train, climo_coarse, var_name='tasmax') + + # regrid scale factor + mod_to_obs_weights = "filename" + regridder_mod_to_obs = xe.Regridder(da_model_train.isel(time=0), + da_obs.isel(time=0), 'bilinear', + filename=mod_to_obs_weights, reuse_weights=True) + sff_regrid = xr.map_blocks(apply_weights, regridder_mod_to_obs, args=[sfc]) + sff = sff_regrid.compute() + + # predict using scale factor + ds_varname = 'scale_factor_fine' + sff_ds = sff.to_dataset(name=ds_varname) + ds_sd = SpatialDisaggregator.predict(sff_ds, da_climo_fine, var_name=ds_varname) Global models ~~~~~~~~~~~~~ @@ -119,7 +156,7 @@ Dependencies ------------ - Core: Xarray, Pandas, Dask, Scikit-learn, Numpy, Scipy -- Optional: Statsmodels, Keras, PyTorch, Tensorflow, etc. +- Optional: Statsmodels, Keras, PyTorch, Tensorflow, xESMF, etc. Related projects ---------------- diff --git a/skdownscale/spatial_models/__init__.py b/skdownscale/spatial_models/__init__.py new file mode 100644 index 0000000..3e2aa05 --- /dev/null +++ b/skdownscale/spatial_models/__init__.py @@ -0,0 +1,2 @@ +from .regridding import apply_weights +from .sd import SpatialDisaggregator diff --git a/skdownscale/spatial_models/regridding.py b/skdownscale/spatial_models/regridding.py new file mode 100644 index 0000000..0625889 --- /dev/null +++ b/skdownscale/spatial_models/regridding.py @@ -0,0 +1,7 @@ +import xesmf as xe + +def apply_weights(regridder, input_data): + regridder._grid_in = None + regridder._grid_out = None + result = regridder(input_data) + return result diff --git a/skdownscale/spatial_models/sd.py b/skdownscale/spatial_models/sd.py new file mode 100644 index 0000000..f947413 --- /dev/null +++ b/skdownscale/spatial_models/sd.py @@ -0,0 +1,124 @@ +import numpy as np +import pandas as pd +import xarray as xr + +class SpatialDisaggregator: + """ + Spatial disaggregation model class + Apply spatial disaggregation algorithm to an xarray Dataset with fit + and predict methods using NASA-NEX method for spatial disaggregation + (see Thrasher et al, 2012). + + Parameters + ---------- + var : str + specifies the variable being downscaled. Default is + temperature and other option is precipitation. + """ + + def __init__(self, var='temperature'): + self._var = var + + if var == "temperature": + pass + elif var == "precipitation": + pass + else: + raise NotImplementedError("functionality for spatial disaggregation" + " of %s has not yet been added" %var) + + def fit(self, ds_bc, climo_coarse, var_name, + lat_name='lat', lon_name='lon'): + """ + Fit the scaling factor used in spatial disaggregation + + Parameters + ----------- + ds_bc : xarray.Dataset + Daily bias corrected data at the model resolution + climo_coarse : xarray.DataArray or xarray.Dataset + Observed climatology that has been regridded (coarsened) + to the model resolution + var_name : str + Specifies the data variable name within ds_bc of the + daily bias corrected data + lat_name : str + Name of the latitude dimension of ds_bc and climo_coarse, + default is 'lat'. + lon_name : str + Name of the longitude dimension of ds_bc and climo_coarse, + default is 'lon'. + """ + + # check that climo has been regridded to model res + if not np.array_equal(ds_bc[lat_name], climo_coarse[lat_name]): + raise ValueError("climo latitude dimension does not match model res") + if not np.array_equal(ds_bc[lon_name], climo_coarse[lon_name]): + raise ValueError("climo longitude dimension does not match model res") + + scf = self._calculate_scaling_factor(ds_bc, climo_coarse, var_name, self._var) + + return scf + + def predict(self, scf, climo_fine, var_name, + lat_name='lat', lon_name='lon'): + """ + Predict (apply) the scaling factor to the observed climatology. + + Parameters + ----------- + scf : xarray.Dataset + Scale factor that has been regridded to the resolution of + the observed data. + climo_fine : xarray.DataArray or xarray.Dataset + Observed climatology at its native resolution + var_name : str + Specifies the data variable name within ds_bc of the + daily bias corrected data + lat_name : str + Name of the latitude dimension of ds_bc and climo_coarse, + default is 'lat'. + lon_name : str + Name of the longitude dimension of ds_bc and climo_coarse, + default is 'lon' + """ + + # check that scale factor has been regridded to obs res + if not np.array_equal(scf[lat_name], climo_fine[lat_name]): + raise ValueError("scale factor latitude dimension does not match obs res") + if not np.array_equal(scf[lon_name], climo_fine[lon_name]): + raise ValueError("scale factor longitude dimension does not match obs res") + + downscaled = self._apply_scaling_factor(scf, climo_fine, var_name, self._var) + + return downscaled + + def _calculate_scaling_factor(self, ds_bc, climo, var_name, var): + ''' + compute scaling factor + ''' + # Necessary workaround to xarray's check with zero dimensions + # https://github.com/pydata/xarray/issues/3575 + da = ds_bc[var_name] + if sum(da.shape) == 0: + return da + groupby_type = ds_bc.time.dt.dayofyear + gb = da.groupby(groupby_type) + + if var == 'temperature': + return gb - climo + elif var == 'precipitation': + return gb / climo + + def _apply_scaling_factor(self, scf, climo, var_name, var): + ''' + apply scaling factor + ''' + groupby_type = scf.time.dt.dayofyear + da = scf[var_name] + sff_daily = da.groupby(groupby_type) + + if var == 'temperature': + return sff_daily + climo + elif var == 'precipitation': + return sff_daily * climo From aa690d0a90eaf67229470bce2c53a9238757b6ec Mon Sep 17 00:00:00 2001 From: Diana Date: Wed, 13 Jan 2021 10:37:04 -0800 Subject: [PATCH 03/10] fix formatting --- skdownscale/spatial_models/regridding.py | 1 + skdownscale/spatial_models/sd.py | 35 ++++++++++++------------ 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/skdownscale/spatial_models/regridding.py b/skdownscale/spatial_models/regridding.py index 0625889..422618f 100644 --- a/skdownscale/spatial_models/regridding.py +++ b/skdownscale/spatial_models/regridding.py @@ -1,5 +1,6 @@ import xesmf as xe + def apply_weights(regridder, input_data): regridder._grid_in = None regridder._grid_out = None diff --git a/skdownscale/spatial_models/sd.py b/skdownscale/spatial_models/sd.py index f947413..5d2a42e 100644 --- a/skdownscale/spatial_models/sd.py +++ b/skdownscale/spatial_models/sd.py @@ -1,6 +1,7 @@ import numpy as np -import pandas as pd -import xarray as xr +import pandas as pd +import xarray as xr + class SpatialDisaggregator: """ @@ -16,7 +17,7 @@ class SpatialDisaggregator: temperature and other option is precipitation. """ - def __init__(self, var='temperature'): + def __init__(self, var="temperature"): self._var = var if var == "temperature": @@ -24,11 +25,12 @@ def __init__(self, var='temperature'): elif var == "precipitation": pass else: - raise NotImplementedError("functionality for spatial disaggregation" - " of %s has not yet been added" %var) + raise NotImplementedError( + "functionality for spatial disaggregation" + " of %s has not yet been added" % var + ) - def fit(self, ds_bc, climo_coarse, var_name, - lat_name='lat', lon_name='lon'): + def fit(self, ds_bc, climo_coarse, var_name, lat_name="lat", lon_name="lon"): """ Fit the scaling factor used in spatial disaggregation @@ -60,8 +62,7 @@ def fit(self, ds_bc, climo_coarse, var_name, return scf - def predict(self, scf, climo_fine, var_name, - lat_name='lat', lon_name='lon'): + def predict(self, scf, climo_fine, var_name, lat_name="lat", lon_name="lon"): """ Predict (apply) the scaling factor to the observed climatology. @@ -94,9 +95,9 @@ def predict(self, scf, climo_fine, var_name, return downscaled def _calculate_scaling_factor(self, ds_bc, climo, var_name, var): - ''' + """ compute scaling factor - ''' + """ # Necessary workaround to xarray's check with zero dimensions # https://github.com/pydata/xarray/issues/3575 da = ds_bc[var_name] @@ -105,20 +106,20 @@ def _calculate_scaling_factor(self, ds_bc, climo, var_name, var): groupby_type = ds_bc.time.dt.dayofyear gb = da.groupby(groupby_type) - if var == 'temperature': + if var == "temperature": return gb - climo - elif var == 'precipitation': + elif var == "precipitation": return gb / climo def _apply_scaling_factor(self, scf, climo, var_name, var): - ''' + """ apply scaling factor - ''' + """ groupby_type = scf.time.dt.dayofyear da = scf[var_name] sff_daily = da.groupby(groupby_type) - if var == 'temperature': + if var == "temperature": return sff_daily + climo - elif var == 'precipitation': + elif var == "precipitation": return sff_daily * climo From 0b2659281bba430b41e6521f4bac64a02f482287 Mon Sep 17 00:00:00 2001 From: Diana Date: Fri, 15 Jan 2021 16:02:11 -0800 Subject: [PATCH 04/10] formatting cleanup --- docs/api.rst | 1 - docs/roadmap.rst | 34 ++++++++++++++++---------------- skdownscale/spatial_models/sd.py | 29 +++++++++++++-------------- 3 files changed, 31 insertions(+), 33 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 921004d..de159c6 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -70,4 +70,3 @@ Xarray Wrappers :toctree: generated/ SpatialDisaggregator - diff --git a/docs/roadmap.rst b/docs/roadmap.rst index 1fdcdbc..60ae181 100644 --- a/docs/roadmap.rst +++ b/docs/roadmap.rst @@ -59,8 +59,8 @@ Components 1. `pointwise_models`: a collection of linear models that are intended to be applied point-by-point. These may be sklearn Pipelines or custom sklearn-like models (e.g. BCSDTemperature). -2. `spatial_models`: a collection of spatial models where the models have spatial - dependence. +2. `spatial_models`: a collection of spatial models where the models have spatial + dependence. 2. `global_models`: (not implemented) concept space for deep learning-based models. 3. `metrics`: (not implemented) concept space for a benchmarking suite @@ -84,14 +84,14 @@ Pointwise models Other methods, like LOCA, MACA, BCCA, etc, should also be possible. -Spatial models +Spatial models ~~~~~~~~~~~~~~~~ -This category of methods includes models that have spatial dependence. -Thus far only the BCSD method (see Thrasher et al., 2012) for spatial disaggregation has -been implemented (for temperature and precipitation). Because the method involves regridding, -which depends on the xESMF package, a recipe is provided herein versus a full implementation -in scikit-downscale. +This category of methods includes models that have spatial dependence. +Thus far only the BCSD method (see Thrasher et al., 2012) for spatial disaggregation has +been implemented (for temperature and precipitation). Because the method involves regridding, +which depends on the xESMF package, a recipe is provided herein versus a full implementation +in scikit-downscale. .. code-block:: Python @@ -104,26 +104,26 @@ in scikit-downscale. # da_model_train: xarray.DataArray (daily training data) # ds_sd: xarray.Dataset (daily spatially disaggregated data) - # regrid climo to model resolution - obs_to_mod_weights = "filename" - regridder_obs_to_mod = xe.Regridder(da_obs.isel(time=0), da_model_train.isel(time=0), + # regrid climo to model resolution + obs_to_mod_weights = "filename" + regridder_obs_to_mod = xe.Regridder(da_obs.isel(time=0), da_model_train.isel(time=0), 'bilinear', filename=obs_to_mod_weights, reuse_weights=True) - climo_regrid = xr.map_blocks(apply_weights, regridder_obs_to_mod, + climo_regrid = xr.map_blocks(apply_weights, regridder_obs_to_mod, args=[da_climo_fine]) climo_coarse = climo_regrid.compute() - # fit the scaling factor model + # fit the scaling factor model sfc = SpatialDisaggregator.fit(da_model_train, climo_coarse, var_name='tasmax') - # regrid scale factor + # regrid scale factor mod_to_obs_weights = "filename" - regridder_mod_to_obs = xe.Regridder(da_model_train.isel(time=0), - da_obs.isel(time=0), 'bilinear', + regridder_mod_to_obs = xe.Regridder(da_model_train.isel(time=0), + da_obs.isel(time=0), 'bilinear', filename=mod_to_obs_weights, reuse_weights=True) sff_regrid = xr.map_blocks(apply_weights, regridder_mod_to_obs, args=[sfc]) sff = sff_regrid.compute() - # predict using scale factor + # predict using scale factor ds_varname = 'scale_factor_fine' sff_ds = sff.to_dataset(name=ds_varname) ds_sd = SpatialDisaggregator.predict(sff_ds, da_climo_fine, var_name=ds_varname) diff --git a/skdownscale/spatial_models/sd.py b/skdownscale/spatial_models/sd.py index 5d2a42e..3a73b22 100644 --- a/skdownscale/spatial_models/sd.py +++ b/skdownscale/spatial_models/sd.py @@ -17,20 +17,19 @@ class SpatialDisaggregator: temperature and other option is precipitation. """ - def __init__(self, var="temperature"): + def __init__(self, var='temperature'): self._var = var - if var == "temperature": + if var == 'temperature': pass - elif var == "precipitation": + elif var == 'precipitation': pass else: raise NotImplementedError( - "functionality for spatial disaggregation" - " of %s has not yet been added" % var + 'functionality for spatial disaggregation' ' of %s has not yet been added' % var ) - def fit(self, ds_bc, climo_coarse, var_name, lat_name="lat", lon_name="lon"): + def fit(self, ds_bc, climo_coarse, var_name, lat_name='lat', lon_name='lon'): """ Fit the scaling factor used in spatial disaggregation @@ -54,15 +53,15 @@ def fit(self, ds_bc, climo_coarse, var_name, lat_name="lat", lon_name="lon"): # check that climo has been regridded to model res if not np.array_equal(ds_bc[lat_name], climo_coarse[lat_name]): - raise ValueError("climo latitude dimension does not match model res") + raise ValueError('climo latitude dimension does not match model res') if not np.array_equal(ds_bc[lon_name], climo_coarse[lon_name]): - raise ValueError("climo longitude dimension does not match model res") + raise ValueError('climo longitude dimension does not match model res') scf = self._calculate_scaling_factor(ds_bc, climo_coarse, var_name, self._var) return scf - def predict(self, scf, climo_fine, var_name, lat_name="lat", lon_name="lon"): + def predict(self, scf, climo_fine, var_name, lat_name='lat', lon_name='lon'): """ Predict (apply) the scaling factor to the observed climatology. @@ -86,9 +85,9 @@ def predict(self, scf, climo_fine, var_name, lat_name="lat", lon_name="lon"): # check that scale factor has been regridded to obs res if not np.array_equal(scf[lat_name], climo_fine[lat_name]): - raise ValueError("scale factor latitude dimension does not match obs res") + raise ValueError('scale factor latitude dimension does not match obs res') if not np.array_equal(scf[lon_name], climo_fine[lon_name]): - raise ValueError("scale factor longitude dimension does not match obs res") + raise ValueError('scale factor longitude dimension does not match obs res') downscaled = self._apply_scaling_factor(scf, climo_fine, var_name, self._var) @@ -106,9 +105,9 @@ def _calculate_scaling_factor(self, ds_bc, climo, var_name, var): groupby_type = ds_bc.time.dt.dayofyear gb = da.groupby(groupby_type) - if var == "temperature": + if var == 'temperature': return gb - climo - elif var == "precipitation": + elif var == 'precipitation': return gb / climo def _apply_scaling_factor(self, scf, climo, var_name, var): @@ -119,7 +118,7 @@ def _apply_scaling_factor(self, scf, climo, var_name, var): da = scf[var_name] sff_daily = da.groupby(groupby_type) - if var == "temperature": + if var == 'temperature': return sff_daily + climo - elif var == "precipitation": + elif var == 'precipitation': return sff_daily * climo From 78a9d172d20d0a968ba6ffc99cc33d797331903b Mon Sep 17 00:00:00 2001 From: Diana Date: Fri, 15 Jan 2021 16:05:24 -0800 Subject: [PATCH 05/10] more formatting updates --- setup.cfg | 2 +- skdownscale/pointwise_models/arrm.py | 6 +++--- skdownscale/pointwise_models/gard.py | 8 ++++---- skdownscale/pointwise_models/grouping.py | 8 ++++---- skdownscale/pointwise_models/quantile.py | 12 ++++++------ skdownscale/pointwise_models/zscore.py | 6 +++--- skdownscale/test/test_pointwise_models.py | 2 +- 7 files changed, 22 insertions(+), 22 deletions(-) diff --git a/setup.cfg b/setup.cfg index 8118e19..9515b4b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -11,7 +11,7 @@ select=B,C,E,F,W,T4,B9 [isort] known_first_party=skdownscale -known_third_party = click,matplotlib,numpy,pandas,pkg_resources,probscale,pytest,scipy,seaborn,setuptools,sklearn,xarray,xsd +known_third_party = click,matplotlib,numpy,pandas,pkg_resources,probscale,pytest,scipy,seaborn,setuptools,sklearn,xarray,xesmf,xsd multi_line_output=3 include_trailing_comma=True line_length=100 diff --git a/skdownscale/pointwise_models/arrm.py b/skdownscale/pointwise_models/arrm.py index a1c1190..bf16871 100644 --- a/skdownscale/pointwise_models/arrm.py +++ b/skdownscale/pointwise_models/arrm.py @@ -12,7 +12,7 @@ def arrm_breakpoints(X, y, window_width, max_breakpoints): - '''Calculate breakpoints in x and y + """Calculate breakpoints in x and y Parameters ---------- @@ -26,7 +26,7 @@ def arrm_breakpoints(X, y, window_width, max_breakpoints): Returns ------- breakpoints : ndarray - ''' + """ min_width = 10 npoints = len(X) @@ -97,7 +97,7 @@ def arrm_breakpoints(X, y, window_width, max_breakpoints): class PiecewiseLinearRegression(RegressorMixin, BaseEstimator): - """ Piecewise Linear Regression + """Piecewise Linear Regression Parameters ---------- diff --git a/skdownscale/pointwise_models/gard.py b/skdownscale/pointwise_models/gard.py index ebe4642..cb1ea41 100644 --- a/skdownscale/pointwise_models/gard.py +++ b/skdownscale/pointwise_models/gard.py @@ -22,7 +22,7 @@ class AnalogBase(RegressorMixin, BaseEstimator): _fit_attributes = ['kdtree_', 'X_', 'y_', 'k_'] def fit(self, X, y): - """ Fit Analog model using a KDTree + """Fit Analog model using a KDTree Parameters ---------- @@ -54,7 +54,7 @@ def fit(self, X, y): class AnalogRegression(AnalogBase): - """ AnalogRegression + """AnalogRegression Parameters ---------- @@ -82,7 +82,7 @@ def __init__(self, n_analogs=200, kdtree_kwargs=None, query_kwargs=None, lr_kwar self.lr_kwargs = lr_kwargs def predict(self, X): - """ Predict using the AnalogRegression model + """Predict using the AnalogRegression model Parameters ---------- @@ -130,7 +130,7 @@ def _predict_one_step(self, lr_model, X): class PureAnalog(AnalogBase): - """ PureAnalog + """PureAnalog Attributes ---------- diff --git a/skdownscale/pointwise_models/grouping.py b/skdownscale/pointwise_models/grouping.py index 8df4025..4f0cb16 100644 --- a/skdownscale/pointwise_models/grouping.py +++ b/skdownscale/pointwise_models/grouping.py @@ -5,7 +5,7 @@ class GroupedRegressor: - """ Grouped Regressor + """Grouped Regressor Wrapper supporting fitting seperate estimators distinct groups @@ -45,7 +45,7 @@ def __init__( self.predict_grouper_kwargs = predict_grouper_kwargs def fit(self, X, y, **fit_kwargs): - """ Fit the grouped regressor + """Fit the grouped regressor Parameters ---------- @@ -76,7 +76,7 @@ def fit(self, X, y, **fit_kwargs): return self def predict(self, X): - """ Predict estimator target for X + """Predict estimator target for X Parameters ---------- @@ -100,7 +100,7 @@ def predict(self, X): class PaddedDOYGrouper: - """ Grouper to group an Index by day-of-year +/ pad + """Grouper to group an Index by day-of-year +/ pad Parameters ---------- diff --git a/skdownscale/pointwise_models/quantile.py b/skdownscale/pointwise_models/quantile.py index 15f2c6f..04187a3 100644 --- a/skdownscale/pointwise_models/quantile.py +++ b/skdownscale/pointwise_models/quantile.py @@ -17,7 +17,7 @@ def plotting_positions(n, alpha=0.4, beta=0.4): - '''Returns a monotonic array of plotting positions. + """Returns a monotonic array of plotting positions. Parameters ---------- @@ -35,7 +35,7 @@ def plotting_positions(n, alpha=0.4, beta=0.4): See Also -------- scipy.stats.mstats.plotting_positions - ''' + """ return (np.arange(1, n + 1) - alpha) / (n + 1.0 - alpha - beta) @@ -182,10 +182,10 @@ def fit(self, X, y, **kwargs): self : object """ X = check_array( - X, dtype='numeric', ensure_min_samples=2 * self.n_endpoints + 1, ensure_2d=True + X, dtype='numeric', ensure_min_samples=2 * self.n_endpoints + 1, ensure_2d=True, ) y = check_array( - y, dtype='numeric', ensure_min_samples=2 * self.n_endpoints + 1, ensure_2d=False + y, dtype='numeric', ensure_min_samples=2 * self.n_endpoints + 1, ensure_2d=False, ) X = check_max_features(X, n=1) @@ -287,9 +287,9 @@ def predict(self, X, **kwargs): return y_hat def _calc_extrapolated_cdf( - self, data, sort=True, extrapolate=None, pp_min=SYNTHETIC_MIN, pp_max=SYNTHETIC_MAX + self, data, sort=True, extrapolate=None, pp_min=SYNTHETIC_MIN, pp_max=SYNTHETIC_MAX, ): - """ Calculate a new extrapolated cdf + """Calculate a new extrapolated cdf The goal of this function is to create a CDF with bounds outside the [0, 1] range. This allows for quantile mapping beyond observed data points. diff --git a/skdownscale/pointwise_models/zscore.py b/skdownscale/pointwise_models/zscore.py index 144d7a8..a1c965f 100644 --- a/skdownscale/pointwise_models/zscore.py +++ b/skdownscale/pointwise_models/zscore.py @@ -7,7 +7,7 @@ class ZScoreRegressor(TimeSynchronousDownscaler): - """ Z Score Regressor bias correction model wrapper + """Z Score Regressor bias correction model wrapper Apply a scikit-learn model (e.g. Pipeline) point-by-point. The pipeline must implement the fit and predict methods. @@ -28,7 +28,7 @@ def __init__(self, window_width=31): self.window_width = window_width def fit(self, X, y): - """ Fit Z-Score Model finds the shift and scale parameters + """Fit Z-Score Model finds the shift and scale parameters to inform bias correction. Parameters @@ -65,7 +65,7 @@ def fit(self, X, y): return self def predict(self, X): - """ Predict performs the z-score bias correction + """Predict performs the z-score bias correction on the future model dataset, X. Parameters diff --git a/skdownscale/test/test_pointwise_models.py b/skdownscale/test/test_pointwise_models.py index c876bda..c3cc55b 100644 --- a/skdownscale/test/test_pointwise_models.py +++ b/skdownscale/test/test_pointwise_models.py @@ -92,7 +92,7 @@ def test_quantile_mapper_detrend(): @pytest.mark.parametrize( 'model_cls', - [BcsdTemperature, PureAnalog, AnalogRegression, ZScoreRegressor, QuantileMappingReressor], + [BcsdTemperature, PureAnalog, AnalogRegression, ZScoreRegressor, QuantileMappingReressor,], ) def test_linear_model(model_cls): From 16a83c1f4a8ca1d6ae8b849ccff21119ea8c4510 Mon Sep 17 00:00:00 2001 From: Diana Date: Fri, 15 Jan 2021 16:08:43 -0800 Subject: [PATCH 06/10] flake8 change --- skdownscale/test/test_pointwise_models.py | 78 ++++++++++++++--------- 1 file changed, 49 insertions(+), 29 deletions(-) diff --git a/skdownscale/test/test_pointwise_models.py b/skdownscale/test/test_pointwise_models.py index c3cc55b..376fd5f 100644 --- a/skdownscale/test/test_pointwise_models.py +++ b/skdownscale/test/test_pointwise_models.py @@ -72,7 +72,7 @@ def test_quantile_mapper(): np.testing.assert_almost_equal(actual, expected) -@pytest.mark.xfail(reason='Need 3 part QM routine to handle bias removal') +@pytest.mark.xfail(reason="Need 3 part QM routine to handle bias removal") def test_quantile_mapper_detrend(): n = 100 trend = 1 @@ -91,16 +91,24 @@ def test_quantile_mapper_detrend(): @pytest.mark.parametrize( - 'model_cls', - [BcsdTemperature, PureAnalog, AnalogRegression, ZScoreRegressor, QuantileMappingReressor,], + "model_cls", + [ + BcsdTemperature, + PureAnalog, + AnalogRegression, + ZScoreRegressor, + QuantileMappingReressor, + ], ) def test_linear_model(model_cls): n = 365 # TODO: add test for time other time ranges (e.g. < 365 days) - index = pd.date_range('2019-01-01', periods=n) + index = pd.date_range("2019-01-01", periods=n) - X = pd.DataFrame({'foo': np.sin(np.linspace(-10 * np.pi, 10 * np.pi, n)) * 10}, index=index) + X = pd.DataFrame( + {"foo": np.sin(np.linspace(-10 * np.pi, 10 * np.pi, n)) * 10}, index=index + ) y = X + 2 model = model_cls() @@ -109,14 +117,14 @@ def test_linear_model(model_cls): assert len(y_hat) == len(X) -@pytest.mark.parametrize('model_cls', [BcsdPrecipitation]) +@pytest.mark.parametrize("model_cls", [BcsdPrecipitation]) def test_linear_model_prec(model_cls): n = 365 # TODO: add test for time other time ranges (e.g. < 365 days) - index = pd.date_range('2019-01-01', periods=n) + index = pd.date_range("2019-01-01", periods=n) - X = pd.DataFrame({'foo': np.random.random(n)}, index=index) + X = pd.DataFrame({"foo": np.random.random(n)}, index=index) y = X + 2 model = model_cls() @@ -126,16 +134,20 @@ def test_linear_model_prec(model_cls): def test_zscore_scale(): - time = pd.date_range(start='2018-01-01', end='2020-01-01') + time = pd.date_range(start="2018-01-01", end="2020-01-01") data_X = np.linspace(0, 1, len(time)) data_y = data_X * 2 - X = xr.DataArray(data_X, name='foo', dims=['index'], coords={'index': time}).to_dataframe() - y = xr.DataArray(data_y, name='foo', dims=['index'], coords={'index': time}).to_dataframe() + X = xr.DataArray( + data_X, name="foo", dims=["index"], coords={"index": time} + ).to_dataframe() + y = xr.DataArray( + data_y, name="foo", dims=["index"], coords={"index": time} + ).to_dataframe() data_scale_expected = [2 for i in np.zeros(364)] scale_expected = xr.DataArray( - data_scale_expected, name='foo', dims=['day'], coords={'day': np.arange(1, 365)} + data_scale_expected, name="foo", dims=["day"], coords={"day": np.arange(1, 365)} ).to_series() zscore = ZScoreRegressor() @@ -145,15 +157,19 @@ def test_zscore_scale(): def test_zscore_shift(): - time = pd.date_range(start='2018-01-01', end='2020-01-01') + time = pd.date_range(start="2018-01-01", end="2020-01-01") data_X = np.zeros(len(time)) data_y = np.ones(len(time)) - X = xr.DataArray(data_X, name='foo', dims=['index'], coords={'index': time}).to_dataframe() - y = xr.DataArray(data_y, name='foo', dims=['index'], coords={'index': time}).to_dataframe() + X = xr.DataArray( + data_X, name="foo", dims=["index"], coords={"index": time} + ).to_dataframe() + y = xr.DataArray( + data_y, name="foo", dims=["index"], coords={"index": time} + ).to_dataframe() shift_expected = xr.DataArray( - np.ones(364), name='foo', dims=['day'], coords={'day': np.arange(1, 365)} + np.ones(364), name="foo", dims=["day"], coords={"day": np.arange(1, 365)} ).to_series() zscore = ZScoreRegressor() @@ -163,16 +179,18 @@ def test_zscore_shift(): def test_zscore_predict(): - time = pd.date_range(start='2018-01-01', end='2020-01-01') + time = pd.date_range(start="2018-01-01", end="2020-01-01") data_X = np.linspace(0, 1, len(time)) - X = xr.DataArray(data_X, name='foo', dims=['index'], coords={'index': time}).to_dataframe() + X = xr.DataArray( + data_X, name="foo", dims=["index"], coords={"index": time} + ).to_dataframe() shift = xr.DataArray( - np.zeros(364), name='foo', dims=['day'], coords={'day': np.arange(1, 365)} + np.zeros(364), name="foo", dims=["day"], coords={"day": np.arange(1, 365)} ).to_series() scale = xr.DataArray( - np.ones(364), name='foo', dims=['day'], coords={'day': np.arange(1, 365)} + np.ones(364), name="foo", dims=["day"], coords={"day": np.arange(1, 365)} ).to_series() zscore = ZScoreRegressor() @@ -181,10 +199,10 @@ def test_zscore_predict(): i = int(zscore.window_width / 2) expected = xr.DataArray( - data_X, name='foo', dims=['index'], coords={'index': time} + data_X, name="foo", dims=["index"], coords={"index": time} ).to_dataframe() - expected[0:i] = 'NaN' - expected[-i:] = 'NaN' + expected[0:i] = "NaN" + expected[-i:] = "NaN" out = zscore.predict(X) @@ -192,8 +210,8 @@ def test_zscore_predict(): def test_paddeddoygrouper(): - index = pd.date_range(start='1980-01-01', end='1982-12-31') - X = pd.DataFrame({'foo': np.random.random(len(index))}, index=index) + index = pd.date_range(start="1980-01-01", end="1982-12-31") + X = pd.DataFrame({"foo": np.random.random(len(index))}, index=index) day_groups = PaddedDOYGrouper(X) doy_group_list = dict(list(day_groups)) @@ -205,8 +223,10 @@ def test_paddeddoygrouper(): def test_BcsdTemperature_nasanex(): - index = pd.date_range(start='1980-01-01', end='1982-12-31') - X = pd.DataFrame({'foo': np.random.random(len(index))}, index=index) - y = pd.DataFrame({'foo': np.random.random(len(index))}, index=index) - model_nasanex = BcsdTemperature(time_grouper='daily_nasa-nex', return_anoms=False).fit(X, y) + index = pd.date_range(start="1980-01-01", end="1982-12-31") + X = pd.DataFrame({"foo": np.random.random(len(index))}, index=index) + y = pd.DataFrame({"foo": np.random.random(len(index))}, index=index) + model_nasanex = BcsdTemperature( + time_grouper="daily_nasa-nex", return_anoms=False + ).fit(X, y) assert issubclass(model_nasanex.time_grouper, PaddedDOYGrouper) From 97b159404a24ed1bda6ad7f9217875b5000f72aa Mon Sep 17 00:00:00 2001 From: Diana Date: Thu, 28 Jan 2021 10:42:17 -0800 Subject: [PATCH 07/10] test pre-commit fix --- skdownscale/test/test_pointwise_models.py | 36 +++++------------------ 1 file changed, 8 insertions(+), 28 deletions(-) diff --git a/skdownscale/test/test_pointwise_models.py b/skdownscale/test/test_pointwise_models.py index 376fd5f..f8bc607 100644 --- a/skdownscale/test/test_pointwise_models.py +++ b/skdownscale/test/test_pointwise_models.py @@ -92,13 +92,7 @@ def test_quantile_mapper_detrend(): @pytest.mark.parametrize( "model_cls", - [ - BcsdTemperature, - PureAnalog, - AnalogRegression, - ZScoreRegressor, - QuantileMappingReressor, - ], + [BcsdTemperature, PureAnalog, AnalogRegression, ZScoreRegressor, QuantileMappingReressor,], ) def test_linear_model(model_cls): @@ -106,9 +100,7 @@ def test_linear_model(model_cls): # TODO: add test for time other time ranges (e.g. < 365 days) index = pd.date_range("2019-01-01", periods=n) - X = pd.DataFrame( - {"foo": np.sin(np.linspace(-10 * np.pi, 10 * np.pi, n)) * 10}, index=index - ) + X = pd.DataFrame({"foo": np.sin(np.linspace(-10 * np.pi, 10 * np.pi, n)) * 10}, index=index) y = X + 2 model = model_cls() @@ -138,12 +130,8 @@ def test_zscore_scale(): data_X = np.linspace(0, 1, len(time)) data_y = data_X * 2 - X = xr.DataArray( - data_X, name="foo", dims=["index"], coords={"index": time} - ).to_dataframe() - y = xr.DataArray( - data_y, name="foo", dims=["index"], coords={"index": time} - ).to_dataframe() + X = xr.DataArray(data_X, name="foo", dims=["index"], coords={"index": time}).to_dataframe() + y = xr.DataArray(data_y, name="foo", dims=["index"], coords={"index": time}).to_dataframe() data_scale_expected = [2 for i in np.zeros(364)] scale_expected = xr.DataArray( @@ -161,12 +149,8 @@ def test_zscore_shift(): data_X = np.zeros(len(time)) data_y = np.ones(len(time)) - X = xr.DataArray( - data_X, name="foo", dims=["index"], coords={"index": time} - ).to_dataframe() - y = xr.DataArray( - data_y, name="foo", dims=["index"], coords={"index": time} - ).to_dataframe() + X = xr.DataArray(data_X, name="foo", dims=["index"], coords={"index": time}).to_dataframe() + y = xr.DataArray(data_y, name="foo", dims=["index"], coords={"index": time}).to_dataframe() shift_expected = xr.DataArray( np.ones(364), name="foo", dims=["day"], coords={"day": np.arange(1, 365)} @@ -182,9 +166,7 @@ def test_zscore_predict(): time = pd.date_range(start="2018-01-01", end="2020-01-01") data_X = np.linspace(0, 1, len(time)) - X = xr.DataArray( - data_X, name="foo", dims=["index"], coords={"index": time} - ).to_dataframe() + X = xr.DataArray(data_X, name="foo", dims=["index"], coords={"index": time}).to_dataframe() shift = xr.DataArray( np.zeros(364), name="foo", dims=["day"], coords={"day": np.arange(1, 365)} @@ -226,7 +208,5 @@ def test_BcsdTemperature_nasanex(): index = pd.date_range(start="1980-01-01", end="1982-12-31") X = pd.DataFrame({"foo": np.random.random(len(index))}, index=index) y = pd.DataFrame({"foo": np.random.random(len(index))}, index=index) - model_nasanex = BcsdTemperature( - time_grouper="daily_nasa-nex", return_anoms=False - ).fit(X, y) + model_nasanex = BcsdTemperature(time_grouper="daily_nasa-nex", return_anoms=False).fit(X, y) assert issubclass(model_nasanex.time_grouper, PaddedDOYGrouper) From 4edee6303bee9370ccc258256fd39d030c310cbc Mon Sep 17 00:00:00 2001 From: Diana Date: Thu, 28 Jan 2021 11:01:36 -0800 Subject: [PATCH 08/10] fix precommit problems --- skdownscale/test/test_pointwise_models.py | 58 +++++++++++------------ 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/skdownscale/test/test_pointwise_models.py b/skdownscale/test/test_pointwise_models.py index f8bc607..c876bda 100644 --- a/skdownscale/test/test_pointwise_models.py +++ b/skdownscale/test/test_pointwise_models.py @@ -72,7 +72,7 @@ def test_quantile_mapper(): np.testing.assert_almost_equal(actual, expected) -@pytest.mark.xfail(reason="Need 3 part QM routine to handle bias removal") +@pytest.mark.xfail(reason='Need 3 part QM routine to handle bias removal') def test_quantile_mapper_detrend(): n = 100 trend = 1 @@ -91,16 +91,16 @@ def test_quantile_mapper_detrend(): @pytest.mark.parametrize( - "model_cls", - [BcsdTemperature, PureAnalog, AnalogRegression, ZScoreRegressor, QuantileMappingReressor,], + 'model_cls', + [BcsdTemperature, PureAnalog, AnalogRegression, ZScoreRegressor, QuantileMappingReressor], ) def test_linear_model(model_cls): n = 365 # TODO: add test for time other time ranges (e.g. < 365 days) - index = pd.date_range("2019-01-01", periods=n) + index = pd.date_range('2019-01-01', periods=n) - X = pd.DataFrame({"foo": np.sin(np.linspace(-10 * np.pi, 10 * np.pi, n)) * 10}, index=index) + X = pd.DataFrame({'foo': np.sin(np.linspace(-10 * np.pi, 10 * np.pi, n)) * 10}, index=index) y = X + 2 model = model_cls() @@ -109,14 +109,14 @@ def test_linear_model(model_cls): assert len(y_hat) == len(X) -@pytest.mark.parametrize("model_cls", [BcsdPrecipitation]) +@pytest.mark.parametrize('model_cls', [BcsdPrecipitation]) def test_linear_model_prec(model_cls): n = 365 # TODO: add test for time other time ranges (e.g. < 365 days) - index = pd.date_range("2019-01-01", periods=n) + index = pd.date_range('2019-01-01', periods=n) - X = pd.DataFrame({"foo": np.random.random(n)}, index=index) + X = pd.DataFrame({'foo': np.random.random(n)}, index=index) y = X + 2 model = model_cls() @@ -126,16 +126,16 @@ def test_linear_model_prec(model_cls): def test_zscore_scale(): - time = pd.date_range(start="2018-01-01", end="2020-01-01") + time = pd.date_range(start='2018-01-01', end='2020-01-01') data_X = np.linspace(0, 1, len(time)) data_y = data_X * 2 - X = xr.DataArray(data_X, name="foo", dims=["index"], coords={"index": time}).to_dataframe() - y = xr.DataArray(data_y, name="foo", dims=["index"], coords={"index": time}).to_dataframe() + X = xr.DataArray(data_X, name='foo', dims=['index'], coords={'index': time}).to_dataframe() + y = xr.DataArray(data_y, name='foo', dims=['index'], coords={'index': time}).to_dataframe() data_scale_expected = [2 for i in np.zeros(364)] scale_expected = xr.DataArray( - data_scale_expected, name="foo", dims=["day"], coords={"day": np.arange(1, 365)} + data_scale_expected, name='foo', dims=['day'], coords={'day': np.arange(1, 365)} ).to_series() zscore = ZScoreRegressor() @@ -145,15 +145,15 @@ def test_zscore_scale(): def test_zscore_shift(): - time = pd.date_range(start="2018-01-01", end="2020-01-01") + time = pd.date_range(start='2018-01-01', end='2020-01-01') data_X = np.zeros(len(time)) data_y = np.ones(len(time)) - X = xr.DataArray(data_X, name="foo", dims=["index"], coords={"index": time}).to_dataframe() - y = xr.DataArray(data_y, name="foo", dims=["index"], coords={"index": time}).to_dataframe() + X = xr.DataArray(data_X, name='foo', dims=['index'], coords={'index': time}).to_dataframe() + y = xr.DataArray(data_y, name='foo', dims=['index'], coords={'index': time}).to_dataframe() shift_expected = xr.DataArray( - np.ones(364), name="foo", dims=["day"], coords={"day": np.arange(1, 365)} + np.ones(364), name='foo', dims=['day'], coords={'day': np.arange(1, 365)} ).to_series() zscore = ZScoreRegressor() @@ -163,16 +163,16 @@ def test_zscore_shift(): def test_zscore_predict(): - time = pd.date_range(start="2018-01-01", end="2020-01-01") + time = pd.date_range(start='2018-01-01', end='2020-01-01') data_X = np.linspace(0, 1, len(time)) - X = xr.DataArray(data_X, name="foo", dims=["index"], coords={"index": time}).to_dataframe() + X = xr.DataArray(data_X, name='foo', dims=['index'], coords={'index': time}).to_dataframe() shift = xr.DataArray( - np.zeros(364), name="foo", dims=["day"], coords={"day": np.arange(1, 365)} + np.zeros(364), name='foo', dims=['day'], coords={'day': np.arange(1, 365)} ).to_series() scale = xr.DataArray( - np.ones(364), name="foo", dims=["day"], coords={"day": np.arange(1, 365)} + np.ones(364), name='foo', dims=['day'], coords={'day': np.arange(1, 365)} ).to_series() zscore = ZScoreRegressor() @@ -181,10 +181,10 @@ def test_zscore_predict(): i = int(zscore.window_width / 2) expected = xr.DataArray( - data_X, name="foo", dims=["index"], coords={"index": time} + data_X, name='foo', dims=['index'], coords={'index': time} ).to_dataframe() - expected[0:i] = "NaN" - expected[-i:] = "NaN" + expected[0:i] = 'NaN' + expected[-i:] = 'NaN' out = zscore.predict(X) @@ -192,8 +192,8 @@ def test_zscore_predict(): def test_paddeddoygrouper(): - index = pd.date_range(start="1980-01-01", end="1982-12-31") - X = pd.DataFrame({"foo": np.random.random(len(index))}, index=index) + index = pd.date_range(start='1980-01-01', end='1982-12-31') + X = pd.DataFrame({'foo': np.random.random(len(index))}, index=index) day_groups = PaddedDOYGrouper(X) doy_group_list = dict(list(day_groups)) @@ -205,8 +205,8 @@ def test_paddeddoygrouper(): def test_BcsdTemperature_nasanex(): - index = pd.date_range(start="1980-01-01", end="1982-12-31") - X = pd.DataFrame({"foo": np.random.random(len(index))}, index=index) - y = pd.DataFrame({"foo": np.random.random(len(index))}, index=index) - model_nasanex = BcsdTemperature(time_grouper="daily_nasa-nex", return_anoms=False).fit(X, y) + index = pd.date_range(start='1980-01-01', end='1982-12-31') + X = pd.DataFrame({'foo': np.random.random(len(index))}, index=index) + y = pd.DataFrame({'foo': np.random.random(len(index))}, index=index) + model_nasanex = BcsdTemperature(time_grouper='daily_nasa-nex', return_anoms=False).fit(X, y) assert issubclass(model_nasanex.time_grouper, PaddedDOYGrouper) From b4e02625b0850fc1bb38621b28d68c2a702c81c5 Mon Sep 17 00:00:00 2001 From: Diana Date: Mon, 19 Apr 2021 20:01:14 -0700 Subject: [PATCH 09/10] add unit tests for SpatialDisaggregator --- skdownscale/test/test_spatial_models.py | 86 +++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 skdownscale/test/test_spatial_models.py diff --git a/skdownscale/test/test_spatial_models.py b/skdownscale/test/test_spatial_models.py new file mode 100644 index 0000000..15e5564 --- /dev/null +++ b/skdownscale/test/test_spatial_models.py @@ -0,0 +1,86 @@ +import numpy as np +import pandas as pd +import pytest +import xarray as xr + +from skdownscale.spatial_models import SpatialDisaggregator + + +@pytest.fixture +def test_dataset(request): + n = 3650 + x = np.sin(np.linspace(-10 * np.pi, 10 * np.pi, n)) * 0.5 + start_time = "1950-01-01" + time = xr.cftime_range( + start=start_time, freq="D", periods=len(x), calendar="standard" + ) + ds = xr.Dataset( + {request.param: (["time", "lon", "lat"], x[:, np.newaxis, np.newaxis])}, + coords={ + "index": time, + "time": time, + "lon": (["lon"], [1.0]), + "lat": (["lat"], [1.0]), + }, + ) + return ds + + +@pytest.mark.parametrize( + "test_dataset, var_name", + [ + pytest.param("temperature", "temperature"), + pytest.param("precipitation", "precipitation"), + ], + indirect=["test_dataset"], +) +def test_spatialdisaggregator_fit(test_dataset, var_name): + time = pd.date_range(start="2017-01-01", end="2020-01-01") + data_X = np.linspace(0, 1, len(time)) + + X = xr.DataArray(data_X, name=var_name, dims=["time"], coords={"time": time}) + # groupby_type = X.time.dt.dayofyear + climo = test_dataset[var_name].groupby("time.dayofyear").mean() + + spatial_disaggregator = SpatialDisaggregator(var_name) + scale_factor = spatial_disaggregator.fit(test_dataset, climo, var_name) + + if var_name == "temperature": + np.testing.assert_allclose( + scale_factor, test_dataset[var_name].groupby("time.dayofyear") - climo + ) + elif var_name == "precipitation": + np.testing.assert_allclose( + scale_factor, test_dataset[var_name].groupby("time.dayofyear") / climo + ) + + +@pytest.mark.parametrize( + "test_dataset, var_name", + [ + pytest.param("temperature", "temperature"), + pytest.param("precipitation", "precipitation"), + ], + indirect=["test_dataset"], +) +def test_spatialdisaggregator_predict(test_dataset, var_name): + time = pd.date_range(start="2017-01-01", end="2020-01-01") + data_X = np.linspace(0, 1, len(time)) + + scale_factor = xr.DataArray( + data_X, name=var_name, dims=["time"], coords={"time": time} + ) + # groupby_type = scale_factor.time.dt.dayofyear + climo = test_dataset[var_name].groupby("time.dayofyear").mean() + + spatial_disaggregator = SpatialDisaggregator(var_name) + data_downscaled = spatial_disaggregator.predict(test_dataset, climo, var_name) + + if var_name == "temperature": + np.testing.assert_allclose( + data_downscaled, test_dataset[var_name].groupby("time.dayofyear") + climo + ) + elif var_name == "precipitation": + np.testing.assert_allclose( + data_downscaled, test_dataset[var_name].groupby("time.dayofyear") * climo + ) From 458f292dab6e8a6584659e97a66c37e028da2b7a Mon Sep 17 00:00:00 2001 From: Diana Date: Mon, 19 Apr 2021 20:12:40 -0700 Subject: [PATCH 10/10] updates per review suggestions --- skdownscale/spatial_models/sd.py | 37 +++++++++++++++----------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/skdownscale/spatial_models/sd.py b/skdownscale/spatial_models/sd.py index 3a73b22..49f7900 100644 --- a/skdownscale/spatial_models/sd.py +++ b/skdownscale/spatial_models/sd.py @@ -17,19 +17,16 @@ class SpatialDisaggregator: temperature and other option is precipitation. """ - def __init__(self, var='temperature'): + def __init__(self, var="temperature"): self._var = var - - if var == 'temperature': - pass - elif var == 'precipitation': - pass - else: + + if var not in ['temperature', 'precipitation']: raise NotImplementedError( - 'functionality for spatial disaggregation' ' of %s has not yet been added' % var + "functionality for spatial disaggregation" + " of %s has not yet been added" % var ) - def fit(self, ds_bc, climo_coarse, var_name, lat_name='lat', lon_name='lon'): + def fit(self, ds_bc, climo_coarse, var_name, lat_name="lat", lon_name="lon"): """ Fit the scaling factor used in spatial disaggregation @@ -52,16 +49,16 @@ def fit(self, ds_bc, climo_coarse, var_name, lat_name='lat', lon_name='lon'): """ # check that climo has been regridded to model res - if not np.array_equal(ds_bc[lat_name], climo_coarse[lat_name]): - raise ValueError('climo latitude dimension does not match model res') - if not np.array_equal(ds_bc[lon_name], climo_coarse[lon_name]): - raise ValueError('climo longitude dimension does not match model res') + if not ds_bc[lat_name].equals(climo_coarse[lat_name]): + raise ValueError("climo latitude dimension does not match model res") + if not ds_bc[lon_name].equals(climo_coarse[lon_name]): + raise ValueError("climo longitude dimension does not match model res") scf = self._calculate_scaling_factor(ds_bc, climo_coarse, var_name, self._var) return scf - def predict(self, scf, climo_fine, var_name, lat_name='lat', lon_name='lon'): + def predict(self, scf, climo_fine, var_name, lat_name="lat", lon_name="lon"): """ Predict (apply) the scaling factor to the observed climatology. @@ -85,9 +82,9 @@ def predict(self, scf, climo_fine, var_name, lat_name='lat', lon_name='lon'): # check that scale factor has been regridded to obs res if not np.array_equal(scf[lat_name], climo_fine[lat_name]): - raise ValueError('scale factor latitude dimension does not match obs res') + raise ValueError("scale factor latitude dimension does not match obs res") if not np.array_equal(scf[lon_name], climo_fine[lon_name]): - raise ValueError('scale factor longitude dimension does not match obs res') + raise ValueError("scale factor longitude dimension does not match obs res") downscaled = self._apply_scaling_factor(scf, climo_fine, var_name, self._var) @@ -105,9 +102,9 @@ def _calculate_scaling_factor(self, ds_bc, climo, var_name, var): groupby_type = ds_bc.time.dt.dayofyear gb = da.groupby(groupby_type) - if var == 'temperature': + if var == "temperature": return gb - climo - elif var == 'precipitation': + elif var == "precipitation": return gb / climo def _apply_scaling_factor(self, scf, climo, var_name, var): @@ -118,7 +115,7 @@ def _apply_scaling_factor(self, scf, climo, var_name, var): da = scf[var_name] sff_daily = da.groupby(groupby_type) - if var == 'temperature': + if var == "temperature": return sff_daily + climo - elif var == 'precipitation': + elif var == "precipitation": return sff_daily * climo