Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Option to output datasets #1625

Merged
merged 5 commits into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ New features and enhancements
* Validate YAML indicators description before trying to build module. (:issue:`1523`, :issue:`1595`, :pull:`1560`, :pull:`1596`, :pull:`1600`).
* Support ``indexer`` keyword in YAML indicator description. (:issue:`1522`, :pull:`1561`).
* New ``xclim.core.calendar.stack_periods`` and ``unstack_periods`` for performing ``rolling(time=...).construct(..., stride=...)`` but with non-uniform temporal periods like years or months. They replace ``xclim.sdba.processing.construct_moving_yearly_window`` and ``unpack_moving_yearly_window`` which are deprecated and will be removed in a future release.
* New ``as_dataset`` options for ``xclim.set_options``. When True, indicators will output Datasets instead of DataArrays. (:issue:`1257`, :pull:`1625`).

Breaking changes
^^^^^^^^^^^^^^^^
Expand Down
26 changes: 23 additions & 3 deletions docs/notebooks/usage.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Finally, `xclim` also allows us to call indicators using datasets and variable names."
"`xclim` also allows us to call indicators using datasets and variable names."
]
},
{
Expand All @@ -255,6 +255,26 @@
"gdd"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Finally, we can also get datasets as an output with the `as_dataset` option."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with xclim.set_options(as_dataset=True, cf_compliance=\"log\"):\n",
" gdd_ds = xclim.atmos.growing_degree_days(\n",
" tas=daily_ds.air, thresh=\"10 degC\", freq=\"YS\", date_bounds=(\"04-01\", \"09-30\")\n",
" )\n",
"gdd_ds"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -367,9 +387,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
22 changes: 22 additions & 0 deletions tests/test_indicators.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,28 @@ def test_keep_attrs(tasmin_series, tasmax_series, xcopt, xropt, exp):
assert "bing" not in tg.attrs


def test_as_dataset(tasmax_series, tasmin_series):
tx = tasmax_series(np.arange(360.0))
tn = tasmin_series(np.arange(360.0))
tx.attrs.update(something="blabla", bing="bang", foo="bar")
tn.attrs.update(something="blabla", bing="bong")
dsin = xr.Dataset({"tasmax": tx, "tasmin": tn}, attrs={"fou": "barre"})
with xclim.set_options(keep_attrs=True, as_dataset=True):
dsout = multiOptVar(ds=dsin)
assert isinstance(dsout, xr.Dataset)
assert dsout.attrs["fou"] == "barre"
assert dsout.multiopt.attrs.get("something") == "blabla"


def test_as_dataset_multi(tas_series):
tg = tas_series(np.arange(360.0))
with xclim.set_options(as_dataset=True):
dsout = multiTemp(tas=tg, freq="YS")
assert isinstance(dsout, xr.Dataset)
assert "tmin" in dsout.data_vars
assert "tmax" in dsout.data_vars


def test_opt_vars(tasmin_series, tasmax_series):
tn = tasmin_series(np.zeros(365))
tx = tasmax_series(np.zeros(365))
Expand Down
49 changes: 31 additions & 18 deletions xclim/core/indicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@
read_locale_file,
)
from .options import (
AS_DATASET,
CHECK_MISSING,
KEEP_ATTRS,
METADATA_LOCALES,
Expand Down Expand Up @@ -810,7 +811,7 @@ def __call__(self, *args, **kwds):
if self._version_deprecated:
self._show_deprecation_warning() # noqa

das, params = self._parse_variables_from_call(args, kwds)
das, params, dsattrs = self._parse_variables_from_call(args, kwds)

if OPTIONS[KEEP_ATTRS] is True or (
OPTIONS[KEEP_ATTRS] == "xarray"
Expand Down Expand Up @@ -882,6 +883,20 @@ def __call__(self, *args, **kwds):
out.attrs.update(attrs)
out.name = var_name

if OPTIONS[AS_DATASET]:
out = Dataset({o.name: o for o in outs})
if OPTIONS[KEEP_ATTRS] is True or (
OPTIONS[KEEP_ATTRS] == "xarray"
and xarray.core.options._get_keep_attrs(False)
):
out.attrs.update(dsattrs)
out.attrs["history"] = update_history(
self._history_string(das, params),
out,
new_name=self.identifier,
)
return out

# Return a single DataArray in case of single output, otherwise a tuple
if self.n_outs == 1:
return outs[0]
Expand Down Expand Up @@ -913,7 +928,9 @@ def _parse_variables_from_call(self, args, kwds) -> tuple[OrderedDict, dict]:
else:
params[name] = param.value

return das, params
ds = ba.arguments.get("ds")
dsattrs = ds.attrs if ds is not None else {}
return das, params, dsattrs

def _assign_named_args(self, ba):
"""Assign inputs passed as strings from ds."""
Expand Down Expand Up @@ -1066,28 +1083,24 @@ def _update_attrs(
if "cell_methods" in out:
attrs["cell_methods"] += " " + out.pop("cell_methods")

# Use of OrderedDict to ensure inputs (das) get listed before parameters (args).
# In the history attr, call signature will be all keywords and might be in a
# different order than the real function (but order doesn't really matter with keywords).
kwargs = OrderedDict(**das)
for k, v in args.items():
if self._all_parameters[k].injected:
continue
if self._all_parameters[k].kind == InputKind.KWARGS:
kwargs.update(**v)
elif self._all_parameters[k].kind != InputKind.DATASET:
kwargs[k] = v

attrs["history"] = update_history(
self._history_string(**kwargs),
self._history_string(das, args),
new_name=out.get("var_name"),
**das,
)

attrs.update(out)
return attrs

def _history_string(self, **kwargs):
def _history_string(self, das, params):
kwargs = dict(**das)
for k, v in params.items():
if self._all_parameters[k].injected:
continue
if self._all_parameters[k].kind == InputKind.KWARGS:
kwargs.update(**v)
elif self._all_parameters[k].kind != InputKind.DATASET:
kwargs[k] = v
return gen_call_string(self._registry_id, **kwargs)

@staticmethod
Expand Down Expand Up @@ -1397,7 +1410,7 @@ def __init__(self, **kwds):

super().__init__(**kwds)

def _history_string(self, **kwargs):
def _history_string(self, das, params):
if self.missing == "from_context":
missing = OPTIONS[CHECK_MISSING]
else:
Expand All @@ -1409,7 +1422,7 @@ def _history_string(self, **kwargs):
if mopts:
opt_str += f", missing_options={mopts}"

return super()._history_string(**kwargs) + opt_str
return super()._history_string(das, params) + opt_str

def _get_missing_freq(self, params):
"""Return the resampling frequency to be used in the missing values check."""
Expand Down
7 changes: 7 additions & 0 deletions xclim/core/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
SDBA_EXTRA_OUTPUT = "sdba_extra_output"
SDBA_ENCODE_CF = "sdba_encode_cf"
KEEP_ATTRS = "keep_attrs"
AS_DATASET = "as_dataset"

MISSING_METHODS: dict[str, Callable] = {}

Expand All @@ -37,6 +38,7 @@
SDBA_EXTRA_OUTPUT: False,
SDBA_ENCODE_CF: False,
KEEP_ATTRS: "xarray",
AS_DATASET: False,
}

_LOUDNESS_OPTIONS = frozenset(["log", "warn", "raise"])
Expand Down Expand Up @@ -68,6 +70,7 @@ def _valid_missing_options(mopts):
SDBA_EXTRA_OUTPUT: lambda opt: isinstance(opt, bool),
SDBA_ENCODE_CF: lambda opt: isinstance(opt, bool),
KEEP_ATTRS: _KEEP_ATTRS_OPTIONS.__contains__,
AS_DATASET: lambda opt: isinstance(opt, bool),
}


Expand Down Expand Up @@ -176,8 +179,12 @@ class set_options:
keep_attrs : bool or str
Controls attributes handling in indicators. If True, attributes from all inputs are merged
using the `drop_conflicts` strategy and then updated with xclim-provided attributes.
If ``as_dataset`` is also True and a dataset was passed to the ``ds`` argument of the Indicator,
the dataset's attributes are copied to the indicator's output.
If False, attributes from the inputs are ignored. If "xarray", xclim will use xarray's `keep_attrs` option.
Note that xarray's "default" is equivalent to False. Default: ``"xarray"``.
as_dataset : bool
If True, indicators output datasets. If False, they output DataArrays. Default :``False``.

Examples
--------
Expand Down
Loading