From 86f8eb36b033d1e6151e8c47b9c0072b2f351218 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 12 Oct 2018 18:13:28 +0100 Subject: [PATCH 01/13] Added a global option to always keep or discard attrs. --- xarray/core/options.py | 15 +++++++++++++++ xarray/tests/test_options.py | 14 +++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/xarray/core/options.py b/xarray/core/options.py index 04ea0be7172..2100897a5d3 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -6,6 +6,8 @@ FILE_CACHE_MAXSIZE = 'file_cache_maxsize' CMAP_SEQUENTIAL = 'cmap_sequential' CMAP_DIVERGENT = 'cmap_divergent' +KEEP_ATTRS = 'keep_attrs' + OPTIONS = { DISPLAY_WIDTH: 80, @@ -14,6 +16,7 @@ FILE_CACHE_MAXSIZE: 128, CMAP_SEQUENTIAL: 'viridis', CMAP_DIVERGENT: 'RdBu_r', + KEEP_ATTRS: 'default' } _JOIN_OPTIONS = frozenset(['inner', 'outer', 'left', 'right', 'exact']) @@ -28,6 +31,7 @@ def _positive_integer(value): ARITHMETIC_JOIN: _JOIN_OPTIONS.__contains__, ENABLE_CFTIMEINDEX: lambda value: isinstance(value, bool), FILE_CACHE_MAXSIZE: _positive_integer, + KEEP_ATTRS: lambda choice: choice in [True, False, 'default'] } @@ -41,6 +45,17 @@ def _set_file_cache_maxsize(value): } +def _set_keep_attrs(func_default): + global_choice = OPTIONS['keep_attrs'] + + if global_choice is 'default': + return func_default + elif global_choice in [True, False]: + return global_choice + else: + raise ValueError('The global option keep_attrs is set to an invalid value.') + + class set_options(object): """Set options for xarray in a controlled context. diff --git a/xarray/tests/test_options.py b/xarray/tests/test_options.py index 4441375a1b1..2c40c9bfb38 100644 --- a/xarray/tests/test_options.py +++ b/xarray/tests/test_options.py @@ -3,7 +3,7 @@ import pytest import xarray -from xarray.core.options import OPTIONS +from xarray.core.options import OPTIONS, _set_keep_attrs from xarray.backends.file_manager import FILE_CACHE @@ -44,6 +44,18 @@ def test_file_cache_maxsize(): assert FILE_CACHE.maxsize == original_size +def test_keep_attrs(): + with pytest.raises(ValueError): + xarray.set_options(keep_attrs='invalid_str') + with xarray.set_options(keep_attrs=True): + assert OPTIONS['keep_attrs'] + with xarray.set_options(keep_attrs=False): + assert not OPTIONS['keep_attrs'] + with xarray.set_options(keep_attrs='default'): + assert _set_keep_attrs(func_default=True) + assert _set_keep_attrs(func_default=False) is False + + def test_nested_options(): original = OPTIONS['display_width'] with xarray.set_options(display_width=1): From 483e28d29b537c2e9a367b881069b8d5096b87b4 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 12 Oct 2018 18:28:06 +0100 Subject: [PATCH 02/13] Updated docs and options docstring to describe new keep_attrs global option --- doc/faq.rst | 3 ++- xarray/core/options.py | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/doc/faq.rst b/doc/faq.rst index 9313481f50a..44bc021024b 100644 --- a/doc/faq.rst +++ b/doc/faq.rst @@ -119,7 +119,8 @@ conventions`_. (An exception is serialization to and from netCDF files.) An implication of this choice is that we do not propagate ``attrs`` through most operations unless explicitly flagged (some methods have a ``keep_attrs`` -option). Similarly, xarray does not check for conflicts between ``attrs`` when +option, and there is a global flag for setting this to be always True or +False). Similarly, xarray does not check for conflicts between ``attrs`` when combining arrays and datasets, unless explicitly requested with the option ``compat='identical'``. The guiding principle is that metadata should not be allowed to get in the way. diff --git a/xarray/core/options.py b/xarray/core/options.py index 2100897a5d3..ee7f657847a 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -78,6 +78,11 @@ class set_options(object): - ``cmap_divergent``: colormap to use for divergent data plots. Default: ``RdBu_r``. If string, must be matplotlib built-in colormap. Can also be a Colormap object (e.g. mpl.cm.magma) + - ``keep_attrs``: rule for whether to keep attributes on xarray + Datasets/dataarrays after operations. Either ``True`` to always keep + attrs, ``False`` to always discard them, or ``'default'`` to use original + logic that attrs should only be kept in unambiguous circumstances. + Default: ``'default'``. f You can use ``set_options`` either as a context manager: From 8df30be49281a06c6f92b6ffd4a98cd48dc59b0f Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 12 Oct 2018 19:12:47 +0100 Subject: [PATCH 03/13] Updated all default keep_attrs arguments to check global option --- xarray/core/common.py | 16 ++++++++++------ xarray/core/dataarray.py | 10 ++++++---- xarray/core/dataset.py | 18 ++++++++++-------- xarray/core/groupby.py | 19 ++++++++++--------- xarray/core/missing.py | 7 ++++--- xarray/core/ops.py | 5 +++-- xarray/core/resample.py | 3 ++- xarray/core/variable.py | 5 +++-- 8 files changed, 48 insertions(+), 35 deletions(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index c74b1fa080b..b5b3ae5814c 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -11,6 +11,7 @@ from .arithmetic import SupportsArithmetic from .pycompat import OrderedDict, basestring, dask_array_type, suppress from .utils import Frozen, ReprObject, SortedKeysDict, either_dict_or_kwargs +from .options import _set_keep_attrs # Used as a sentinel value to indicate a all dimensions ALL_DIMS = ReprObject('') @@ -21,12 +22,12 @@ class ImplementsArrayReduce(object): def _reduce_method(cls, func, include_skipna, numeric_only): if include_skipna: def wrapped_func(self, dim=None, axis=None, skipna=None, - keep_attrs=False, **kwargs): + keep_attrs=_set_keep_attrs(False), **kwargs): return self.reduce(func, dim, axis, keep_attrs=keep_attrs, skipna=skipna, allow_lazy=True, **kwargs) else: - def wrapped_func(self, dim=None, axis=None, keep_attrs=False, - **kwargs): + def wrapped_func(self, dim=None, axis=None, + keep_attrs=_set_keep_attrs(False), **kwargs): return self.reduce(func, dim, axis, keep_attrs=keep_attrs, allow_lazy=True, **kwargs) return wrapped_func @@ -51,13 +52,15 @@ class ImplementsDatasetReduce(object): @classmethod def _reduce_method(cls, func, include_skipna, numeric_only): if include_skipna: - def wrapped_func(self, dim=None, keep_attrs=False, skipna=None, + def wrapped_func(self, dim=None, + keep_attrs=_set_keep_attrs(False), skipna=None, **kwargs): return self.reduce(func, dim, keep_attrs, skipna=skipna, numeric_only=numeric_only, allow_lazy=True, **kwargs) else: - def wrapped_func(self, dim=None, keep_attrs=False, **kwargs): + def wrapped_func(self, dim=None, + keep_attrs=_set_keep_attrs(False), **kwargs): return self.reduce(func, dim, keep_attrs, numeric_only=numeric_only, allow_lazy=True, **kwargs) @@ -590,7 +593,8 @@ def rolling(self, dim=None, min_periods=None, center=False, **dim_kwargs): center=center) def resample(self, freq=None, dim=None, how=None, skipna=None, - closed=None, label=None, base=0, keep_attrs=False, **indexer): + closed=None, label=None, base=0, + keep_attrs=_set_keep_attrs(False), **indexer): """Returns a Resample object for performing resampling operations. Handles both downsampling and upsampling. If any intervals contain no diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index f131b003a69..9790479670f 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -16,7 +16,7 @@ assert_coordinate_consistent, remap_label_indexers) from .dataset import Dataset, merge_indexes, split_indexes from .formatting import format_item -from .options import OPTIONS +from .options import OPTIONS, _set_keep_attrs from .pycompat import OrderedDict, basestring, iteritems, range, zip from .utils import ( decode_numpy_dict_values, either_dict_or_kwargs, ensure_us_time_resolution) @@ -1559,7 +1559,8 @@ def combine_first(self, other): """ return ops.fillna(self, other, join="outer") - def reduce(self, func, dim=None, axis=None, keep_attrs=False, **kwargs): + def reduce(self, func, dim=None, axis=None, + keep_attrs=_set_keep_attrs(False), **kwargs): """Reduce this array by applying `func` along some dimension(s). Parameters @@ -2270,7 +2271,8 @@ def sortby(self, variables, ascending=True): ds = self._to_temp_dataset().sortby(variables, ascending=ascending) return self._from_temp_dataset(ds) - def quantile(self, q, dim=None, interpolation='linear', keep_attrs=False): + def quantile(self, q, dim=None, interpolation='linear', + keep_attrs=_set_keep_attrs(False)): """Compute the qth quantile of the data along the specified dimension. Returns the qth quantiles(s) of the array elements. @@ -2316,7 +2318,7 @@ def quantile(self, q, dim=None, interpolation='linear', keep_attrs=False): q, dim=dim, keep_attrs=keep_attrs, interpolation=interpolation) return self._from_temp_dataset(ds) - def rank(self, dim, pct=False, keep_attrs=False): + def rank(self, dim, pct=False, keep_attrs=_set_keep_attrs(False)): """Ranks the data. Equal values are assigned a rank that is the average of the ranks that diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index c8586d1d408..4e3a59312ce 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -28,7 +28,7 @@ from .merge import ( dataset_merge_method, dataset_update_method, merge_data_and_coords, merge_variables) -from .options import OPTIONS +from .options import OPTIONS, _set_keep_attrs from .pycompat import ( OrderedDict, basestring, dask_array_type, integer_types, iteritems, range) from .utils import ( @@ -2870,7 +2870,7 @@ def combine_first(self, other): out = ops.fillna(self, other, join="outer", dataset_join="outer") return out - def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False, + def reduce(self, func, dim=None, keep_attrs=_set_keep_attrs(False), numeric_only=False, allow_lazy=False, **kwargs): """Reduce this dataset by applying `func` along some dimension(s). @@ -2940,7 +2940,7 @@ def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False, attrs = self.attrs if keep_attrs else None return self._replace_vars_and_dims(variables, coord_names, attrs=attrs) - def apply(self, func, keep_attrs=False, args=(), **kwargs): + def apply(self, func, keep_attrs=_set_keep_attrs(False), args=(), **kwargs): """Apply a function over the data variables in this dataset. Parameters @@ -3288,7 +3288,7 @@ def from_dict(cls, d): return obj @staticmethod - def _unary_op(f, keep_attrs=False): + def _unary_op(f, keep_attrs=_set_keep_attrs(False)): @functools.wraps(f) def func(self, *args, **kwargs): ds = self.coords.to_dataset() @@ -3649,7 +3649,7 @@ def sortby(self, variables, ascending=True): return aligned_self.isel(**indices) def quantile(self, q, dim=None, interpolation='linear', - numeric_only=False, keep_attrs=False): + numeric_only=False, keep_attrs=_set_keep_attrs(False)): """Compute the qth quantile of the data along the specified dimension. Returns the qth quantiles(s) of the array elements for each variable @@ -3735,7 +3735,7 @@ def quantile(self, q, dim=None, interpolation='linear', new.coords['quantile'] = q return new - def rank(self, dim, pct=False, keep_attrs=False): + def rank(self, dim, pct=False, keep_attrs=_set_keep_attrs(False)): """Ranks the data. Equal values are assigned a rank that is the average of the ranks that @@ -3838,11 +3838,13 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None): @property def real(self): - return self._unary_op(lambda x: x.real, keep_attrs=True)(self) + return self._unary_op(lambda x: x.real, + keep_attrs=_set_keep_attrs(True))(self) @property def imag(self): - return self._unary_op(lambda x: x.imag, keep_attrs=True)(self) + return self._unary_op(lambda x: x.imag, + keep_attrs=_set_keep_attrs(True))(self) def filter_by_attrs(self, **kwargs): """Returns a ``Dataset`` with variables that match specific conditions. diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 3842c642047..f4ee763662c 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -13,6 +13,7 @@ from .pycompat import integer_types, range, zip from .utils import hashable, maybe_wrap_array, peek_at, safe_cast_to_index from .variable import IndexVariable, Variable, as_variable +from .options import _set_keep_attrs def unique_value_groups(ar, sort=True): @@ -407,12 +408,12 @@ def _first_or_last(self, op, skipna, keep_attrs): return self.reduce(op, self._group_dim, skipna=skipna, keep_attrs=keep_attrs, allow_lazy=True) - def first(self, skipna=None, keep_attrs=True): + def first(self, skipna=None, keep_attrs=_set_keep_attrs(True)): """Return the first element of each group along the group dimension """ return self._first_or_last(duck_array_ops.first, skipna, keep_attrs) - def last(self, skipna=None, keep_attrs=True): + def last(self, skipna=None, keep_attrs=_set_keep_attrs(True)): """Return the last element of each group along the group dimension """ return self._first_or_last(duck_array_ops.last, skipna, keep_attrs) @@ -538,8 +539,8 @@ def _combine(self, applied, shortcut=False): combined = self._maybe_unstack(combined) return combined - def reduce(self, func, dim=None, axis=None, keep_attrs=False, - shortcut=True, **kwargs): + def reduce(self, func, dim=None, axis=None, + keep_attrs=_set_keep_attrs(False), shortcut=True, **kwargs): """Reduce the items in this group by applying `func` along some dimension(s). @@ -589,12 +590,12 @@ def reduce_array(ar): def _reduce_method(cls, func, include_skipna, numeric_only): if include_skipna: def wrapped_func(self, dim=DEFAULT_DIMS, axis=None, skipna=None, - keep_attrs=False, **kwargs): + keep_attrs=_set_keep_attrs(False), **kwargs): return self.reduce(func, dim, axis, keep_attrs=keep_attrs, skipna=skipna, allow_lazy=True, **kwargs) else: def wrapped_func(self, dim=DEFAULT_DIMS, axis=None, - keep_attrs=False, **kwargs): + keep_attrs=_set_keep_attrs(False), **kwargs): return self.reduce(func, dim, axis, keep_attrs=keep_attrs, allow_lazy=True, **kwargs) return wrapped_func @@ -650,7 +651,7 @@ def _combine(self, applied): combined = self._maybe_unstack(combined) return combined - def reduce(self, func, dim=None, keep_attrs=False, **kwargs): + def reduce(self, func, dim=None, keep_attrs=_set_keep_attrs(False), **kwargs): """Reduce the items in this group by applying `func` along some dimension(s). @@ -700,13 +701,13 @@ def reduce_dataset(ds): @classmethod def _reduce_method(cls, func, include_skipna, numeric_only): if include_skipna: - def wrapped_func(self, dim=DEFAULT_DIMS, keep_attrs=False, + def wrapped_func(self, dim=DEFAULT_DIMS, keep_attrs=_set_keep_attrs(False), skipna=None, **kwargs): return self.reduce(func, dim, keep_attrs, skipna=skipna, numeric_only=numeric_only, allow_lazy=True, **kwargs) else: - def wrapped_func(self, dim=DEFAULT_DIMS, keep_attrs=False, + def wrapped_func(self, dim=DEFAULT_DIMS, keep_attrs=_set_keep_attrs(False), **kwargs): return self.reduce(func, dim, keep_attrs, numeric_only=numeric_only, allow_lazy=True, diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 3f4e0fc3ac9..a025c702369 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -14,6 +14,7 @@ from .pycompat import iteritems from .utils import OrderedSet, datetime_to_numeric, is_scalar from .variable import Variable, broadcast_variables +from .options import _set_keep_attrs class BaseInterpolator(object): @@ -218,7 +219,7 @@ def interp_na(self, dim=None, use_coordinate=True, method='linear', limit=None, output_dtypes=[self.dtype], dask='parallelized', vectorize=True, - keep_attrs=True).transpose(*self.dims) + keep_attrs=_set_keep_attrs(True)).transpose(*self.dims) if limit is not None: arr = arr.where(valids) @@ -269,7 +270,7 @@ def ffill(arr, dim=None, limit=None): return apply_ufunc(bn.push, arr, dask='parallelized', - keep_attrs=True, + keep_attrs=_set_keep_attrs(True), output_dtypes=[arr.dtype], kwargs=dict(n=_limit, axis=axis)).transpose(*arr.dims) @@ -283,7 +284,7 @@ def bfill(arr, dim=None, limit=None): return apply_ufunc(_bfill, arr, dask='parallelized', - keep_attrs=True, + keep_attrs=_set_keep_attrs(True), output_dtypes=[arr.dtype], kwargs=dict(n=_limit, axis=axis)).transpose(*arr.dims) diff --git a/xarray/core/ops.py b/xarray/core/ops.py index a0dd2212a8f..84a521960cb 100644 --- a/xarray/core/ops.py +++ b/xarray/core/ops.py @@ -14,6 +14,7 @@ from . import dtypes, duck_array_ops from .nputils import array_eq, array_ne from .pycompat import PY3 +from .options import _set_keep_attrs try: import bottleneck as bn @@ -153,7 +154,7 @@ def fillna(data, other, join="left", dataset_join="left"): dask="allowed", dataset_join=dataset_join, dataset_fill_value=np.nan, - keep_attrs=True) + keep_attrs=_set_keep_attrs(True)) def where_method(self, cond, other=dtypes.NA): @@ -179,7 +180,7 @@ def where_method(self, cond, other=dtypes.NA): join=join, dataset_join=join, dask='allowed', - keep_attrs=True) + keep_attrs=_set_keep_attrs(True)) def _call_possibly_missing_method(arg, name, args, kwargs): diff --git a/xarray/core/resample.py b/xarray/core/resample.py index bd84e04487e..13b5dab2dc0 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -3,6 +3,7 @@ from . import ops from .groupby import DEFAULT_DIMS, DataArrayGroupBy, DatasetGroupBy from .pycompat import OrderedDict, dask_array_type +from .options import _set_keep_attrs RESAMPLE_DIM = '__resample_dim__' @@ -273,7 +274,7 @@ def apply(self, func, **kwargs): return combined.rename({self._resample_dim: self._dim}) - def reduce(self, func, dim=None, keep_attrs=False, **kwargs): + def reduce(self, func, dim=None, keep_attrs=_set_keep_attrs(False), **kwargs): """Reduce the items in this group by applying `func` along the pre-defined resampling dimension. diff --git a/xarray/core/variable.py b/xarray/core/variable.py index c003d52aab2..8b9ff1a07cd 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -18,6 +18,7 @@ from .pycompat import ( OrderedDict, basestring, dask_array_type, integer_types, zip) from .utils import OrderedSet, either_dict_or_kwargs +from .options import _set_keep_attrs try: import dask.array as da @@ -1303,8 +1304,8 @@ def fillna(self, value): def where(self, cond, other=dtypes.NA): return ops.where_method(self, cond, other) - def reduce(self, func, dim=None, axis=None, keep_attrs=False, - allow_lazy=False, **kwargs): + def reduce(self, func, dim=None, axis=None, + keep_attrs=_set_keep_attrs(False), allow_lazy=False, **kwargs): """Reduce this array by applying `func` along some dimension(s). Parameters From 9521d5bee2ba38d8c9b2d898af918c57ace94329 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sat, 13 Oct 2018 13:34:23 +0100 Subject: [PATCH 04/13] New test to check attributes are retained properly --- xarray/core/options.py | 4 ++-- xarray/tests/test_options.py | 34 +++++++++++++++++++++++++++++++--- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/xarray/core/options.py b/xarray/core/options.py index ee7f657847a..65153a42fe0 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -45,11 +45,11 @@ def _set_file_cache_maxsize(value): } -def _set_keep_attrs(func_default): +def _get_keep_attrs(default): global_choice = OPTIONS['keep_attrs'] if global_choice is 'default': - return func_default + return default elif global_choice in [True, False]: return global_choice else: diff --git a/xarray/tests/test_options.py b/xarray/tests/test_options.py index 2c40c9bfb38..b70046cfe4e 100644 --- a/xarray/tests/test_options.py +++ b/xarray/tests/test_options.py @@ -3,8 +3,9 @@ import pytest import xarray -from xarray.core.options import OPTIONS, _set_keep_attrs +from xarray.core.options import OPTIONS, _get_keep_attrs from xarray.backends.file_manager import FILE_CACHE +from xarray.tests.test_dataset import create_test_data def test_invalid_option_raises(): @@ -52,8 +53,35 @@ def test_keep_attrs(): with xarray.set_options(keep_attrs=False): assert not OPTIONS['keep_attrs'] with xarray.set_options(keep_attrs='default'): - assert _set_keep_attrs(func_default=True) - assert _set_keep_attrs(func_default=False) is False + assert _get_keep_attrs(default=True) + assert not _get_keep_attrs(default=False) + + +def create_test_data_attrs(seed=0): + ds = create_test_data(seed) + ds.attrs = {'attr1': 5, 'attr2': 'history', + 'attr3': {'nested': 'more_info'}} + return ds + + +def test_attr_retention(): + ds = create_test_data_attrs() + original_attrs = ds.attrs + + # Test default behaviour + result = ds.mean() + assert result.attrs == {} + with xarray.set_options(keep_attrs='default'): + result = ds.mean() + assert result.attrs == {} + + with xarray.set_options(keep_attrs=True): + result = ds.mean() + assert result.attrs == original_attrs + + with xarray.set_options(keep_attrs=False): + result = ds.mean() + assert result.attrs == {} def test_nested_options(): From 68ce083e403e27b55d38f9ab8fc7b6618f375690 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sat, 13 Oct 2018 13:41:03 +0100 Subject: [PATCH 05/13] Implemented shoyer's suggestion so attribute permanence test now passes for reduce methods --- xarray/core/common.py | 30 +++++++++++++++++++----------- xarray/core/dataarray.py | 20 ++++++++++++++------ xarray/core/dataset.py | 27 ++++++++++++++++++--------- xarray/core/groupby.py | 35 ++++++++++++++++++++++------------- xarray/core/missing.py | 7 +++---- xarray/core/ops.py | 5 ++--- xarray/core/resample.py | 3 +-- xarray/core/variable.py | 6 ++++-- 8 files changed, 83 insertions(+), 50 deletions(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index b5b3ae5814c..56ee626ff17 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -11,7 +11,7 @@ from .arithmetic import SupportsArithmetic from .pycompat import OrderedDict, basestring, dask_array_type, suppress from .utils import Frozen, ReprObject, SortedKeysDict, either_dict_or_kwargs -from .options import _set_keep_attrs +from .options import _get_keep_attrs # Used as a sentinel value to indicate a all dimensions ALL_DIMS = ReprObject('') @@ -22,12 +22,16 @@ class ImplementsArrayReduce(object): def _reduce_method(cls, func, include_skipna, numeric_only): if include_skipna: def wrapped_func(self, dim=None, axis=None, skipna=None, - keep_attrs=_set_keep_attrs(False), **kwargs): + keep_attrs=None, **kwargs): + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) return self.reduce(func, dim, axis, keep_attrs=keep_attrs, skipna=skipna, allow_lazy=True, **kwargs) else: def wrapped_func(self, dim=None, axis=None, - keep_attrs=_set_keep_attrs(False), **kwargs): + keep_attrs=None, **kwargs): + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) return self.reduce(func, dim, axis, keep_attrs=keep_attrs, allow_lazy=True, **kwargs) return wrapped_func @@ -52,16 +56,18 @@ class ImplementsDatasetReduce(object): @classmethod def _reduce_method(cls, func, include_skipna, numeric_only): if include_skipna: - def wrapped_func(self, dim=None, - keep_attrs=_set_keep_attrs(False), skipna=None, + def wrapped_func(self, dim=None, keep_attrs=None, skipna=None, **kwargs): - return self.reduce(func, dim, keep_attrs, skipna=skipna, + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + return self.reduce(func, dim, keep_attrs=keep_attrs, skipna=skipna, numeric_only=numeric_only, allow_lazy=True, **kwargs) else: - def wrapped_func(self, dim=None, - keep_attrs=_set_keep_attrs(False), **kwargs): - return self.reduce(func, dim, keep_attrs, + def wrapped_func(self, dim=None, keep_attrs=None, **kwargs): + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + return self.reduce(func, dim, keep_attrs=keep_attrs, numeric_only=numeric_only, allow_lazy=True, **kwargs) return wrapped_func @@ -593,8 +599,7 @@ def rolling(self, dim=None, min_periods=None, center=False, **dim_kwargs): center=center) def resample(self, freq=None, dim=None, how=None, skipna=None, - closed=None, label=None, base=0, - keep_attrs=_set_keep_attrs(False), **indexer): + closed=None, label=None, base=0, keep_attrs=None, **indexer): """Returns a Resample object for performing resampling operations. Handles both downsampling and upsampling. If any intervals contain no @@ -662,6 +667,9 @@ def resample(self, freq=None, dim=None, how=None, skipna=None, from .dataarray import DataArray from .resample import RESAMPLE_DIM + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + if dim is not None: if how is None: how = 'mean' diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 9790479670f..f361331e008 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -16,7 +16,7 @@ assert_coordinate_consistent, remap_label_indexers) from .dataset import Dataset, merge_indexes, split_indexes from .formatting import format_item -from .options import OPTIONS, _set_keep_attrs +from .options import OPTIONS, _get_keep_attrs from .pycompat import OrderedDict, basestring, iteritems, range, zip from .utils import ( decode_numpy_dict_values, either_dict_or_kwargs, ensure_us_time_resolution) @@ -1559,8 +1559,7 @@ def combine_first(self, other): """ return ops.fillna(self, other, join="outer") - def reduce(self, func, dim=None, axis=None, - keep_attrs=_set_keep_attrs(False), **kwargs): + def reduce(self, func, dim=None, axis=None, keep_attrs=None, **kwargs): """Reduce this array by applying `func` along some dimension(s). Parameters @@ -1589,6 +1588,10 @@ def reduce(self, func, dim=None, axis=None, DataArray with this object's array replaced with an array with summarized data and the indicated dimension(s) removed. """ + + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + var = self.variable.reduce(func, dim, axis, keep_attrs, **kwargs) return self._replace_maybe_drop_dims(var) @@ -2271,8 +2274,7 @@ def sortby(self, variables, ascending=True): ds = self._to_temp_dataset().sortby(variables, ascending=ascending) return self._from_temp_dataset(ds) - def quantile(self, q, dim=None, interpolation='linear', - keep_attrs=_set_keep_attrs(False)): + def quantile(self, q, dim=None, interpolation='linear', keep_attrs=None): """Compute the qth quantile of the data along the specified dimension. Returns the qth quantiles(s) of the array elements. @@ -2314,11 +2316,14 @@ def quantile(self, q, dim=None, interpolation='linear', numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile """ + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + ds = self._to_temp_dataset().quantile( q, dim=dim, keep_attrs=keep_attrs, interpolation=interpolation) return self._from_temp_dataset(ds) - def rank(self, dim, pct=False, keep_attrs=_set_keep_attrs(False)): + def rank(self, dim, pct=False, keep_attrs=None): """Ranks the data. Equal values are assigned a rank that is the average of the ranks that @@ -2354,6 +2359,9 @@ def rank(self, dim, pct=False, keep_attrs=_set_keep_attrs(False)): array([ 1., 2., 3.]) Dimensions without coordinates: x """ + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + ds = self._to_temp_dataset().rank(dim, pct=pct, keep_attrs=keep_attrs) return self._from_temp_dataset(ds) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 4e3a59312ce..b5057b121f2 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -28,7 +28,7 @@ from .merge import ( dataset_merge_method, dataset_update_method, merge_data_and_coords, merge_variables) -from .options import OPTIONS, _set_keep_attrs +from .options import OPTIONS, _get_keep_attrs from .pycompat import ( OrderedDict, basestring, dask_array_type, integer_types, iteritems, range) from .utils import ( @@ -2870,7 +2870,7 @@ def combine_first(self, other): out = ops.fillna(self, other, join="outer", dataset_join="outer") return out - def reduce(self, func, dim=None, keep_attrs=_set_keep_attrs(False), numeric_only=False, + def reduce(self, func, dim=None, keep_attrs=None, numeric_only=False, allow_lazy=False, **kwargs): """Reduce this dataset by applying `func` along some dimension(s). @@ -2912,6 +2912,9 @@ def reduce(self, func, dim=None, keep_attrs=_set_keep_attrs(False), numeric_only raise ValueError('Dataset does not contain the dimensions: %s' % missing_dimensions) + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + variables = OrderedDict() for name, var in iteritems(self._variables): reduce_dims = [dim for dim in var.dims if dim in dims] @@ -2940,7 +2943,7 @@ def reduce(self, func, dim=None, keep_attrs=_set_keep_attrs(False), numeric_only attrs = self.attrs if keep_attrs else None return self._replace_vars_and_dims(variables, coord_names, attrs=attrs) - def apply(self, func, keep_attrs=_set_keep_attrs(False), args=(), **kwargs): + def apply(self, func, keep_attrs=None, args=(), **kwargs): """Apply a function over the data variables in this dataset. Parameters @@ -2950,7 +2953,7 @@ def apply(self, func, keep_attrs=_set_keep_attrs(False), args=(), **kwargs): transform each DataArray `x` in this dataset into another DataArray. keep_attrs : bool, optional - If True, the dataset's attributes (`attrs`) will be copied from + If True, the dataset's attributes (`attrs`) will be _getcopied from the original object to the new one. If False, the new object will be returned without attributes. args : tuple, optional @@ -2985,6 +2988,8 @@ def apply(self, func, keep_attrs=_set_keep_attrs(False), args=(), **kwargs): variables = OrderedDict( (k, maybe_wrap_array(v, func(v, *args, **kwargs))) for k, v in iteritems(self.data_vars)) + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) attrs = self.attrs if keep_attrs else None return type(self)(variables, attrs=attrs) @@ -3288,7 +3293,7 @@ def from_dict(cls, d): return obj @staticmethod - def _unary_op(f, keep_attrs=_set_keep_attrs(False)): + def _unary_op(f, keep_attrs=False): @functools.wraps(f) def func(self, *args, **kwargs): ds = self.coords.to_dataset() @@ -3649,7 +3654,7 @@ def sortby(self, variables, ascending=True): return aligned_self.isel(**indices) def quantile(self, q, dim=None, interpolation='linear', - numeric_only=False, keep_attrs=_set_keep_attrs(False)): + numeric_only=False, keep_attrs=None): """Compute the qth quantile of the data along the specified dimension. Returns the qth quantiles(s) of the array elements for each variable @@ -3727,6 +3732,8 @@ def quantile(self, q, dim=None, interpolation='linear', # construct the new dataset coord_names = set(k for k in self.coords if k in variables) + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) attrs = self.attrs if keep_attrs else None new = self._replace_vars_and_dims(variables, coord_names, attrs=attrs) if 'quantile' in new.dims: @@ -3735,7 +3742,7 @@ def quantile(self, q, dim=None, interpolation='linear', new.coords['quantile'] = q return new - def rank(self, dim, pct=False, keep_attrs=_set_keep_attrs(False)): + def rank(self, dim, pct=False, keep_attrs=None): """Ranks the data. Equal values are assigned a rank that is the average of the ranks that @@ -3775,6 +3782,8 @@ def rank(self, dim, pct=False, keep_attrs=_set_keep_attrs(False)): variables[name] = var coord_names = set(self.coords) + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) attrs = self.attrs if keep_attrs else None return self._replace_vars_and_dims(variables, coord_names, attrs=attrs) @@ -3839,12 +3848,12 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None): @property def real(self): return self._unary_op(lambda x: x.real, - keep_attrs=_set_keep_attrs(True))(self) + keep_attrs=True)(self) @property def imag(self): return self._unary_op(lambda x: x.imag, - keep_attrs=_set_keep_attrs(True))(self) + keep_attrs=True)(self) def filter_by_attrs(self, **kwargs): """Returns a ``Dataset`` with variables that match specific conditions. diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index f4ee763662c..f93c24f7913 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -13,7 +13,7 @@ from .pycompat import integer_types, range, zip from .utils import hashable, maybe_wrap_array, peek_at, safe_cast_to_index from .variable import IndexVariable, Variable, as_variable -from .options import _set_keep_attrs +from .options import _get_keep_attrs def unique_value_groups(ar, sort=True): @@ -405,15 +405,17 @@ def _first_or_last(self, op, skipna, keep_attrs): # NB. this is currently only used for reductions along an existing # dimension return self._obj + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=True) return self.reduce(op, self._group_dim, skipna=skipna, keep_attrs=keep_attrs, allow_lazy=True) - def first(self, skipna=None, keep_attrs=_set_keep_attrs(True)): + def first(self, skipna=None, keep_attrs=None): """Return the first element of each group along the group dimension """ return self._first_or_last(duck_array_ops.first, skipna, keep_attrs) - def last(self, skipna=None, keep_attrs=_set_keep_attrs(True)): + def last(self, skipna=None, keep_attrs=None): """Return the last element of each group along the group dimension """ return self._first_or_last(duck_array_ops.last, skipna, keep_attrs) @@ -540,7 +542,7 @@ def _combine(self, applied, shortcut=False): return combined def reduce(self, func, dim=None, axis=None, - keep_attrs=_set_keep_attrs(False), shortcut=True, **kwargs): + keep_attrs=None, shortcut=True, **kwargs): """Reduce the items in this group by applying `func` along some dimension(s). @@ -580,6 +582,9 @@ def reduce(self, func, dim=None, axis=None, "warning, pass dim=xarray.ALL_DIMS explicitly.", FutureWarning, stacklevel=2) + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + def reduce_array(ar): return ar.reduce(func, dim, axis, keep_attrs=keep_attrs, **kwargs) return self.apply(reduce_array, shortcut=shortcut) @@ -590,12 +595,16 @@ def reduce_array(ar): def _reduce_method(cls, func, include_skipna, numeric_only): if include_skipna: def wrapped_func(self, dim=DEFAULT_DIMS, axis=None, skipna=None, - keep_attrs=_set_keep_attrs(False), **kwargs): + keep_attrs=None, **kwargs): + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) return self.reduce(func, dim, axis, keep_attrs=keep_attrs, skipna=skipna, allow_lazy=True, **kwargs) else: def wrapped_func(self, dim=DEFAULT_DIMS, axis=None, - keep_attrs=_set_keep_attrs(False), **kwargs): + keep_attrs=None, **kwargs): + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) return self.reduce(func, dim, axis, keep_attrs=keep_attrs, allow_lazy=True, **kwargs) return wrapped_func @@ -651,7 +660,7 @@ def _combine(self, applied): combined = self._maybe_unstack(combined) return combined - def reduce(self, func, dim=None, keep_attrs=_set_keep_attrs(False), **kwargs): + def reduce(self, func, dim=None, keep_attrs=None, **kwargs): """Reduce the items in this group by applying `func` along some dimension(s). @@ -701,15 +710,15 @@ def reduce_dataset(ds): @classmethod def _reduce_method(cls, func, include_skipna, numeric_only): if include_skipna: - def wrapped_func(self, dim=DEFAULT_DIMS, keep_attrs=_set_keep_attrs(False), + def wrapped_func(self, dim=DEFAULT_DIMS, skipna=None, **kwargs): - return self.reduce(func, dim, keep_attrs, skipna=skipna, - numeric_only=numeric_only, allow_lazy=True, - **kwargs) + return self.reduce(func, dim, keep_attrs=False, + skipna=skipna, numeric_only=numeric_only, + allow_lazy=True, **kwargs) else: - def wrapped_func(self, dim=DEFAULT_DIMS, keep_attrs=_set_keep_attrs(False), + def wrapped_func(self, dim=DEFAULT_DIMS, **kwargs): - return self.reduce(func, dim, keep_attrs, + return self.reduce(func, dim, keep_attrs=False, numeric_only=numeric_only, allow_lazy=True, **kwargs) return wrapped_func diff --git a/xarray/core/missing.py b/xarray/core/missing.py index a025c702369..3f4e0fc3ac9 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -14,7 +14,6 @@ from .pycompat import iteritems from .utils import OrderedSet, datetime_to_numeric, is_scalar from .variable import Variable, broadcast_variables -from .options import _set_keep_attrs class BaseInterpolator(object): @@ -219,7 +218,7 @@ def interp_na(self, dim=None, use_coordinate=True, method='linear', limit=None, output_dtypes=[self.dtype], dask='parallelized', vectorize=True, - keep_attrs=_set_keep_attrs(True)).transpose(*self.dims) + keep_attrs=True).transpose(*self.dims) if limit is not None: arr = arr.where(valids) @@ -270,7 +269,7 @@ def ffill(arr, dim=None, limit=None): return apply_ufunc(bn.push, arr, dask='parallelized', - keep_attrs=_set_keep_attrs(True), + keep_attrs=True, output_dtypes=[arr.dtype], kwargs=dict(n=_limit, axis=axis)).transpose(*arr.dims) @@ -284,7 +283,7 @@ def bfill(arr, dim=None, limit=None): return apply_ufunc(_bfill, arr, dask='parallelized', - keep_attrs=_set_keep_attrs(True), + keep_attrs=True, output_dtypes=[arr.dtype], kwargs=dict(n=_limit, axis=axis)).transpose(*arr.dims) diff --git a/xarray/core/ops.py b/xarray/core/ops.py index 84a521960cb..a0dd2212a8f 100644 --- a/xarray/core/ops.py +++ b/xarray/core/ops.py @@ -14,7 +14,6 @@ from . import dtypes, duck_array_ops from .nputils import array_eq, array_ne from .pycompat import PY3 -from .options import _set_keep_attrs try: import bottleneck as bn @@ -154,7 +153,7 @@ def fillna(data, other, join="left", dataset_join="left"): dask="allowed", dataset_join=dataset_join, dataset_fill_value=np.nan, - keep_attrs=_set_keep_attrs(True)) + keep_attrs=True) def where_method(self, cond, other=dtypes.NA): @@ -180,7 +179,7 @@ def where_method(self, cond, other=dtypes.NA): join=join, dataset_join=join, dask='allowed', - keep_attrs=_set_keep_attrs(True)) + keep_attrs=True) def _call_possibly_missing_method(arg, name, args, kwargs): diff --git a/xarray/core/resample.py b/xarray/core/resample.py index 13b5dab2dc0..edf7dfc3d41 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -3,7 +3,6 @@ from . import ops from .groupby import DEFAULT_DIMS, DataArrayGroupBy, DatasetGroupBy from .pycompat import OrderedDict, dask_array_type -from .options import _set_keep_attrs RESAMPLE_DIM = '__resample_dim__' @@ -274,7 +273,7 @@ def apply(self, func, **kwargs): return combined.rename({self._resample_dim: self._dim}) - def reduce(self, func, dim=None, keep_attrs=_set_keep_attrs(False), **kwargs): + def reduce(self, func, dim=None, keep_attrs=None, **kwargs): """Reduce the items in this group by applying `func` along the pre-defined resampling dimension. diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 8b9ff1a07cd..271f00102e0 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -18,7 +18,7 @@ from .pycompat import ( OrderedDict, basestring, dask_array_type, integer_types, zip) from .utils import OrderedSet, either_dict_or_kwargs -from .options import _set_keep_attrs +from .options import _get_keep_attrs try: import dask.array as da @@ -1305,7 +1305,7 @@ def where(self, cond, other=dtypes.NA): return ops.where_method(self, cond, other) def reduce(self, func, dim=None, axis=None, - keep_attrs=_set_keep_attrs(False), allow_lazy=False, **kwargs): + keep_attrs=None, allow_lazy=False, **kwargs): """Reduce this array by applying `func` along some dimension(s). Parameters @@ -1352,6 +1352,8 @@ def reduce(self, func, dim=None, axis=None, dims = [adim for n, adim in enumerate(self.dims) if n not in removed_axes] + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) attrs = self._attrs if keep_attrs else None return Variable(dims, data, attrs=attrs) From ff65ebe0acdc59d75a0863f8009e58043bf0d575 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sun, 14 Oct 2018 14:31:29 +0100 Subject: [PATCH 06/13] Added tests to explicitly check that attrs are propagated correctly --- xarray/tests/test_options.py | 118 ++++++++++++++++++++++++++++------- 1 file changed, 97 insertions(+), 21 deletions(-) diff --git a/xarray/tests/test_options.py b/xarray/tests/test_options.py index b70046cfe4e..b9eb6ea9d9e 100644 --- a/xarray/tests/test_options.py +++ b/xarray/tests/test_options.py @@ -6,6 +6,7 @@ from xarray.core.options import OPTIONS, _get_keep_attrs from xarray.backends.file_manager import FILE_CACHE from xarray.tests.test_dataset import create_test_data +from xarray import concat, merge def test_invalid_option_raises(): @@ -57,38 +58,113 @@ def test_keep_attrs(): assert not _get_keep_attrs(default=False) -def create_test_data_attrs(seed=0): +def test_nested_options(): + original = OPTIONS['display_width'] + with xarray.set_options(display_width=1): + assert OPTIONS['display_width'] == 1 + with xarray.set_options(display_width=2): + assert OPTIONS['display_width'] == 2 + assert OPTIONS['display_width'] == 1 + assert OPTIONS['display_width'] == original + + +def create_test_dataset_attrs(seed=0): ds = create_test_data(seed) ds.attrs = {'attr1': 5, 'attr2': 'history', 'attr3': {'nested': 'more_info'}} return ds -def test_attr_retention(): - ds = create_test_data_attrs() - original_attrs = ds.attrs +def create_test_dataarray_attrs(seed=0, var='var1'): + da = create_test_data(seed)[var] + da.attrs = {'attr1': 5, 'attr2': 'history', + 'attr3': {'nested': 'more_info'}} + return da - # Test default behaviour - result = ds.mean() - assert result.attrs == {} - with xarray.set_options(keep_attrs='default'): + +class TestAttrRetention: + def test_dataset_attr_retention(self): + # Use .mean() for all tests: a typical reduction operation + ds = create_test_dataset_attrs() + original_attrs = ds.attrs + + # Test default behaviour result = ds.mean() assert result.attrs == {} + with xarray.set_options(keep_attrs='default'): + result = ds.mean() + assert result.attrs == {} - with xarray.set_options(keep_attrs=True): - result = ds.mean() - assert result.attrs == original_attrs + with xarray.set_options(keep_attrs=True): + result = ds.mean() + assert result.attrs == original_attrs - with xarray.set_options(keep_attrs=False): - result = ds.mean() + with xarray.set_options(keep_attrs=False): + result = ds.mean() + assert result.attrs == {} + + def test_dataarray_attr_retention(self): + # Use .mean() for all tests: a typical reduction operation + da = create_test_dataarray_attrs() + original_attrs = da.attrs + + # Test default behaviour + result = da.mean() assert result.attrs == {} + with xarray.set_options(keep_attrs='default'): + result = da.mean() + assert result.attrs == {} + + with xarray.set_options(keep_attrs=True): + result = da.mean() + assert result.attrs == original_attrs + + with xarray.set_options(keep_attrs=False): + result = da.mean() + assert result.attrs == {} + + def test_groupby_attr_retention(self): + da = xarray.DataArray([1, 2, 3], [('x', [1, 1, 2])]) + da.attrs = {'attr1': 5, 'attr2': 'history', + 'attr3': {'nested': 'more_info'}} + original_attrs = da.attrs + + # Test default behaviour + result = da.groupby('x').sum(keep_attrs=True) + assert result.attrs == original_attrs + with xarray.set_options(keep_attrs='default'): + result = da.groupby('x').sum(keep_attrs=True) + assert result.attrs == original_attrs + + with xarray.set_options(keep_attrs=True): + result1 = da.groupby('x') + result = result1.sum() + assert result.attrs == original_attrs + + with xarray.set_options(keep_attrs=False): + result = da.groupby('x').sum() + assert result.attrs == {} + + def test_concat_attr_retention(self): + ds1 = create_test_dataset_attrs() + ds2 = create_test_dataset_attrs() + ds2.attrs = {'wrong': 'attributes'} + original_attrs = ds1.attrs + + # Test default behaviour of keeping the attrs of the first + # dataset in the supplied list + # global keep_attrs option current doesn't affect concat + result = concat([ds1, ds2], dim='dim1') + assert result.attrs == original_attrs + @pytest.mark.xfail + def test_merge_attr_retention(self): + da1 = create_test_dataarray_attrs(var='var1') + da2 = create_test_dataarray_attrs(var='var2') + da2.attrs = {'wrong': 'attributes'} + original_attrs = da1.attrs -def test_nested_options(): - original = OPTIONS['display_width'] - with xarray.set_options(display_width=1): - assert OPTIONS['display_width'] == 1 - with xarray.set_options(display_width=2): - assert OPTIONS['display_width'] == 2 - assert OPTIONS['display_width'] == 1 - assert OPTIONS['display_width'] == original + # merge currently discards attrs, and the global keep_attrs + # option doesn't affect this + result = merge([da1, da2]) + assert result.attrs == original_attrs From 6b5d40353c01c7d9ab9f32a7f28cfb1cbf6e83d3 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sun, 14 Oct 2018 17:21:25 +0100 Subject: [PATCH 07/13] Updated what's new with global keep_attrs option --- doc/whats-new.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c8ae5ac43c8..7824ee5d445 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -72,6 +72,11 @@ Enhancements :py:meth:`~xarray.DataArray.interp`, and :py:meth:`~xarray.Dataset.interp`. By `Spencer Clark `_ +- There is now a global option to either always keep or always discard + dataset and dataarray attrs upon operations. The option is set with + ``xarray.set_options(keep_attrs=True)``, and the default is to use the old + behaviour. + By `Tom Nicholas `_. Bug fixes ~~~~~~~~~ From 23a70d6eab5db49fa60e72acf3956595c69272a3 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Mon, 15 Oct 2018 10:30:20 +0100 Subject: [PATCH 08/13] Bugfix to stop failing tests in test_dataset --- xarray/core/groupby.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index f93c24f7913..a7f250f55e5 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -701,6 +701,9 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs): elif dim is None: dim = self._group_dim + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + def reduce_dataset(ds): return ds.reduce(func, dim, keep_attrs, **kwargs) return self.apply(reduce_dataset) @@ -712,13 +715,13 @@ def _reduce_method(cls, func, include_skipna, numeric_only): if include_skipna: def wrapped_func(self, dim=DEFAULT_DIMS, skipna=None, **kwargs): - return self.reduce(func, dim, keep_attrs=False, + return self.reduce(func, dim, skipna=skipna, numeric_only=numeric_only, allow_lazy=True, **kwargs) else: def wrapped_func(self, dim=DEFAULT_DIMS, **kwargs): - return self.reduce(func, dim, keep_attrs=False, + return self.reduce(func, dim, numeric_only=numeric_only, allow_lazy=True, **kwargs) return wrapped_func From e9c7d26207b66a0fe6c9729287d56bbb5e83e985 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sat, 27 Oct 2018 14:38:56 +0100 Subject: [PATCH 09/13] Test class now inherits from object for python2 compatibility --- xarray/tests/test_options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_options.py b/xarray/tests/test_options.py index b9eb6ea9d9e..a21ea3e6b64 100644 --- a/xarray/tests/test_options.py +++ b/xarray/tests/test_options.py @@ -82,7 +82,7 @@ def create_test_dataarray_attrs(seed=0, var='var1'): return da -class TestAttrRetention: +class TestAttrRetention(object): def test_dataset_attr_retention(self): # Use .mean() for all tests: a typical reduction operation ds = create_test_dataset_attrs() From 5094c1364ff10c2286011fae299af2aba552a208 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sun, 28 Oct 2018 10:22:46 +0000 Subject: [PATCH 10/13] Fixes to documentation --- doc/whats-new.rst | 2 -- xarray/core/options.py | 6 +++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0704f00bb32..19b50797d24 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -85,8 +85,6 @@ Enhancements ``xarray.set_options(keep_attrs=True)``, and the default is to use the old behaviour. By `Tom Nicholas `_. - - By `Spencer Clark `_. - Added a new backend for the GRIB file format based on ECMWF *cfgrib* python driver and *ecCodes* C-library. (:issue:`2475`) By `Alessandro Amici `_, diff --git a/xarray/core/options.py b/xarray/core/options.py index 65153a42fe0..96c24ee7f6d 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -53,7 +53,7 @@ def _get_keep_attrs(default): elif global_choice in [True, False]: return global_choice else: - raise ValueError('The global option keep_attrs is set to an invalid value.') + raise ValueError("The global option keep_attrs must be one of True, False or 'default'.") class set_options(object): @@ -75,7 +75,7 @@ class set_options(object): - ``cmap_sequential``: colormap to use for nondivergent data plots. Default: ``viridis``. If string, must be matplotlib built-in colormap. Can also be a Colormap object (e.g. mpl.cm.magma) - - ``cmap_divergent``: colormap to use for divergent data plots. + - ``cmap_divergent ``: colormap to use for divergent data plots. Default: ``RdBu_r``. If string, must be matplotlib built-in colormap. Can also be a Colormap object (e.g. mpl.cm.magma) - ``keep_attrs``: rule for whether to keep attributes on xarray @@ -84,7 +84,7 @@ class set_options(object): logic that attrs should only be kept in unambiguous circumstances. Default: ``'default'``. -f You can use ``set_options`` either as a context manager: + You can use ``set_options`` either as a context manager: >>> ds = xr.Dataset({'x': np.arange(1000)}) >>> with xr.set_options(display_width=40): From fec2d71a192225d1442282b9f453c6c0bed47b61 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sun, 28 Oct 2018 10:49:13 +0000 Subject: [PATCH 11/13] Removed some unneccessary checks of the global keep_attrs option --- xarray/core/common.py | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index 73890acce54..e303c485523 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -22,17 +22,13 @@ class ImplementsArrayReduce(object): def _reduce_method(cls, func, include_skipna, numeric_only): if include_skipna: def wrapped_func(self, dim=None, axis=None, skipna=None, - keep_attrs=None, **kwargs): - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) - return self.reduce(func, dim, axis, keep_attrs=keep_attrs, + **kwargs): + return self.reduce(func, dim, axis, skipna=skipna, allow_lazy=True, **kwargs) else: def wrapped_func(self, dim=None, axis=None, - keep_attrs=None, **kwargs): - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) - return self.reduce(func, dim, axis, keep_attrs=keep_attrs, + **kwargs): + return self.reduce(func, dim, axis, allow_lazy=True, **kwargs) return wrapped_func @@ -56,18 +52,14 @@ class ImplementsDatasetReduce(object): @classmethod def _reduce_method(cls, func, include_skipna, numeric_only): if include_skipna: - def wrapped_func(self, dim=None, keep_attrs=None, skipna=None, + def wrapped_func(self, dim=None, skipna=None, **kwargs): - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) - return self.reduce(func, dim, keep_attrs=keep_attrs, skipna=skipna, + return self.reduce(func, dim, skipna=skipna, numeric_only=numeric_only, allow_lazy=True, **kwargs) else: - def wrapped_func(self, dim=None, keep_attrs=None, **kwargs): - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) - return self.reduce(func, dim, keep_attrs=keep_attrs, + def wrapped_func(self, dim=None, **kwargs): + return self.reduce(func, dim, numeric_only=numeric_only, allow_lazy=True, **kwargs) return wrapped_func From 5d5e0a6499f84634c32666e92287d3a3ac76399d Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sun, 28 Oct 2018 10:52:52 +0000 Subject: [PATCH 12/13] Removed whitespace typo I just created --- xarray/core/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/options.py b/xarray/core/options.py index 96c24ee7f6d..eb3013d5233 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -75,7 +75,7 @@ class set_options(object): - ``cmap_sequential``: colormap to use for nondivergent data plots. Default: ``viridis``. If string, must be matplotlib built-in colormap. Can also be a Colormap object (e.g. mpl.cm.magma) - - ``cmap_divergent ``: colormap to use for divergent data plots. + - ``cmap_divergent``: colormap to use for divergent data plots. Default: ``RdBu_r``. If string, must be matplotlib built-in colormap. Can also be a Colormap object (e.g. mpl.cm.magma) - ``keep_attrs``: rule for whether to keep attributes on xarray From cbf227d4bef2cb270e4ec9cfcee7024625041cec Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Mon, 29 Oct 2018 15:18:30 +0000 Subject: [PATCH 13/13] Removed some more unneccessary checks of global keep_attrs option (pointed out by dcherian) --- xarray/core/dataarray.py | 8 -------- xarray/core/dataset.py | 2 +- xarray/core/groupby.py | 4 ---- 3 files changed, 1 insertion(+), 13 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index f361331e008..61e0e709c36 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1589,9 +1589,6 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=None, **kwargs): summarized data and the indicated dimension(s) removed. """ - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) - var = self.variable.reduce(func, dim, axis, keep_attrs, **kwargs) return self._replace_maybe_drop_dims(var) @@ -2316,9 +2313,6 @@ def quantile(self, q, dim=None, interpolation='linear', keep_attrs=None): numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile """ - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) - ds = self._to_temp_dataset().quantile( q, dim=dim, keep_attrs=keep_attrs, interpolation=interpolation) return self._from_temp_dataset(ds) @@ -2359,8 +2353,6 @@ def rank(self, dim, pct=False, keep_attrs=None): array([ 1., 2., 3.]) Dimensions without coordinates: x """ - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) ds = self._to_temp_dataset().rank(dim, pct=pct, keep_attrs=keep_attrs) return self._from_temp_dataset(ds) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index ccf4bbf7fe1..7bd99968ebb 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2925,7 +2925,7 @@ def apply(self, func, keep_attrs=None, args=(), **kwargs): transform each DataArray `x` in this dataset into another DataArray. keep_attrs : bool, optional - If True, the dataset's attributes (`attrs`) will be _getcopied from + If True, the dataset's attributes (`attrs`) will be copied from the original object to the new one. If False, the new object will be returned without attributes. args : tuple, optional diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index df90fc82f11..defe72ab3ee 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -597,15 +597,11 @@ def _reduce_method(cls, func, include_skipna, numeric_only): if include_skipna: def wrapped_func(self, dim=DEFAULT_DIMS, axis=None, skipna=None, keep_attrs=None, **kwargs): - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) return self.reduce(func, dim, axis, keep_attrs=keep_attrs, skipna=skipna, allow_lazy=True, **kwargs) else: def wrapped_func(self, dim=DEFAULT_DIMS, axis=None, keep_attrs=None, **kwargs): - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) return self.reduce(func, dim, axis, keep_attrs=keep_attrs, allow_lazy=True, **kwargs) return wrapped_func