From 2c23117ebbbaeabc107fd607e80f39faa7dadbd3 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Thu, 9 Nov 2023 18:56:55 -0800 Subject: [PATCH 1/2] Rename `to_array` to `to_dataarray` This is a _very_ minor nit, so I'm not sure it's worth changing. What do others think? (I would have opened an issue but it's just as quick to just do the PR) --- doc/api.rst | 2 +- doc/howdoi.rst | 2 +- doc/user-guide/reshaping.rst | 12 ++++++------ doc/whats-new.rst | 12 +++++++++--- xarray/core/common.py | 2 +- xarray/core/computation.py | 2 +- xarray/core/dataset.py | 14 ++++++++++---- xarray/core/groupby.py | 4 ++++ xarray/tests/test_concat.py | 8 ++++---- xarray/tests/test_dask.py | 16 ++++++++-------- xarray/tests/test_dataarray.py | 4 ++-- xarray/tests/test_dataset.py | 6 +++--- xarray/tests/test_groupby.py | 6 +++--- xarray/tests/test_rolling.py | 2 +- xarray/tests/test_sparse.py | 4 ++-- 15 files changed, 56 insertions(+), 40 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 96b4864804f..935d86f2c18 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -602,7 +602,7 @@ Dataset methods Dataset.as_numpy Dataset.from_dataframe Dataset.from_dict - Dataset.to_array + Dataset.to_dataarray Dataset.to_dataframe Dataset.to_dask_dataframe Dataset.to_dict diff --git a/doc/howdoi.rst b/doc/howdoi.rst index 8cc4e9939f2..97b0872fdc4 100644 --- a/doc/howdoi.rst +++ b/doc/howdoi.rst @@ -36,7 +36,7 @@ How do I ... * - rename a variable, dimension or coordinate - :py:meth:`Dataset.rename`, :py:meth:`DataArray.rename`, :py:meth:`Dataset.rename_vars`, :py:meth:`Dataset.rename_dims`, * - convert a DataArray to Dataset or vice versa - - :py:meth:`DataArray.to_dataset`, :py:meth:`Dataset.to_array`, :py:meth:`Dataset.to_stacked_array`, :py:meth:`DataArray.to_unstacked_dataset` + - :py:meth:`DataArray.to_dataset`, :py:meth:`Dataset.to_dataarray`, :py:meth:`Dataset.to_stacked_array`, :py:meth:`DataArray.to_unstacked_dataset` * - extract variables that have certain attributes - :py:meth:`Dataset.filter_by_attrs` * - extract the underlying array (e.g. NumPy or Dask arrays) diff --git a/doc/user-guide/reshaping.rst b/doc/user-guide/reshaping.rst index d0b72322218..14b343549e2 100644 --- a/doc/user-guide/reshaping.rst +++ b/doc/user-guide/reshaping.rst @@ -59,11 +59,11 @@ use :py:meth:`~xarray.DataArray.squeeze` Converting between datasets and arrays -------------------------------------- -To convert from a Dataset to a DataArray, use :py:meth:`~xarray.Dataset.to_array`: +To convert from a Dataset to a DataArray, use :py:meth:`~xarray.Dataset.to_dataarray`: .. ipython:: python - arr = ds.to_array() + arr = ds.to_dataarray() arr This method broadcasts all data variables in the dataset against each other, @@ -77,7 +77,7 @@ To convert back from a DataArray to a Dataset, use arr.to_dataset(dim="variable") -The broadcasting behavior of ``to_array`` means that the resulting array +The broadcasting behavior of ``to_dataarray`` means that the resulting array includes the union of data variable dimensions: .. ipython:: python @@ -88,7 +88,7 @@ includes the union of data variable dimensions: ds2 # the resulting array has 6 elements - ds2.to_array() + ds2.to_dataarray() Otherwise, the result could not be represented as an orthogonal array. @@ -161,8 +161,8 @@ arrays as inputs. For datasets with only one variable, we only need ``stack`` and ``unstack``, but combining multiple variables in a :py:class:`xarray.Dataset` is more complicated. If the variables in the dataset have matching numbers of dimensions, we can call -:py:meth:`~xarray.Dataset.to_array` and then stack along the the new coordinate. -But :py:meth:`~xarray.Dataset.to_array` will broadcast the dataarrays together, +:py:meth:`~xarray.Dataset.to_dataarray` and then stack along the the new coordinate. +But :py:meth:`~xarray.Dataset.to_dataarray` will broadcast the dataarrays together, which will effectively tile the lower dimensional variable along the missing dimensions. The method :py:meth:`xarray.Dataset.to_stacked_array` allows combining variables of differing dimensions without this wasteful copying while diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3a9be494db2..3380392e70d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -39,6 +39,12 @@ Deprecations this was one place in the API where dimension positions were used. (:pull:`8341`) By `Maximilian Roos `_. +- Rename :py:meth:`Dataset.to_array` to :py:meth:`Dataset.to_dataarray` for + consistency with :py:meth:`DataArray.to_dataset` & + :py:func:`open_dataarray` functions. This is a "soft" deprecation — the + existing methods work and don't raise any warnings, given the relatively small + benefits of the change. + By `Maximilian Roos `_. Bug fixes ~~~~~~~~~ @@ -6704,7 +6710,7 @@ Backwards incompatible changes Enhancements ~~~~~~~~~~~~ -- New ``xray.Dataset.to_array`` and enhanced +- New ``xray.Dataset.to_dataarray`` and enhanced ``xray.DataArray.to_dataset`` methods make it easy to switch back and forth between arrays and datasets: @@ -6715,8 +6721,8 @@ Enhancements coords={"c": 42}, attrs={"Conventions": "None"}, ) - ds.to_array() - ds.to_array().to_dataset(dim="variable") + ds.to_dataarray() + ds.to_dataarray().to_dataset(dim="variable") - New ``xray.Dataset.fillna`` method to fill missing values, modeled off the pandas method of the same name: diff --git a/xarray/core/common.py b/xarray/core/common.py index ab8a4d84261..fef8adb101a 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1173,7 +1173,7 @@ def _dataset_indexer(dim: Hashable) -> DataArray: var for var in cond if dim not in cond[var].dims ) keepany = cond_wdim.any(dim=(d for d in cond.dims.keys() if d != dim)) - return keepany.to_array().any("variable") + return keepany.to_dataarray().any("variable") _get_indexer = ( _dataarray_indexer if isinstance(cond, DataArray) else _dataset_indexer diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 23d54a5779c..08c9ed290f1 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1603,7 +1603,7 @@ def cross( >>> ds_a = xr.Dataset(dict(x=("dim_0", [1]), y=("dim_0", [2]), z=("dim_0", [3]))) >>> ds_b = xr.Dataset(dict(x=("dim_0", [4]), y=("dim_0", [5]), z=("dim_0", [6]))) >>> c = xr.cross( - ... ds_a.to_array("cartesian"), ds_b.to_array("cartesian"), dim="cartesian" + ... ds_a.to_dataarray("cartesian"), ds_b.to_dataarray("cartesian"), dim="cartesian" ... ) >>> c.to_dataset(dim="cartesian") diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 10deea5f62b..9c30cca5e08 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1502,7 +1502,7 @@ def __array__(self, dtype=None): "cannot directly convert an xarray.Dataset into a " "numpy array. Instead, create an xarray.DataArray " "first, either with indexing on the Dataset or by " - "invoking the `to_array()` method." + "invoking the `to_dataarray()` method." ) @property @@ -5258,7 +5258,7 @@ def to_stacked_array( """Combine variables of differing dimensionality into a DataArray without broadcasting. - This method is similar to Dataset.to_array but does not broadcast the + This method is similar to Dataset.to_dataarray but does not broadcast the variables. Parameters @@ -5287,7 +5287,7 @@ def to_stacked_array( See Also -------- - Dataset.to_array + Dataset.to_dataarray Dataset.stack DataArray.to_unstacked_dataset @@ -7017,7 +7017,7 @@ def assign( return data - def to_array( + def to_dataarray( self, dim: Hashable = "variable", name: Hashable | None = None ) -> DataArray: """Convert this dataset into an xarray.DataArray @@ -7054,6 +7054,12 @@ def to_array( return DataArray._construct_direct(variable, coords, name, indexes) + def to_array( + self, dim: Hashable = "variable", name: Hashable | None = None + ) -> DataArray: + """Deprecated version of to_dataarray""" + return self.to_dataarray(dim=dim, name=name) + def _normalize_dim_order( self, dim_order: Sequence[Hashable] | None = None ) -> dict[Hashable, int]: diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 788e1efa80b..8c81d3e6a96 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -251,6 +251,10 @@ def to_dataarray(self) -> DataArray: data=self.data, dims=(self.name,), coords=self.coords, name=self.name ) + def to_array(self) -> DataArray: + """Deprecated version of to_dataarray.""" + return self.to_dataarray() + T_Group = Union["T_DataArray", "IndexVariable", _DummyGroup] diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 11d0d38594d..92415631748 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -1070,10 +1070,10 @@ def test_concat_fill_value(self, fill_value) -> None: def test_concat_join_kwarg(self) -> None: ds1 = Dataset( {"a": (("x", "y"), [[0]])}, coords={"x": [0], "y": [0]} - ).to_array() + ).to_dataarray() ds2 = Dataset( {"a": (("x", "y"), [[0]])}, coords={"x": [1], "y": [0.0001]} - ).to_array() + ).to_dataarray() expected: dict[JoinOptions, Any] = {} expected["outer"] = Dataset( @@ -1101,7 +1101,7 @@ def test_concat_join_kwarg(self) -> None: for join in expected: actual = concat([ds1, ds2], join=join, dim="x") - assert_equal(actual, expected[join].to_array()) + assert_equal(actual, expected[join].to_dataarray()) def test_concat_combine_attrs_kwarg(self) -> None: da1 = DataArray([0], coords=[("x", [0])], attrs={"b": 42}) @@ -1224,7 +1224,7 @@ def test_concat_preserve_coordinate_order() -> None: def test_concat_typing_check() -> None: ds = Dataset({"foo": 1}, {"bar": 2}) - da = Dataset({"foo": 3}, {"bar": 4}).to_array(dim="foo") + da = Dataset({"foo": 3}, {"bar": 4}).to_dataarray(dim="foo") # concatenate a list of non-homogeneous types must raise TypeError with pytest.raises( diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 1c2511427ac..c2a77c97d85 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -608,11 +608,11 @@ def test_to_dataset_roundtrip(self): v = self.lazy_array expected = u.assign_coords(x=u["x"]) - self.assertLazyAndEqual(expected, v.to_dataset("x").to_array("x")) + self.assertLazyAndEqual(expected, v.to_dataset("x").to_dataarray("x")) def test_merge(self): def duplicate_and_merge(array): - return xr.merge([array, array.rename("bar")]).to_array() + return xr.merge([array, array.rename("bar")]).to_dataarray() expected = duplicate_and_merge(self.eager_array) actual = duplicate_and_merge(self.lazy_array) @@ -1306,12 +1306,12 @@ def test_map_blocks_kwargs(obj): assert_identical(actual, expected) -def test_map_blocks_to_array(map_ds): +def test_map_blocks_to_dataarray(map_ds): with raise_if_dask_computes(): - actual = xr.map_blocks(lambda x: x.to_array(), map_ds) + actual = xr.map_blocks(lambda x: x.to_dataarray(), map_ds) - # to_array does not preserve name, so cannot use assert_identical - assert_equal(actual, map_ds.to_array()) + # to_dataarray does not preserve name, so cannot use assert_identical + assert_equal(actual, map_ds.to_dataarray()) @pytest.mark.parametrize( @@ -1376,8 +1376,8 @@ def test_map_blocks_template_convert_object(): assert_identical(actual, template) ds = da.to_dataset() - func = lambda x: x.to_array().isel(x=[1]) - template = ds.to_array().isel(x=[1, 5, 9]) + func = lambda x: x.to_dataarray().isel(x=[1]) + template = ds.to_dataarray().isel(x=[1, 5, 9]) with raise_if_dask_computes(): actual = xr.map_blocks(func, ds, template=template) assert_identical(actual, template) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 26537766f4d..1fbb834b679 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3801,7 +3801,7 @@ def test_to_dataset_split(self) -> None: with pytest.raises(TypeError): array.to_dataset("x", name="foo") - roundtripped = actual.to_array(dim="x") + roundtripped = actual.to_dataarray(dim="x") assert_identical(array, roundtripped) array = DataArray([1, 2, 3], dims="x") @@ -3818,7 +3818,7 @@ def test_to_dataset_retains_keys(self) -> None: array = DataArray([1, 2, 3], coords=[("x", dates)], attrs={"a": 1}) # convert to dateset and back again - result = array.to_dataset("x").to_array(dim="x") + result = array.to_dataset("x").to_dataarray(dim="x") assert_equal(array, result) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 687aae8f1dc..af4ede15fa4 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4569,7 +4569,7 @@ def test_squeeze_drop(self) -> None: selected = data.squeeze(drop=True) assert_identical(data, selected) - def test_to_array(self) -> None: + def test_to_dataarray(self) -> None: ds = Dataset( {"a": 1, "b": ("x", [1, 2, 3])}, coords={"c": 42}, @@ -4579,10 +4579,10 @@ def test_to_array(self) -> None: coords = {"c": 42, "variable": ["a", "b"]} dims = ("variable", "x") expected = DataArray(data, coords, dims, attrs=ds.attrs) - actual = ds.to_array() + actual = ds.to_dataarray() assert_identical(expected, actual) - actual = ds.to_array("abc", name="foo") + actual = ds.to_dataarray("abc", name="foo") expected = expected.rename({"variable": "abc"}).rename("foo") assert_identical(expected, actual) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 320ba999318..8afdf95a082 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -600,19 +600,19 @@ def test_groupby_grouping_errors() -> None: with pytest.raises( ValueError, match=r"None of the data falls within bins with edges" ): - dataset.to_array().groupby_bins("x", bins=[0.1, 0.2, 0.3]) + dataset.to_dataarray().groupby_bins("x", bins=[0.1, 0.2, 0.3]) with pytest.raises(ValueError, match=r"All bin edges are NaN."): dataset.groupby_bins("x", bins=[np.nan, np.nan, np.nan]) with pytest.raises(ValueError, match=r"All bin edges are NaN."): - dataset.to_array().groupby_bins("x", bins=[np.nan, np.nan, np.nan]) + dataset.to_dataarray().groupby_bins("x", bins=[np.nan, np.nan, np.nan]) with pytest.raises(ValueError, match=r"Failed to group data."): dataset.groupby(dataset.foo * np.nan) with pytest.raises(ValueError, match=r"Failed to group data."): - dataset.to_array().groupby(dataset.foo * np.nan) + dataset.to_dataarray().groupby(dataset.foo * np.nan) def test_groupby_reduce_dimension_error(array) -> None: diff --git a/xarray/tests/test_rolling.py b/xarray/tests/test_rolling.py index 3b213db0b88..cb7b723a208 100644 --- a/xarray/tests/test_rolling.py +++ b/xarray/tests/test_rolling.py @@ -631,7 +631,7 @@ def test_rolling_construct(self, center: bool, window: int) -> None: ds_rolling_mean = ds_rolling.construct("window", stride=2, fill_value=0.0).mean( "window" ) - assert (ds_rolling_mean.isnull().sum() == 0).to_array(dim="vars").all() + assert (ds_rolling_mean.isnull().sum() == 0).to_dataarray(dim="vars").all() assert (ds_rolling_mean["x"] == 0.0).sum() >= 0 @pytest.mark.parametrize("center", (True, False)) diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index 489836b70fd..5b75c10631a 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -578,7 +578,7 @@ def setUp(self): def test_to_dataset_roundtrip(self): x = self.sp_xr - assert_equal(x, x.to_dataset("x").to_array("x")) + assert_equal(x, x.to_dataset("x").to_dataarray("x")) def test_align(self): a1 = xr.DataArray( @@ -830,7 +830,7 @@ def test_reindex(self): @pytest.mark.xfail def test_merge(self): x = self.sp_xr - y = xr.merge([x, x.rename("bar")]).to_array() + y = xr.merge([x, x.rename("bar")]).to_dataarray() assert isinstance(y, sparse.SparseArray) @pytest.mark.xfail From 9f3c2b17249b4f564034b564a2f3e46a3c79d710 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 10 Nov 2023 02:58:58 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/computation.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 08c9ed290f1..0c5c9d6d5cb 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1603,7 +1603,9 @@ def cross( >>> ds_a = xr.Dataset(dict(x=("dim_0", [1]), y=("dim_0", [2]), z=("dim_0", [3]))) >>> ds_b = xr.Dataset(dict(x=("dim_0", [4]), y=("dim_0", [5]), z=("dim_0", [6]))) >>> c = xr.cross( - ... ds_a.to_dataarray("cartesian"), ds_b.to_dataarray("cartesian"), dim="cartesian" + ... ds_a.to_dataarray("cartesian"), + ... ds_b.to_dataarray("cartesian"), + ... dim="cartesian", ... ) >>> c.to_dataset(dim="cartesian")