Skip to content

Commit

Permalink
Fix save_to_table with small datasets, better make_toc (#473)
Browse files Browse the repository at this point in the history
<!-- Please ensure the PR fulfills the following requirements! -->
<!-- If this is your first PR, make sure to add your details to the
AUTHORS.rst! -->
### Pull Request Checklist:
- [x] This PR addresses an already opened issue (for bug fixes /
features)
    - This PR fixes #xyz
- [x] (If applicable) Documentation has been added / updated (for bug
fixes / features).
- [x] (If applicable) Tests have been added.
- [x] This PR does not seem to break the templates.
- [x] CHANGELOG.rst has been updated (with summary of main changes).
- [x] Link to issue (:issue:`number`) and pull request (:pull:`number`)
has been added.

### What kind of change does this PR introduce?

* Bugfix: `save_to_table` now works as intended when given DataArrays or
Datasets with a single variable.
* `xs.io.make_toc` now includes the global attributes.

### Does this PR introduce a breaking change?

- No.


### Other information:
  • Loading branch information
RondeauG authored Oct 2, 2024
2 parents ef0f493 + c13543b commit 014e3b5
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 15 deletions.
12 changes: 12 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,18 @@
Changelog
=========

v0.11.0 (unreleased)
--------------------
Contributors to this version: Gabriel Rondeau-Genesse (:user:`RondeauG`).

New features and enhancements
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
* ``xs.io.make_toc`` now includes the global attributes of the dataset after the information about the variables. (:pull:`473`).

Bug fixes
^^^^^^^^^
* ``xs.io.save_to_table`` now correctly handles the case where the input is a `DataArray` or a `Dataset` with a single variable. (:pull:`473`).

v0.10.0 (2024-09-30)
--------------------
Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Pascal Bourgault (:user:`aulemahal`), Gabriel Rondeau-Genesse (:user:`RondeauG`), Trevor James Smith (:user:`Zeitsperre`).
Expand Down
Binary file modified src/xscen/data/fr/LC_MESSAGES/xscen.mo
Binary file not shown.
3 changes: 3 additions & 0 deletions src/xscen/data/fr/LC_MESSAGES/xscen.po
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,6 @@ msgstr "Unités"

msgid "Content"
msgstr "Contenu"

msgid "Global attributes"
msgstr "Attributs globaux"
38 changes: 34 additions & 4 deletions src/xscen/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -675,7 +675,11 @@ def to_table(
if isinstance(ds, xr.Dataset):
da = ds.to_array(name="data")
if len(ds) == 1:
da = da.isel(variable=0).rename(data=da.variable.values[0])
da = da.isel(variable=0)
da.name = str(da["variable"].values)
da = da.drop_vars("variable")
else:
da = ds

def _ensure_list(seq):
if isinstance(seq, str):
Expand All @@ -689,7 +693,13 @@ def _ensure_list(seq):
row = [d for d in da.dims if d != "variable" and d not in passed_dims]
row = _ensure_list(row)
if column is None:
column = ["variable"] if len(ds) > 1 and "variable" not in passed_dims else []
column = (
["variable"]
if isinstance(ds, xr.Dataset)
and len(ds) > 1
and "variable" not in passed_dims
else []
)
column = _ensure_list(column)
if sheet is None:
sheet = []
Expand All @@ -708,10 +718,10 @@ def _ensure_list(seq):

if coords is not True:
coords = _ensure_list(coords or [])
drop = set(ds.coords.keys()) - set(da.dims) - set(coords)
drop = set(da.coords.keys()) - set(da.dims) - set(coords)
da = da.drop_vars(drop)
else:
coords = list(set(ds.coords.keys()) - set(da.dims))
coords = list(set(da.coords.keys()) - set(da.dims))
if len(coords) > 1 and ("variable" in row or "variable" in sheet):
raise NotImplementedError(
"Keeping auxiliary coords is not implemented when 'variable' is in the row or in the sheets."
Expand Down Expand Up @@ -774,6 +784,26 @@ def make_toc(ds: xr.Dataset | xr.DataArray, loc: str | None = None) -> pd.DataFr
],
).set_index(_("Variable"))
toc.attrs["name"] = _("Content")

# Add global attributes by using a fake variable and description
if len(ds.attrs) > 0:
globattr = pd.DataFrame.from_records(
[
{
_("Variable"): vv,
_("Description"): da,
_("Units"): "",
}
for vv, da in ds.attrs.items()
],
).set_index(_("Variable"))
globattr.attrs["name"] = _("Global attributes")

# Empty row to separate global attributes from variables
toc = pd.concat([toc, pd.DataFrame(index=[""])])
toc = pd.concat([toc, pd.DataFrame(index=[_("Global attributes")])])
toc = pd.concat([toc, globattr])

return toc


Expand Down
111 changes: 100 additions & 11 deletions tests/test_io.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
import pytest
import xarray as xr
import xclim as xc

import xscen as xs

Expand Down Expand Up @@ -90,12 +91,24 @@ class TestToTable:
.reset_index("site")
.assign_coords(site=list("abcdef"))
).transpose("season", "time", "site")
ds.attrs = {"foo": "bar", "baz": 1, "qux": 2.0}

@pytest.mark.parametrize(
"multiple, as_dataset", [(True, True), (False, True), (False, False)]
)
def test_normal(self, multiple, as_dataset):
if multiple is False:
if as_dataset:
ds = self.ds[["tas"]].copy()
else:
ds = self.ds["tas"].copy()
else:
ds = self.ds.copy()

def test_normal(self):
# Default
tab = xs.io.to_table(self.ds)
assert tab.shape == (120, 5) # 3 vars + 2 aux coords
assert tab.columns.names == ["variable"]
tab = xs.io.to_table(ds)
assert tab.shape == (120, 5 if multiple else 3) # 3 vars + 2 aux coords
assert tab.columns.names == ["variable"] if multiple else [None]
assert tab.index.names == ["season", "time", "site"]
# Season order is chronological, rather than alphabetical
np.testing.assert_array_equal(
Expand All @@ -105,15 +118,91 @@ def test_normal(self):
["JFM", "AMJ", "JAS", "OND"],
)

# Variable in the index, thus no coords
if multiple:
# Variable in the index, thus no coords
tab = xs.io.to_table(
ds, row=["time", "variable"], column=["season", "site"], coords=False
)
assert tab.shape == (15, 24)
assert tab.columns.names == ["season", "site"]
np.testing.assert_array_equal(
tab.loc[("1993", "pr"), ("JFM",)], ds.pr.sel(time="1993", season="JFM")
)
# Ensure that the coords are not present
assert (
len(
set(tab.index.get_level_values("variable").unique()).difference(
["tas", "pr", "snw"]
)
)
== 0
)

def test_sheet(self):
tab = xs.io.to_table(
self.ds, row=["time", "variable"], column=["season", "site"], coords=False
)
assert tab.shape == (15, 24)
assert tab.columns.names == ["season", "site"]
np.testing.assert_array_equal(
tab.loc[("1993", "pr"), ("JFM",)], self.ds.pr.sel(time="1993", season="JFM")
self.ds,
row=["time", "variable"],
column=["season"],
sheet="site",
coords=False,
)
assert set(tab.keys()) == {("a",), ("b",), ("c",), ("d",), ("e",), ("f",)}
assert tab[("a",)].shape == (15, 4) # 5 time * 3 variable X 4 season

def test_error(self):
with pytest.raises(ValueError, match="Repeated dimension names."):
xs.io.to_table(
self.ds, row=["time", "variable"], column=["season", "site", "time"]
)
with pytest.raises(ValueError, match="Passed row, column and sheet"):
xs.io.to_table(
self.ds, row=["time", "variable"], column=["season", "site", "foo"]
)
with pytest.raises(
NotImplementedError,
match="Keeping auxiliary coords is not implemented when",
):
xs.io.to_table(
self.ds,
row=["time", "variable"],
column=["season", "site"],
coords=True,
)

@pytest.mark.parametrize("as_dataset", [True, False])
def test_make_toc(self, as_dataset):
ds = self.ds.copy()
for v in ds.data_vars:
ds[v].attrs["long_name"] = f"Long name for {v}"
ds[v].attrs["long_name_fr"] = f"Nom long pour {v}"

if as_dataset is False:
ds = ds["tas"]

with xc.set_options(metadata_locales="fr"):
toc = xs.io.make_toc(ds)

if as_dataset:
assert toc.shape == (8, 2)
assert toc.columns.tolist() == ["Description", "Unités"]
assert toc.index.tolist() == [
"tas",
"pr",
"snw",
"",
"Attributs globaux",
"foo",
"baz",
"qux",
]
assert toc.loc["tas", "Description"] == "Nom long pour tas"
assert toc.loc["tas", "Unités"] == "K"
else:
assert toc.shape == (1, 2)
assert toc.columns.tolist() == ["Description", "Unités"]
assert toc.index.tolist() == ["tas"]
assert toc.loc["tas", "Description"] == "Nom long pour tas"
assert toc.loc["tas", "Unités"] == "K"


def test_round_bits(datablock_3d):
Expand Down

0 comments on commit 014e3b5

Please sign in to comment.