Skip to content

Commit

Permalink
Merge branch 'main' into numpy2-iris
Browse files Browse the repository at this point in the history
  • Loading branch information
keewis authored Jul 11, 2024
2 parents 9cd4d08 + 7087ca4 commit b506a6c
Show file tree
Hide file tree
Showing 50 changed files with 1,468 additions and 566 deletions.
113 changes: 112 additions & 1 deletion asv_bench/benchmarks/dataset_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import pandas as pd

import xarray as xr
from xarray.backends.api import open_datatree
from xarray.core.datatree import DataTree

from . import _skip_slow, parameterized, randint, randn, requires_dask

Expand All @@ -16,7 +18,6 @@
except ImportError:
pass


os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"

_ENGINES = tuple(xr.backends.list_engines().keys() - {"store"})
Expand Down Expand Up @@ -469,6 +470,116 @@ def create_delayed_write():
return ds.to_netcdf("file.nc", engine="netcdf4", compute=False)


class IONestedDataTree:
"""
A few examples that benchmark reading/writing a heavily nested netCDF datatree with
xarray
"""

timeout = 300.0
repeat = 1
number = 5

def make_datatree(self, nchildren=10):
# multiple Dataset
self.ds = xr.Dataset()
self.nt = 1000
self.nx = 90
self.ny = 45
self.nchildren = nchildren

self.block_chunks = {
"time": self.nt / 4,
"lon": self.nx / 3,
"lat": self.ny / 3,
}

self.time_chunks = {"time": int(self.nt / 36)}

times = pd.date_range("1970-01-01", periods=self.nt, freq="D")
lons = xr.DataArray(
np.linspace(0, 360, self.nx),
dims=("lon",),
attrs={"units": "degrees east", "long_name": "longitude"},
)
lats = xr.DataArray(
np.linspace(-90, 90, self.ny),
dims=("lat",),
attrs={"units": "degrees north", "long_name": "latitude"},
)
self.ds["foo"] = xr.DataArray(
randn((self.nt, self.nx, self.ny), frac_nan=0.2),
coords={"lon": lons, "lat": lats, "time": times},
dims=("time", "lon", "lat"),
name="foo",
attrs={"units": "foo units", "description": "a description"},
)
self.ds["bar"] = xr.DataArray(
randn((self.nt, self.nx, self.ny), frac_nan=0.2),
coords={"lon": lons, "lat": lats, "time": times},
dims=("time", "lon", "lat"),
name="bar",
attrs={"units": "bar units", "description": "a description"},
)
self.ds["baz"] = xr.DataArray(
randn((self.nx, self.ny), frac_nan=0.2).astype(np.float32),
coords={"lon": lons, "lat": lats},
dims=("lon", "lat"),
name="baz",
attrs={"units": "baz units", "description": "a description"},
)

self.ds.attrs = {"history": "created for xarray benchmarking"}

self.oinds = {
"time": randint(0, self.nt, 120),
"lon": randint(0, self.nx, 20),
"lat": randint(0, self.ny, 10),
}
self.vinds = {
"time": xr.DataArray(randint(0, self.nt, 120), dims="x"),
"lon": xr.DataArray(randint(0, self.nx, 120), dims="x"),
"lat": slice(3, 20),
}
root = {f"group_{group}": self.ds for group in range(self.nchildren)}
nested_tree1 = {
f"group_{group}/subgroup_1": xr.Dataset() for group in range(self.nchildren)
}
nested_tree2 = {
f"group_{group}/subgroup_2": xr.DataArray(np.arange(1, 10)).to_dataset(
name="a"
)
for group in range(self.nchildren)
}
nested_tree3 = {
f"group_{group}/subgroup_2/sub-subgroup_1": self.ds
for group in range(self.nchildren)
}
dtree = root | nested_tree1 | nested_tree2 | nested_tree3
self.dtree = DataTree.from_dict(dtree)


class IOReadDataTreeNetCDF4(IONestedDataTree):
def setup(self):
# TODO: Lazily skipped in CI as it is very demanding and slow.
# Improve times and remove errors.
_skip_slow()

requires_dask()

self.make_datatree()
self.format = "NETCDF4"
self.filepath = "datatree.nc4.nc"
dtree = self.dtree
dtree.to_netcdf(filepath=self.filepath)

def time_load_datatree_netcdf4(self):
open_datatree(self.filepath, engine="netcdf4").load()

def time_open_datatree_netcdf4(self):
open_datatree(self.filepath, engine="netcdf4")


class IOWriteNetCDFDask:
timeout = 60
repeat = 1
Expand Down
3 changes: 1 addition & 2 deletions ci/install-upstream-wheels.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ python -m pip install \
numpy \
scipy \
matplotlib \
pandas \
h5py
pandas
# for some reason pandas depends on pyarrow already.
# Remove once a `pyarrow` version compiled with `numpy>=2.0` is on `conda-forge`
python -m pip install \
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements/all-but-dask.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ dependencies:
- pandas
- pint>=0.22
- pip
- pydap
# - pydap
- pytest
- pytest-cov
- pytest-env
Expand Down
3 changes: 2 additions & 1 deletion ci/requirements/doc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies:
- nbsphinx
- netcdf4>=1.5
- numba
- numpy>=1.21
- numpy>=1.21,<2
- packaging>=21.3
- pandas>=1.4,!=2.1.0
- pooch
Expand All @@ -42,5 +42,6 @@ dependencies:
- sphinxext-rediraffe
- zarr>=2.10
- pip:
- sphinxcontrib-mermaid
# relative to this file. Needs to be editable to be accepted.
- -e ../..
2 changes: 1 addition & 1 deletion ci/requirements/environment-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ dependencies:
# - pint>=0.22
- pip
- pre-commit
- pydap
# - pydap
- pytest
- pytest-cov
- pytest-env
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ dependencies:
- pooch
- pre-commit
- pyarrow # pandas raises a deprecation warning without this, breaking doctests
- pydap
# - pydap
- pytest
- pytest-cov
- pytest-env
Expand Down
5 changes: 5 additions & 0 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
)

nbsphinx_allow_errors = False
nbsphinx_requirejs_path = ""

# -- General configuration ------------------------------------------------

Expand All @@ -68,7 +69,9 @@
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.

extensions = [
"sphinxcontrib.mermaid",
"sphinx.ext.autodoc",
"sphinx.ext.autosummary",
"sphinx.ext.intersphinx",
Expand Down Expand Up @@ -175,6 +178,8 @@
"pd.NaT": "~pandas.NaT",
}

# mermaid config
mermaid_version = "10.9.1"

# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates", sphinx_autosummary_accessors.templates_path]
Expand Down
75 changes: 75 additions & 0 deletions doc/help-diagram.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
Getting Help
============

Navigating the wealth of resources available for Xarray can be overwhelming.
We've created this flow chart to help guide you towards the best way to get help, depending on what you're working towards.
The links to each resource are provided below the diagram.
Regardless of how you interact with us, we're always thrilled to hear from you!

.. mermaid::
:alt: Flowchart illustrating the different ways to access help using or contributing to Xarray.

flowchart TD
intro[Welcome to Xarray! How can we help?]:::quesNodefmt
usage(["fa:fa-chalkboard-user Xarray Tutorials
fab:fa-readme Xarray Docs
fab:fa-google Google/fab:fa-stack-overflow Stack Exchange
fa:fa-robot Ask AI/a Language Learning Model (LLM)"]):::ansNodefmt
API([fab:fa-readme Xarray Docs
fab:fa-readme extension's docs]):::ansNodefmt
help([fab:fa-github Xarray Discussions
fab:fa-discord Xarray Discord
fa:fa-users Xarray Office Hours
fa:fa-globe Pangeo Discourse]):::ansNodefmt
bug([Report and Propose here:
fab:fa-github Xarray Issues]):::ansNodefmt
contrib([fa:fa-book-open Xarray Contributor's Guide]):::ansNodefmt
pr(["fab:fa-github Pull Request (PR)"]):::ansNodefmt
dev([fab:fa-github Comment on your PR
fa:fa-users Developer's Meeting]):::ansNodefmt
report[Thanks for letting us know!]:::quesNodefmt
merged[fa:fa-hands-clapping Your PR was merged.
Thanks for contributing to Xarray!]:::quesNodefmt


intro -->|How do I use Xarray?| usage
usage -->|"with extensions (like Dask)"| API

usage -->|I'd like some more help| help
intro -->|I found a bug| bug
intro -->|I'd like to make a small change| contrib
subgraph bugcontrib[Bugs and Contributions]
bug
contrib
bug -->|I just wanted to tell you| report
bug<-->|I'd like to fix the bug!| contrib
pr -->|my PR was approved| merged
end


intro -->|I wish Xarray could...| bug


pr <-->|my PR is quiet| dev
contrib -->pr

classDef quesNodefmt fill:#9DEEF4,stroke:#206C89

classDef ansNodefmt fill:#FFAA05,stroke:#E37F17

classDef boxfmt fill:#FFF5ED,stroke:#E37F17
class bugcontrib boxfmt

linkStyle default font-size:20pt,color:#206C89


- `Xarray Tutorials <https://tutorial.xarray.dev/>`__
- `Xarray Docs <https://docs.xarray.dev/en/stable/>`__
- `Google/Stack Exchange <https://stackoverflow.com/questions/tagged/python-xarray>`__
- `Xarray Discussions <https://github.com/pydata/xarray/discussions>`__
- `Xarray Discord <https://discord.com/invite/wEKPCt4PDu>`__
- `Xarray Office Hours <https://github.com/pydata/xarray/discussions/categories/office-hours>`__
- `Pangeo Discourse <https://discourse.pangeo.io/>`__
- `Xarray Issues <https://github.com/pydata/xarray/issues>`__
- `Xarray Contributors Guide <https://docs.xarray.dev/en/stable/contributing.html>`__
- `Developer's Meeting <https://docs.xarray.dev/en/stable/developers-meeting.html>`__
4 changes: 3 additions & 1 deletion doc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ efficient, and fun!
`Releases <https://github.com/pydata/xarray/releases>`__ |
`Stack Overflow <https://stackoverflow.com/questions/tagged/python-xarray>`__ |
`Mailing List <https://groups.google.com/g/xarray>`__ |
`Blog <https://xarray.dev/blog>`__
`Blog <https://xarray.dev/blog>`__ |
`Tutorials <https://tutorial.xarray.dev/>`__


.. grid:: 1 1 2 2
Expand Down Expand Up @@ -65,6 +66,7 @@ efficient, and fun!
Tutorials & Videos <tutorials-and-videos>
API Reference <api>
How do I ... <howdoi>
Getting Help <help-diagram>
Ecosystem <ecosystem>

.. toctree::
Expand Down
31 changes: 31 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ New Features
~~~~~~~~~~~~
- Allow chunking for arrays with duplicated dimension names (:issue:`8759`, :pull:`9099`).
By `Martin Raspaud <https://github.com/mraspaud>`_.
- Extract the source url from fsspec objects (:issue:`9142`, :pull:`8923`).
By `Justus Magin <https://github.com/keewis>`_.

Breaking changes
~~~~~~~~~~~~~~~~
Expand All @@ -35,11 +37,40 @@ Deprecations

Bug fixes
~~~~~~~~~
- Fix scatter plot broadcasting unneccesarily. (:issue:`9129`, :pull:`9206`)
By `Jimmy Westling <https://github.com/illviljan>`_.
- Don't convert custom indexes to ``pandas`` indexes when computing a diff (:pull:`9157`)
By `Justus Magin <https://github.com/keewis>`_.
- Make :py:func:`testing.assert_allclose` work with numpy 2.0 (:issue:`9165`, :pull:`9166`).
By `Pontus Lurcock <https://github.com/pont-us>`_.
- Allow diffing objects with array attributes on variables (:issue:`9153`, :pull:`9169`).
By `Justus Magin <https://github.com/keewis>`_.
- ``numpy>=2`` compatibility in the ``netcdf4`` backend (:pull:`9136`).
By `Justus Magin <https://github.com/keewis>`_ and `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
- Promote floating-point numeric datetimes before decoding (:issue:`9179`, :pull:`9182`).
By `Justus Magin <https://github.com/keewis>`_.
- Address regression introduced in :pull:`9002` that prevented objects returned
by py:meth:`DataArray.convert_calendar` to be indexed by a time index in
certain circumstances (:issue:`9138`, :pull:`9192`). By `Mark Harfouche
<https://github.com/hmaarrfk>`_ and `Spencer Clark
<https://github.com/spencerkclark>`.

- Fiy static typing of tolerance arguments by allowing `str` type (:issue:`8892`, :pull:`9194`).
By `Michael Niklas <https://github.com/headtr1ck>`_.
- Dark themes are now properly detected for ``html[data-theme=dark]``-tags (:pull:`9200`).
By `Dieter Werthmüller <https://github.com/prisae>`_.
- Reductions no longer fail for ``np.complex_`` dtype arrays when numbagg is
installed.
By `Maximilian Roos <https://github.com/max-sixty>`_

Documentation
~~~~~~~~~~~~~

- Adds a flow-chart diagram to help users navigate help resources (`Discussion #8990 <https://github.com/pydata/xarray/discussions/8990>`_).
By `Jessica Scheick <https://github.com/jessicas11>`_.
- Improvements to Zarr & chunking docs (:pull:`9139`, :pull:`9140`, :pull:`9132`)
By `Maximilian Roos <https://github.com/max-sixty>`_.


Internal Changes
~~~~~~~~~~~~~~~~
Expand Down
Loading

0 comments on commit b506a6c

Please sign in to comment.