Skip to content

Commit

Permalink
Enable numbagg for reductions (#8316)
Browse files Browse the repository at this point in the history
Co-authored-by: Anderson Banihirwe <[email protected]>
  • Loading branch information
dcherian and andersy005 authored Oct 18, 2023
1 parent 087fe45 commit ae41d82
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 15 deletions.
57 changes: 42 additions & 15 deletions xarray/core/nputils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np
import pandas as pd
from numpy.core.multiarray import normalize_axis_index # type: ignore[attr-defined]
from packaging.version import Version

# remove once numpy 2.0 is the oldest supported version
try:
Expand All @@ -18,11 +19,20 @@
try:
import bottleneck as bn

_USE_BOTTLENECK = True
_BOTTLENECK_AVAILABLE = True
except ImportError:
# use numpy methods instead
bn = np
_USE_BOTTLENECK = False
_BOTTLENECK_AVAILABLE = False

try:
import numbagg

_HAS_NUMBAGG = Version(numbagg.__version__) >= Version("0.5.0")
except ImportError:
# use numpy methods instead
numbagg = np
_HAS_NUMBAGG = False


def _select_along_axis(values, idx, axis):
Expand Down Expand Up @@ -161,13 +171,30 @@ def __setitem__(self, key, value):
self._array[key] = np.moveaxis(value, vindex_positions, mixed_positions)


def _create_bottleneck_method(name, npmodule=np):
def _create_method(name, npmodule=np):
def f(values, axis=None, **kwargs):
dtype = kwargs.get("dtype", None)
bn_func = getattr(bn, name, None)
nba_func = getattr(numbagg, name, None)

if (
_USE_BOTTLENECK
_HAS_NUMBAGG
and OPTIONS["use_numbagg"]
and isinstance(values, np.ndarray)
and nba_func is not None
# numbagg uses ddof=1 only, but numpy uses ddof=0 by default
and (("var" in name or "std" in name) and kwargs.get("ddof", 0) == 1)
# TODO: bool?
and values.dtype.kind in "uifc"
# and values.dtype.isnative
and (dtype is None or np.dtype(dtype) == values.dtype)
):
# numbagg does not take care dtype, ddof
kwargs.pop("dtype", None)
kwargs.pop("ddof", None)
result = nba_func(values, axis=axis, **kwargs)
elif (
_BOTTLENECK_AVAILABLE
and OPTIONS["use_bottleneck"]
and isinstance(values, np.ndarray)
and bn_func is not None
Expand Down Expand Up @@ -233,14 +260,14 @@ def least_squares(lhs, rhs, rcond=None, skipna=False):
return coeffs, residuals


nanmin = _create_bottleneck_method("nanmin")
nanmax = _create_bottleneck_method("nanmax")
nanmean = _create_bottleneck_method("nanmean")
nanmedian = _create_bottleneck_method("nanmedian")
nanvar = _create_bottleneck_method("nanvar")
nanstd = _create_bottleneck_method("nanstd")
nanprod = _create_bottleneck_method("nanprod")
nancumsum = _create_bottleneck_method("nancumsum")
nancumprod = _create_bottleneck_method("nancumprod")
nanargmin = _create_bottleneck_method("nanargmin")
nanargmax = _create_bottleneck_method("nanargmax")
nanmin = _create_method("nanmin")
nanmax = _create_method("nanmax")
nanmean = _create_method("nanmean")
nanmedian = _create_method("nanmedian")
nanvar = _create_method("nanvar")
nanstd = _create_method("nanstd")
nanprod = _create_method("nanprod")
nancumsum = _create_method("nancumsum")
nancumprod = _create_method("nancumprod")
nanargmin = _create_method("nanargmin")
nanargmax = _create_method("nanargmax")
7 changes: 7 additions & 0 deletions xarray/core/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
"keep_attrs",
"warn_for_unclosed_files",
"use_bottleneck",
"use_numbagg",
"use_flox",
]

Expand All @@ -50,6 +51,7 @@ class T_Options(TypedDict):
warn_for_unclosed_files: bool
use_bottleneck: bool
use_flox: bool
use_numbagg: bool


OPTIONS: T_Options = {
Expand All @@ -72,6 +74,7 @@ class T_Options(TypedDict):
"warn_for_unclosed_files": False,
"use_bottleneck": True,
"use_flox": True,
"use_numbagg": True,
}

_JOIN_OPTIONS = frozenset(["inner", "outer", "left", "right", "exact"])
Expand All @@ -98,6 +101,7 @@ def _positive_integer(value: int) -> bool:
"file_cache_maxsize": _positive_integer,
"keep_attrs": lambda choice: choice in [True, False, "default"],
"use_bottleneck": lambda value: isinstance(value, bool),
"use_numbagg": lambda value: isinstance(value, bool),
"use_flox": lambda value: isinstance(value, bool),
"warn_for_unclosed_files": lambda value: isinstance(value, bool),
}
Expand Down Expand Up @@ -230,6 +234,9 @@ class set_options:
use_flox : bool, default: True
Whether to use ``numpy_groupies`` and `flox`` to
accelerate groupby and resampling reductions.
use_numbagg : bool, default: True
Whether to use ``numbagg`` to accelerate reductions.
Takes precedence over ``use_bottleneck`` when both are True.
warn_for_unclosed_files : bool, default: False
Whether or not to issue a warning when unclosed files are
deallocated. This is mostly useful for debugging.
Expand Down

0 comments on commit ae41d82

Please sign in to comment.