From d1444a4f9edfd6572617bae9d0fc41cf898831c5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 12 Aug 2024 18:18:07 +0100
Subject: [PATCH 1/2] chore: update pre-commit hooks (#21)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/astral-sh/ruff-pre-commit: v0.5.6 → v0.5.7](https://github.com/astral-sh/ruff-pre-commit/compare/v0.5.6...v0.5.7)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7bbd928..98bd64b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -40,7 +40,7 @@ repos:
         args: [--prose-wrap=always]
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: "v0.5.6"
+    rev: "v0.5.7"
     hooks:
       - id: ruff
         args: ["--fix", "--show-fixes"]

From bcc7a947eb3d46141e719499ebc2b0ce4ee6056d Mon Sep 17 00:00:00 2001
From: Saransh <saransh0701@gmail.com>
Date: Thu, 15 Aug 2024 05:50:18 +0530
Subject: [PATCH 2/2] refactor!: match API with Scikit-HEP

---
 .pre-commit-config.yaml             |   1 +
 README.md                           |  82 +++
 src/cuda_histogram/axis/__init__.py | 160 ++++--
 src/cuda_histogram/hist.py          | 781 ++++++----------------------
 4 files changed, 342 insertions(+), 682 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 98bd64b..f25e20b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,6 +7,7 @@ repos:
     rev: "1.18.0"
     hooks:
       - id: blacken-docs
+        args: ["-E"]
         additional_dependencies: [black==24.*]
 
   - repo: https://github.com/pre-commit/pre-commit-hooks
diff --git a/README.md b/README.md
index 5ff37b5..8594c6c 100644
--- a/README.md
+++ b/README.md
@@ -20,6 +20,88 @@
 
 The package is under active development at the moment.
 
+## Example
+
+```py
+In [1]: import cuda_histogram; import cupy as cp
+
+In [2]: a = cuda_histogram.axis.Regular(10, 0, 1)
+
+In [3]: b = cuda_histogram.axis.Variable([0, 2, 3, 6])
+
+In [4]: c = cuda_histogram.Hist(a, b)
+
+In [5]: a, b, c
+Out[5]:
+(Regular(10, 0, 1),
+ Variable([0. 2. 3. 6.]),
+ Hist(Regular(10, 0, 1), Variable([0. 2. 3. 6.])))
+
+In [6]: c.fill(cp.random.normal(size=1_000_000), cp.random.normal(size=1_000_000))
+
+In [7]: c.values(), type(c.values())
+Out[7]:
+(array([[28493.,  1282.,    96.],
+        [29645.,  1366.,    91.],
+        [30465.,  1397.,    80.],
+        [31537.,  1473.,    81.],
+        [32608.,  1454.,   102.],
+        [33015.,  1440.,    83.],
+        [33992.,  1482.,    87.],
+        [34388.,  1482.,   111.],
+        [34551.,  1517.,    90.],
+        [35177.,  1515.,    85.]]),
+ cupy.ndarray)
+
+In [8]: c[0, 0], type(c[0, 0])
+Out[8]: (array(28493.), cupy.ndarray)
+
+In [9]: c[0:2, 0], type(c[0, 0]) # should ideally return a reduced histogram
+Out[9]: (array([28493., 29645.]), cupy.ndarray)
+
+In [10]: c.to_boost()
+Out[10]:
+Histogram(
+  Regular(10, 0, 1),
+  Variable([0, 2, 3, 6]),
+  storage=Double()) # Sum: 339185.0 (945991.0 with flow)
+
+In [11]: c.to_boost().values(), type(c.to_boost().values())
+Out[11]:
+(array([[28493.,  1282.,    96.],
+        [29645.,  1366.,    91.],
+        [30465.,  1397.,    80.],
+        [31537.,  1473.,    81.],
+        [32608.,  1454.,   102.],
+        [33015.,  1440.,    83.],
+        [33992.,  1482.,    87.],
+        [34388.,  1482.,   111.],
+        [34551.,  1517.,    90.],
+        [35177.,  1515.,    85.]]),
+ numpy.ndarray)
+
+In [12]: c.to_hist()
+Out[12]:
+Hist(
+  Regular(10, 0, 1, label='Axis 0'),
+  Variable([0, 2, 3, 6], label='Axis 1'),
+  storage=Double()) # Sum: 339185.0 (945991.0 with flow)
+
+In [13]: c.to_hist().values(), type(c.to_hist().values())
+Out[13]:
+(array([[28493.,  1282.,    96.],
+        [29645.,  1366.,    91.],
+        [30465.,  1397.,    80.],
+        [31537.,  1473.,    81.],
+        [32608.,  1454.,   102.],
+        [33015.,  1440.,    83.],
+        [33992.,  1482.,    87.],
+        [34388.,  1482.,   111.],
+        [34551.,  1517.,    90.],
+        [35177.,  1515.,    85.]]),
+    numpy.ndarray)
+```
+
 <!-- prettier-ignore-start -->
 [actions-badge]:            https://github.com/Saransh-cpp/cuda-histogram/workflows/CI/badge.svg
 [actions-link]:             https://github.com/Saransh-cpp/cuda-histogram/actions
diff --git a/src/cuda_histogram/axis/__init__.py b/src/cuda_histogram/axis/__init__.py
index 0613cc2..50bced5 100644
--- a/src/cuda_histogram/axis/__init__.py
+++ b/src/cuda_histogram/axis/__init__.py
@@ -4,13 +4,13 @@
 import numbers
 import re
 import warnings
+from typing import Iterable
 
 import awkward
 import cupy
 import numpy as np
 
 __all__: list[str] = [
-    "Bin",
     "Regular",
     "Variable",
     "Cat",
@@ -32,21 +32,11 @@
 )
 
 
-def _overflow_behavior(overflow):
-    if overflow == "none":
+def _overflow_behavior(overflow: bool):
+    if not overflow:
         return slice(1, -2)
-    elif overflow == "under":
-        return slice(None, -2)
-    elif overflow == "over":
-        return slice(1, -1)
-    elif overflow == "all":
-        return slice(None, -1)
-    elif overflow == "allnan":
-        return slice(None)
-    elif overflow == "justnan":
-        return slice(-1, None)
     else:
-        raise ValueError(f"Unrecognized overflow behavior: {overflow}")
+        return slice(None, None)
 
 
 @functools.total_ordering
@@ -397,9 +387,6 @@ class DenseAxis(Axis):
         **_ireduce(slice)** - return a slice or list of indices, input slice to be interpred as values
 
         **reduced(islice)** - return a new axis with binning corresponding to the index slice (from _ireduce)
-
-    TODO: hasoverflow(), not all dense axes might have an overflow concept,
-    currently it is implicitly assumed they do (as the only dense type is a numeric axis)
     """
 
 
@@ -408,10 +395,6 @@ class Bin(DenseAxis):
 
     Parameters
     ----------
-        name : str
-            is used as a keyword in histogram filling, immutable
-        label : str
-            describes the meaning of the axis, can be changed
         n_or_arr : int or list or np.ndarray
             Integer number of bins, if uniform binning. Otherwise, a list or
             numpy 1D array of bin boundaries.
@@ -419,14 +402,17 @@ class Bin(DenseAxis):
             lower boundary of bin range, if uniform binning
         hi : float, optional
             upper boundary of bin range, if uniform binning
+        name : str
+            is used as a keyword in histogram filling, immutable
+        label : str
+            describes the meaning of the axis, can be changed
 
     This axis will generate frequencies for n+3 bins, special bin indices:
     ``0 = underflow, n+1 = overflow, n+2 = nanflow``
     Bin boundaries are [lo, hi)
     """
 
-    def __init__(self, name, label, n_or_arr, lo=None, hi=None):
-        super().__init__(name, label)
+    def __init__(self, n_or_arr, lo=None, hi=None, *, name="", label=""):
         self._lazy_intervals = None
         if isinstance(n_or_arr, (list, np.ndarray, cupy.ndarray)):
             self._uniform = False
@@ -440,10 +426,6 @@ def __init__(self, name, label, n_or_arr, lo=None, hi=None):
             self._interval_bins = cupy.r_[-cupy.inf, self._bins, cupy.nan]
             self._bin_names = np.full(self._interval_bins[:-1].size, None)
         elif isinstance(n_or_arr, numbers.Integral):
-            if lo is None or hi is None:
-                raise TypeError(
-                    "Interpreting n_or_arr as uniform binning, please specify lo and hi values"
-                )
             self._uniform = True
             self._lo = lo
             self._hi = hi
@@ -455,10 +437,16 @@ def __init__(self, name, label, n_or_arr, lo=None, hi=None):
                 cupy.nan,
             ]
             self._bin_names = np.full(self._interval_bins[:-1].size, None)
-        else:
-            raise TypeError(
-                f"Cannot understand n_or_arr (nbins or binning array) type {n_or_arr!r}"
-            )
+        self._label = label
+        self._name = name
+
+    def __repr__(self):
+        class_name = self.__class__.__name__
+        return (
+            f"{class_name}({self._bins[:-1]})"
+            if not self._uniform
+            else f"{class_name}{self._bins, self._lo, self._hi}"
+        )
 
     @property
     def _intervals(self):
@@ -480,11 +468,6 @@ def __getstate__(self):
         return self.__dict__
 
     def __setstate__(self, d):
-        if "_intervals" in d:  # convert old hists to new serialization format
-            _old_intervals = d.pop("_intervals")
-            interval_bins = [i._lo for i in _old_intervals] + [_old_intervals[-1]._hi]
-            d["_interval_bins"] = cupy.array(interval_bins)
-            d["_bin_names"] = np.array([interval._label for interval in _old_intervals])
         if "_interval_bins" in d and "_bin_names" not in d:
             d["_bin_names"] = np.full(d["_interval_bins"][:-1].size, None)
         self.__dict__ = d
@@ -501,7 +484,9 @@ def index(self, identifier):
         Returns an integer corresponding to the index in the axis where the histogram would be filled.
         The integer range includes flow bins: ``0 = underflow, n+1 = overflow, n+2 = nanflow``
         """
-        isarray = isinstance(identifier, (awkward.Array, cupy.ndarray, np.ndarray))
+        isarray = isinstance(
+            identifier, (awkward.Array, cupy.ndarray, np.ndarray, list)
+        )
         if isarray or isinstance(identifier, numbers.Number):
             identifier = awkward.to_cupy(identifier)  # cupy.asarray(identifier)
             if self._uniform:
@@ -638,10 +623,8 @@ def _ireduce(self, the_slice):
 
     def reduced(self, islice):
         """Return a new axis with reduced binning
-
         The new binning corresponds to the slice made on this axis.
         Overflow will be taken care of by ``Hist.__getitem__``
-
         Parameters
         ----------
             islice : slice
@@ -669,22 +652,23 @@ def reduced(self, islice):
             # TODO: remove this once satisfied it works
             rbins = (hi - lo) * self._bins / (self._hi - self._lo)
             assert abs(bins - rbins) < 1e-14, "%d %f %r" % (bins, rbins, self)
-            return Bin(self._name, self._label, bins, lo, hi)
+            return Regular(bins, lo, hi, name=self._name, label=self._label)
         else:
             lo = None if islice.start is None else islice.start - 1
             hi = -1 if islice.stop is None else islice.stop
             bins = self._bins[slice(lo, hi)]
-            return Bin(self._name, self._label, bins)
+            return Variable(bins, name=self._name, label=self._label)
 
     @property
     def size(self):
-        """Number of bins, including overflow (i.e. ``n + 3``)"""
-        if self._uniform:
-            return self._bins + 3
-        # (inf added at constructor)
-        return len(self._bins) + 1
+        """Number of bins"""
+        return (
+            self._bins
+            if isinstance(self._bins, (int, np.integer, cupy.integer))
+            else len(self._bins)
+        )
 
-    def edges(self, overflow="none"):
+    def edges(self, flow=False):
         """Bin boundaries
 
         Parameters
@@ -700,9 +684,9 @@ def edges(self, overflow="none"):
         out = cupy.r_[
             2 * out[0] - out[1], out, 2 * out[-1] - out[-2], 3 * out[-1] - 2 * out[-2]
         ]
-        return out[_overflow_behavior(overflow)]
+        return out[_overflow_behavior(flow)]
 
-    def centers(self, overflow="none"):
+    def centers(self, flow=False):
         """Bin centers
 
         Parameters
@@ -711,9 +695,81 @@ def centers(self, overflow="none"):
                 Create overflow and/or underflow bins by adding a bin of same width to each end.
                 See `Hist.sum` description for the allowed values.
         """
-        edges = self.edges(overflow)
+        edges = self.edges(flow)
         return (edges[:-1] + edges[1:]) / 2
 
-    def identifiers(self, overflow="none"):
+    def identifiers(self, flow=False):
         """List of `Interval` identifiers"""
-        return self._intervals[_overflow_behavior(overflow)]
+        return self._intervals[_overflow_behavior(flow)]
+
+
+class Regular(Bin):
+    """A binned axis with name, label, and binning.
+
+    Parameters
+    ----------
+        name : str
+            is used as a keyword in histogram filling, immutable
+        label : str
+            describes the meaning of the axis, can be changed
+        n_or_arr : int or list or np.ndarray
+            Integer number of bins, if uniform binning. Otherwise, a list or
+            numpy 1D array of bin boundaries.
+        lo : float, optional
+            lower boundary of bin range, if uniform binning
+        hi : float, optional
+            upper boundary of bin range, if uniform binning
+
+    This axis will generate frequencies for n+3 bins, special bin indices:
+    ``0 = underflow, n+1 = overflow, n+2 = nanflow``
+    Bin boundaries are [lo, hi)
+    """
+
+    def __init__(
+        self,
+        bins: int,
+        start: float,
+        stop: float,
+        *,
+        name: str = "",
+        label: str = "",
+    ) -> None:
+        super().__init__(
+            bins,
+            start,
+            stop,
+            name=name,
+            label=label,
+        )
+
+
+class Variable(Bin):
+    """A binned axis with name, label, and binning.
+
+    Parameters
+    ----------
+        name : str
+            is used as a keyword in histogram filling, immutable
+        label : str
+            describes the meaning of the axis, can be changed
+        n_or_arr : int or list or np.ndarray
+            Integer number of bins, if uniform binning. Otherwise, a list or
+            numpy 1D array of bin boundaries.
+        lo : float, optional
+            lower boundary of bin range, if uniform binning
+        hi : float, optional
+            upper boundary of bin range, if uniform binning
+
+    This axis will generate frequencies for n+3 bins, special bin indices:
+    ``0 = underflow, n+1 = overflow, n+2 = nanflow``
+    Bin boundaries are [lo, hi)
+    """
+
+    def __init__(
+        self,
+        edges: Iterable[float],
+        *,
+        name: str = "",
+        label: str = "",
+    ) -> None:
+        super().__init__(edges, name=name, label=label)
diff --git a/src/cuda_histogram/hist.py b/src/cuda_histogram/hist.py
index 1f28c2e..527d2ed 100644
--- a/src/cuda_histogram/hist.py
+++ b/src/cuda_histogram/hist.py
@@ -1,23 +1,17 @@
 from __future__ import annotations
 
-import copy
-import numbers
-import warnings
-from abc import ABCMeta, abstractmethod
 from collections import namedtuple
-from collections.abc import Sequence
 
-import awkward
 import cupy
 import numpy as np
 
 from cuda_histogram.axis import (
     Axis,
-    Bin,
     Cat,
     DenseAxis,
-    Interval,
+    Regular,
     SparseAxis,
+    Variable,
     _overflow_behavior,
 )
 
@@ -55,177 +49,55 @@ def _assemble_blocks(array, ndslice, depth=0):
     return slist
 
 
-class AccumulatorABC(metaclass=ABCMeta):
-    """Abstract base class for an accumulator
-
-    Accumulators are abstract objects that enable the reduce stage of the typical map-reduce
-    scaleout that we do in Coffea. One concrete example is a histogram. The idea is that an
-    accumulator definition holds enough information to be able to create an empty accumulator
-    (the ``identity()`` method) and add two compatible accumulators together (the ``add()`` method).
-    The former is not strictly necessary, but helps with book-keeping. Here we show an example usage
-    of a few accumulator types. An arbitrary-depth nesting of dictionary accumulators is supported, much
-    like the behavior of directories in ROOT hadd.
-
-    After defining an accumulator::
-
-        from coffea.processor import dict_accumulator, column_accumulator, defaultdict_accumulator
-        from cuda_histogram import Hist, Bin
-        import numpy as np
-
-        adef = dict_accumulator({
-            'cutflow': defaultdict_accumulator(int),
-            'pt': Hist("counts", Bin("pt", "$p_T$", 100, 0, 100)),
-            'final_pt': column_accumulator(np.zeros(shape=(0,))),
-        })
-
-    Notice that this function does not mutate ``adef``::
-
-        def fill(n):
-            ptvals = np.random.exponential(scale=30, size=n)
-            cut = ptvals > 200.
-            acc = adef.identity()
-            acc['cutflow']['pt>200'] += cut.sum()
-            acc['pt'].fill(pt=ptvals)
-            acc['final_pt'] += column_accumulator(ptvals[cut])
-            return acc
-
-    As such, we can execute it several times in parallel and reduce the result::
-
-        import concurrent.futures
-        with concurrent.futures.ThreadPoolExecutor() as executor:
-            outputs = executor.map(fill, [2000, 2000])
-
-        combined = sum(outputs, adef.identity())
-
-
-    Derived classes must implement
-        - ``identity()``: returns a new object of same type as self,
-          such that ``self + self.identity() == self``
-        - ``add(other)``: adds an object of same type as self to self
-
-    Concrete implementations are then provided for ``__add__``, ``__radd__``, and ``__iadd__``.
+class Hist:
     """
+    Construct a new histogram.
 
-    @abstractmethod
-    def identity(self):
-        """Identity of the accumulator
-
-        A value such that any other value added to it will return
-        the other value
-        """
-
-    @abstractmethod
-    def add(self, other):
-        """Add another accumulator to this one in-place"""
-
-    def __add__(self, other):
-        ret = self.identity()
-        ret.add(self)
-        ret.add(other)
-        return ret
-
-    def __radd__(self, other):
-        ret = self.identity()
-        ret.add(other)
-        ret.add(self)
-        return ret
-
-    def __iadd__(self, other):
-        self.add(other)
-        return self
-
-
-class Hist(AccumulatorABC):
-    """
-    Specify a multidimensional histogram.
+    If you pass in a single argument, this will be treated as a
+    histogram and this will convert the histogram to this type of
+    histogram.
 
     Parameters
     ----------
-        label : str
-            A description of the meaning of the sum of weights
-        ``*axes``
-            positional list of `Cat` or `Bin` objects, denoting the axes of the histogram
-        axes : collections.abc.Sequence
-            list of `Cat` or `Bin` objects, denoting the axes of the histogram (overridden by ``*axes``)
-        dtype : str
-            Underlying numpy dtype to use for storing sum of weights
-
-    Examples
-    --------
-
-    Creating a histogram with a sparse axis, and two dense axes::
-
-        import cuda_histogram as chist
-
-        h = chist.Hist("Observed bird count",
-                             chist.Cat("species", "Bird species"),
-                             chist.Bin("x", "x coordinate [m]", 20, -5, 5),
-                             chist.Bin("y", "y coordinate [m]", 20, -5, 5),
-                             )
-
-        # or
-
-        h = chist.Hist(label="Observed bird count",
-                             axes=(chist.Cat("species", "Bird species"),
-                                   chist.Bin("x", "x coordinate [m]", 20, -5, 5),
-                                   chist.Bin("y", "y coordinate [m]", 20, -5, 5),
-                                  )
-                             )
-
-        # or
-
-        h = chist.Hist(axes=[chist.Cat("species", "Bird species"),
-                                   chist.Bin("x", "x coordinate [m]", 20, -5, 5),
-                                   chist.Bin("y", "y coordinate [m]", 20, -5, 5),
-                                  ],
-                             label="Observed bird count",
-                             )
-
-    which produces:
-
-    >>> h
-    <Hist (species,x,y) instance at 0x10d84b550>
-
+    *args : Axis
+        Provide 1 or more axis instances.
+    label: str = None
+        Histogram's label
+    name: str = None
+        Histogram's name
     """
 
-    #: Default numpy dtype to store sum of weights
     DEFAULT_DTYPE = "d"
 
-    def __init__(self, label, *axes, **kwargs):
-        if not isinstance(label, str):
-            raise TypeError("label must be a string")
+    def __init__(
+        self,
+        *axes,
+        label: str | None = None,
+        name: str | None = None,
+    ) -> None:
         self._label = label
-        self._dtype = kwargs.get("dtype", Hist.DEFAULT_DTYPE)
+        self._name = name
+        self._dtype = Hist.DEFAULT_DTYPE
         self._axes = axes
-        if len(axes) == 0 and "axes" in kwargs:
-            if not isinstance(kwargs["axes"], Sequence):
-                raise TypeError("axes must be a sequence type! (tuple, list, etc.)")
-            self._axes = tuple(kwargs["axes"])
-        elif len(axes) != 0 and "axes" in kwargs:
-            warnings.warn(
-                "axes defined by both positional arguments and keyword argument, using positional arguments"
-            )
 
         if not all(isinstance(ax, Axis) for ax in self._axes):
-            del self._axes
             raise TypeError("All axes must be derived from Axis class")
-        # if we stably partition axes to sparse, then dense, some things simplify
-        # ..but then the user would then see the order change under them
         self._dense_shape = tuple(
-            [ax.size for ax in self._axes if isinstance(ax, DenseAxis)]
+            [
+                ax.size + 3 if isinstance(ax, Regular) else ax.size + 1
+                for ax in self._axes
+                if isinstance(ax, DenseAxis)
+            ]
         )
-        if np.prod(self._dense_shape) > 10000000:
-            warnings.warn("Allocating a large (>10M bin) histogram!", RuntimeWarning)
         self._sumw = {}
         # Storage of sumw2 starts at first use of weight keyword in fill()
         self._sumw2 = None
 
     def __repr__(self):
-        return "<{} ({}) instance at 0x{:0x}>".format(
-            self.__class__.__name__,
-            ",".join(d.name for d in self.axes()),
-            id(self),
-        )
+        repr_str = "Hist("
+        for ax in self._axes:
+            repr_str += f"{ax!r}, "
+        return f"{repr_str[:-2]})"
 
     @property
     def label(self):
@@ -236,31 +108,14 @@ def label(self):
     def label(self, label):
         self._label = label
 
-    def copy(self, content=True):
-        """Create a deep copy
-
-        Parameters
-        ----------
-            content : bool
-                If set false, only the histogram definition is copied, resetting
-                the sum of weights to zero
-        """
-        out = Hist(self._label, *self._axes, dtype=self._dtype)
-        if self._sumw2 is not None:
-            out._sumw2 = {}
-        if content:
-            out._sumw = copy.deepcopy(self._sumw)
-            out._sumw2 = copy.deepcopy(self._sumw2)
-        return out
-
-    def identity(self):
-        """The identity (zero value) of this accumulator"""
-        return self.copy(content=False)
+    @property
+    def name(self):
+        """A label describing the meaning of the sum of weights"""
+        return self._name
 
-    def clear(self):
-        """Clear all content in this histogram"""
-        self._sumw = {}
-        self._sumw2 = None
+    @label.setter
+    def name(self, name):  # noqa: F811
+        self._name = name
 
     def axis(self, axis_name):
         """Get an ``Axis`` object"""
@@ -272,11 +127,6 @@ def axes(self):
         """Get all axes in this histogram"""
         return self._axes
 
-    @property
-    def fields(self):
-        """This is a stub for histbook compatibility"""
-        return [ax.name for ax in self._axes]
-
     def dim(self):
         """Dimension of this histogram (number of axes)"""
         return len(self._axes)
@@ -285,25 +135,10 @@ def dense_dim(self):
         """Dense dimension of this histogram (number of non-sparse axes)"""
         return len(self._dense_shape)
 
-    def sparse_dim(self):
-        """Sparse dimension of this histogram (number of sparse axes)"""
-        return self.dim() - self.dense_dim()
-
-    def dense_axes(self):
-        """All dense axes"""
-        return [ax for ax in self._axes if isinstance(ax, DenseAxis)]
-
     def sparse_axes(self):
         """All sparse axes"""
         return [ax for ax in self._axes if isinstance(ax, SparseAxis)]
 
-    def sparse_nbins(self):
-        """Total number of sparse bins"""
-        return len(self._sumw)
-
-    def _idense(self, axis):
-        return self.dense_axes().index(axis)
-
     def _isparse(self, axis):
         return self.sparse_axes().index(axis)
 
@@ -312,97 +147,62 @@ def _init_sumw2(self):
         for key in self._sumw:
             self._sumw2[key] = self._sumw[key].copy()
 
-    def compatible(self, other):
-        """Checks if this histogram is compatible with another, i.e. they have identical binning"""
-        if self.dim() != other.dim():
-            return False
-        if {d.name for d in self.sparse_axes()} != {
-            d.name for d in other.sparse_axes()
-        }:
-            return False
-        if not all(d1 == d2 for d1, d2 in zip(self.dense_axes(), other.dense_axes())):  # noqa: SIM103
-            return False
-        return True
-
-    def add(self, other):
-        """Add another histogram into this one, in-place"""
-        if not self.compatible(other):
-            raise ValueError(
-                f"Cannot add this histogram with histogram {other!r} of dissimilar dimensions"
-            )
-
-        raxes = other.sparse_axes()
-
-        def add_dict(left, right):
-            for rkey in right:
-                lkey = tuple(
-                    self.axis(rax).index(rax[ridx]) for rax, ridx in zip(raxes, rkey)
-                )
-                if lkey in left:
-                    left[lkey] += right[rkey]
-                else:
-                    left[lkey] = copy.deepcopy(right[rkey])
-
-        if self._sumw2 is None and other._sumw2 is None:
-            pass
-        elif self._sumw2 is None:
-            self._init_sumw2()
-            add_dict(self._sumw2, other._sumw2)
-        elif other._sumw2 is None:
-            add_dict(self._sumw2, other._sumw)
-        else:
-            add_dict(self._sumw2, other._sumw2)
-        add_dict(self._sumw, other._sumw)
-        return self
-
+    # fix this
     def __getitem__(self, keys):
-        if not isinstance(keys, tuple):
-            keys = (keys,)
-        if len(keys) > self.dim():
-            raise IndexError("Too many indices for this histogram")
-        elif len(keys) < self.dim():
-            if Ellipsis in keys:
-                idx = keys.index(Ellipsis)
-                slices = (slice(None),) * (self.dim() - len(keys) + 1)
-                keys = keys[:idx] + slices + keys[idx + 1 :]
-            else:
-                slices = (slice(None),) * (self.dim() - len(keys))
-                keys += slices
-        sparse_idx = []
-        dense_idx = []
-        new_dims = []
-        for s, ax in zip(keys, self._axes):
-            if isinstance(ax, SparseAxis):
-                sparse_idx.append(ax._ireduce(s))
-                new_dims.append(ax)
-            else:
-                islice = ax._ireduce(s)
-                dense_idx.append(islice)
-                new_dims.append(ax.reduced(islice))
-        dense_idx = tuple(dense_idx)
-
-        def dense_op(array):
-            as_numpy = array.get()
-            blocked = np.block(_assemble_blocks(as_numpy, dense_idx))
-            return cupy.asarray(blocked)
-
-        out = Hist(self._label, *new_dims, dtype=self._dtype)
-        if self._sumw2 is not None:
-            out._init_sumw2()
-        for sparse_key in self._sumw:
-            if not all(k in idx for k, idx in zip(sparse_key, sparse_idx)):
-                continue
-            if sparse_key in out._sumw:
-                out._sumw[sparse_key] += dense_op(self._sumw[sparse_key])
-                if self._sumw2 is not None:
-                    out._sumw2[sparse_key] += dense_op(self._sumw2[sparse_key])
-            else:
-                out._sumw[sparse_key] = dense_op(self._sumw[sparse_key]).copy()
-                if self._sumw2 is not None:
-                    out._sumw2[sparse_key] = dense_op(self._sumw2[sparse_key]).copy()
-        return out
-
-    def fill(self, **values):
+        if isinstance(keys, slice) and not all(
+            isinstance(s, int) or s is None for s in [keys.start, keys.stop, keys.step]
+        ):
+            raise ValueError("use to_boost/to_hist to access other UHI functionalities")
+        if not isinstance(keys, slice) and not isinstance(keys, (int, tuple)):
+            raise ValueError("use to_boost/to_hist to access other UHI functionalities")
+        return self.values()[keys]
+        # if not isinstance(keys, tuple):
+        #     keys = (keys,)
+        # if len(keys) > self.dim():
+        #     raise IndexError("Too many indices for this histogram")
+        # elif len(keys) < self.dim():
+        #     if Ellipsis in keys:
+        #         idx = keys.index(Ellipsis)
+        #         slices = (slice(None),) * (self.dim() - len(keys) + 1)
+        #         keys = keys[:idx] + slices + keys[idx + 1 :]
+        #     else:
+        #         slices = (slice(None),) * (self.dim() - len(keys))
+        #         keys += slices
+        # sparse_idx = []
+        # dense_idx = []
+        # new_dims = []
+        # for s, ax in zip(keys, self._axes):
+        #     if isinstance(ax, SparseAxis):
+        #         sparse_idx.append(ax._ireduce(s))
+        #         new_dims.append(ax)
+        #     else:
+        #         islice = ax._ireduce(s)
+        #         dense_idx.append(islice)
+        #         new_dims.append(ax.reduced(islice))
+        # dense_idx = tuple(dense_idx)
+
+        # def dense_op(array):
+        #     as_numpy = array.get()
+        #     blocked = np.block(_assemble_blocks(as_numpy, dense_idx))
+        #     return cupy.asarray(blocked)
+
+        # out = Hist(*new_dims, label=self._label)
+        # if self._sumw2 is not None:
+        #     out._init_sumw2()
+        # for sparse_key in self._sumw:
+        #     if not all(k in idx for k, idx in zip(sparse_key, sparse_idx)):
+        #         continue
+        #     if sparse_key in out._sumw:
+        #         out._sumw[sparse_key] += dense_op(self._sumw[sparse_key])
+        #         if self._sumw2 is not None:
+        #             out._sumw2[sparse_key] += dense_op(self._sumw2[sparse_key])
+        #     else:
+        #         out._sumw[sparse_key] = dense_op(self._sumw[sparse_key]).copy()
+        #         if self._sumw2 is not None:
+        #             out._sumw2[sparse_key] = dense_op(self._sumw2[sparse_key]).copy()
+        # return out
+
+    def fill(self, *args, weight=None):
         """Fill sum of weights from columns
 
         Parameters
@@ -427,26 +227,23 @@ def fill(self, **values):
         >>> h.fill(species='ducks', x=np.random.normal(size=10), y=np.random.normal(size=10), weight=np.ones(size=10) * 3)
 
         """
-        weight = values.pop("weight", None)
-        if isinstance(weight, (awkward.Array, cupy.ndarray, np.ndarray)):
-            weight = cupy.array(weight)
-        if isinstance(weight, numbers.Number):
-            weight = cupy.atleast_1d(weight)
-        if not all(d.name in values for d in self._axes):
-            missing = ", ".join(d.name for d in self._axes if d.name not in values)
-            raise ValueError(
-                f"Not all axes specified for {self!r}.  Missing: {missing}"
-            )
-        if not all(name in self._axes for name in values):
-            extra = ", ".join(name for name in values if name not in self._axes)
-            raise ValueError(
-                f"Unrecognized axes specified for {self!r}.  Extraneous: {extra}"
-            )
+        if not all(isinstance(a, (cupy.ndarray, str)) for a in args):
+            raise TypeError("pass CuPy arrays")
+        if weight is not None and not isinstance(weight, cupy.ndarray):
+            raise TypeError("pass CuPy arrays")
+
+        if len(self._axes) != len(args):
+            raise ValueError("mismatching dimensions for provided values and axes")
 
         if weight is not None and self._sumw2 is None:
             self._init_sumw2()
 
-        sparse_key = tuple(d.index(values[d.name]) for d in self.sparse_axes())
+        sparse_key = tuple(
+            d.index(value)
+            for d, value in zip(self._axes, args)
+            if isinstance(d, SparseAxis)
+        )
+
         if sparse_key not in self._sumw:
             self._sumw[sparse_key] = cupy.zeros(
                 shape=self._dense_shape, dtype=self._dtype
@@ -458,8 +255,8 @@ def fill(self, **values):
 
         if self.dense_dim() > 0:
             dense_indices = tuple(
-                cupy.asarray(d.index(values[d.name]))
-                for d in self._axes
+                cupy.asarray(d.index(value))
+                for d, value in zip(self._axes, args)
                 if isinstance(d, DenseAxis)
             )
             xy = cupy.atleast_1d(
@@ -492,322 +289,33 @@ def fill(self, **values):
             if self._sumw2 is not None:
                 self._sumw2[sparse_key] += 1.0
 
-    def sum(self, *axes, **kwargs):
-        """Integrates out a set of axes, producing a new histogram
-
-        Parameters
-        ----------
-            ``*axes``
-                Positional list of axes to integrate out (either a string or an Axis object)
-
-            overflow : {'none', 'under', 'over', 'all', 'allnan'}, optional
-                How to treat the overflow bins in the sum.  Only applies to dense axes.
-                'all' includes both under- and over-flow but not nan-flow bins.
-                Default is 'none'.
-        """
-        overflow = kwargs.pop("overflow", "none")
-        axes = [self.axis(ax) for ax in axes]
-        reduced_dims = [ax for ax in self._axes if ax not in axes]
-        out = Hist(self._label, *reduced_dims, dtype=self._dtype)
-        if self._sumw2 is not None:
-            out._init_sumw2()
-
-        sparse_drop = []
-        dense_slice = [slice(None)] * self.dense_dim()
-        dense_sum_dim = []
-        for axis in axes:
-            if isinstance(axis, DenseAxis):
-                idense = self._idense(axis)
-                dense_sum_dim.append(idense)
-                dense_slice[idense] = _overflow_behavior(overflow)
-            elif isinstance(axis, SparseAxis):
-                isparse = self._isparse(axis)
-                sparse_drop.append(isparse)
-        dense_slice = tuple(dense_slice)
-        dense_sum_dim = tuple(dense_sum_dim)
-
-        def dense_op(array):
-            if len(dense_sum_dim) > 0:
-                return np.sum(array[dense_slice], axis=dense_sum_dim)
-            return array
-
-        for key in self._sumw:
-            new_key = tuple(k for i, k in enumerate(key) if i not in sparse_drop)
-            if new_key in out._sumw:
-                out._sumw[new_key] += dense_op(self._sumw[key])
-                if self._sumw2 is not None:
-                    out._sumw2[new_key] += dense_op(self._sumw2[key])
-            else:
-                out._sumw[new_key] = dense_op(self._sumw[key]).copy()
-                if self._sumw2 is not None:
-                    out._sumw2[new_key] = dense_op(self._sumw2[key]).copy()
-        return out
-
-    def project(self, *axes, **kwargs):
-        """Project histogram onto a subset of its axes
-
-        Parameters
-        ----------
-            ``*axes`` : str or Axis
-                Positional list of axes to project on to
-            overflow : str
-                Controls behavior of integration over remaining axes.
-                See `sum` description for meaning of allowed values
-                Default is to *not include* overflow bins
-        """
-        overflow = kwargs.pop("overflow", "none")
-        axes = [self.axis(ax) for ax in axes]
-        toremove = [ax for ax in self.axes() if ax not in axes]
-        return self.sum(*toremove, overflow=overflow)
-
-    def integrate(self, axis_name, int_range=None, overflow="none"):
-        """Integrates current histogram along one dimension
-
-        Parameters
-        ----------
-            axis_name : str or Axis
-                Which dimension to reduce on
-            int_range : slice
-                Any slice, list, string, or other object that the axis will understand
-                Default is to integrate over the whole range
-            overflow : str
-                See `sum` description for meaning of allowed values
-                Default is to *not include* overflow bins
-
-        """
-        if int_range is None:
-            int_range = slice(None)
-        axis = self.axis(axis_name)
-        full_slice = tuple(
-            slice(None) if ax != axis else int_range for ax in self._axes
-        )
-        if isinstance(int_range, Interval):
-            # Handle overflow intervals nicely
-            if int_range.nan():
-                overflow = "justnan"
-            elif int_range.lo == -np.inf:
-                overflow = "under"
-            elif int_range.hi == np.inf:
-                overflow = "over"
-        return self[full_slice].sum(
-            axis.name, overflow=overflow
-        )  # slice may make new axis, use name
-
-    def remove(self, bins, axis):
-        """Remove bins from a sparse axis
-
-        Parameters
-        ----------
-            bins : iterable
-                A list of bin identifiers to remove
-            axis : str or Axis
-                Axis name or SparseAxis instance
-
-        Returns a *copy* of the histogram with specified bins removed, not an in-place operation
-        """
-        axis = self.axis(axis)
-        if not isinstance(axis, SparseAxis):
-            raise NotImplementedError(
-                "Hist.remove() only supports removing items from a sparse axis."
-            )
-        bins = [axis.index(binid) for binid in bins]
-        keep = [binid.name for binid in self.identifiers(axis) if binid not in bins]
-        full_slice = tuple(slice(None) if ax != axis else keep for ax in self._axes)
-        return self[full_slice]
-
-    def group(self, old_axes, new_axis, mapping, overflow="none"):
-        """Group a set of slices on old axes into a single new axis
-
-        Parameters
-        ----------
-            old_axes
-                Axis or tuple of axes which are being grouped
-            new_axis
-                A new sparse dimension definition, e.g. a `Cat` instance
-            mapping : dict
-                A mapping ``{'new_bin': (slice, ...), ...}`` where each
-                slice is on the axes being re-binned.  In the case of
-                a single axis for ``old_axes``, ``{'new_bin': slice, ...}``
-                is admissible.
-            overflow : str
-                See `sum` description for meaning of allowed values
-                Default is to *not include* overflow bins
-
-        Returns a new histogram object
-        """
-        if not isinstance(new_axis, SparseAxis):
-            raise TypeError(
-                "New axis must be a sparse axis.  Note: Hist.group() signature has changed to group(old_axes, new_axis, ...)!"
-            )
-        if new_axis in self.axes() and self.axis(new_axis) is new_axis:
-            raise RuntimeError(
-                "new_axis is already in the list of axes.  Note: Hist.group() signature has changed to group(old_axes, new_axis, ...)!"
-            )
-        if not isinstance(old_axes, tuple):
-            old_axes = (old_axes,)
-        old_axes = [self.axis(ax) for ax in old_axes]
-        old_indices = [i for i, ax in enumerate(self._axes) if ax in old_axes]
-        new_dims = [new_axis] + [ax for ax in self._axes if ax not in old_axes]
-        out = Hist(self._label, *new_dims, dtype=self._dtype)
-        if self._sumw2 is not None:
-            out._init_sumw2()
-        for new_cat in mapping:
-            the_slice = mapping[new_cat]
-            if not isinstance(the_slice, tuple):
-                the_slice = (the_slice,)
-            if len(the_slice) != len(old_axes):
-                raise Exception("Slicing does not match number of axes being rebinned")
-            full_slice = [slice(None)] * self.dim()
-            for idx, s in zip(old_indices, the_slice):
-                full_slice[idx] = s
-            full_slice = tuple(full_slice)
-            reduced_hist = self[full_slice].sum(
-                *tuple(ax.name for ax in old_axes), overflow=overflow
-            )  # slice may change old axis binning
-            new_idx = new_axis.index(new_cat)
-            for key in reduced_hist._sumw:
-                new_key = (new_idx, *key)
-                out._sumw[new_key] = reduced_hist._sumw[key]
-                if self._sumw2 is not None:
-                    out._sumw2[new_key] = reduced_hist._sumw2[key]
-        return out
-
-    def rebin(self, old_axis, new_axis):
-        """Rebin a dense axis
-
-        This function will construct the mapping from old to new axis, and
-        constructs a new histogram, rebinning the sum of weights along that dimension.
-
-        Note
-        ----
-        No interpolation is performed, so the user must be sure the old
-        and new axes have compatible bin boundaries, e.g. that they evenly
-        divide each other.
-
-        Parameters
-        ----------
-            old_axis : str or Axis
-                Axis to rebin
-            new_axis : str or Axis or int
-                A DenseAxis object defining the new axis (e.g. a `Bin` instance).
-                If a number N is supplied, the old axis edges are downsampled by N,
-                resulting in a histogram with ``old_nbins // N`` bins.
-
-        Returns a new `Hist` object.
-        """
-        old_axis = self.axis(old_axis)
-        if isinstance(new_axis, numbers.Integral):
-            new_axis = Bin(old_axis.name, old_axis.label, old_axis.edges()[::new_axis])
-        new_dims = [ax if ax != old_axis else new_axis for ax in self._axes]
-        out = Hist(self._label, *new_dims, dtype=self._dtype)
-        if self._sumw2 is not None:
-            out._init_sumw2()
-
-        # would have been nice to use ufunc.reduceat, but we should support arbitrary reshuffling
-        idense = self._idense(old_axis)
-
-        def view_ax(idx):
-            fullindex = [slice(None)] * self.dense_dim()
-            fullindex[idense] = idx
-            return tuple(fullindex)
-
-        binmap = [new_axis.index(i) for i in old_axis.identifiers(overflow="allnan")]
-
-        def dense_op(array):
-            anew = np.zeros(out._dense_shape, dtype=out._dtype)
-            for iold, inew in enumerate(binmap):
-                anew[view_ax(inew)] += array[view_ax(iold)]
-            return anew
-
-        for key in self._sumw:
-            out._sumw[key] = dense_op(self._sumw[key])
-            if self._sumw2 is not None:
-                out._sumw2[key] = dense_op(self._sumw2[key])
-        return out
+    def _view_dim(self, arr, flow):
+        if self.dense_dim() == 0:
+            return arr
+        else:
+            return arr[tuple(_overflow_behavior(flow) for _ in range(self.dense_dim()))]
 
-    def values(self, sumw2=False, overflow="none"):
+    def values(self, flow=False):
         """Extract the sum of weights arrays from this histogram
 
         Parameters
         ----------
-            sumw2 : bool
-                If True, frequencies is a tuple of arrays (sum weights, sum squared weights)
-            overflow
+            flow : bool
                 See `sum` description for meaning of allowed values
-
-        Returns a mapping ``{(sparse identifier, ...): np.array(...), ...}``
-        where each array has dimension `dense_dim` and shape matching
-        the number of bins per axis, plus 0-3 overflow bins depending
-        on the ``overflow`` argument.
         """
 
-        def view_dim(arr):
-            if self.dense_dim() == 0:
-                return arr
-            else:
-                return arr[
-                    tuple(_overflow_behavior(overflow) for _ in range(self.dense_dim()))
-                ]
-
-        out = {}
-        for sparse_key in self._sumw:
-            id_key = tuple(ax[k] for ax, k in zip(self.sparse_axes(), sparse_key))
-            if sumw2:
-                if self._sumw2 is not None:
-                    w2 = view_dim(self._sumw2[sparse_key])
-                else:
-                    w2 = view_dim(self._sumw[sparse_key])
-                out[id_key] = (view_dim(self._sumw[sparse_key]), w2)
-            else:
-                out[id_key] = view_dim(self._sumw[sparse_key])
-        return out
-
-    def scale(self, factor, axis=None):
-        """Scale histogram in-place by factor
-
-        Parameters
-        ----------
-            factor : float or dict
-                A number or mapping of identifier to number
-            axis : optional
-                Which (sparse) axis the dict applies to, may be a tuples of axes.
-                The dict keys must follow the same structure.
-
-        Examples
-        --------
-        This function is useful to quickly reweight according to some
-        weight mapping along a sparse axis, such as the ``species`` axis
-        in the `Hist` example:
+        return (
+            self._view_dim(cupy.zeros(shape=self._dense_shape), flow)
+            if self._sumw == {}
+            else self._view_dim(next(iter(self._sumw.values())), flow)
+        )
 
-        >>> h.scale({'ducks': 0.3, 'geese': 1.2}, axis='species')
-        >>> h.scale({('ducks',): 0.5}, axis=('species',))
-        >>> h.scale({('geese', 'honk'): 5.0}, axis=('species', 'vocalization'))
-        """
-        if self._sumw2 is None:
-            self._init_sumw2()
-        if isinstance(factor, numbers.Number) and axis is None:
-            for key in self._sumw:
-                self._sumw[key] *= factor
-                self._sumw2[key] *= factor**2
-        elif isinstance(factor, dict):
-            if not isinstance(axis, tuple):
-                axis = (axis,)
-                factor = {(k,): v for k, v in factor.items()}
-            axis = tuple(map(self.axis, axis))
-            isparse = list(map(self._isparse, axis))
-            factor = {
-                tuple(a.index(e) for a, e in zip(axis, k)): v for k, v in factor.items()
-            }
-            for key in self._sumw:
-                factor_key = tuple(key[i] for i in isparse)
-                if factor_key in factor:
-                    self._sumw[key] *= factor[factor_key]
-                    self._sumw2[key] *= factor[factor_key] ** 2
-        elif isinstance(factor, np.ndarray):
-            axis = self.axis(axis)
-            raise NotImplementedError("Scale dense dimension by a factor")
-        else:
-            raise TypeError("Could not interpret scale factor")
+    def variance(self, flow=False):
+        return (
+            None
+            if self._sumw2 is None
+            else self._view_dim(next(iter(self._sumw2.values())), flow)
+        )
 
     def identifiers(self, axis, overflow="none"):
         """Return a list of identifiers for an axis
@@ -842,7 +350,7 @@ def to_boost(self):
 
         newaxes = []
         for axis in self.axes():
-            if isinstance(axis, Bin) and axis._uniform:
+            if isinstance(axis, Regular):
                 newaxis = boost_histogram.axis.Regular(
                     axis._bins,
                     axis._lo,
@@ -853,7 +361,7 @@ def to_boost(self):
                 newaxis.name = axis.name
                 newaxis.label = axis.label
                 newaxes.append(newaxis)
-            elif isinstance(axis, Bin) and not axis._uniform:
+            elif isinstance(axis, Variable):
                 newaxis = boost_histogram.axis.Variable(
                     axis.edges(),
                     underflow=True,
@@ -889,19 +397,32 @@ def expandkey(key):
                 else:
                     yield slice(None)
 
+        view = out.view(flow=True)
+        nonan = [slice(None, -1, None)] * (len(newaxes))
         if self._sumw2 is None:
-            values = self.values(overflow="all")
-            for sparse_key, sumw in values.items():
-                index = tuple(expandkey(sparse_key))
-                view = out.view(flow=True)
-                view[index] = sumw.get()
+            view[:] = self.values(flow=True)[(*nonan,)].get()
         else:
-            values = self.values(sumw2=True, overflow="all")
-            for sparse_key, (sumw, sumw2) in values.items():
-                index = tuple(expandkey(sparse_key))
-                view = out.view(flow=True)
-                view[index].value = sumw.get()
-                view[index].variance = sumw2.get()
+            view[:] = cupy.stack(
+                (
+                    self.values(flow=True)[(*nonan,)],
+                    self.variance(flow=True)[(*nonan,)],
+                ),
+                axis=len(newaxes),
+            ).get()
+
+        # if self._sumw2 is None:
+        #     values = self.values(overflow="all")
+        #     for sparse_key, sumw in values.items():
+        #         index = tuple(expandkey(sparse_key))
+        #         view = out.view(flow=True)
+        #         view[index] = sumw.get()
+        # else:
+        #     values = self.values(sumw2=True, overflow="all")
+        #     for sparse_key, (sumw, sumw2) in values.items():
+        #         index = tuple(expandkey(sparse_key))
+        #         view = out.view(flow=True)
+        #         view[index].value = sumw.get()
+        #         view[index].variance = sumw2.get()
 
         return out